diff --git a/packages/CLPBN/Makefile.in b/packages/CLPBN/Makefile.in index 1835b0468..c82037a58 100644 --- a/packages/CLPBN/Makefile.in +++ b/packages/CLPBN/Makefile.in @@ -93,6 +93,11 @@ CLPBN_HMMER_EXAMPLES= \ $(CLPBN_EXDIR)/HMMer/scan.yap \ $(CLPBN_EXDIR)/HMMer/score.yap +CLPBN_LEARNING_EXAMPLES= \ + $(CLPBN_EXDIR)/learning/school_params.yap \ + $(CLPBN_EXDIR)/learning/sprinkler_params.yap \ + $(CLPBN_EXDIR)/learning/train.yap + CLPBN_EXAMPLES= \ $(CLPBN_EXDIR)/burglary-alarm.fg \ $(CLPBN_EXDIR)/burglary-alarm.yap \ @@ -102,6 +107,7 @@ CLPBN_EXAMPLES= \ $(CLPBN_EXDIR)/comp_workshops.yap \ $(CLPBN_EXDIR)/social_domain1.yap \ $(CLPBN_EXDIR)/social_domain2.yap \ + $(CLPBN_EXDIR)/sprinkler.pfl \ $(CLPBN_EXDIR)/sprinkler.yap \ $(CLPBN_EXDIR)/workshop_attrs.yap @@ -112,10 +118,12 @@ install: $(CLBN_TOP) $(CLBN_PROGRAMS) $(CLPBN_PROGRAMS) mkdir -p $(DESTDIR)$(EXDIR) mkdir -p $(DESTDIR)$(EXDIR)/School mkdir -p $(DESTDIR)$(EXDIR)/HMMer + mkdir -p $(DESTDIR)$(EXDIR)/learning for h in $(CLPBN_TOP); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done for h in $(CLPBN_PROGRAMS); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR)/clpbn; done for h in $(CLPBN_LEARNING_PROGRAMS); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR)/clpbn/learning; done for h in $(CLPBN_EXAMPLES); do $(INSTALL_DATA) $$h $(DESTDIR)$(EXDIR); done for h in $(CLPBN_SCHOOL_EXAMPLES); do $(INSTALL_DATA) $$h $(DESTDIR)$(EXDIR)/School; done for h in $(CLPBN_HMMER_EXAMPLES); do $(INSTALL_DATA) $$h $(DESTDIR)$(EXDIR)/HMMer; done + for h in $(CLPBN_LEARNING_EXAMPLES); do $(INSTALL_DATA) $$h $(DESTDIR)$(EXDIR)/learning; done diff --git a/packages/CLPBN/clpbn.yap b/packages/CLPBN/clpbn.yap index f0b6b5870..59ee14c78 100644 --- a/packages/CLPBN/clpbn.yap +++ b/packages/CLPBN/clpbn.yap @@ -512,6 +512,9 @@ clpbn_run_solver(pcg, LVs, LPs, State) :- add_keys(Key1+V1,_Key2,Key1+V1). +% +% only useful for probabilistic context free grammars +% clpbn_init_graph(pcg) :- !, pcg_init_graph. clpbn_init_graph(_). diff --git a/packages/CLPBN/clpbn/dists.yap b/packages/CLPBN/clpbn/dists.yap index 830a44c18..8f71e6fae 100644 --- a/packages/CLPBN/clpbn/dists.yap +++ b/packages/CLPBN/clpbn/dists.yap @@ -293,7 +293,7 @@ empty_dist(Dist, TAB) :- dist_new_table(DistId, NewMat) :- use_parfactors(on), !, matrix_to_list(NewMat, List), - pfl:set_pfl_parameters(DistId, List). + pfl:new_pfl_parameters(DistId, List). dist_new_table(Id, NewMat) :- matrix_to_list(NewMat, List), recorded(clpbn_dist_db, db(Id, Key, _, A, B, C, D), R), diff --git a/packages/CLPBN/learning/example/school_params.yap b/packages/CLPBN/examples/learning/school_params.yap similarity index 100% rename from packages/CLPBN/learning/example/school_params.yap rename to packages/CLPBN/examples/learning/school_params.yap diff --git a/packages/CLPBN/examples/learning/sprinkler_params.yap b/packages/CLPBN/examples/learning/sprinkler_params.yap new file mode 100644 index 000000000..f29ffad86 --- /dev/null +++ b/packages/CLPBN/examples/learning/sprinkler_params.yap @@ -0,0 +1,40 @@ +% learn distribution for school database. + +:- ['../sprinkler.pfl']. + +:- use_module(library(clpbn/learning/em)). + +%% data(t,t,t,t). +data(t,f,_,t). +%% data(_,t,_,t). +%% data(t,t,f,f). +%% data(t,t,f,t). +%% data(t,_,_,t). +%% data(t,f,t,t). +%% data(t,t,f,t). +%% data(t,_,f,f). +%% data(t,t,f,f). +%% data(f,f,t,t). +%% data(t,t,_,f). +%% data(t,f,f,t). +%% data(t,f,t,t). + +%:- clpbn:set_clpbn_flag(em_solver,gibbs). +%:- clpbn:set_clpbn_flag(em_solver,jt). +%:- clpbn:set_clpbn_flag(em_solver,ve). +:- clpbn:set_clpbn_flag(em_solver,bp). + +timed_main :- + statistics(runtime, _), + main(Lik), + statistics(runtime, [T,_]), + format('Took ~d msec and Lik ~3f~n',[T,Lik]). + +main(Lik) :- + findall(X,scan_data(X),L), + em(L,0.01,10,_,Lik). + +scan_data(example([wet_grass(W),sprinkler(S),rain(R),cloudy(C)])) :- + data(W, S, R, C). + + diff --git a/packages/CLPBN/learning/example/train.yap b/packages/CLPBN/examples/learning/train.yap similarity index 100% rename from packages/CLPBN/learning/example/train.yap rename to packages/CLPBN/examples/learning/train.yap diff --git a/packages/CLPBN/examples/sprinkler.pfl b/packages/CLPBN/examples/sprinkler.pfl new file mode 100644 index 000000000..bc86dd776 --- /dev/null +++ b/packages/CLPBN/examples/sprinkler.pfl @@ -0,0 +1,33 @@ + +:- style_check(all). + +:- ensure_loaded(library(pfl)). + +% 1. define domain of random variables +% not necessary if they are boolean. + +% 2. define parfactors + +bayes cloudy ; cloudy_table ; []. + +bayes sprinkler, cloudy ; sprinkler_table ; []. + +bayes rain, cloudy ; rain_table ; []. + +bayes wet_grass, sprinkler, rain ; wet_grass_table ; []. + + +% 3. define CPTs. + +wet_grass_table([1.0,0.1,0.1,0.01, + 0.0,0.9,0.9,0.99]). + +sprinkler_table([0.5,0.9, + 0.5,0.1]). + +rain_table([0.8,0.2, + 0.2,0.8]). + +cloudy_table([0.5,0.5]). + + diff --git a/packages/CLPBN/learning/em.yap b/packages/CLPBN/learning/em.yap index 3de3cfeb4..9efd15aa5 100644 --- a/packages/CLPBN/learning/em.yap +++ b/packages/CLPBN/learning/em.yap @@ -76,15 +76,21 @@ handle_em(error(repeated_parents)) :- % and more detailed info on distributions, namely with a list of all instances for the distribution. init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :- clpbn_flag(em_solver, Solver), + % only used for PCGs clpbn_init_graph(Solver), + % create the ground network call_run_all(Items), % randomise_all_dists, + % set initial values for distributions uniformise_all_dists, + % get all variablews to marginalise attributes:all_attvars(AllVars0), + % and order them sort_vars_by_key(AllVars0,AllVars,[]), % remove variables that do not have to do with this query. % check_for_hidden_vars(AllVars1, AllVars1, AllVars), different_dists(AllVars, AllDists, AllDistInstances, MargVars), + % setup solver by doing parameter independent work. clpbn_init_solver(Solver, MargVars, AllVars, _, SolverVars). % loop for as long as you want. @@ -116,15 +122,32 @@ ltables([Id-T|Tables], [Key-LTable|FTables]) :- % collect the different dists we are going to learn next. different_dists(AllVars, AllDists, AllInfo, MargVars) :- - all_dists(AllVars, Dists0), + all_dists(AllVars, AllVars, Dists0), sort(Dists0, Dists1), group(Dists1, AllDists, AllInfo, MargVars0, []), sort(MargVars0, MargVars). -all_dists([], []). -all_dists([V|AllVars], [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :- +% +% V -> to Id defining V. We get: +% the random variables that are parents +% the cases that can happen, eg if we have A <- B, C +% A and B are boolean w/o evidence, and C is f, the cases could be +% [0,0,1], [0,1,1], [1,0,0], [1,1,0], +% Hiddens will be C +% +all_dists([], _, []). +all_dists([V|AllVars], AllVars0, [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :- + clpbn:use_parfactors(on), !, + clpbn:get_atts(V, [key(K)]), + pfl:factor(bayes,Id,[K|PKeys],_,_,_), + find_variables(PKeys, AllVars0, Parents), + generate_hidden_cases([V|Parents], CompactCases, Hiddens), + uncompact_cases(CompactCases, Cases), + all_dists(AllVars, AllVars0, Dists). +all_dists([V|AllVars], AllVars0, [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :- + % V is an instance of Id clpbn:get_atts(V, [dist(Id,Parents)]), - sort([V|Parents], Sorted), + sort([V|Parents], Sorted), length(Sorted, LengSorted), length(Parents, LengParents), ( @@ -133,15 +156,31 @@ all_dists([V|AllVars], [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :- true ; throw(error(repeated_parents)) - ), + ), generate_hidden_cases([V|Parents], CompactCases, Hiddens), uncompact_cases(CompactCases, Cases), - all_dists(AllVars, Dists). + all_dists(AllVars, AllVars0, Dists). + +find_variables([], _AllVars0, []). +find_variables([K|PKeys], AllVars0, [Parent|Parents]) :- + find_variable(K, AllVars0, Parent), + find_variables(PKeys, AllVars0, Parents). + +find_variable(K, [Parent|_AllVars0], Parent) :- + clpbn:get_atts(Parent, [key(K0)]), K0 =@= K, !. +find_variable(K, [_|AllVars0], Parent) :- + find_variable(K, AllVars0, Parent). generate_hidden_cases([], [], []). generate_hidden_cases([V|Parents], [P|Cases], Hiddens) :- clpbn:get_atts(V, [evidence(P)]), !, generate_hidden_cases(Parents, Cases, Hiddens). +generate_hidden_cases([V|Parents], [Cases|MoreCases], [V|Hiddens]) :- + clpbn:use_parfactors(on), !, + clpbn:get_atts(V, [key(K)]), + pfl:skolem(K,D), length(D,Sz), + gen_cases(0, Sz, Cases), + generate_hidden_cases(Parents, MoreCases, Hiddens). generate_hidden_cases([V|Parents], [Cases|MoreCases], [V|Hiddens]) :- clpbn:get_atts(V, [dist(Id,_)]), get_dist_domain_size(Id, Sz), diff --git a/packages/CLPBN/learning/learn_utils.yap b/packages/CLPBN/learning/learn_utils.yap index 713f19da4..fee6cf5df 100644 --- a/packages/CLPBN/learning/learn_utils.yap +++ b/packages/CLPBN/learning/learn_utils.yap @@ -36,8 +36,10 @@ run_all(M:Gs) :- run_all(Gs,M). run_all([],_). +run_all([example(Gs0)|Gs],M) :- + run_all(Gs0,M), + run_all(Gs,M). run_all([G|Gs],M) :- -% (G = _:ge(ybr136w,t8,23,-1) -> nb_getval(clpbn_tables, Tab), writeln(Tab) ; true ), ( call(M:G) -> true ; throw(bad_call(M:G)) ), run_all(Gs,M). diff --git a/packages/CLPBN/pfl.yap b/packages/CLPBN/pfl.yap index 49e4da970..a57bad804 100644 --- a/packages/CLPBN/pfl.yap +++ b/packages/CLPBN/pfl.yap @@ -21,7 +21,7 @@ append/3, member/2]). -:- dynamic factor/5, skolem_in/2, skolem/2, preprocess/3, evidence/2, id/1. +:- dynamic factor/6, skolem_in/2, skolem/2, preprocess/3, evidence/2, id/1. :- reexport(library(clpbn), [clpbn_flag/2 as pfl_flag, @@ -55,7 +55,7 @@ user:term_expansion( markov((Formula ; Phi ; Constraints)), pfl:factor(markov,Id process_args(Formula, Id, 0, _, FList, []). user:term_expansion( Id@N, L ) :- atom(Id), number(N), !, - N1 is N + 1, + N1 is N + 1, findall(G,generate_entity(1, N1, Id, G), L). user:term_expansion( Goal, [] ) :- preprocess(Goal, Sk,Var), !, @@ -112,6 +112,10 @@ process_arg(Sk::D, Id, _I) --> process_arg(Sk, Id, _I) --> !, { + % if :: been used before for this skolem + % just keep on using it, + % otherwise, assume it is t,f + ( \+ \+ skolem(Sk,_D) -> true ; new_skolem(Sk,[t,f]) ), assert(skolem_in(Sk, Id)) }, [Sk]. @@ -121,7 +125,7 @@ new_skolem(Sk,D) :- skolem(Sk1, D1), Sk1 =@= Sk, !, - D1 = D. + ( D1 = D -> true ; throw(pfl(permission_error(redefining_domain(Sk),D:D1)))). new_skolem(Sk,D) :- interface_predicate(Sk), assert(skolem(Sk, D)). @@ -155,7 +159,7 @@ get_pfl_parameters(Id,Out) :- new_pfl_parameters(Id, NewPhi) :- - retract(factor(Type.Id,FList,FV,_Phi,Constraints)), + retract(factor(Type,Id,FList,FV,_Phi,Constraints)), assert(factor(Type,Id,FList,FV,NewPhi,Constraints)), fail. new_pfl_parameters(_Id, _NewPhi). @@ -179,5 +183,3 @@ get_factor_pvariable(Id,Var) :- factor(_Type, Id,FList,_FV,_Phi,_Constraints), member(Var, FList). - -