From 66d14116dde1251fcaa09261d671bfe7d0ba38f6 Mon Sep 17 00:00:00 2001 From: vsc Date: Mon, 6 Aug 2007 14:55:43 +0000 Subject: [PATCH] improve CLP(BN)/BNT interface git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@1918 b08c6af1-5177-4d33-ba66-4b1c6b8b522a --- C/globals.c | 4 +- C/grow.c | 5 +- C/write.c | 3 +- CLPBN/clpbn.yap | 24 ++- CLPBN/clpbn/aggregates.yap | 3 +- CLPBN/clpbn/bnt.yap | 201 ++++++++++++++++++++++--- CLPBN/clpbn/dists.yap | 2 + CLPBN/clpbn/examples/School/tables.yap | 4 +- CLPBN/learning/bnt_parms.yap | 121 +++++++++++++++ changes-5.1.html | 2 + library/dgraphs.yap | 22 +-- library/matlab.yap | 1 + 12 files changed, 351 insertions(+), 41 deletions(-) create mode 100644 CLPBN/learning/bnt_parms.yap diff --git a/C/globals.c b/C/globals.c index d99fbb6a2..b8e2b17e2 100644 --- a/C/globals.c +++ b/C/globals.c @@ -1151,9 +1151,9 @@ RecoverDelayArena(Term delay_arena) { attvar_record *pt = DelayArenaPt(delay_arena), *max = DelayTop(); - - if (max == pt-DelayArenaSz(delay_arena)) + if (max == pt-DelayArenaSz(delay_arena)) { SetDelayTop(pt); + } } #endif diff --git a/C/grow.c b/C/grow.c index 5bfe2da1f..0ff043957 100644 --- a/C/grow.c +++ b/C/grow.c @@ -698,12 +698,11 @@ static_growglobal(long size, CELL **ptr, CELL *hsplit) do_grow = FALSE; } } + size0 = size; if (size < ((char *)H0-omax)/8) size = ((char *)H0-omax)/8; if (do_grow) { - size0 = size = AdjustPageSize(size); - } else { - size0 = size; + size = AdjustPageSize(size); } /* adjust to a multiple of 256) */ Yap_ErrorMessage = NULL; diff --git a/C/write.c b/C/write.c index cd45e70a2..92f0a831a 100644 --- a/C/write.c +++ b/C/write.c @@ -333,6 +333,7 @@ write_var(CELL *t, struct write_globs *wglb) /* make sure we don't get no creepy spaces where they shouldn't be */ lastw = separator; if (CellPtr(t) < H0) { + Int vcount = (H0-t)/(sizeof(attvar_record)/sizeof(CELL)); #if COROUTINING #if DEBUG if (Yap_Portray_delays) { @@ -365,7 +366,7 @@ write_var(CELL *t, struct write_globs *wglb) } #endif wrputc('D', wglb->writewch); - wrputn((Int) ((attvar_record *)H0-(attvar_record *)t),wglb->writewch); + wrputn(vcount,wglb->writewch); #endif } else { wrputn(((Int) (t- H0)),wglb->writewch); diff --git a/CLPBN/clpbn.yap b/CLPBN/clpbn.yap index ff9206ce6..7bea2badf 100644 --- a/CLPBN/clpbn.yap +++ b/CLPBN/clpbn.yap @@ -81,6 +81,9 @@ clpbn_flag(bnt_solver,Before,After) :- clpbn_flag(bnt_path,Before,After) :- retract(bnt:bnt_path(Before)), assert(bnt:bnt_path(After)). +clpbn_flag(bnt_model,Before,After) :- + retract(bnt:bnt_model(Before)), + assert(bnt:bnt_model(After)). {Var = Key with Dist} :- put_atts(El,[key(Key),dist(DistInfo,Parents)]), @@ -121,12 +124,15 @@ project_attributes(GVars, AVars) :- AVars = [_|_], solver(Solver), ( GVars = [_|_] ; Solver = graphs), !, - sort_vars_by_key(AVars,SortedAVars,DiffVars), + clpbn_vars(AVars, DiffVars, AllVars), get_clpbn_vars(GVars,CLPBNGVars), - incorporate_evidence(SortedAVars, AllVars), write_out(Solver,CLPBNGVars, AllVars, DiffVars). project_attributes(_, _). +clpbn_vars(AVars, DiffVars, AllVars) :- + sort_vars_by_key(AVars,SortedAVars,DiffVars), + incorporate_evidence(SortedAVars, AllVars). + get_clpbn_vars([],[]). get_clpbn_vars([V|GVars],[V|CLPBNGVars]) :- get_atts(V, [key(_)]), !, @@ -236,12 +242,24 @@ bind_clpbns(Key, Dist, Parents, Key1, Dist1, Parents1) :- Key == Key1, !, get_dist(Dist,Type,Domain,Table), get_dist(Dist1,Type1,Domain1,Table1), - ( Dist == Dist1, Parents == Parents1 -> true ; throw(error(domain_error(bayesian_domain),bind_clpbns(var(Key, Type, Domain, Table, Parents),var(Key1, Type1, Domain1, Table1, Parents1))))). + ( Dist == Dist1, same_parents(Parents,Parents1) -> true ; throw(error(domain_error(bayesian_domain),bind_clpbns(var(Key, Type, Domain, Table, Parents),var(Key1, Type1, Domain1, Table1, Parents1))))). bind_clpbns(Key, _, _, _, Key1, _, _, _) :- Key\=Key1, !, fail. bind_clpbns(_, _, _, _, _, _, _, _) :- format(user_error, 'unification of two bayesian vars not supported~n', []). +same_parents([],[]). +same_parents([P|Parents],[P1|Parents1]) :- + same_node(P,P1), + same_parents(Parents,Parents1). + +same_node(P,P1) :- P == P1, !. +same_node(P,P1) :- + get_atts( P,[key(K)]), + get_atts(P1,[key(K)]), + P = P1. + + bind_evidence_from_extra_var(Ev1,Var) :- get_atts(Var, [evidence(Ev0)]),!,Ev0 = Ev1. bind_evidence_from_extra_var(Ev1,Var) :- diff --git a/CLPBN/clpbn/aggregates.yap b/CLPBN/clpbn/aggregates.yap index 45546e919..4cf3eb902 100644 --- a/CLPBN/clpbn/aggregates.yap +++ b/CLPBN/clpbn/aggregates.yap @@ -295,5 +295,6 @@ sm([_|Vs], C, El) :- get_vdist_size(V, Sz) :- clpbn:get_atts(V, [dist(Dist,_)]), - get_dist_domain_size(Dist, Sz), + get_dist_domain_size(Dist, Sz). + diff --git a/CLPBN/clpbn/bnt.yap b/CLPBN/clpbn/bnt.yap index d67a31ce5..86c80efd0 100644 --- a/CLPBN/clpbn/bnt.yap +++ b/CLPBN/clpbn/bnt.yap @@ -1,14 +1,18 @@ - :- module(bnt, [do_bnt/3, + create_bnt_graph/2, check_if_bnt_done/1]). :- use_module(library('clpbn/display'), [ clpbn_bind_vals/3]). :- use_module(library('clpbn/dists'), [ - get_dist_domain_size/2, - get_dist_domain/2, - get_dist_params/2]). + get_dist_domain_size/2, + get_dist_domain/2, + get_dist_params/2 + ]). + +:- use_module(library('clpbn/discrete_utils'), [ + reorder_CPT/5]). :- use_module(library(matlab), [start_matlab/1, close_matlab/0, @@ -16,6 +20,7 @@ matlab_eval_string/1, matlab_eval_string/2, matlab_matrix/4, + matlab_vector/2, matlab_sequence/3, matlab_initialized_cells/4, matlab_get_variable/2, @@ -30,6 +35,12 @@ dgraph_edges/2 ]). +:- use_module(library(lists), [ + member/2]). + +:- use_module(library(ordsets), [ + ord_insert/3]). + :- yap_flag(write_strings,on). % syntactic sugar for matlab_call. @@ -42,7 +53,7 @@ G <-- Y :- :- dynamic bnt/1. -:- dynamic bnt_solver/1, bnt_path/1. +:- dynamic bnt_solver/1, bnt_path/1, bnt_model/1. % belprop bnt_solver(jtree). @@ -50,7 +61,13 @@ bnt_solver(jtree). bnt_path('/u/vitor/Yap/CLPBN/FullBNT-1.0.3/BNT'). - +% +% What BNT are we using: +% a propositional one +% a tied parameter one. +% +%bnt_model(propositional). +bnt_model(tied). /***************************************** @@ -71,16 +88,23 @@ check_if_bnt_done(Var) :- do_bnt([], _, _) :- !. do_bnt(QueryVars, AllVars, AllDiffs) :- - init_matlab, - extract_graph(AllVars, Graph), - dgraph_top_sort(Graph, SortedVertices), - number_graph(SortedVertices, NumberedVertices, 0, Size), - init_bnet(SortedVertices, NumberedVertices, Size), + create_bnt_graph(AllVars, _, SortedVertices, NumberedVertices, Size), set_inference, add_evidence(SortedVertices, Size, NumberedVertices), marginalize(QueryVars, Ps), clpbn_bind_vals(QueryVars, Ps, AllDiffs). +create_bnt_graph(AllVars, Representatives) :- + create_bnt_graph(AllVars, Representatives, _, _, _). + +create_bnt_graph(AllVars, Representatives, SortedVertices, NumberedVertices, Size) :- + init_matlab, + sort_nodes(AllVars, SortedVertices), + number_graph(SortedVertices, NumberedVertices, 0, Size), + bnt_model(ModelType), + init_bnet(ModelType, SortedVertices, NumberedVertices, Size, Representatives). + + % make sure MATLAB works. init_matlab :- bnt(on), !. @@ -97,6 +121,75 @@ start_matlab :- start_matlab :- start_matlab('matlab -nojvm -nosplash'). +sort_nodes(AllVars, SortedVertices) :- + bnt_model(tied), !, + extract_tied(AllVars, SortedVertices). +sort_nodes(AllVars, SortedVertices) :- + bnt_model(propositional), !, + extract_graph(AllVars, Graph), + dgraph_top_sort(Graph, SortedVertices). + +extract_tied(AllVars, SortedVars) :- + extract_kvars(AllVars,KVars), + keysort(KVars,SVars), + split_tied_vars(SVars,TVars, Vertices), + tied_graph(TVars,TGraph,Vertices), + dgraph_top_sort(TGraph, Sort), + distribute_tied_variables(Sort, TVars, 1, SortedVars). + +extract_kvars([],[]). +extract_kvars([V|AllVars],[N-i(V,Parents)|KVars]) :- + clpbn:get_atts(V, [dist(N,Parents)]), + extract_kvars(AllVars,KVars). + +split_tied_vars([],[],[]). +split_tied_vars([N-i(V,Par)|More],[N-g(Vs,Ns,Es)|TVars],[N|LNs]) :- + get_pars(Par,N,V,NPs,[],Es0,Es), + get_tied(More,N,Vs,[V],Ns,NPs,Es,Es0,SVars), + split_tied_vars(SVars,TVars,LNs). + +get_pars([],_,_,NPs,NPs,Es,Es). +get_pars([V|Par],N,V0,NPs,NPs0,Es,Es0) :- + clpbn:get_atts(V, [dist(N,_)]), !, + get_pars(Par,N,V0,NPs,NPs0,Es,[V-V0|Es0]). +get_pars([V|Par],N,V0,NPs,NPs0,Es,Es0) :- + clpbn:get_atts(V, [dist(M,_)]), + ord_insert(NPs0,M,NPsI), + get_pars(Par,N,V0,NPs,NPsI,Es,Es0). + +get_tied([N-i(V,Par)|More],N,Vs,Vs0,Ns,NPs,Es,Es0,SVars) :- !, + get_pars(Par,N,V,NPsI,NPs,EsI,Es0), + get_tied(More,N,Vs,[V|Vs0],Ns,NPsI,Es,EsI,SVars). +get_tied(More,_,Vs,Vs,Ns,Ns,Es,Es,More). + +tied_graph(TVars,Graph,Vertices) :- + dgraph_new(Graph0), + dgraph_add_vertices(Vertices, Graph0, Graph1), + get_tied_edges(TVars,Edges), + dgraph_add_edges(Edges, Graph1, Graph). + +get_tied_edges([],[]). +get_tied_edges([N-g(_,Vs,_)|TGraph],Edges) :- + add_tied(Vs,N,Edges,Edges0), + get_tied_edges(TGraph,Edges0). + +add_tied([],_,Edges,Edges). +add_tied([N1|Vs],N,[N1-N|Edges],Edges0) :- + add_tied(Vs,N,Edges,Edges0). + +distribute_tied_variables([], _, _, []). +distribute_tied_variables([N|Sort], TVars, I0, SortedVars) :- + member(N-g(Vs,_,_),TVars), + distribute_tied(Vs,I0,In,SortedVars,SortedVars0), + distribute_tied_variables(Sort, TVars, In, SortedVars0). + +distribute_tied([],I,I,Vs,Vs). +distribute_tied([V|Vs],I0,In,[V|NVs],NVs0) :- + I is I0+1, + put_atts(V, [bnt_id(I0)]), +% clpbn:get_atts(V,[key(K)]), + distribute_tied(Vs,I,In,NVs,NVs0). + extract_graph(AllVars, Graph) :- dgraph_new(Graph0), dgraph_add_vertices(AllVars, Graph0, Graph1), @@ -117,14 +210,20 @@ number_graph([], [], I, I). number_graph([V|SortedGraph], [I|Is], I0, IF) :- I is I0+1, put_atts(V, [bnt_id(I)]), +% clpbn:get_atts(V,[key(K)]), +% write(I:K),nl, number_graph(SortedGraph, Is, I, IF). -init_bnet(SortedGraph, NumberedGraph, Size) :- +init_bnet(propositional, SortedGraph, NumberedGraph, Size, []) :- build_dag(SortedGraph, Size), - matlab_sequence(1,Size,discrete_nodes), - mksizes(SortedGraph, Size), + init_discrete_nodes(SortedGraph, Size), bnet <-- mk_bnet(dag, node_sizes, \discrete, discrete_nodes), - dump_cpts(SortedGraph, NumberedGraph). + dump_cpts(SortedGraph, NumberedGraph), + matlab_eval_string('bnet.CPD{3}',S),format('~s~n',[S]). +init_bnet(tied, SortedGraph, NumberedGraph, Size, Representatives) :- + build_dag(SortedGraph, Size), + init_discrete_nodes(SortedGraph, Size), + dump_tied_cpts(SortedGraph, NumberedGraph, Representatives). build_dag(SortedVertices, Size) :- get_numbered_edges(SortedVertices, Edges), @@ -145,6 +244,10 @@ add_numbered_edges([P|Ps], N, [PN-N|Edges], Edges0) :- v2number(V,N) :- get_atts(V,[bnt_id(N)]). +init_discrete_nodes(SortedGraph, Size) :- + matlab_sequence(1,Size,discrete_nodes), + mksizes(SortedGraph, Size). + mkdag(N,Els) :- Tot is N*N, functor(Dag,dag,Tot), @@ -177,13 +280,71 @@ get_szs([V|SortedVertices],[LD|Sizes]) :- dump_cpts([], []). dump_cpts([V|SortedGraph], [I|Is]) :- - clpbn:get_atts(V, [dist(Id,_)]), + clpbn:get_atts(V, [dist(Id,Parents)]), get_dist_params(Id,CPT), - mkcpt(bnet,I,CPT), + reorder_cpt(CPT,V,Parents,Tab), + mkcpt(bnet,I,Tab), dump_cpts(SortedGraph, Is). -mkcpt(BayesNet, V, Tab) :- - (BayesNet.'CPD'({V})) <-- tabular_CPD(BayesNet,V,Tab). +% +% This is complicated, the BNT and we have different orders +% +reorder_cpt(CPT,_, [], CPT) :- !. +reorder_cpt(CPT,V,Parents,Tab) :- + % get BNT label + get_sizes_and_ids(Parents,Ids), + % sort to BNT + keysort(Ids,NIds), + % get vars in order + extract_vars(NIds, [], NParents), + % do the actual work + reorder_CPT([V|Parents],CPT,[V|NParents],STab,_), + STab=..[_|Tab]. + +get_sizes_and_ids([],[]). +get_sizes_and_ids([V|Parents],[Id-V|Ids]) :- + get_atts(V, [bnt_id(Id)]), + get_sizes_and_ids(Parents,Ids). + +extract_vars([], L, L). +extract_vars([_-V|NIds], NParents, Vs) :- + extract_vars(NIds, [V|NParents], Vs). + +mkcpt(BayesNet, I, Tab) :- + (BayesNet.'CPD'({I})) <-- tabular_CPD(BayesNet,I,Tab). + +dump_tied_cpts(Graph, Is, Reps) :- + create_class_vector(Graph, Is, Classes, Reps0), + matlab_vector(Classes, eclass), + keysort(Reps0,Reps1), + representatives(Reps1,Reps), + bnet <-- mk_bnet(dag, node_sizes, \discrete, discrete_nodes, \equiv_class, eclass), + dump_tied_cpts(Reps). + +create_class_vector([], [], [],[]). +create_class_vector([V|Graph], [I|Is], [Id|Classes], [Id-v(V,I,Parents)|Sets]) :- + clpbn:get_atts(V, [dist(Id,Parents)]), + create_class_vector(Graph, Is,Classes,Sets). + +representatives([],[]). +representatives([Class-Rep|Reps1],[Class-Rep|Reps]) :- + nonrepresentatives(Reps1, Class, Reps2), + representatives(Reps2,Reps). + +nonrepresentatives([Class-_|Reps1], Class, Reps2) :- !, + nonrepresentatives(Reps1, Class, Reps2). +nonrepresentatives(Reps, _, Reps). + + +dump_tied_cpts([]). +dump_tied_cpts([Class-v(V,Id,Parents)|SortedGraph]) :- + get_dist_params(Class,CPT), + reorder_cpt(CPT,V,Parents,NCPT), + mktiedcpt(bnet,Id,Class,NCPT), + dump_tied_cpts(SortedGraph). + +mktiedcpt(BayesNet, V, Class, Tab) :- + (BayesNet.'CPD'({Class})) <-- tabular_CPD(BayesNet,V,Tab). set_inference :- bnt_solver(Solver), @@ -198,7 +359,7 @@ init_solver(likelihood_weighting) :- init_solver(enumerative) :- engine <-- enumerative_inf_engine(bnet). init_solver(gibbs) :- - engine <-- gibbs_inf_engine(bnet). + engine <-- gibbs_sampling_inf_engine(bnet). init_solver(global_joint) :- engine <-- global_joint_inf_engine(bnet). init_solver(pearl) :- diff --git a/CLPBN/clpbn/dists.yap b/CLPBN/clpbn/dists.yap index 6ca5e7d7f..967dfca5e 100644 --- a/CLPBN/clpbn/dists.yap +++ b/CLPBN/clpbn/dists.yap @@ -5,6 +5,7 @@ :- module(clpbn_dist,[ dist/1, dist/3, + dists/1, get_dist/4, get_dist_domain/2, get_dist_params/2, @@ -66,6 +67,7 @@ new_id(Id) :- Id1 is Id+1, assert(id(Id1)). +dists(X) :- id(X1), X is X1-1. dist(V, Id, Parents) :- var(V), !, diff --git a/CLPBN/clpbn/examples/School/tables.yap b/CLPBN/clpbn/examples/School/tables.yap index 240b0c4fb..c3f9b8878 100644 --- a/CLPBN/clpbn/examples/School/tables.yap +++ b/CLPBN/clpbn/examples/School/tables.yap @@ -46,10 +46,10 @@ build_sats_table(LSats, Key, Table) :- */ build_rating_table(LSats, Key, Table) :- - cpt_average(LSats, Key, [h,m,l], 0.95, Table). + cpt_average(LSats, Key, [h,m,l], 1.00, Table). build_grades_table(LGrades, Key, Table) :- - cpt_average(LGrades, Key, [a,b,c,d], 0.95, Table). + cpt_average(LGrades, Key, [a,b,c,d], 1.00, Table). /* build_grades_table(LGrades, Key, Table) :- diff --git a/CLPBN/learning/bnt_parms.yap b/CLPBN/learning/bnt_parms.yap new file mode 100644 index 000000000..d3e8d9734 --- /dev/null +++ b/CLPBN/learning/bnt_parms.yap @@ -0,0 +1,121 @@ +% +% Learn parameters using the BNT toolkit +% + +:- yap_flag(unknown,error). + +:- style_check(all). + +:- module(bnt_parameters, [learn_parameters/2]). + +:- use_module(library('clpbn'), [ + clpbn_flag/3]). + +:- use_module(library('clpbn/bnt'), [ + create_bnt_graph/2]). + +:- use_module(library('clpbn/display'), [ + clpbn_bind_vals/3]). + +:- use_module(library('clpbn/dists'), [ + get_dist_domain/2 + ]). + +:- use_module(library(matlab), [matlab_initialized_cells/4, + matlab_call/2, + matlab_get_variable/2 + ]). + +:- dynamic bnt_em_max_iter/1. +bnt_em_max_iter(10). + + +% syntactic sugar for matlab_call. +:- op(800,yfx,<--). + +G <-- Y :- + matlab_call(Y,G). + + +learn_parameters(Items, Tables) :- + run_all(Items), + clpbn_flag(solver, OldSolver, bnt), + clpbn_flag(bnt_model, Old, tied), + attributes:all_attvars(AVars), + % sort and incorporte evidence + clpbn_vars(AVars, AllVars), + length(AllVars,NVars), + create_bnt_graph(AllVars, Reps), + mk_sample(AllVars,NVars,EvVars), + bnt_learn_parameters(NVars,EvVars), + get_parameters(Reps, Tables), + clpbn_flag(solver, bnt, OldSolver), + clpbn_flag(bnt_model, tied, Old). + +run_all([]). +run_all([G|Gs]) :- + call(user:G), + run_all(Gs). + +clpbn_vars(Vs,BVars) :- + get_clpbn_vars(Vs,CVs), + keysort(CVs,KVs), + merge_vars(KVs,BVars). + +get_clpbn_vars([],[]). +get_clpbn_vars([V|GVars],[K-V|CLPBNGVars]) :- + clpbn:get_atts(V, [key(K)]), !, + get_clpbn_vars(GVars,CLPBNGVars). +get_clpbn_vars([_|GVars],CLPBNGVars) :- + get_clpbn_vars(GVars,CLPBNGVars). + +merge_vars([],[]). +merge_vars([K-V|KVs],[V|BVars]) :- + get_var_has_same_key(KVs,K,V,KVs0), + merge_vars(KVs0,BVars). + +get_var_has_same_key([K-V|KVs],K,V,KVs0) :- !, + get_var_has_same_key(KVs,K,V,KVs0). +get_var_has_same_key(KVs,_,_,KVs). + + +mk_sample(AllVars,NVars, LL) :- + add2sample(AllVars, LN), + length(LN,LL), + matlab_initialized_cells( NVars, 1, LN, sample). + +add2sample([], []). +add2sample([V|Vs],[val(VId,1,Val)|Vals]) :- + clpbn:get_atts(V, [evidence(Ev),dist(Id,_)]), !, + bnt:get_atts(V,[bnt_id(VId)]), + get_dist_domain(Id, Domain), + evidence_val(Ev,1,Domain,Val), + add2sample(Vs, Vals). +add2sample([_V|Vs],Vals) :- + add2sample(Vs, Vals). + +evidence_val(Ev,Val,[Ev|_],Val) :- !. +evidence_val(Ev,I0,[_|Domain],Val) :- + I1 is I0+1, + evidence_val(Ev,I1,Domain,Val). + +bnt_learn_parameters(_,_) :- + engine <-- jtree_inf_engine(bnet), +% engine <-- var_elim_inf_engine(bnet), +% engine <-- gibbs_sampling_inf_engine(bnet), +% engine <-- belprop_inf_engine(bnet), +% engine <-- pearl_inf_engine(bnet), + bnt_em_max_iter(MaxIters), + [new_bnet, trace] <-- learn_params_em(engine, sample, MaxIters). + + +get_parameters([],[]). +get_parameters([Rep-v(_,_,_)|Reps],[CPT|CPTs]) :- + get_new_table(Rep,CPT), + get_parameters(Reps,CPTs). + +get_new_table(Rep,CPT) :- + s <-- struct(new_bnet.'CPD'({Rep})), + matlab_get_variable( s.'CPT', CPT). + + diff --git a/changes-5.1.html b/changes-5.1.html index 61fdb2ec2..2096cc9f4 100644 --- a/changes-5.1.html +++ b/changes-5.1.html @@ -16,6 +16,8 @@

Yap-5.1.3: