From 2f6ec5ff59fb6144447914213910a18af703119b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?V=C3=ADtor=20Santos=20Costa?= <vsc@dcc.fc.up.pt>
Date: Wed, 30 Nov 2011 13:04:13 +0000
Subject: [PATCH] improve learning for CLP(BN).

---
 packages/CLPBN/clpbn.yap                 |  50 +++++++++
 packages/CLPBN/clpbn/bp.yap              |  10 +-
 packages/CLPBN/clpbn/table.yap           |  34 +++++-
 packages/CLPBN/clpbn/ve.yap              |   1 +
 packages/CLPBN/learning/aleph_params.yap | 131 ++++++++++++-----------
 packages/CLPBN/learning/em.yap           |   8 +-
 6 files changed, 156 insertions(+), 78 deletions(-)

diff --git a/packages/CLPBN/clpbn.yap b/packages/CLPBN/clpbn.yap
index ef70663b6..8dbcebc92 100644
--- a/packages/CLPBN/clpbn.yap
+++ b/packages/CLPBN/clpbn.yap
@@ -11,6 +11,8 @@
 		  clpbn_init_solver/5,
 		  clpbn_run_solver/4,
 		  clpbn_init_graph/1,
+		  probability/2,
+		  conditional_probability/3,
 		  op( 500, xfy, with)]).
 
 :- use_module(library(atts)).
@@ -113,6 +115,8 @@
 solver(ve).
 em_solver(ve).
 
+:- meta_predicate probability(:,-), conditional_probability(:,:,-).
+
 %output(xbif(user_error)).
 %output(gviz(user_error)).
 output(no).
@@ -468,3 +472,49 @@ clpbn_finalize_solver(State) :-
 	arg(Last, State, Info),
 	finalize_bp_solver(Info).
 clpbn_finalize_solver(_State).
+
+probability(Goal, Prob) :-
+	findall(Prob, do_probability(Goal, [], Prob), [Prob]).
+	
+conditional_probability(Goal, ListOfGoals, Prob) :-
+	\+ ground(ListOfGoals), !,
+	throw(error(ground(ListOfGoals),conditional_probability(Goal, ListOfGoals, Prob))).
+conditional_probability(Goal, ListOfGoals, Prob) :-
+	findall(Prob, do_probability(Goal, ListOfGoals, Prob), [Prob]).
+
+do_probability(Goal, ListOfGoals, Prob) :-
+	evidence_to_var(Goal, C, NGoal, V),
+	call_residue(run( ListOfGoals, NGoal), Vars), !,
+	match_probability(Vars, C, V, Prob).
+
+run(ListOfGoals,Goal) :-
+	do(ListOfGoals),
+	call(Goal).
+
+do(M:ListOfGoals) :-
+	do(ListOfGoals, M).
+do([]).
+
+do([], _M).
+do(G.ListOfGoals, M) :-
+	M:G,
+	do(ListOfGoals, M).
+
+evidence_to_var(M:Goal, C, M:VItem, V) :- !,
+	evidence_to_var(Goal, C, VItem, V).
+evidence_to_var(Goal, C, VItem, V) :-
+	Goal =.. [L|Args],
+	variabilise_last(Args, C, NArgs, V),
+	VItem =.. [L|NArgs].
+	
+variabilise_last([Arg], Arg, [V], V).
+variabilise_last([Arg1,Arg2|Args], Arg, Arg1.NArgs, V) :-
+	variabilise_last(Arg2.Args, Arg, NArgs, V).
+
+match_probability([p(V0=C)=Prob|_], C, V, Prob) :-
+	V0 == V,
+	!.
+match_probability([_|Probs], C, V, Prob) :-
+	match_probability(Probs, C, V, Prob).
+
+
diff --git a/packages/CLPBN/clpbn/bp.yap b/packages/CLPBN/clpbn/bp.yap
index 5ff389ef1..e5b00b4da 100644
--- a/packages/CLPBN/clpbn/bp.yap
+++ b/packages/CLPBN/clpbn/bp.yap
@@ -58,8 +58,7 @@ init_bp_solver(_, AllVars, _, (BayesNet, DistIds)) :-
     sort(DistIds0, DistIds),
     %(num_bayes_nets(0) -> writeln(vars:VarsInfo) ; true),
     %(num_bayes_nets(0) -> writeln(dists:DistsInfo) ; true),
-    create_network(VarsInfo, BayesNet),
-    get_extra_vars_info(AllVars, ExtraVarsInfo).
+    create_network(VarsInfo, BayesNet).
     %set_extra_vars_info(BayesNet, ExtraVarsInfo).
     
 
@@ -99,17 +98,10 @@ get_evidence(_V, -1). % no evidence !!!
 get_extra_vars_info([], []).
 get_extra_vars_info([V|Vs], [v(VarId, Label, Domain)|VarsInfo]) :-
     get_atts(V, [id(VarId)]), !,
-writeln(k:V),
     clpbn:get_atts(V, [key(Key),dist(DistId, _)]),
-writeln(j:Key),
     term_to_atom(Key, Label),
-writeln(i:Label),
     get_dist_domain(DistId, Domain0),
-writeln(a:Domain0),
     numbers2atoms(Domain0, Domain),
-writeln(b:Domain),
-    get_extra_vars_info(Vs, VarsInfo).
-get_extra_vars_info([_|Vs], VarsInfo) :-
     get_extra_vars_info(Vs, VarsInfo).
 
 
diff --git a/packages/CLPBN/clpbn/table.yap b/packages/CLPBN/clpbn/table.yap
index a3a5e16cb..9153c6ec9 100644
--- a/packages/CLPBN/clpbn/table.yap
+++ b/packages/CLPBN/clpbn/table.yap
@@ -12,6 +12,8 @@
 	 clpbn_tableallargs/1,
 	 clpbn_table_nondet/1,
 	 clpbn_tabled_clause/2,
+	 clpbn_tabled_clause_ref/3,
+	 clpbn_tabled_retract/2,
 	 clpbn_tabled_abolish/1,
 	 clpbn_tabled_asserta/1,
 	 clpbn_tabled_assertz/1,
@@ -31,6 +33,8 @@
 
 :- meta_predicate clpbn_table(:),
 	clpbn_tabled_clause(:.?),
+	clpbn_tabled_clause_ref(:.?,?),
+	clpbn_tabled_retract(:),
 	clpbn_tabled_abolish(:),
 	clpbn_tabled_asserta(:), 
 	clpbn_tabled_assertz(:), 
@@ -121,6 +125,9 @@ clpbn_table(F/N,M) :-
 		  % enter evidence after binding.
 		  ( var(A0) -> A0 = V2 ; put_evidence(A0, V2) )
 		;
+		  clpbn:clpbn_flag(solver,none) ->
+		  true
+	        ;
 		  throw(error(tabled_clpbn_predicate_should_never_fail,S))
 		)
 	       )
@@ -179,7 +186,7 @@ clpbn_table_nondet(F/N,M) :-
 	NKey =.. [NF|Args],
 	asserta(clpbn_table(Key, M, NKey)),
 	assert(
-	       (M:Key :- writeln(in:Key),
+	       (M:Key :- % writeln(in:Key),
 	        b_getval(clpbn_tables, Tab),
 		( b_hash_lookup(Key, Out, Tab) ->
 		  fail
@@ -217,6 +224,31 @@ clpbn_tabled_clause(Head, M, Body) :-
 	clpbn_table(Head, M, THead),
 	clause(M:THead, Body).
 
+clpbn_tabled_clause_ref(M:Head, Body, Ref) :- !,
+	clpbn_tabled_clause_ref(Head, M, Body, Ref).
+clpbn_tabled_clause_ref(Head, Body, Ref) :-
+	prolog_load_context(module, M),
+	clpbn_tabled_clause_ref(Head, M, Body, Ref).
+
+clpbn_tabled_clause_ref(M:Head, _, Body, Ref) :- !,
+	clpbn_tabled_clause_ref(Head, M, Body, Ref).
+clpbn_tabled_clause_ref(Head, M, Body, Ref) :-
+	clpbn_table(Head, M, THead),
+	clause(M:THead, Body, Ref).
+
+
+clpbn_tabled_retract(M:Head) :- !,
+	clpbn_tabled_retract(Head, M).
+clpbn_tabled_retract(Head) :-
+	prolog_load_context(module, M),
+	clpbn_tabled_retract(Head, M).
+
+clpbn_tabled_retract(M:Head, _) :- !,
+	clpbn_tabled_retract(Head, M).
+clpbn_tabled_retract(Head, M) :-
+	clpbn_table(Head, M, THead),
+	retract(M:THead).
+
 
 clpbn_tabled_assertz(M:Clause) :- !,
 	clpbn_tabled_assertz2(Clause, M).
diff --git a/packages/CLPBN/clpbn/ve.yap b/packages/CLPBN/clpbn/ve.yap
index 54ce81bff..91766d8ba 100644
--- a/packages/CLPBN/clpbn/ve.yap
+++ b/packages/CLPBN/clpbn/ve.yap
@@ -103,6 +103,7 @@ solve_ve([LVs|_], [NVs0|_], Ps) :-
 	sort(LV0, LV),
 	% construct the graph
 	find_all_table_deps(Tables0, LV),
+%writeln((Li: LVs: LV)),
 	process(LVi, LVs, tab(Dist,_,_)),
 %writeln(m:Dist),matrix:matrix_to_list(Dist,LD),writeln(LD),
 %exps(LD,LDE),writeln(LDE),
diff --git a/packages/CLPBN/learning/aleph_params.yap b/packages/CLPBN/learning/aleph_params.yap
index dc2a6a165..8162c8bf7 100644
--- a/packages/CLPBN/learning/aleph_params.yap
+++ b/packages/CLPBN/learning/aleph_params.yap
@@ -1,8 +1,13 @@
 %
 % Interface the Aleph ILP system to CLP(BN)
 %
-% Relies on the Aleph cost function.
-% It assumes Aleph work as usual,  but some variables are of type random.
+% Aleph was written by Ashwin Srinivasan
+%
+% this code relies on hacked version of Aleph, contact
+% vsc@dcc.fc.up.pt
+%
+% Aleph generates clauses as usual,
+% but some variables are of special type random.
 %
 :- module(clpbn_aleph,
 	  [init_clpbn_cost/0,
@@ -33,6 +38,7 @@
 	 clpbn_tabled_asserta/2,
 	 clpbn_tabled_assertz/1,
 	 clpbn_tabled_clause/2,
+	 clpbn_tabled_clause_ref/3,
 	 clpbn_tabled_number_of_clauses/2,
 	 clpbn_is_tabled/1,
 	 clpbn_reset_tables/0,
@@ -42,7 +48,7 @@
 % Tell Aleph not to use default solver during saturation
 %
 % all work will be done by EM 
-:- set_clpbn_flag(solver,none).
+%:- set_clpbn_flag(solver,none).
 
 %
 % This is the Aleph interface
@@ -58,85 +64,50 @@
 :- user:set(skolem_examples, false).
 
 % avoid doing CLP(BN) stuff except at start
-:- user:set(sat_start_hook, clpbn_aleph:disable_solver).
-:- user:set(sat_stop_hook, clpbn_aleph:enable_solver).
+:- user:set(sat_begin_hook, clpbn_aleph:disable_solver).
+:- user:set(sat_end_hook, clpbn_aleph:enable_solver).
 
-:- user:set(reduce_start_hook, clpbn_aleph:disable_solver).
-:- user:set(reduce_stop_hook, clpbn_aleph:enable_solver).
+:- user:set(reduce_begin_hook, clpbn_aleph:disable_solver).
+:- user:set(reduce_end_hook, clpbn_aleph:enable_solver).
 
-:- user:set(record_testclause_hook, clpbn_aleph:do_nothing).
+:- user:set(best_clause_hook, clpbn_aleph:add_new_clause).
 
-:- user:set(newbest_hook, clpbn_aleph:store_theory).
-
-disable_solver(_) :-
-	clpbn_flag(solver, Old, none),
-	nb_setval(old_clpbn_solver, Old).
 disable_solver(_,_) :-
+	disable_solver.
+disable_solver(_) :-
+	disable_solver.
+
+disable_solver :-
 	clpbn_flag(solver, Old, none),
 	nb_setval(old_clpbn_solver, Old).
 
+enable_solver(_) :-
+	enable_solver.
+enable_solver(_,_) :-
+	enable_solver.
+
 enable_solver :-
 	nb_getval(old_clpbn_solver, Old),
 	set_clpbn_flag(solver, Old).
-enable_solver(_,_) :-
-	nb_getval(old_clpbn_solver, Old),
-	set_clpbn_flag(solver, Old).
 
-do_nothing(_).
-
-% backup current best theory in DB.
-store_theory(_,_,_) :-
-	eraseall(best_theory),
+% step 1: update distributions to better values.
+add_new_clause(_,(H :- _),_,_) :-
+	(
+	  clpbn_is_tabled(user:H)
+	 ->
+	  update_tabled_theory(H)
+        ;
+	  update_theory(H)
+        ),
 	fail.
-store_theory(_,(H:-_),_) :-
-	clpbn_is_tabled(user:H), !,
-	store_tabled_theory(H).
-store_theory(_,(H:-_),_) :- !,
-	store_theory(H).
-store_theory(_,H,_) :-
-	store_theory(H).
-
-store_tabled_theory(H) :-
-	clpbn_tabled_clause(user:H,B),
-	add_correct_cpt(B,NB),
-	store_cl((H:-NB)),
-	fail.
-store_tabled_theory(_).
-	
-store_theory(H) :-
-	clause(user:H,B),
-	add_correct_cpt(B,NB),
-	store_cl((H:-NB)),
-	fail.
-store_theory(_).
-
-add_correct_cpt((G,B),(G,NB)) :- !,
-	add_correct_cpt(B,NB).
-add_correct_cpt((clpbn:{V = K with Tab }), ({V = K with NTab})) :-
-	correct_tab(Tab,K,NTab).
-add_correct_cpt(({V = K with Tab }), ({V = K with NTab})) :-
-	correct_tab(Tab,K,NTab).
-
-correct_tab(p(Vs,_),K,p(Vs,TDist)) :-
-	get_dist_key(Id, K),
-	get_dist_params(Id, TDist).
-correct_tab(p(Vs,_,Ps),K,p(Vs,TDist,Ps)) :-
-	get_dist_key(Id, K),
-	get_dist_params(Id, TDist).
-
-store_cl(Cl) :-
-	recordz(best_theory, Cl, _).
-
-:- user:set(best_clause_hook, clpbn_aleph:add_new_clause).
-
+% step 2: add clause
 add_new_clause(_,(_ :- true),_,_) :- !.
 add_new_clause(_,(H :- B),_,_) :-
 %trace,
 %	user:db_usage,
 %	user:db_dynamic,
 	domain(H, K, V, D),
-	user:cost((H :- B), _, Cost),
-	format(user_error,'Current Cost: ~w.~n', [Cost]),
+	user:cost((H :- B), _, _Cost),
 	rewrite_body(B, IB, Vs, _, ( !, { V = K with p(D, CPTList, Vs) })),
 	% need to remember which CPT we want
 	get_dist_key(Id, K),
@@ -157,8 +128,38 @@ add_new_clause(_,(H :- B),_,_) :-
 	).
 
 
+update_tabled_theory(H) :-
+	clpbn_tabled_clause_ref(user:H,B,Ref),
+	add_correct_cpt(B,NB),
+	erase(Ref),
+	clpbn_tabled_assertz((user:(H:-NB))),
+	fail.
+update_tabled_theory(_).
+	
+update_theory(H) :-
+	clause(user:H,B,Ref),
+	add_correct_cpt(B,NB),
+	erase(Ref),
+	assert((user:H:-NB)),
+	fail.
+update_theory(_).
+
+add_correct_cpt((G,B),(G,NB)) :-
+	add_correct_cpt(B,NB).
+add_correct_cpt((clpbn:{V = K with Tab }), ({V = K with NTab})) :-
+	correct_tab(Tab,K,NTab).
+add_correct_cpt(({V = K with Tab }), ({V = K with NTab})) :-
+	correct_tab(Tab,K,NTab).
+
+correct_tab(p(Vs,_),K,p(Vs,TDist)) :-
+	get_dist_key(Id, K),
+	get_dist_params(Id, TDist).
+correct_tab(p(Vs,_,Ps),K,p(Vs,TDist,Ps)) :-
+	get_dist_key(Id, K),
+	get_dist_params(Id, TDist).
+
 % user-defined cost function, Aleph knows about this (and only about this). 
-user:cost((H :- B),Inf,Score) :- !,
+user:cost((H :- B),Inf,Score) :-
 	domain(H, K, V, D),
 	check_info(Inf),
 	rewrite_body(B, IB, Vs, Ds, ( !, { V = K with p(D, CPTList, Vs) })),
diff --git a/packages/CLPBN/learning/em.yap b/packages/CLPBN/learning/em.yap
index 960e10ad5..3de3cfeb4 100644
--- a/packages/CLPBN/learning/em.yap
+++ b/packages/CLPBN/learning/em.yap
@@ -5,13 +5,15 @@
 :- module(clpbn_em, [em/5]).
 
 :- use_module(library(lists),
-	      [append/3]).
+	      [append/3,
+	       delete/3]).
 
 :- use_module(library(clpbn),
 	      [clpbn_init_graph/1,
 	       clpbn_init_solver/5,
 	       clpbn_run_solver/4,
 	       clpbn_finalize_solver/1,
+	       conditional_probability/3,
 	       clpbn_flag/2]).
 
 :- use_module(library('clpbn/dists'),
@@ -203,8 +205,7 @@ compute_parameters([], [], _, Lik, Lik, _).
 compute_parameters([Id-Samples|Dists], [Id-NewTable|Tables],  MDistTable, Lik0, Lik, LPs:MargVars) :-
 	empty_dist(Id, Table0),
 	add_samples(Samples, Table0, MDistTable),
-%matrix_to_list(Table0,Mat), 
-%format(user_error, 'FINAL ~d ~w~n', [Id,Mat]),
+%matrix_to_list(Table0,Mat), lists:sumlist(Mat, Sum), format(user_error, 'FINAL ~d ~w ~w~n', [Id,Sum,Mat]),
 	soften_sample(Table0, SoftenedTable),
 %	matrix:matrix_sum(Table0,TotM),
 	normalise_counts(SoftenedTable, NewTable),
@@ -240,3 +241,4 @@ backtrack_run_all([Item|_], Mod) :-
 backtrack_run_all([_|Items], Mod) :-
 	backtrack_run_all(Items, Mod).
 backtrack_run_all([], _).
+