improve stochastic grammar learning (work in progress).

2009-05-26 10:49:04 -05:00
parent de7474a5d9
commit a10bf47a0c
4 changed files with 166 additions and 18 deletions
--- a/packages/CLPBN/clpbn.yap
+++ b/packages/CLPBN/clpbn.yap
@@ -8,7 +8,8 @@
 		  clpbn_init_solver/4,
 		  clpbn_run_solver/3,
 		  clpbn_init_solver/5,
-		  clpbn_run_solver/4]).
+		  clpbn_run_solver/4,
+		  clpbn_init_graph/1]).

 :- use_module(library(atts)).
 :- use_module(library(lists)).
@@ -53,6 +54,12 @@
 	       run_gibbs_solver/3
 	      ]).

+:- use_module('clpbn/pgrammar',
+	      [init_pcg_solver/4,
+	       run_pcg_solver/3,
+	       pcg_init_graph/0
+	      ]).
+
 :- use_module('clpbn/graphs',
 	      [
 	       clpbn2graph/1
@@ -289,8 +296,7 @@ bind_clpbn(T, Var, _, _, _) :- nonvar(T),
 	!, ( add_evidence(Var,T) -> true ; writeln(T:Var), fail ).
 bind_clpbn(T, Var, Key, Dist, Parents) :- var(T),
 	get_atts(T, [key(Key1),dist(Dist1,Parents1)]),
-writeln(eq:Key:Key1),
-(
+	(
 	 bind_clpbns(Key, Dist, Parents, Key1, Dist1, Parents1)
 	->
 	 (
@@ -382,6 +388,8 @@ clpbn_init_solver(vel, LVs, Vs0, VarsWithUnboundKeys, State) :-
 	init_vel_solver(LVs, Vs0, VarsWithUnboundKeys, State).
 clpbn_init_solver(jt, LVs, Vs0, VarsWithUnboundKeys, State) :-
 	init_jt_solver(LVs, Vs0, VarsWithUnboundKeys, State).
+clpbn_init_solver(pcg, LVs, Vs0, VarsWithUnboundKeys, State) :-
+	init_pcg_solver(LVs, Vs0, VarsWithUnboundKeys, State).

 %
 % LVs is the list of lists of variables to marginalise
@@ -390,7 +398,7 @@ clpbn_init_solver(jt, LVs, Vs0, VarsWithUnboundKeys, State) :-
 % 
 %
 clpbn_run_solver(LVs, LPs, State) :-
-       	solver(Solver, State),
+       	solver(Solver),
 	clpbn_run_solver(Solver, LVs, LPs, State).

 clpbn_run_solver(gibbs, LVs, LPs, State) :-
@@ -399,6 +407,11 @@ clpbn_run_solver(vel, LVs, LPs, State) :-
 	run_vel_solver(LVs, LPs, State).
 clpbn_run_solver(jt, LVs, LPs, State) :-
 	run_jt_solver(LVs, LPs, State).
+clpbn_run_solver(pcg, LVs, LPs, State) :-
+	run_pcg_solver(LVs, LPs, State).

 add_keys(Key1+V1,_Key2,Key1+V1).

+clpbn_init_graph(pcg) :- !,
+	pcg_init_graph.
+clpbn_init_graph(_).
--- a/packages/CLPBN/clpbn/evidence.yap
+++ b/packages/CLPBN/clpbn/evidence.yap
@@ -119,10 +119,10 @@ check_stored_evidence(_, _).

 add_evidence(K, V) :-
 	evidence(K, Ev), !,
+	store_evidence(V, Ev),
 	clpbn:put_atts(V, [evidence(Ev)]).
 add_evidence(_, _).

-
 check_for_evidence(_, V, Vf, Vf, C, C) :-
 	clpbn:get_atts(V, [evidence(_)]), !.
 check_for_evidence(K, _, Vf0, Vff, C0, Ci) :-
--- a/packages/CLPBN/clpbn/pgrammar.yap
+++ b/packages/CLPBN/clpbn/pgrammar.yap
@@ -2,8 +2,12 @@

 :- style_check(all).

-:- module(clpbn_pgrammar,[grammar_prob/2,
-			  grammar_mle/2]).
+:- module(clpbn_pgrammar,[grammar_to_atts/1,
+			  grammar_prob/2,
+			  grammar_mle/2,
+			  init_pcg_solver/4,
+			  run_pcg_solver/3,
+			  pcg_init_graph/0]).

 :- load_files([library(clpbn)],
 	      [ if(not_loaded),
@@ -14,11 +18,20 @@
 	      [ sum_list/2
 	      ]).

+:- use_module([library(matrix)],
+	      [ matrix_new/3,
+		matrix_add/3,
+		matrix_get/3,
+		matrix_op/4,
+		matrix_op_to_all/4,
+		matrix_set_all/2
+	      ]).
+
 :- op(600, xfy,'::').

-:- dynamic id/4.
+:- dynamic id/4, dist_id/2, new_proof/2.

-:- meta_predicate grammar_prob(:,-).
+:- meta_predicate grammar_prob(:,-), grammar_mle(:,-), grammar_to_atts(:).

 grammar_prob(M:S, P) :- !,
 	grammar_prob(S, M, P).
@@ -94,10 +107,11 @@ add_to_predicate(M:EH1,M:EH,M:H0,NH,NB,Key,Choice,P,Id,(EH1:-NB)) :-
 	EH=NH.

 p_rule(_,_,_,_) :-
-	nb_setval(grammar_fast,on), !.
+	nb_getval(grammar_fast,on), !.
 p_rule(M,EH,Key,Choice) :-
 	all_tabs(M,EH,Dom,Opt),
-	{ Choice = Key with p(Dom,Opt) }.
+	{ AttVar = Key with p(Dom,Opt) },
+	Choice = AttVar.

 ensure_tabled(M,H0,EH) :-
 	predicate_property(M:H0, tabled), !,
@@ -178,3 +192,112 @@ extract_logprobability([P1|Ps], LogP0, LogP) :-
 	LogPI is LogDP+LogP0,
 	extract_logprobability(Ps, LogPI, LogP).

+grammar_to_atts(M:S) :- !,
+	grammar_to_atts(S, M).
+grammar_to_atts(S) :-
+	source_module(M),
+	grammar_to_atts(S, M).
+
+grammar_to_atts(S, M) :-
+	nb_setval(grammar_fast,on),
+	get_internal(S, InternalS, Proof),
+	path_choices(M:InternalS,Proof).
+
+path_choices(InternalS, Proof) :-
+	new_id(Id),
+	call(InternalS),
+	/* use Ids because we may have repeated examples */
+	assert(new_proof(Id,Proof)).
+
+new_id(Id) :-
+	(nb_getval(grammar_id,Id) ->
+	 I1 is Id+1,
+	 nb_setval(grammar_id,I1)
+	;
+	 nb_setval(grammar_id,1),
+	 Id = 0
+	).
+
+find_dom(K, Vs, Ps) :-
+	findall(V,id(_,K,_,V),Vs),
+	gen_ps(Vs, Ps).
+
+gen_ps([], []).
+gen_ps([_|Vs], [1.0|Ps]) :-
+	gen_ps(Vs, Ps).
+
+init_pcg_solver(_, _, _, _).
+
+run_pcg_solver(LVs, LPs, _) :-
+	init_prob_array(Array, ExArray),
+	add_proofs_to_array(Array, ExArray),
+	matrix:matrix_to_list(Array,L), writeln(L),
+	out_to_vs(LVs, LPs, Array).
+
+add_proofs_to_array(Array, ExArray) :-
+	nb_getval(grammar_id,IdMax),
+	from(0,IdMax,Id),
+	matrix_set_all(ExArray,0.0),
+	sum_proofs(Id, ExArray),
+%	matrix:matrix_to_list(ExArray,L0), writeln(Id:L0),
+	matrix_op(ExArray, Array, +, Array),
+%	matrix:matrix_to_list(Array,L0),writeln(i:L0),
+	fail.
+add_proofs_to_array(_,_).
+
+from(I,_,I).
+from(I0,Id,I) :-
+	I1 is I0+1,
+	I1 < Id,
+	from(I1,Id,I).
+
+sum_proofs(Id, ExArray) :-
+	findall(P, add_proof(Id, ExArray, P), Ps),
+	sum_list(Ps,TotP),
+	matrix_op_to_all(ExArray,/,TotP,ExArray).
+
+add_proof(Id, ExArray, P) :-
+	new_proof(Id,Proof),
+	extract_probability(Proof, P),
+	add_to_array(Proof, P, ExArray).
+
+add_to_array(p(Id, Goals), P, Array) :-
+	add(Id, P, Array),
+	add_to_array_goals(Goals, P, Array).
+
+add_to_array_goals([], _, _).
+add_to_array_goals([G|Goals], P, Array) :-
+	add_to_array(G, P, Array),
+	add_to_array_goals(Goals, P, Array).
+
+init_prob_array(Array, ExArray) :-
+	predicate_property(id(_,_,_,_),number_of_clauses(Cls)),
+	matrix_new(floats, [Cls], Array),
+	matrix_new(floats, [Cls], ExArray).
+
+add(Id, P, Array) :-
+	matrix_add(Array, [Id], P).
+
+out_to_vs([], [], _).
+out_to_vs([[V]|LVs], [Ps|LPs], Array) :-
+	clpbn:get_atts(V, [key(Key)]),
+	findall(P,count_for_key(Key,P,Array),Ps),
+	out_to_vs(LVs, LPs, Array).
+
+count_for_key(Key,P,Array) :-
+	id(Id,Key,_,_),
+	matrix_get(Array, [Id], P).
+
+% generate attributed variables to make EM happy.
+% just as few as possible
+%
+pcg_init_graph :-
+	setof(K,I^P^S^id(I,K,P,S),Ks),
+	generate_atts(Ks).
+
+generate_atts([]).
+generate_atts([Key|KVs]) :-
+	find_dom(Key, Dom, Ps),
+	{ _ = Key with p(Dom,Ps) },
+	generate_atts(KVs).
+
--- a/packages/CLPBN/learning/em.yap
+++ b/packages/CLPBN/learning/em.yap
@@ -8,7 +8,8 @@
 	      [append/3]).

 :- use_module(library(clpbn),
-	      [clpbn_init_solver/5,
+	      [clpbn_init_graph/1,
+	       clpbn_init_solver/5,
 	       clpbn_run_solver/4,
 	       clpbn_flag/2]).

@@ -61,9 +62,7 @@ em(_, _, _, Tables, Likelihood) :-

 handle_em(error(repeated_parents)) :-
 	assert(em_found(_, -inf)),
-	fail.
-	
-	
+	fail.	

 % This gets you an initial configuration. If there is a lot of evidence
 % tables may be filled in close to optimal, otherwise they may be
@@ -75,7 +74,9 @@ handle_em(error(repeated_parents)) :-
 % the list of distributions for which we want to compute parameters,
 % and more detailed info on distributions, namely with a list of all instances for the distribution.
 init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :-
-	run_all(Items),
+	clpbn_flag(em_solver, Solver),
+	clpbn_init_graph(Solver),
+	call_run_all(Items),
 %	randomise_all_dists,
 	uniformise_all_dists,
 	attributes:all_attvars(AllVars0),
@@ -83,14 +84,13 @@ init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :-
 	% remove variables that do not have to do with this query.
 %	check_for_hidden_vars(AllVars1, AllVars1, AllVars),
 	different_dists(AllVars, AllDists, AllDistInstances, MargVars),
-	clpbn_flag(em_solver, Solver),
 	clpbn_init_solver(Solver, MargVars, AllVars, _, SolverVars).

 % loop for as long as you want.
 em_loop(Its, Likelihood0, State, MaxError, MaxIts, LikelihoodF, FTables) :-
 	estimate(State, LPs),
 	maximise(State, Tables, LPs, Likelihood),
-%	writeln(Likelihood:Its:Likelihood0:Tables),
+	writeln(Likelihood:Its:Likelihood0:Tables),
 	(
 	    (
 	     abs((Likelihood - Likelihood0)/Likelihood) < MaxError
@@ -226,4 +226,16 @@ run_sample([C|Cases], [P|Ps], Table) :-
 	matrix_add(Table, C, P),
 	run_sample(Cases, Ps, Table).

+call_run_all(Mod:Items) :-
+	clpbn_flag(em_solver, pcg),
+	backtrack_run_all(Items, Mod).
+call_run_all(Items) :-
+	clpbn_flag(em_solver, pcg),
+	run_all(Items).

+backtrack_run_all([Item|_], Mod) :-
+	call(Mod:Item),
+	fail.
+backtrack_run_all([_|Items], Mod) :-
+	backtrack_run_all(Items, Mod).
+backtrack_run_all([], _).