From f6c5d16f6340ea60452cc80ff8f4202b52ef5a82 Mon Sep 17 00:00:00 2001
From: Vitor Santos Costa <vsc@dcc.fc.up.pt>
Date: Mon, 3 Nov 2008 16:02:15 +0000
Subject: [PATCH] use avg CPT type. how to deal with it is a solver problem,
 not an app issue.

---
 CLPBN/clpbn/aggregates.yap               | 100 +++++++++++++----------
 CLPBN/clpbn/dists.yap                    |   8 ++
 CLPBN/clpbn/examples/School/schema.yap   |   6 +-
 CLPBN/clpbn/vel.yap                      |  29 +++++--
 CLPBN/learning/example/school_params.yap |   2 +-
 5 files changed, 91 insertions(+), 54 deletions(-)

diff --git a/CLPBN/clpbn/aggregates.yap b/CLPBN/clpbn/aggregates.yap
index 997c86ebe..05299400e 100644
--- a/CLPBN/clpbn/aggregates.yap
+++ b/CLPBN/clpbn/aggregates.yap
@@ -1,9 +1,11 @@
-
+%
+% generate explicit CPTs
+%
 :- module(clpbn_aggregates, [
-	cpt_average/4,
-	cpt_average/5,
-	cpt_max/4,
-	cpt_min/4
+	cpt_average/6,
+	cpt_average/7,
+	cpt_max/6,
+	cpt_min/6
 	]).
 
 :- use_module(library(clpbn), [{}/1]).
@@ -11,6 +13,7 @@
 :- use_module(library(lists),
 	[last/2,
 	sumlist/2,
+	sum_list/3,
 	max_list/2,
 	min_list/2
     ]).
@@ -22,66 +25,77 @@
 
 :- use_module(dists, [get_dist_domain_size/2]).
 
-cpt_average(Vars, Key, Els0, CPT) :-
-	build_avg_table(Vars, Els0, Key, 1.0, CPT).
+cpt_average(AllVars, Key, Els0, Tab, Vs, NewVs) :-
+	cpt_average(AllVars, Key, Els0, 1.0, Tab, Vs, NewVs).
 
-cpt_average(Vars, Key, Els0, Softness, CPT) :-
-	build_avg_table(Vars, Els0, Key, Softness, CPT).
+% support variables with evidence from domain. This should make everyone's life easier.
+cpt_average([_|Vars], Key, Els0, Softness, p(Els0, CPT, NewEls), Vs, NewVs) :-
+	find_evidence(Vars, 0, TotEvidence, RVars),
+	build_avg_table(RVars, Vars, Els0, Key, TotEvidence, Softness, MAT, NewEls, Vs, NewVs),
+	matrix_to_list(MAT, CPT).
 
-cpt_max(Vars, Key, Els0, CPT) :-
-	build_max_table(Vars, Els0, Els0, Key, 1.0, CPT).
+find_evidence([], TotEvidence, TotEvidence, []).
+find_evidence([V|Vars], TotEvidence0, TotEvidence, RVars) :-
+	clpbn:get_atts(V,[evidence(Ev)]), !,
+	TotEvidenceI is TotEvidence0+Ev,
+	find_evidence(Vars, TotEvidenceI, TotEvidence, RVars).
+find_evidence([V|Vars], TotEvidence0, TotEvidence, [V|RVars]) :-
+	find_evidence(Vars, TotEvidence0, TotEvidence, RVars).
 
-cpt_min(Vars, Key, Els0, CPT) :-
-	build_min_table(Vars, Els0, Els0, Key, 1.0, CPT).
+cpt_max([_|Vars], Key, Els0, CPT, Vs, NewVs) :-
+	build_max_table(Vars, Els0, Els0, Key, 1.0, CPT, Vs, NewVs).
 
-build_avg_table(Vars, Domain, _, Softness, p(Domain, CPT, Vars)) :-
+cpt_min([_|Vars], Key, Els0, CPT, Vs, NewVs) :-
+	build_min_table(Vars, Els0, Els0, Key, 1.0, CPT, Vs, NewVs).
+
+build_avg_table(Vars, OVars, Domain, _, TotEvidence, Softness, CPT, Vars, Vs, Vs) :-
 	length(Domain, SDomain),
 	int_power(Vars, SDomain, 1, TabSize),
-	TabSize =< 16, 
+	TabSize =< 256, 
 	/* case gmp is not there !! */
 	TabSize > 0, !,
-	average_cpt(Vars, Domain, Softness, CPT).
-build_avg_table(Vars, Domain, Key, Softness, p(Domain, CPT, [V1,V2])) :-
+	average_cpt(Vars, OVars, Domain, TotEvidence,  Softness, CPT).
+build_avg_table(Vars, OVars, Domain, Key, TotEvidence, Softness, CPT, [V1,V2], Vs, [V1,V2|NewVs]) :-
 	length(Vars,L),
 	LL1 is L//2,
 	LL2 is L-LL1,
 	list_split(LL1, Vars, L1, L2),
 	Min = 0,
 	length(Domain,Max1), Max is Max1-1,
-	build_intermediate_table(LL1, sum(Min,Max), L1, V1, Key,  1.0, 0, I1),
-	build_intermediate_table(LL2, sum(Min,Max), L2, V2, Key, 1.0, I1, _),
-	average_cpt([V1,V2], Domain, Softness, CPT).
+	build_intermediate_table(LL1, sum(Min,Max), L1, V1, Key,  1.0, 0, I1, Vs, Vs1),
+	build_intermediate_table(LL2, sum(Min,Max), L2, V2, Key, 1.0, I1, _, Vs1, NewVs),
+	average_cpt([V1,V2], OVars, Domain, TotEvidence, Softness, CPT).
 	
-build_max_table(Vars, Domain, Softness, p(Domain, CPT, Vars)) :-
+build_max_table(Vars, Domain, Softness, p(Domain, CPT, Vars), Vs, Vs) :-
 	length(Domain, SDomain),
 	int_power(Vars, SDomain, 1, TabSize),
 	TabSize =< 16, 
 	/* case gmp is not there !! */
 	TabSize > 0, !,
 	max_cpt(Vars, Domain, Softness, CPT).
-build_max_table(Vars, Domain, Softness, p(Domain, CPT, [V1,V2])) :-
+build_max_table(Vars, Domain, Softness, p(Domain, CPT, [V1,V2]), Vs, [V1,V2|NewVs]) :-
 	length(Vars,L),
 	LL1 is L//2,
 	LL2 is L-LL1,
 	list_split(LL1, Vars, L1, L2),
-	build_intermediate_table(LL1, max(Domain,CPT), L1, V1, Key, 1.0,  0, I1),
-	build_intermediate_table(LL2, max(Domain,CPT), L2, V2, Key, 1.0, I1, _),
+	build_intermediate_table(LL1, max(Domain,CPT), L1, V1, Key, 1.0,  0, I1, Vs, Vs1),
+	build_intermediate_table(LL2, max(Domain,CPT), L2, V2, Key, 1.0, I1, _, Vs1, NewVs),
 	max_cpt([V1,V2], Domain, Softness, CPT).
 	
-build_min_table(Vars, Domain, Softness, p(Domain, CPT, Vars)) :-
+build_min_table(Vars, Domain, Softness, p(Domain, CPT, Vars), Vs, Vs) :-
 	length(Domain, SDomain),
 	int_power(Vars, SDomain, 1, TabSize),
 	TabSize =< 16, 
 	/* case gmp is not there !! */
 	TabSize > 0, !,
 	min_cpt(Vars, Domain, Softness, CPT).
-build_min_table(Vars, Domain, Softness, p(Domain, CPT, [V1,V2])) :-
+build_min_table(Vars, Domain, Softness, p(Domain, CPT, [V1,V2]), Vs, [V1,V2|NewVs]) :-
 	length(Vars,L),
 	LL1 is L//2,
 	LL2 is L-LL1,
 	list_split(LL1, Vars, L1, L2),
-	build_intermediate_table(LL1, min(Domain,CPT), L1, V1, Key, 1.0,  0, I1),
-	build_intermediate_table(LL2, min(Domain,CPT), L2, V2, Key, 1.0, I1, _),
+	build_intermediate_table(LL1, min(Domain,CPT), L1, V1, Key, 1.0,  0, I1, Vs, Vs1),
+	build_intermediate_table(LL2, min(Domain,CPT), L2, V2, Key, 1.0, I1, _, Vs1, NewVs),
 	min_cpt([V1,V2], Domain, Softness, CPT).
 	
 int_power([], _, TabSize, TabSize).
@@ -89,17 +103,17 @@ int_power([_|L], X, I0, TabSize) :-
 	I is I0*X,
 	int_power(L, X, I, TabSize).
 
-build_intermediate_table(1,_,[V],V, _, _, I, I) :- !.
-build_intermediate_table(2, Op, [V1,V2], V, Key, Softness, I0, If) :- !,
+build_intermediate_table(1,_,[V],V, _, _, I, I, Vs, Vs) :- !.
+build_intermediate_table(2, Op, [V1,V2], V, Key, Softness, I0, If, Vs, Vs) :- !,
 	If is I0+1,
 	generate_tmp_random(Op, 2, [V1,V2], V, Key, Softness, I0).
-build_intermediate_table(N, Op, L, V, Key, Softness, I0, If) :-
+build_intermediate_table(N, Op, L, V, Key, Softness, I0, If, Vs, [V1,V2|NewVs]) :-
 	LL1 is N//2,
 	LL2 is N-LL1,
 	list_split(LL1, L, L1, L2),
 	I1 is I0+1,
-	build_intermediate_table(LL1, Op, L1, V1, Key, Softness, I1, I2),
-	build_intermediate_table(LL2, Op, L2, V2, Key, Softness, I2, If),
+	build_intermediate_table(LL1, Op, L1, V1, Key, Softness, I1, I2, Vs, Vs1),
+	build_intermediate_table(LL2, Op, L2, V2, Key, Softness, I2, If, Vs1, NewVs),
 	generate_tmp_random(Op, N, [V1,V2], V, Key, Softness, I0).
 
 % averages are transformed into sums.
@@ -129,26 +143,26 @@ list_split(I, [H|L], [H|L1], L2) :-
 % generate actual table, instead of trusting the solver
 %
 
-average_cpt(Vs,Vals,_,CPT) :-
+average_cpt(Vs, OVars, Vals, Base, _, MCPT) :-
 	get_ds_lengths(Vs,Lengs),
-	sumlist(Lengs, Tot),
-	length(Vals,SVals),
+	length(OVars, N),
+	length(Vals, SVals),
+	Tot is (N-1)*SVals,
 	Factor is SVals/Tot,
 	matrix_new(floats,[SVals|Lengs],MCPT),
-	fill_in_average(Lengs,Factor,MCPT),
-	matrix_to_list(MCPT,CPT).
+	fill_in_average(Lengs,Factor,Base,MCPT).
 
 get_ds_lengths([],[]).
 get_ds_lengths([V|Vs],[Sz|Lengs]) :-
 	get_vdist_size(V, Sz),
 	get_ds_lengths(Vs,Lengs).
 	
-fill_in_average(Lengs,SVals,MCPT) :-
+fill_in_average(Lengs, SVals, Base, MCPT) :-
 	generate(Lengs, Case),
-	average(Case, SVals, Val),
+	average(Case, SVals, Base, Val),
 	matrix_set(MCPT,[Val|Case],1.0),
 	fail.
-fill_in_average(_,_,_).
+fill_in_average(_,_,_,_).
 
 generate([], []).
 generate([N|Lengs], [C|Case]) :-
@@ -161,8 +175,8 @@ from(I1,M,J) :-
 	I < M,
 	from(I,M,J).
 
-average(Case, SVals, Val) :-
-	sumlist(Case, Tot),
+average(Case, SVals, Base, Val) :-
+	sum_list(Case, Base, Tot),
 	Val is integer(round(Tot*SVals)).
 
 
diff --git a/CLPBN/clpbn/dists.yap b/CLPBN/clpbn/dists.yap
index 8320df5bb..2ba735907 100644
--- a/CLPBN/clpbn/dists.yap
+++ b/CLPBN/clpbn/dists.yap
@@ -68,6 +68,9 @@ where Id is the id,
   DSize is the domain size,
   Type is
     tab for tabular
+    avg for average
+    max for maximum
+    min for minimum
     trans  for HMMs
     continuous
   Domain is
@@ -98,6 +101,9 @@ dist(V, Id, Key, Parents) :-
 dist(V, Id, Key, Parents) :-
 	var(Key), !,
 	when(Key, dist(V, Id, Key, Parents)).
+dist(avg(Domain, Parents), avg(Domain), _, Parents).
+dist(max(Domain, Parents), max(Domain), _, Parents).
+dist(min(Domain, Parents), min(Domain), _, Parents).
 dist(p(Type, CPT), Id, Key, FParents) :-
 	copy_structure(Key, Key0),
 	distribution(Type, CPT, Id, Key0, [], FParents).
@@ -207,6 +213,8 @@ get_dsizes([P|Parents], [Sz|Sizes], Sizes0) :-
 get_dist_params(Id, Parms) :-
 	recorded(clpbn_dist_db, db(Id, _, Parms, _, _, _, _), _).
 
+get_dist_domain_size(avg(D,_), DSize) :- !,
+	length(D, DSize).
 get_dist_domain_size(Id, DSize) :-
 	recorded(clpbn_dist_db, db(Id, _, _, _, _, _, DSize), _).
 
diff --git a/CLPBN/clpbn/examples/School/schema.yap b/CLPBN/clpbn/examples/School/schema.yap
index 0c5b40b8b..dfeb48ee1 100644
--- a/CLPBN/clpbn/examples/School/schema.yap
+++ b/CLPBN/clpbn/examples/School/schema.yap
@@ -47,8 +47,7 @@ course_professor(Key, PKey) :-
 	
 course_rating(CKey, Rat) :-
 	setof(Sat, RKey^(registration_course(RKey,CKey), registration_satisfaction(RKey,Sat)), Sats),
-	build_rating_table(Sats, rating(CKey), Table),
-	{ Rat =  rating(CKey) with Table }.
+	{ Rat =  rating(CKey) with avg([h,m,l],Sats) }.
 
 course_difficulty(Key, Dif) :-
 	dif_table(Key, Dist),
@@ -64,8 +63,7 @@ student_intelligence(Key, Int) :-
 student_ranking(Key, Rank) :-
 	setof(Grade, CKey^(registration_student(CKey,Key),
 			 registration_grade(CKey, Grade)), Grades),
-	build_grades_table(Grades, ranking(Key), GradesTable),
-	{ Rank = ranking(Key) with GradesTable }.
+	{ Rank = ranking(Key) with avg([a,b,c,d],Grades) }.
 
 :- ensure_loaded(tables).
 
diff --git a/CLPBN/clpbn/vel.yap b/CLPBN/clpbn/vel.yap
index 94b285150..6f383eb69 100644
--- a/CLPBN/clpbn/vel.yap
+++ b/CLPBN/clpbn/vel.yap
@@ -31,12 +31,12 @@
 
 :- use_module(library('clpbn/dists'),
 	      [
+	       dist/4,
 	       get_dist_domain_size/2,
 	       get_dist_matrix/5]).
 
 :- use_module(library('clpbn/utils'), [
-	clpbn_not_var_member/2,
-	check_for_hidden_vars/3]).
+	clpbn_not_var_member/2]).
 
 :- use_module(library('clpbn/display'), [
 	clpbn_bind_vals/3]).
@@ -60,6 +60,10 @@
 	       append/3
 	      ]).
 
+:- use_module(library('clpbn/aggregates'),
+	      [cpt_average/6]).
+
+
 check_if_vel_done(Var) :-
 	get_atts(Var, [size(_)]), !.
 
@@ -70,14 +74,12 @@ vel([[]],_,_) :- !.
 vel([LVs],Vs0,AllDiffs) :-
 	init_vel_solver([LVs], Vs0, AllDiffs, State),
 	% variable elimination proper
-	run_vel_solver([LVs], [Ps], State),
-	% from array to list
-	list_from_CPT(Ps, LPs),
+	run_vel_solver([LVs], [LPs], State),
 	% bind Probs back to variables so that they can be output.
 	clpbn_bind_vals([LVs],[LPs],AllDiffs).
 
 init_vel_solver(Qs, Vs0, _, LVis) :-
-	check_for_hidden_vars(Vs0, Vs0, Vs1),
+	check_for_special_vars(Vs0, Vs1),
 	% LVi will have a  list of CLPBN variables
 	% Tables0 will have the full data on each variable
 	init_influences(Vs1, G, RG),
@@ -86,6 +88,21 @@ init_vel_solver(Qs, Vs0, _, LVis) :-
 	(clpbn:output(xbif(XBifStream)) -> clpbn2xbif(XBifStream,vel,Vs) ; true),
 	(clpbn:output(gviz(XBifStream)) -> clpbn2gviz(XBifStream,vel,Vs,_) ; true).
 
+check_for_special_vars([], []).
+check_for_special_vars([V|Vs0], [V|Vs1]) :-
+	clpbn:get_atts(V, [key(K), dist(Id,Parents)]), !,
+	simplify_dist(Id, V, K, Parents, Vs0, Vs00),
+	check_for_special_vars(Vs00, Vs1).
+check_for_special_vars([_|Vs0], Vs1) :-
+	check_for_special_vars(Vs0, Vs1).
+
+% transform aggregate distribution into tree
+simplify_dist(avg(Domain), V, Key, Parents, Vs0, VsF) :- !,
+	cpt_average([V|Parents], Key, Domain, NewDist, Vs0, VsF),
+	dist(NewDist, Id, Key, ParentsF),
+	clpbn:put_atts(V, [dist(Id,ParentsF)]).
+simplify_dist(_, _, _, _, Vs0, Vs0).
+
 init_vel_solver_for_questions([], _, _, [], []).
 init_vel_solver_for_questions([Vs|MVs], G, RG, [NVs|MNVs0], [NVs|LVis]) :-
 	influences(Vs, _, NVs0, G, RG),
diff --git a/CLPBN/learning/example/school_params.yap b/CLPBN/learning/example/school_params.yap
index 935797979..fe927141f 100644
--- a/CLPBN/learning/example/school_params.yap
+++ b/CLPBN/learning/example/school_params.yap
@@ -11,7 +11,7 @@ main :-
         em(L,0.01,10,CPTs,Lik),
         writeln(Lik:CPTs).
 
-missing(0.3).
+missing(0.1).
 
 % miss 30% of the examples.
 goal(professor_ability(P,V)) :-