update package locations to a subdir packages

2009-02-16 12:23:29 +00:00
parent 495ff55868
commit 9c9444bece
151 changed files with 62955 additions and 0 deletions
--- a/packages/CLPBN/learning/aleph_params.yap
+++ b/packages/CLPBN/learning/aleph_params.yap
@@ -0,0 +1,306 @@
+%
+% Interface the Aleph ILP system to CLP(BN)
+%
+% Relies on the Aleph cost function.
+% It assumes Aleph work as usual,  but some variables are of type random.
+%
+:- module(clpbn_aleph,
+	  [init_clpbn_cost/0,
+	  random_type/2]).
+
+:- dynamic rt/2, inited/1.
+
+:- use_module(library('clpbn'),
+	[{}/1,
+	 clpbn_flag/2,
+	 clpbn_flag/3,
+         set_clpbn_flag/2]).
+
+:- use_module(library('clpbn/learning/em')).
+
+:- use_module(library('clpbn/matrix_cpt_utils'),
+	[uniform_CPT_as_list/2]).
+
+:- use_module(library('clpbn/dists'),
+	[reset_all_dists/0,
+	 get_dist_key/2,
+	 get_dist_params/2
+     ]).
+
+:- use_module(library('clpbn/table'),
+	[clpbn_tabled_abolish/1,
+	 clpbn_tabled_asserta/1,
+	 clpbn_tabled_asserta/2,
+	 clpbn_tabled_assertz/1,
+	 clpbn_tabled_clause/2,
+	 clpbn_tabled_number_of_clauses/2,
+	 clpbn_is_tabled/1,
+	 clpbn_tabled_dynamic/1]).
+
+%
+% Tell Aleph not to use default solver during saturation
+%
+% all work will be done by EM 
+:- set_clpbn_flag(solver,none).
+
+%
+% This is the Aleph interface
+% examples are stored as example(Id, Type, Example)
+% CPT domains are stored as random_type(KeySkeleton, ListOfValues).
+%
+
+:- use_module(library(lists),[append/3]).
+
+:- multifile user:cost/3.
+
+% handle uninstantiated examples as hidden variables.
+:- user:set(skolem_examples, false).
+
+% avoid doing CLP(BN) stuff except at start
+:- user:set(sat_start_hook, clpbn_aleph:disable_solver).
+:- user:set(sat_stop_hook, clpbn_aleph:enable_solver).
+
+:- user:set(reduce_start_hook, clpbn_aleph:disable_solver).
+:- user:set(reduce_stop_hook, clpbn_aleph:enable_solver).
+
+:- user:set(record_testclause_hook, clpbn_aleph:do_nothing).
+
+:- user:set(newbest_hook, clpbn_aleph:store_theory).
+
+disable_solver(_) :-
+	clpbn_flag(solver, Old, none),
+	nb_setval(old_clpbn_solver, Old).
+disable_solver(_,_) :-
+	clpbn_flag(solver, Old, none),
+	nb_setval(old_clpbn_solver, Old).
+
+enable_solver :-
+	nb_getval(old_clpbn_solver, Old),
+	set_clpbn_flag(solver, Old).
+enable_solver(_,_) :-
+	nb_getval(old_clpbn_solver, Old),
+	set_clpbn_flag(solver, Old).
+
+do_nothing(_).
+
+% backup current best theory in DB.
+store_theory(_,_,_) :-
+	eraseall(best_theory),
+	fail.
+store_theory(_,(H:-_),_) :-
+	clpbn_is_tabled(user:H), !,
+	store_tabled_theory(H).
+store_theory(_,(H:-_),_) :-
+	store_theory(H).
+
+store_tabled_theory(H) :-
+	clpbn_tabled_clause(user:H,B),
+	add_correct_cpt(B,NB),
+	store_cl((H:-NB)),
+	fail.
+store_tabled_theory(_).
+	
+store_theory(H) :-
+	clause(user:H,B),
+	add_correct_cpt(B,NB),
+	store_cl((H:-NB)),
+	fail.
+store_theory(_).
+
+add_correct_cpt((G,B),(G,NB)) :- !,
+	add_correct_cpt(B,NB).
+add_correct_cpt((clpbn:{V = K with Tab }), ({V = K with NTab})) :-
+	correct_tab(Tab,K,NTab).
+add_correct_cpt(({V = K with Tab }), ({V = K with NTab})) :-
+	correct_tab(Tab,K,NTab).
+
+correct_tab(p(Vs,_),K,p(Vs,TDist)) :-
+	get_dist_key(Id, K),
+	get_dist_params(Id, TDist).
+correct_tab(p(Vs,_,Ps),K,p(Vs,TDist,Ps)) :-
+	get_dist_key(Id, K),
+	get_dist_params(Id, TDist).
+
+store_cl(Cl) :-
+	recordz(best_theory, Cl, _).
+	
+
+:- user:set(best_clause_hook, clpbn_aleph:add_new_clause).
+
+add_new_clause(_,(_ :- true),_,_) :- !.
+add_new_clause(_,(H :- B),_,_) :-
+	user:db_usage,
+	user:db_dynamic,
+	domain(H, K, V, D),
+	rewrite_body(B, IB, Vs, _, ( !, { V = K with p(D, CPTList, Vs) })),
+	% need to remember which CPT we want
+	get_dist_key(Id, K),
+	get_dist_params(Id, CPTList),
+	(
+	    clpbn_is_tabled(user:H)
+	->
+	    clpbn_tabled_asserta(user:(H :- IB))
+	;
+	    asserta(user:(H :- IB))
+	),
+	user:setting(verbosity,V),
+	( V >= 1 -> 
+	    user:p_message('CLP(BN) Theory'),
+	    functor(H,N,Ar), listing(user:N/Ar)
+	;
+	    true
+	).
+
+
+% user-defined cost function, Aleph knows about this (and only about this). 
+user:cost((H :- B),Inf,Score) :- !,
+	domain(H, K, V, D),
+	check_info(Inf),
+	rewrite_body(B, IB, Vs, Ds, ( !, { V = K with p(D, CPTList, Vs) })),
+	uniform_cpt([D|Ds], CPTList),
+	(
+	    clpbn_is_tabled(user:H)
+	->
+	    clpbn_tabled_asserta(user:(H :- IB), R)
+	;
+	    asserta(user:(H :- IB), R)
+	),
+	(
+	    cpt_score(Score0)
+	->
+	    erase(R),
+	    Score is -Score0
+        ;
+	    % illegal clause, just get out of here.
+	    erase(R),
+	    fail
+	).
+user:cost(H,_Inf,Score) :- !,
+	init_clpbn_cost(H, Score0),
+	Score is -Score0.
+
+% this is here so that Aleph will actually compute coverage. Aleph computes
+% coverage only if cost actually checks Inf.
+check_info(_).
+
+init_clpbn_cost(_, Score) :-
+	inited(Score), !.
+init_clpbn_cost(H, Score) :-
+	functor(H,N,A),
+	% get rid of Aleph crap
+	(
+	    clpbn_is_tabled(user:H)
+	->
+	     clpbn_tabled_abolish(user:N/A),
+ 	     clpbn_tabled_dynamic(user:N/A)
+	;
+	     abolish(user:N/A),
+	     % make it easy to add and remove clauses.
+ 	     dynamic(user:N/A)
+	),
+	domain(H, K, V, D),
+	uniform_cpt([D], CPTList),
+	% This will be the default cause, called when the other rules fail.
+	(
+	    clpbn_is_tabled(user:H)
+	->
+	     clpbn_tabled_assertz(user:(H :- !, { V = K with p(D, CPTList) }))
+	 ;
+	     assert(user:(H :- !, { V = K with p(D, CPTList) }))
+	 ),
+	cpt_score(Score),
+	assert(inited(Score)).
+
+% receives H, and generates a key K, a random variable RV, and a domain  D.
+domain(H, K, RV, D) :-
+	functor(H,Name,Arity),
+	functor(Pred,Name,Arity),
+	(
+	    recorded(aleph,modeh(_,Pred),_)
+	-> 
+	    true
+	;
+	    user:'$aleph_global'(modeh,modeh(_,Pred))
+	),
+	arg(Arity,Pred,+RType),
+	rt(RType,D), !,
+	key_from_head(H,K,RV).
+domain(H, K, V, D) :-
+	current_predicate(_,user:domain(_)),
+	key_from_head(H,K,V),
+	user:domain(K,D).
+
+key_from_head(H,K,V) :-
+	H =.. [Name|Args],
+	(
+	    clpbn_is_tabled(user:H)
+	->
+	    clpbn_tabled_number_of_clauses(user:H,NClauses)
+	;
+	    predicate_property(user:H,number_of_clauses(NClauses))
+	),
+	atomic_concat(Name,NClauses,NName),
+	append(H0L,[V],Args),
+	K =.. [NName|H0L].
+
+% transforms_body into something that is going to be called
+% receives G0, and generates a list of goals, a list of variables, and a list of domains.
+% receives also a Tail with the constraint to append at the end.
+rewrite_body((A,B), (user:NA,NB), [V|Vs], [D|Ds], Tail) :-
+	rewrite_goal(A, V, D, NA), !,
+	rewrite_body(B, NB, Vs, Ds, Tail).
+rewrite_body((A,B), (user:A,NB), Vs, Ds, Tail) :-
+	rewrite_body(B,NB, Vs, Ds, Tail).
+rewrite_body(A,(user:NA,Tail), [V], [D], Tail) :- 
+	rewrite_goal(A, V, D, NA), !.
+rewrite_body(A, (user:A,Tail), [], [], Tail).
+
+% so they need not be rewritten.
+rewrite_goal(A,V,D,NA) :-
+	functor(A,Name,Arity),
+	functor(Pred,Name,Arity),
+	(
+	    recorded(aleph,modeb(_,Pred),_)
+	-> 
+	    true
+	;
+	    user:'$aleph_global'(modeb,modeb(_,Pred))
+	),
+	arg(Arity,Pred,-RType),
+	rt(RType,D), !,
+	A =.. [Name|Args],
+	replace_last_var(Args,V,NArgs),
+	NA =.. [Name|NArgs].
+
+replace_last_var([_],V,[V]) :- !.
+replace_last_var([A|Args],V,[A|NArgs]) :-
+	replace_last_var(Args,V,NArgs).
+
+
+%
+% This is the key
+%
+cpt_score(Lik) :-
+	findall(user:Ex, user:example(_,pos,Ex),  Exs),
+	clpbn_flag(solver, Solver),
+	clpbn_flag(em_solver, EMSolver),
+	set_clpbn_flag(solver, EMSolver),
+	reset_all_dists,
+	em(Exs, 0.01, 10, _Tables, Lik),
+	set_clpbn_flag(solver, Solver).
+
+complete_clpbn_cost(_AlephClause).
+
+random_type(A,B) :-
+	assert(rt(A,B)).
+
+
+uniform_cpt(Ds, CPTList) :-
+	lengths(Ds, Ls),
+	uniform_CPT_as_list(Ls, CPTList).
+
+lengths([], []).
+lengths([D|Ds], [L|Ls]) :-
+	length(D, L),
+	lengths(Ds, Ls).
+
--- a/packages/CLPBN/learning/bnt_parms.yap
+++ b/packages/CLPBN/learning/bnt_parms.yap
@@ -0,0 +1,121 @@
+%
+% Learn parameters using the BNT toolkit
+%
+
+:- yap_flag(unknown,error).
+
+:- style_check(all).
+
+:- module(bnt_parameters, [learn_parameters/2]).
+
+:- use_module(library('clpbn'), [
+	clpbn_flag/3]).
+
+:- use_module(library('clpbn/bnt'), [
+	create_bnt_graph/2]).
+
+:- use_module(library('clpbn/display'), [
+	clpbn_bind_vals/3]).
+
+:- use_module(library('clpbn/dists'), [
+				       get_dist_domain/2
+				      ]).
+
+:- use_module(library(matlab), [matlab_initialized_cells/4,
+				matlab_call/2,
+				matlab_get_variable/2
+			      ]).
+
+:- dynamic bnt_em_max_iter/1.
+bnt_em_max_iter(10).
+
+
+% syntactic sugar for matlab_call.
+:- op(800,yfx,<--).
+
+G <-- Y :-
+	matlab_call(Y,G).
+
+
+learn_parameters(Items, Tables) :-
+	run_all(Items),
+	clpbn_flag(solver, OldSolver, bnt),
+	clpbn_flag(bnt_model, Old, tied),
+	attributes:all_attvars(AVars),
+	% sort and incorporte evidence
+	clpbn_vars(AVars, AllVars),
+	length(AllVars,NVars),
+	create_bnt_graph(AllVars, Reps),
+	mk_sample(AllVars,NVars,EvVars),
+	bnt_learn_parameters(NVars,EvVars),
+	get_parameters(Reps, Tables),
+	clpbn_flag(solver, bnt, OldSolver),
+	clpbn_flag(bnt_model, tied, Old).
+
+run_all([]).
+run_all([G|Gs]) :-
+	call(user:G),
+	run_all(Gs).
+
+clpbn_vars(Vs,BVars) :-
+	get_clpbn_vars(Vs,CVs),
+	keysort(CVs,KVs),
+	merge_vars(KVs,BVars).
+	
+get_clpbn_vars([],[]).
+get_clpbn_vars([V|GVars],[K-V|CLPBNGVars]) :-
+	clpbn:get_atts(V, [key(K)]), !,
+	get_clpbn_vars(GVars,CLPBNGVars).
+get_clpbn_vars([_|GVars],CLPBNGVars) :-
+	get_clpbn_vars(GVars,CLPBNGVars).
+
+merge_vars([],[]).
+merge_vars([K-V|KVs],[V|BVars]) :-
+	get_var_has_same_key(KVs,K,V,KVs0),
+	merge_vars(KVs0,BVars).
+	
+get_var_has_same_key([K-V|KVs],K,V,KVs0)  :- !,
+	get_var_has_same_key(KVs,K,V,KVs0).
+get_var_has_same_key(KVs,_,_,KVs).
+
+
+mk_sample(AllVars,NVars, LL) :-
+	add2sample(AllVars, LN),
+	length(LN,LL),
+	matlab_initialized_cells( NVars, 1, LN, sample).
+
+add2sample([],  []).
+add2sample([V|Vs],[val(VId,1,Val)|Vals]) :-
+	clpbn:get_atts(V, [evidence(Ev),dist(Id,_)]), !,
+	bnt:get_atts(V,[bnt_id(VId)]),
+	get_dist_domain(Id, Domain),
+	evidence_val(Ev,1,Domain,Val),
+	add2sample(Vs, Vals).
+add2sample([_V|Vs],Vals) :-
+	add2sample(Vs, Vals).
+
+evidence_val(Ev,Val,[Ev|_],Val) :- !.
+evidence_val(Ev,I0,[_|Domain],Val) :-
+	I1 is I0+1,
+	evidence_val(Ev,I1,Domain,Val).
+
+bnt_learn_parameters(_,_) :-
+	engine <-- jtree_inf_engine(bnet),
+%	engine <-- var_elim_inf_engine(bnet),
+%	engine <-- gibbs_sampling_inf_engine(bnet),
+%	engine <-- belprop_inf_engine(bnet),
+%	engine <-- pearl_inf_engine(bnet),
+	bnt_em_max_iter(MaxIters),
+	[new_bnet, trace] <-- learn_params_em(engine, sample, MaxIters).
+
+
+get_parameters([],[]).
+get_parameters([Rep-v(_,_,_)|Reps],[CPT|CPTs]) :-
+	get_new_table(Rep,CPT),
+	get_parameters(Reps,CPTs).
+	
+get_new_table(Rep,CPT) :-
+	s <-- struct(new_bnet.'CPD'({Rep})),
+	matlab_get_variable( s.'CPT', CPT).
+	
+	
--- a/packages/CLPBN/learning/em.yap
+++ b/packages/CLPBN/learning/em.yap
@@ -0,0 +1,229 @@
+%
+% The world famous EM algorithm, in a nutshell
+%
+
+:- module(clpbn_em, [em/5]).
+
+:- use_module(library(lists),
+	      [append/3]).
+
+:- use_module(library(clpbn),
+	      [clpbn_init_solver/5,
+	       clpbn_run_solver/4,
+	       clpbn_flag/2]).
+
+:- use_module(library('clpbn/dists'),
+	      [get_dist_domain_size/2,
+	       empty_dist/2,
+	       dist_new_table/2,
+	       get_dist_key/2,
+	       randomise_all_dists/0,
+	       uniformise_all_dists/0]).
+
+:- use_module(library('clpbn/connected'),
+	      [clpbn_subgraphs/2]).
+
+:- use_module(library('clpbn/learning/learn_utils'),
+	      [run_all/1,
+	       clpbn_vars/2,
+	       normalise_counts/2,
+	       compute_likelihood/3,
+	       soften_sample/2]).
+
+:- use_module(library(lists),
+	      [member/2]).
+
+:- use_module(library(matrix),
+	      [matrix_add/3,
+	       matrix_to_list/2]).
+
+:- use_module(library(rbtrees),
+	      [rb_new/1,
+	       rb_insert/4,
+	       rb_lookup/3]).
+
+:- use_module(library('clpbn/utils'),
+	      [
+	       check_for_hidden_vars/3,
+	       sort_vars_by_key/3]).
+
+:- meta_predicate em(:,+,+,-,-), init_em(:,-).
+
+em(Items, MaxError, MaxIts, Tables, Likelihood) :-
+	catch(init_em(Items, State),Error,handle_em(Error)),
+	em_loop(0, 0.0, State, MaxError, MaxIts, Likelihood, Tables),
+	assert(em_found(Tables, Likelihood)),
+	fail.
+% get rid of new random variables the easy way :)
+em(_, _, _, Tables, Likelihood) :-
+	retract(em_found(Tables, Likelihood)).
+
+
+handle_em(error(repeated_parents)) :-
+	assert(em_found(_, -inf)),
+	fail.
+	
+	
+
+% This gets you an initial configuration. If there is a lot of evidence
+% tables may be filled in close to optimal, otherwise they may be
+% close to uniform.
+% it also gets you a run for random variables
+
+% state collects all Info we need for the EM algorithm
+% it includes the list of variables without evidence,
+% the list of distributions for which we want to compute parameters,
+% and more detailed info on distributions, namely with a list of all instances for the distribution.
+init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :-
+	run_all(Items),
+%	randomise_all_dists,
+	uniformise_all_dists,
+	attributes:all_attvars(AllVars0),
+	sort_vars_by_key(AllVars0,AllVars,[]),
+	% remove variables that do not have to do with this query.
+%	check_for_hidden_vars(AllVars1, AllVars1, AllVars),
+	different_dists(AllVars, AllDists, AllDistInstances, MargVars),
+	clpbn_flag(em_solver, Solver),
+	clpbn_init_solver(Solver, MargVars, AllVars, _, SolverVars).
+
+% loop for as long as you want.
+em_loop(Its, Likelihood0, State, MaxError, MaxIts, LikelihoodF, FTables) :-
+	estimate(State, LPs),
+	maximise(State, Tables, LPs, Likelihood),
+%	writeln(Likelihood:Its:Likelihood0:Tables),
+	(
+	    (
+	     abs((Likelihood - Likelihood0)/Likelihood) < MaxError
+	    ;
+	     Its == MaxIts
+	    )	 
+	->
+	 ltables(Tables, FTables),
+	 LikelihoodF = Likelihood
+	;
+	 Its1 is Its+1,
+	 em_loop(Its1, Likelihood, State, MaxError, MaxIts, LikelihoodF, FTables)
+	).
+
+ltables([], []).
+ltables([Id-T|Tables], [Key-LTable|FTables]) :-
+	matrix_to_list(T,LTable),
+	get_dist_key(Id, Key),
+	ltables(Tables, FTables).
+	 
+
+
+% collect the different dists we are going to learn next.
+different_dists(AllVars, AllDists, AllInfo, MargVars) :-
+	all_dists(AllVars, Dists0),
+	sort(Dists0, Dists1),
+	group(Dists1, AllDists, AllInfo, MargVars0, []),
+	sort(MargVars0, MargVars).
+
+all_dists([], []).
+all_dists([V|AllVars], [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :-
+	clpbn:get_atts(V, [dist(Id,Parents)]),
+	sort([V|Parents], Sorted),
+	length(Sorted, LengSorted),
+        length(Parents, LengParents),
+	(
+	    LengParents+1 =:= LengSorted
+	-> 
+	    true
+	;
+	    throw(error(repeated_parents))
+	),	
+	generate_hidden_cases([V|Parents], CompactCases, Hiddens),
+	uncompact_cases(CompactCases, Cases),
+	all_dists(AllVars, Dists).
+
+generate_hidden_cases([], [], []).
+generate_hidden_cases([V|Parents], [P|Cases], Hiddens) :-
+	clpbn:get_atts(V, [evidence(P)]), !,
+	generate_hidden_cases(Parents, Cases, Hiddens).
+generate_hidden_cases([V|Parents], [Cases|MoreCases], [V|Hiddens]) :-
+	clpbn:get_atts(V, [dist(Id,_)]),
+	get_dist_domain_size(Id, Sz),
+	gen_cases(0, Sz, Cases),
+	generate_hidden_cases(Parents, MoreCases, Hiddens).
+	
+gen_cases(Sz, Sz, []) :- !.
+gen_cases(I, Sz, [I|Cases]) :-
+	I1 is I+1,
+	gen_cases(I1, Sz, Cases).
+
+uncompact_cases(CompactCases, Cases) :-
+	findall(Case, is_case(CompactCases, Case), Cases).
+
+is_case([], []).
+is_case([A|CompactCases], [A|Case]) :-
+	integer(A), !,
+	is_case(CompactCases, Case).
+is_case([L|CompactCases], [C|Case]) :-
+	member(C, L),
+	is_case(CompactCases, Case).
+
+group([], [], []) --> [].
+group([i(Id,Ps,Cs,[])|Dists1], [Id|Ids], [Id-[i(Id,Ps,Cs,[])|Extra]|AllInfo]) --> !,
+	same_id(Dists1, Id, Extra, Rest),
+	group(Rest, Ids, AllInfo).
+group([i(Id,Ps,Cs,Hs)|Dists1], [Id|Ids], [Id-[i(Id,Ps,Cs,Hs)|Extra]|AllInfo]) -->
+	[Hs],
+	same_id(Dists1, Id, Extra, Rest),
+	group(Rest, Ids, AllInfo).
+
+same_id([i(Id,Vs,Cases,[])|Dists1], Id, [i(Id, Vs, Cases, [])|Extra], Rest) --> !,
+	same_id(Dists1, Id, Extra, Rest).
+same_id([i(Id,Vs,Cases,Hs)|Dists1], Id, [i(Id, Vs, Cases, Hs)|Extra], Rest) --> !,
+	[Hs],
+	same_id(Dists1, Id, Extra, Rest).
+same_id(Dists, _, [], Dists) --> [].
+
+
+compact_mvars([], []).
+compact_mvars([X1,X2|MargVars], CMVars) :- X1 == X2, !,
+	compact_mvars([X2|MargVars], CMVars).
+compact_mvars([X|MargVars], [X|CMVars]) :- !,
+	compact_mvars(MargVars, CMVars).
+
+estimate(state(_, _, Margs, SolverState), LPs) :-
+	clpbn_flag(em_solver, Solver),
+	clpbn_run_solver(Solver, Margs, LPs, SolverState).
+
+maximise(state(_,DistInstances,MargVars,_), Tables, LPs, Likelihood) :-
+	rb_new(MDistTable0),
+	create_mdist_table(MargVars, LPs, MDistTable0, MDistTable),
+	compute_parameters(DistInstances, Tables, MDistTable, 0.0, Likelihood, LPs:MargVars).
+
+create_mdist_table([],[],MDistTable,MDistTable).
+create_mdist_table([Vs|MargVars],[Ps|LPs],MDistTable0,MDistTable) :-
+	rb_insert(MDistTable0, Vs, Ps, MDistTableI),
+	create_mdist_table(MargVars, LPs, MDistTableI ,MDistTable).
+
+compute_parameters([], [], _, Lik, Lik, _).
+compute_parameters([Id-Samples|Dists], [Id-NewTable|Tables],  MDistTable, Lik0, Lik, LPs:MargVars) :-
+	empty_dist(Id, Table0),
+	add_samples(Samples, Table0, MDistTable),
+	soften_sample(Table0, SoftenedTable),
+	matrix:matrix_sum(Table0,TotM),
+	normalise_counts(SoftenedTable, NewTable),
+	compute_likelihood(Table0, NewTable, DeltaLik),
+	dist_new_table(Id, NewTable),
+	NewLik is Lik0+DeltaLik,
+	compute_parameters(Dists, Tables,  MDistTable, NewLik, Lik, LPs:MargVars).
+
+add_samples([], _, _).
+add_samples([i(_,_,[Case],[])|Samples], Table, MDistTable) :- !,
+	matrix_add(Table,Case,1.0),
+	add_samples(Samples, Table, MDistTable).
+add_samples([i(_,_,Cases,Hiddens)|Samples], Table, MDistTable) :-
+	rb_lookup(Hiddens, Ps, MDistTable),
+	run_sample(Cases, Ps, Table),
+	add_samples(Samples, Table, MDistTable).
+
+run_sample([], [], _).
+run_sample([C|Cases], [P|Ps], Table) :-
+	matrix_add(Table, C, P),
+	run_sample(Cases, Ps, Table).
+
+
--- a/packages/CLPBN/learning/example/school_params.yap
+++ b/packages/CLPBN/learning/example/school_params.yap
@@ -0,0 +1,46 @@
+% learn distribution for school database.
+
+% we do not consider the aggregates yet.
+
+:- [pos:train].
+
+:- ['~/Yap/work/CLPBN/clpbn/examples/School/school_32'].
+
+:- ['~/Yap/work/CLPBN/learning/em'].
+
+main :-
+        findall(X,goal(X),L),
+        em(L,0.01,10,CPTs,Lik),
+        writeln(Lik:CPTs).
+
+%
+% change to 0.05, 0.1, 0.2 to make things simpler/harder
+%
+missing(0.3).
+
+% miss 30% of the examples.
+goal(professor_ability(P,V)) :-
+        pos:professor_ability(P,V1),
+	missing(X),
+        ( random > X -> V = V1 ; true).
+% miss 10% of the examples.
+goal(professor_popularity(P,V)) :-
+        pos:professor_popularity(P,V1),
+	missing(X),
+        ( random > X -> V = V1 ; true).
+goal(registration_grade(P,V)) :-
+        pos:registration_grade(P,V1),
+	missing(X),
+        ( random > X -> V = V1 ; true).
+goal(student_intelligence(P,V)) :-
+        pos:student_intelligence(P,V1),
+	missing(X),
+        ( random > X -> V = V1 ; true).
+goal(course_difficulty(P,V)) :-
+        pos:course_difficulty(P,V1),
+	missing(X),
+        ( random > X -> V = V1 ; true).
+goal(registration_satisfaction(P,V)) :-
+        pos:registration_satisfaction(P,V1),
+	missing(X),
+        ( random > X -> V = V1 ; true).
--- a/packages/CLPBN/learning/example/train.yap
+++ b/packages/CLPBN/learning/example/train.yap
--- a/packages/CLPBN/learning/learn_utils.yap
+++ b/packages/CLPBN/learning/learn_utils.yap
@@ -0,0 +1,100 @@
+%
+% Utilities for learning
+%
+
+:- module(clpbn_learn_utils, [run_all/1,
+			      clpbn_vars/2,
+			      normalise_counts/2,
+			      compute_likelihood/3,
+			      soften_sample/2,
+			      soften_sample/3]).
+
+:- use_module(library(clpbn),
+	      [clpbn_flag/2]).
+
+:- use_module(library('clpbn/table'),
+	      [clpbn_reset_tables/0]).
+
+:- use_module(library(matrix),
+	      [matrix_agg_lines/3,
+	       matrix_op_to_lines/4,
+	       matrix_agg_cols/3,
+	       matrix_op_to_cols/4,
+	       matrix_to_logs/2,
+	       matrix_op/4,
+	       matrix_sum/2,
+	       matrix_to_list/2,
+	       matrix_op_to_all/4]).
+
+:- meta_predicate run_all(:).
+
+run_all([]).
+run_all([G|Gs]) :-
+	call(G),
+	run_all(Gs).
+run_all(M:Gs) :-
+	clpbn_reset_tables,
+	run_all(Gs,M).
+
+run_all([],_).
+run_all([G|Gs],M) :-
+	( call(M:G) -> true ; writeln(bad:M:G), break),
+	run_all(Gs,M).
+
+clpbn_vars(Vs,BVars) :-
+	get_clpbn_vars(Vs,CVs),
+	keysort(CVs,KVs),
+	merge_vars(KVs,BVars).
+	
+get_clpbn_vars([],[]).
+get_clpbn_vars([V|GVars],[K-V|CLPBNGVars]) :-
+	clpbn:get_atts(V, [key(K)]), !,
+	get_clpbn_vars(GVars,CLPBNGVars).
+get_clpbn_vars([_|GVars],CLPBNGVars) :-
+	get_clpbn_vars(GVars,CLPBNGVars).
+
+merge_vars([],[]).
+merge_vars([K-V|KVs],[V|BVars]) :-
+	get_var_has_same_key(KVs,K,V,KVs0),
+	merge_vars(KVs0,BVars).
+	
+get_var_has_same_key([K-V|KVs],K,V,KVs0)  :- !,
+	get_var_has_same_key(KVs,K,V,KVs0).
+get_var_has_same_key(KVs,_,_,KVs).
+
+soften_sample(T0,T) :-
+	clpbn_flag(parameter_softening, Soften),
+	soften_sample(Soften, T0, T).
+
+soften_sample(no,T,T).
+soften_sample(m_estimate(M), T0, T) :-
+	matrix_agg_cols(T0,+,Cols),
+	matrix_op_to_all(Cols, *, M, R),
+	matrix_op_to_cols(T0,R,+,T).
+soften_sample(auto_m, T0,T) :-
+	matrix_agg_cols(T0,+,Cols),
+	matrix_sum(Cols,TotM),
+	M is sqrt(TotM),
+	matrix_op_to_all(Cols, *, M, R),
+	matrix_op_to_cols(T0,R,+,T).
+soften_sample(laplace,T0,T) :-
+	matrix_op_to_all(T0, +, 1, T).
+
+
+normalise_counts(MAT,NMAT) :-
+	matrix_agg_lines(MAT, +, Sum),
+	matrix_op_to_lines(MAT, Sum, /, NMAT).
+
+compute_likelihood(Table0, NewTable, DeltaLik) :-
+	matrix_to_logs(NewTable, Logs),
+	matrix_to_list(Table0,L1),
+	matrix_to_list(Logs,L2),
+	sum_prods(L1,L2,0,DeltaLik).
+
+sum_prods([],[],DeltaLik,DeltaLik).
+sum_prods([0.0|L1],[_|L2],DeltaLik0,DeltaLik) :- !,
+	sum_prods(L1,L2,DeltaLik0,DeltaLik).
+sum_prods([Count|L1],[Log|L2],DeltaLik0,DeltaLik) :- !,
+	DeltaLik1 is DeltaLik0+Count*Log,
+	sum_prods(L1,L2,DeltaLik1,DeltaLik).
+
--- a/packages/CLPBN/learning/mle.yap
+++ b/packages/CLPBN/learning/mle.yap
@@ -0,0 +1,113 @@
+%
+% Maximum likelihood estimator and friends.
+%
+%
+% This assumes we have a single big example.
+%
+
+:- module(clpbn_mle, [learn_parameters/2,
+		      learn_parameters/3,
+		      parameters_from_evidence/3]).
+
+:- use_module(library('clpbn')).
+	      
+:- use_module(library('clpbn/learning/learn_utils'),
+	      [run_all/1,
+	       clpbn_vars/2,
+	       normalise_counts/2,
+	       soften_table/2,
+	       normalise_counts/2]).
+
+:- use_module(library('clpbn/dists'),
+	      [empty_dist/2,
+	       dist_new_table/2]).
+
+:- use_module(library(matrix),
+	      [matrix_inc/2]).
+
+
+learn_parameters(Items, Tables) :-
+	learn_parameters(Items, Tables, []).
+
+%
+% full evidence learning
+%
+learn_parameters(Items, Tables, Extras) :-
+	run_all(Items),
+	attributes:all_attvars(AVars),
+	% sort and incorporate evidence
+	clpbn_vars(AVars, AllVars),
+	mk_sample(AllVars, Sample),
+	compute_tables(Extras, Sample, Tables).
+
+parameters_from_evidence(AllVars, Sample, Extras) :-
+	mk_sample_from_evidence(AllVars, Sample),
+	compute_tables(Extras, Sample, Tables).
+
+mk_sample_from_evidence(AllVars, SortedSample) :-
+	add_evidence2sample(AllVars, Sample),
+	msort(Sample, SortedSample).
+
+mk_sample(AllVars, SortedSample) :-
+	add2sample(AllVars, Sample),
+	msort(Sample, SortedSample).
+
+%
+% assumes we have full data, meaning evidence for every variable 
+%
+add2sample([],  []).
+add2sample([V|Vs],[val(Id,[Ev|EParents])|Vals]) :-
+	clpbn:get_atts(V, [evidence(Ev),dist(Id,Parents)]),
+	get_eparents(Parents, EParents),
+	add2sample(Vs, Vals).
+
+get_eparents([P|Parents], [E|EParents]) :-
+	clpbn:get_atts(P, [evidence(E)]),
+	get_eparents(Parents, EParents).
+get_eparents([], []).
+
+
+%
+% assumes we ignore variables without evidence or without evidence
+% on a parent!
+%
+add_evidence2sample([],  []).
+add_evidence2sample([V|Vs],[val(Id,[Ev|EParents])|Vals]) :-
+	clpbn:get_atts(V, [evidence(Ev),dist(Id,Parents)]),
+	get_eveparents(Parents, EParents), !,
+	add_evidence2sample(Vs, Vals).
+add_evidence2sample([_|Vs],Vals) :-
+	add_evidence2sample(Vs, Vals).
+
+get_eveparents([P|Parents], [E|EParents]) :-
+	clpbn:get_atts(P, [evidence(E)]),
+	get_eparents(Parents, EParents).
+get_eveparents([], []).
+
+
+compute_tables(Parameters, Sample, NewTables) :-
+	estimator(Sample, Tables),
+	add_priors(Parameters, Tables, NewTables).
+
+estimator([], []).
+estimator([val(Id,Sample)|Samples], [NewDist|Tables]) :-
+	empty_dist(Id, NewTable),
+	id_samples(Id, Samples, IdSamples, MoreSamples),
+	mle([Sample|IdSamples], NewTable),
+	soften_table(NewTable, SoftenedTable),
+	normalise_counts(SoftenedTable, NewDist),
+	% replace matrix in distribution
+	dist_new_table(Id, NewDist),
+	estimator(MoreSamples, Tables).
+
+
+id_samples(_, [], [], []).
+id_samples(Id, [val(Id,Sample)|Samples], [Sample|IdSamples], MoreSamples) :- !,
+	id_samples(Id, Samples, IdSamples, MoreSamples).
+id_samples(_, Samples, [], Samples).
+
+mle([Sample|IdSamples], Table) :-
+	matrix_inc(Table, Sample),
+	mle(IdSamples, Table).
+mle([], _).
+