update package locations to a subdir packages

This commit is contained in:
Vitor Santos Costa
2009-02-16 12:23:29 +00:00
parent 495ff55868
commit 9c9444bece
151 changed files with 62955 additions and 0 deletions

View File

@@ -0,0 +1,306 @@
%
% Interface the Aleph ILP system to CLP(BN)
%
% Relies on the Aleph cost function.
% It assumes Aleph work as usual, but some variables are of type random.
%
:- module(clpbn_aleph,
[init_clpbn_cost/0,
random_type/2]).
:- dynamic rt/2, inited/1.
:- use_module(library('clpbn'),
[{}/1,
clpbn_flag/2,
clpbn_flag/3,
set_clpbn_flag/2]).
:- use_module(library('clpbn/learning/em')).
:- use_module(library('clpbn/matrix_cpt_utils'),
[uniform_CPT_as_list/2]).
:- use_module(library('clpbn/dists'),
[reset_all_dists/0,
get_dist_key/2,
get_dist_params/2
]).
:- use_module(library('clpbn/table'),
[clpbn_tabled_abolish/1,
clpbn_tabled_asserta/1,
clpbn_tabled_asserta/2,
clpbn_tabled_assertz/1,
clpbn_tabled_clause/2,
clpbn_tabled_number_of_clauses/2,
clpbn_is_tabled/1,
clpbn_tabled_dynamic/1]).
%
% Tell Aleph not to use default solver during saturation
%
% all work will be done by EM
:- set_clpbn_flag(solver,none).
%
% This is the Aleph interface
% examples are stored as example(Id, Type, Example)
% CPT domains are stored as random_type(KeySkeleton, ListOfValues).
%
:- use_module(library(lists),[append/3]).
:- multifile user:cost/3.
% handle uninstantiated examples as hidden variables.
:- user:set(skolem_examples, false).
% avoid doing CLP(BN) stuff except at start
:- user:set(sat_start_hook, clpbn_aleph:disable_solver).
:- user:set(sat_stop_hook, clpbn_aleph:enable_solver).
:- user:set(reduce_start_hook, clpbn_aleph:disable_solver).
:- user:set(reduce_stop_hook, clpbn_aleph:enable_solver).
:- user:set(record_testclause_hook, clpbn_aleph:do_nothing).
:- user:set(newbest_hook, clpbn_aleph:store_theory).
disable_solver(_) :-
clpbn_flag(solver, Old, none),
nb_setval(old_clpbn_solver, Old).
disable_solver(_,_) :-
clpbn_flag(solver, Old, none),
nb_setval(old_clpbn_solver, Old).
enable_solver :-
nb_getval(old_clpbn_solver, Old),
set_clpbn_flag(solver, Old).
enable_solver(_,_) :-
nb_getval(old_clpbn_solver, Old),
set_clpbn_flag(solver, Old).
do_nothing(_).
% backup current best theory in DB.
store_theory(_,_,_) :-
eraseall(best_theory),
fail.
store_theory(_,(H:-_),_) :-
clpbn_is_tabled(user:H), !,
store_tabled_theory(H).
store_theory(_,(H:-_),_) :-
store_theory(H).
store_tabled_theory(H) :-
clpbn_tabled_clause(user:H,B),
add_correct_cpt(B,NB),
store_cl((H:-NB)),
fail.
store_tabled_theory(_).
store_theory(H) :-
clause(user:H,B),
add_correct_cpt(B,NB),
store_cl((H:-NB)),
fail.
store_theory(_).
add_correct_cpt((G,B),(G,NB)) :- !,
add_correct_cpt(B,NB).
add_correct_cpt((clpbn:{V = K with Tab }), ({V = K with NTab})) :-
correct_tab(Tab,K,NTab).
add_correct_cpt(({V = K with Tab }), ({V = K with NTab})) :-
correct_tab(Tab,K,NTab).
correct_tab(p(Vs,_),K,p(Vs,TDist)) :-
get_dist_key(Id, K),
get_dist_params(Id, TDist).
correct_tab(p(Vs,_,Ps),K,p(Vs,TDist,Ps)) :-
get_dist_key(Id, K),
get_dist_params(Id, TDist).
store_cl(Cl) :-
recordz(best_theory, Cl, _).
:- user:set(best_clause_hook, clpbn_aleph:add_new_clause).
add_new_clause(_,(_ :- true),_,_) :- !.
add_new_clause(_,(H :- B),_,_) :-
user:db_usage,
user:db_dynamic,
domain(H, K, V, D),
rewrite_body(B, IB, Vs, _, ( !, { V = K with p(D, CPTList, Vs) })),
% need to remember which CPT we want
get_dist_key(Id, K),
get_dist_params(Id, CPTList),
(
clpbn_is_tabled(user:H)
->
clpbn_tabled_asserta(user:(H :- IB))
;
asserta(user:(H :- IB))
),
user:setting(verbosity,V),
( V >= 1 ->
user:p_message('CLP(BN) Theory'),
functor(H,N,Ar), listing(user:N/Ar)
;
true
).
% user-defined cost function, Aleph knows about this (and only about this).
user:cost((H :- B),Inf,Score) :- !,
domain(H, K, V, D),
check_info(Inf),
rewrite_body(B, IB, Vs, Ds, ( !, { V = K with p(D, CPTList, Vs) })),
uniform_cpt([D|Ds], CPTList),
(
clpbn_is_tabled(user:H)
->
clpbn_tabled_asserta(user:(H :- IB), R)
;
asserta(user:(H :- IB), R)
),
(
cpt_score(Score0)
->
erase(R),
Score is -Score0
;
% illegal clause, just get out of here.
erase(R),
fail
).
user:cost(H,_Inf,Score) :- !,
init_clpbn_cost(H, Score0),
Score is -Score0.
% this is here so that Aleph will actually compute coverage. Aleph computes
% coverage only if cost actually checks Inf.
check_info(_).
init_clpbn_cost(_, Score) :-
inited(Score), !.
init_clpbn_cost(H, Score) :-
functor(H,N,A),
% get rid of Aleph crap
(
clpbn_is_tabled(user:H)
->
clpbn_tabled_abolish(user:N/A),
clpbn_tabled_dynamic(user:N/A)
;
abolish(user:N/A),
% make it easy to add and remove clauses.
dynamic(user:N/A)
),
domain(H, K, V, D),
uniform_cpt([D], CPTList),
% This will be the default cause, called when the other rules fail.
(
clpbn_is_tabled(user:H)
->
clpbn_tabled_assertz(user:(H :- !, { V = K with p(D, CPTList) }))
;
assert(user:(H :- !, { V = K with p(D, CPTList) }))
),
cpt_score(Score),
assert(inited(Score)).
% receives H, and generates a key K, a random variable RV, and a domain D.
domain(H, K, RV, D) :-
functor(H,Name,Arity),
functor(Pred,Name,Arity),
(
recorded(aleph,modeh(_,Pred),_)
->
true
;
user:'$aleph_global'(modeh,modeh(_,Pred))
),
arg(Arity,Pred,+RType),
rt(RType,D), !,
key_from_head(H,K,RV).
domain(H, K, V, D) :-
current_predicate(_,user:domain(_)),
key_from_head(H,K,V),
user:domain(K,D).
key_from_head(H,K,V) :-
H =.. [Name|Args],
(
clpbn_is_tabled(user:H)
->
clpbn_tabled_number_of_clauses(user:H,NClauses)
;
predicate_property(user:H,number_of_clauses(NClauses))
),
atomic_concat(Name,NClauses,NName),
append(H0L,[V],Args),
K =.. [NName|H0L].
% transforms_body into something that is going to be called
% receives G0, and generates a list of goals, a list of variables, and a list of domains.
% receives also a Tail with the constraint to append at the end.
rewrite_body((A,B), (user:NA,NB), [V|Vs], [D|Ds], Tail) :-
rewrite_goal(A, V, D, NA), !,
rewrite_body(B, NB, Vs, Ds, Tail).
rewrite_body((A,B), (user:A,NB), Vs, Ds, Tail) :-
rewrite_body(B,NB, Vs, Ds, Tail).
rewrite_body(A,(user:NA,Tail), [V], [D], Tail) :-
rewrite_goal(A, V, D, NA), !.
rewrite_body(A, (user:A,Tail), [], [], Tail).
% so they need not be rewritten.
rewrite_goal(A,V,D,NA) :-
functor(A,Name,Arity),
functor(Pred,Name,Arity),
(
recorded(aleph,modeb(_,Pred),_)
->
true
;
user:'$aleph_global'(modeb,modeb(_,Pred))
),
arg(Arity,Pred,-RType),
rt(RType,D), !,
A =.. [Name|Args],
replace_last_var(Args,V,NArgs),
NA =.. [Name|NArgs].
replace_last_var([_],V,[V]) :- !.
replace_last_var([A|Args],V,[A|NArgs]) :-
replace_last_var(Args,V,NArgs).
%
% This is the key
%
cpt_score(Lik) :-
findall(user:Ex, user:example(_,pos,Ex), Exs),
clpbn_flag(solver, Solver),
clpbn_flag(em_solver, EMSolver),
set_clpbn_flag(solver, EMSolver),
reset_all_dists,
em(Exs, 0.01, 10, _Tables, Lik),
set_clpbn_flag(solver, Solver).
complete_clpbn_cost(_AlephClause).
random_type(A,B) :-
assert(rt(A,B)).
uniform_cpt(Ds, CPTList) :-
lengths(Ds, Ls),
uniform_CPT_as_list(Ls, CPTList).
lengths([], []).
lengths([D|Ds], [L|Ls]) :-
length(D, L),
lengths(Ds, Ls).

View File

@@ -0,0 +1,121 @@
%
% Learn parameters using the BNT toolkit
%
:- yap_flag(unknown,error).
:- style_check(all).
:- module(bnt_parameters, [learn_parameters/2]).
:- use_module(library('clpbn'), [
clpbn_flag/3]).
:- use_module(library('clpbn/bnt'), [
create_bnt_graph/2]).
:- use_module(library('clpbn/display'), [
clpbn_bind_vals/3]).
:- use_module(library('clpbn/dists'), [
get_dist_domain/2
]).
:- use_module(library(matlab), [matlab_initialized_cells/4,
matlab_call/2,
matlab_get_variable/2
]).
:- dynamic bnt_em_max_iter/1.
bnt_em_max_iter(10).
% syntactic sugar for matlab_call.
:- op(800,yfx,<--).
G <-- Y :-
matlab_call(Y,G).
learn_parameters(Items, Tables) :-
run_all(Items),
clpbn_flag(solver, OldSolver, bnt),
clpbn_flag(bnt_model, Old, tied),
attributes:all_attvars(AVars),
% sort and incorporte evidence
clpbn_vars(AVars, AllVars),
length(AllVars,NVars),
create_bnt_graph(AllVars, Reps),
mk_sample(AllVars,NVars,EvVars),
bnt_learn_parameters(NVars,EvVars),
get_parameters(Reps, Tables),
clpbn_flag(solver, bnt, OldSolver),
clpbn_flag(bnt_model, tied, Old).
run_all([]).
run_all([G|Gs]) :-
call(user:G),
run_all(Gs).
clpbn_vars(Vs,BVars) :-
get_clpbn_vars(Vs,CVs),
keysort(CVs,KVs),
merge_vars(KVs,BVars).
get_clpbn_vars([],[]).
get_clpbn_vars([V|GVars],[K-V|CLPBNGVars]) :-
clpbn:get_atts(V, [key(K)]), !,
get_clpbn_vars(GVars,CLPBNGVars).
get_clpbn_vars([_|GVars],CLPBNGVars) :-
get_clpbn_vars(GVars,CLPBNGVars).
merge_vars([],[]).
merge_vars([K-V|KVs],[V|BVars]) :-
get_var_has_same_key(KVs,K,V,KVs0),
merge_vars(KVs0,BVars).
get_var_has_same_key([K-V|KVs],K,V,KVs0) :- !,
get_var_has_same_key(KVs,K,V,KVs0).
get_var_has_same_key(KVs,_,_,KVs).
mk_sample(AllVars,NVars, LL) :-
add2sample(AllVars, LN),
length(LN,LL),
matlab_initialized_cells( NVars, 1, LN, sample).
add2sample([], []).
add2sample([V|Vs],[val(VId,1,Val)|Vals]) :-
clpbn:get_atts(V, [evidence(Ev),dist(Id,_)]), !,
bnt:get_atts(V,[bnt_id(VId)]),
get_dist_domain(Id, Domain),
evidence_val(Ev,1,Domain,Val),
add2sample(Vs, Vals).
add2sample([_V|Vs],Vals) :-
add2sample(Vs, Vals).
evidence_val(Ev,Val,[Ev|_],Val) :- !.
evidence_val(Ev,I0,[_|Domain],Val) :-
I1 is I0+1,
evidence_val(Ev,I1,Domain,Val).
bnt_learn_parameters(_,_) :-
engine <-- jtree_inf_engine(bnet),
% engine <-- var_elim_inf_engine(bnet),
% engine <-- gibbs_sampling_inf_engine(bnet),
% engine <-- belprop_inf_engine(bnet),
% engine <-- pearl_inf_engine(bnet),
bnt_em_max_iter(MaxIters),
[new_bnet, trace] <-- learn_params_em(engine, sample, MaxIters).
get_parameters([],[]).
get_parameters([Rep-v(_,_,_)|Reps],[CPT|CPTs]) :-
get_new_table(Rep,CPT),
get_parameters(Reps,CPTs).
get_new_table(Rep,CPT) :-
s <-- struct(new_bnet.'CPD'({Rep})),
matlab_get_variable( s.'CPT', CPT).

View File

@@ -0,0 +1,229 @@
%
% The world famous EM algorithm, in a nutshell
%
:- module(clpbn_em, [em/5]).
:- use_module(library(lists),
[append/3]).
:- use_module(library(clpbn),
[clpbn_init_solver/5,
clpbn_run_solver/4,
clpbn_flag/2]).
:- use_module(library('clpbn/dists'),
[get_dist_domain_size/2,
empty_dist/2,
dist_new_table/2,
get_dist_key/2,
randomise_all_dists/0,
uniformise_all_dists/0]).
:- use_module(library('clpbn/connected'),
[clpbn_subgraphs/2]).
:- use_module(library('clpbn/learning/learn_utils'),
[run_all/1,
clpbn_vars/2,
normalise_counts/2,
compute_likelihood/3,
soften_sample/2]).
:- use_module(library(lists),
[member/2]).
:- use_module(library(matrix),
[matrix_add/3,
matrix_to_list/2]).
:- use_module(library(rbtrees),
[rb_new/1,
rb_insert/4,
rb_lookup/3]).
:- use_module(library('clpbn/utils'),
[
check_for_hidden_vars/3,
sort_vars_by_key/3]).
:- meta_predicate em(:,+,+,-,-), init_em(:,-).
em(Items, MaxError, MaxIts, Tables, Likelihood) :-
catch(init_em(Items, State),Error,handle_em(Error)),
em_loop(0, 0.0, State, MaxError, MaxIts, Likelihood, Tables),
assert(em_found(Tables, Likelihood)),
fail.
% get rid of new random variables the easy way :)
em(_, _, _, Tables, Likelihood) :-
retract(em_found(Tables, Likelihood)).
handle_em(error(repeated_parents)) :-
assert(em_found(_, -inf)),
fail.
% This gets you an initial configuration. If there is a lot of evidence
% tables may be filled in close to optimal, otherwise they may be
% close to uniform.
% it also gets you a run for random variables
% state collects all Info we need for the EM algorithm
% it includes the list of variables without evidence,
% the list of distributions for which we want to compute parameters,
% and more detailed info on distributions, namely with a list of all instances for the distribution.
init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :-
run_all(Items),
% randomise_all_dists,
uniformise_all_dists,
attributes:all_attvars(AllVars0),
sort_vars_by_key(AllVars0,AllVars,[]),
% remove variables that do not have to do with this query.
% check_for_hidden_vars(AllVars1, AllVars1, AllVars),
different_dists(AllVars, AllDists, AllDistInstances, MargVars),
clpbn_flag(em_solver, Solver),
clpbn_init_solver(Solver, MargVars, AllVars, _, SolverVars).
% loop for as long as you want.
em_loop(Its, Likelihood0, State, MaxError, MaxIts, LikelihoodF, FTables) :-
estimate(State, LPs),
maximise(State, Tables, LPs, Likelihood),
% writeln(Likelihood:Its:Likelihood0:Tables),
(
(
abs((Likelihood - Likelihood0)/Likelihood) < MaxError
;
Its == MaxIts
)
->
ltables(Tables, FTables),
LikelihoodF = Likelihood
;
Its1 is Its+1,
em_loop(Its1, Likelihood, State, MaxError, MaxIts, LikelihoodF, FTables)
).
ltables([], []).
ltables([Id-T|Tables], [Key-LTable|FTables]) :-
matrix_to_list(T,LTable),
get_dist_key(Id, Key),
ltables(Tables, FTables).
% collect the different dists we are going to learn next.
different_dists(AllVars, AllDists, AllInfo, MargVars) :-
all_dists(AllVars, Dists0),
sort(Dists0, Dists1),
group(Dists1, AllDists, AllInfo, MargVars0, []),
sort(MargVars0, MargVars).
all_dists([], []).
all_dists([V|AllVars], [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :-
clpbn:get_atts(V, [dist(Id,Parents)]),
sort([V|Parents], Sorted),
length(Sorted, LengSorted),
length(Parents, LengParents),
(
LengParents+1 =:= LengSorted
->
true
;
throw(error(repeated_parents))
),
generate_hidden_cases([V|Parents], CompactCases, Hiddens),
uncompact_cases(CompactCases, Cases),
all_dists(AllVars, Dists).
generate_hidden_cases([], [], []).
generate_hidden_cases([V|Parents], [P|Cases], Hiddens) :-
clpbn:get_atts(V, [evidence(P)]), !,
generate_hidden_cases(Parents, Cases, Hiddens).
generate_hidden_cases([V|Parents], [Cases|MoreCases], [V|Hiddens]) :-
clpbn:get_atts(V, [dist(Id,_)]),
get_dist_domain_size(Id, Sz),
gen_cases(0, Sz, Cases),
generate_hidden_cases(Parents, MoreCases, Hiddens).
gen_cases(Sz, Sz, []) :- !.
gen_cases(I, Sz, [I|Cases]) :-
I1 is I+1,
gen_cases(I1, Sz, Cases).
uncompact_cases(CompactCases, Cases) :-
findall(Case, is_case(CompactCases, Case), Cases).
is_case([], []).
is_case([A|CompactCases], [A|Case]) :-
integer(A), !,
is_case(CompactCases, Case).
is_case([L|CompactCases], [C|Case]) :-
member(C, L),
is_case(CompactCases, Case).
group([], [], []) --> [].
group([i(Id,Ps,Cs,[])|Dists1], [Id|Ids], [Id-[i(Id,Ps,Cs,[])|Extra]|AllInfo]) --> !,
same_id(Dists1, Id, Extra, Rest),
group(Rest, Ids, AllInfo).
group([i(Id,Ps,Cs,Hs)|Dists1], [Id|Ids], [Id-[i(Id,Ps,Cs,Hs)|Extra]|AllInfo]) -->
[Hs],
same_id(Dists1, Id, Extra, Rest),
group(Rest, Ids, AllInfo).
same_id([i(Id,Vs,Cases,[])|Dists1], Id, [i(Id, Vs, Cases, [])|Extra], Rest) --> !,
same_id(Dists1, Id, Extra, Rest).
same_id([i(Id,Vs,Cases,Hs)|Dists1], Id, [i(Id, Vs, Cases, Hs)|Extra], Rest) --> !,
[Hs],
same_id(Dists1, Id, Extra, Rest).
same_id(Dists, _, [], Dists) --> [].
compact_mvars([], []).
compact_mvars([X1,X2|MargVars], CMVars) :- X1 == X2, !,
compact_mvars([X2|MargVars], CMVars).
compact_mvars([X|MargVars], [X|CMVars]) :- !,
compact_mvars(MargVars, CMVars).
estimate(state(_, _, Margs, SolverState), LPs) :-
clpbn_flag(em_solver, Solver),
clpbn_run_solver(Solver, Margs, LPs, SolverState).
maximise(state(_,DistInstances,MargVars,_), Tables, LPs, Likelihood) :-
rb_new(MDistTable0),
create_mdist_table(MargVars, LPs, MDistTable0, MDistTable),
compute_parameters(DistInstances, Tables, MDistTable, 0.0, Likelihood, LPs:MargVars).
create_mdist_table([],[],MDistTable,MDistTable).
create_mdist_table([Vs|MargVars],[Ps|LPs],MDistTable0,MDistTable) :-
rb_insert(MDistTable0, Vs, Ps, MDistTableI),
create_mdist_table(MargVars, LPs, MDistTableI ,MDistTable).
compute_parameters([], [], _, Lik, Lik, _).
compute_parameters([Id-Samples|Dists], [Id-NewTable|Tables], MDistTable, Lik0, Lik, LPs:MargVars) :-
empty_dist(Id, Table0),
add_samples(Samples, Table0, MDistTable),
soften_sample(Table0, SoftenedTable),
matrix:matrix_sum(Table0,TotM),
normalise_counts(SoftenedTable, NewTable),
compute_likelihood(Table0, NewTable, DeltaLik),
dist_new_table(Id, NewTable),
NewLik is Lik0+DeltaLik,
compute_parameters(Dists, Tables, MDistTable, NewLik, Lik, LPs:MargVars).
add_samples([], _, _).
add_samples([i(_,_,[Case],[])|Samples], Table, MDistTable) :- !,
matrix_add(Table,Case,1.0),
add_samples(Samples, Table, MDistTable).
add_samples([i(_,_,Cases,Hiddens)|Samples], Table, MDistTable) :-
rb_lookup(Hiddens, Ps, MDistTable),
run_sample(Cases, Ps, Table),
add_samples(Samples, Table, MDistTable).
run_sample([], [], _).
run_sample([C|Cases], [P|Ps], Table) :-
matrix_add(Table, C, P),
run_sample(Cases, Ps, Table).

View File

@@ -0,0 +1,46 @@
% learn distribution for school database.
% we do not consider the aggregates yet.
:- [pos:train].
:- ['~/Yap/work/CLPBN/clpbn/examples/School/school_32'].
:- ['~/Yap/work/CLPBN/learning/em'].
main :-
findall(X,goal(X),L),
em(L,0.01,10,CPTs,Lik),
writeln(Lik:CPTs).
%
% change to 0.05, 0.1, 0.2 to make things simpler/harder
%
missing(0.3).
% miss 30% of the examples.
goal(professor_ability(P,V)) :-
pos:professor_ability(P,V1),
missing(X),
( random > X -> V = V1 ; true).
% miss 10% of the examples.
goal(professor_popularity(P,V)) :-
pos:professor_popularity(P,V1),
missing(X),
( random > X -> V = V1 ; true).
goal(registration_grade(P,V)) :-
pos:registration_grade(P,V1),
missing(X),
( random > X -> V = V1 ; true).
goal(student_intelligence(P,V)) :-
pos:student_intelligence(P,V1),
missing(X),
( random > X -> V = V1 ; true).
goal(course_difficulty(P,V)) :-
pos:course_difficulty(P,V1),
missing(X),
( random > X -> V = V1 ; true).
goal(registration_satisfaction(P,V)) :-
pos:registration_satisfaction(P,V1),
missing(X),
( random > X -> V = V1 ; true).

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,100 @@
%
% Utilities for learning
%
:- module(clpbn_learn_utils, [run_all/1,
clpbn_vars/2,
normalise_counts/2,
compute_likelihood/3,
soften_sample/2,
soften_sample/3]).
:- use_module(library(clpbn),
[clpbn_flag/2]).
:- use_module(library('clpbn/table'),
[clpbn_reset_tables/0]).
:- use_module(library(matrix),
[matrix_agg_lines/3,
matrix_op_to_lines/4,
matrix_agg_cols/3,
matrix_op_to_cols/4,
matrix_to_logs/2,
matrix_op/4,
matrix_sum/2,
matrix_to_list/2,
matrix_op_to_all/4]).
:- meta_predicate run_all(:).
run_all([]).
run_all([G|Gs]) :-
call(G),
run_all(Gs).
run_all(M:Gs) :-
clpbn_reset_tables,
run_all(Gs,M).
run_all([],_).
run_all([G|Gs],M) :-
( call(M:G) -> true ; writeln(bad:M:G), break),
run_all(Gs,M).
clpbn_vars(Vs,BVars) :-
get_clpbn_vars(Vs,CVs),
keysort(CVs,KVs),
merge_vars(KVs,BVars).
get_clpbn_vars([],[]).
get_clpbn_vars([V|GVars],[K-V|CLPBNGVars]) :-
clpbn:get_atts(V, [key(K)]), !,
get_clpbn_vars(GVars,CLPBNGVars).
get_clpbn_vars([_|GVars],CLPBNGVars) :-
get_clpbn_vars(GVars,CLPBNGVars).
merge_vars([],[]).
merge_vars([K-V|KVs],[V|BVars]) :-
get_var_has_same_key(KVs,K,V,KVs0),
merge_vars(KVs0,BVars).
get_var_has_same_key([K-V|KVs],K,V,KVs0) :- !,
get_var_has_same_key(KVs,K,V,KVs0).
get_var_has_same_key(KVs,_,_,KVs).
soften_sample(T0,T) :-
clpbn_flag(parameter_softening, Soften),
soften_sample(Soften, T0, T).
soften_sample(no,T,T).
soften_sample(m_estimate(M), T0, T) :-
matrix_agg_cols(T0,+,Cols),
matrix_op_to_all(Cols, *, M, R),
matrix_op_to_cols(T0,R,+,T).
soften_sample(auto_m, T0,T) :-
matrix_agg_cols(T0,+,Cols),
matrix_sum(Cols,TotM),
M is sqrt(TotM),
matrix_op_to_all(Cols, *, M, R),
matrix_op_to_cols(T0,R,+,T).
soften_sample(laplace,T0,T) :-
matrix_op_to_all(T0, +, 1, T).
normalise_counts(MAT,NMAT) :-
matrix_agg_lines(MAT, +, Sum),
matrix_op_to_lines(MAT, Sum, /, NMAT).
compute_likelihood(Table0, NewTable, DeltaLik) :-
matrix_to_logs(NewTable, Logs),
matrix_to_list(Table0,L1),
matrix_to_list(Logs,L2),
sum_prods(L1,L2,0,DeltaLik).
sum_prods([],[],DeltaLik,DeltaLik).
sum_prods([0.0|L1],[_|L2],DeltaLik0,DeltaLik) :- !,
sum_prods(L1,L2,DeltaLik0,DeltaLik).
sum_prods([Count|L1],[Log|L2],DeltaLik0,DeltaLik) :- !,
DeltaLik1 is DeltaLik0+Count*Log,
sum_prods(L1,L2,DeltaLik1,DeltaLik).

View File

@@ -0,0 +1,113 @@
%
% Maximum likelihood estimator and friends.
%
%
% This assumes we have a single big example.
%
:- module(clpbn_mle, [learn_parameters/2,
learn_parameters/3,
parameters_from_evidence/3]).
:- use_module(library('clpbn')).
:- use_module(library('clpbn/learning/learn_utils'),
[run_all/1,
clpbn_vars/2,
normalise_counts/2,
soften_table/2,
normalise_counts/2]).
:- use_module(library('clpbn/dists'),
[empty_dist/2,
dist_new_table/2]).
:- use_module(library(matrix),
[matrix_inc/2]).
learn_parameters(Items, Tables) :-
learn_parameters(Items, Tables, []).
%
% full evidence learning
%
learn_parameters(Items, Tables, Extras) :-
run_all(Items),
attributes:all_attvars(AVars),
% sort and incorporate evidence
clpbn_vars(AVars, AllVars),
mk_sample(AllVars, Sample),
compute_tables(Extras, Sample, Tables).
parameters_from_evidence(AllVars, Sample, Extras) :-
mk_sample_from_evidence(AllVars, Sample),
compute_tables(Extras, Sample, Tables).
mk_sample_from_evidence(AllVars, SortedSample) :-
add_evidence2sample(AllVars, Sample),
msort(Sample, SortedSample).
mk_sample(AllVars, SortedSample) :-
add2sample(AllVars, Sample),
msort(Sample, SortedSample).
%
% assumes we have full data, meaning evidence for every variable
%
add2sample([], []).
add2sample([V|Vs],[val(Id,[Ev|EParents])|Vals]) :-
clpbn:get_atts(V, [evidence(Ev),dist(Id,Parents)]),
get_eparents(Parents, EParents),
add2sample(Vs, Vals).
get_eparents([P|Parents], [E|EParents]) :-
clpbn:get_atts(P, [evidence(E)]),
get_eparents(Parents, EParents).
get_eparents([], []).
%
% assumes we ignore variables without evidence or without evidence
% on a parent!
%
add_evidence2sample([], []).
add_evidence2sample([V|Vs],[val(Id,[Ev|EParents])|Vals]) :-
clpbn:get_atts(V, [evidence(Ev),dist(Id,Parents)]),
get_eveparents(Parents, EParents), !,
add_evidence2sample(Vs, Vals).
add_evidence2sample([_|Vs],Vals) :-
add_evidence2sample(Vs, Vals).
get_eveparents([P|Parents], [E|EParents]) :-
clpbn:get_atts(P, [evidence(E)]),
get_eparents(Parents, EParents).
get_eveparents([], []).
compute_tables(Parameters, Sample, NewTables) :-
estimator(Sample, Tables),
add_priors(Parameters, Tables, NewTables).
estimator([], []).
estimator([val(Id,Sample)|Samples], [NewDist|Tables]) :-
empty_dist(Id, NewTable),
id_samples(Id, Samples, IdSamples, MoreSamples),
mle([Sample|IdSamples], NewTable),
soften_table(NewTable, SoftenedTable),
normalise_counts(SoftenedTable, NewDist),
% replace matrix in distribution
dist_new_table(Id, NewDist),
estimator(MoreSamples, Tables).
id_samples(_, [], [], []).
id_samples(Id, [val(Id,Sample)|Samples], [Sample|IdSamples], MoreSamples) :- !,
id_samples(Id, Samples, IdSamples, MoreSamples).
id_samples(_, Samples, [], Samples).
mle([Sample|IdSamples], Table) :-
matrix_inc(Table, Sample),
mle(IdSamples, Table).
mle([], _).