support for learning CLP(BN) programs from Aleph

This commit is contained in:
Vitor Santos Costa 2008-11-13 09:03:53 +00:00
parent 1c334ea198
commit 99248795c9
8 changed files with 264 additions and 14 deletions

View File

@ -52,6 +52,7 @@ CLPBN_PROGRAMS= \
$(CLPBN_SRCDIR)/xbif.yap
CLPBN_LEARNING_PROGRAMS= \
$(CLPBN_LEARNING_SRCDIR)/aleph_parms.yap \
$(CLPBN_LEARNING_SRCDIR)/bnt_parms.yap \
$(CLPBN_LEARNING_SRCDIR)/em.yap \
$(CLPBN_LEARNING_SRCDIR)/learn_utils.yap \

View File

@ -84,8 +84,8 @@
:- dynamic solver/1,output/1,use/1,suppress_attribute_display/1, parameter_softening/1, em_solver/1.
solver(em).
em_solver(em).
solver(vel).
em_solver(vel).
%output(xbif(user_error)).
%output(gviz(user_error)).
@ -125,6 +125,8 @@ clpbn_flag(parameter_softening,Before,After) :-
assert(parameter_softening(After)).
{_} :-
solver(none), !.
{Var = Key with Dist} :-
put_atts(El,[key(Key),dist(DistInfo,Parents)]),
dist(Dist, DistInfo, Key, Parents),
@ -323,7 +325,7 @@ bind_clpbns(Key, Dist, Parents, Key1, Dist1, Parents1) :-
->
true
;
throw(error(domain_error(bayesian_domain),bind_clpbns(var(Key, Type, Domain, Table, Parents),var(Key1, Type1, Domain1, Table1, Parents1))))
throw(error(domain_error(bayesian_domain),bind_clpbns(var(Dist, Key, Type, Domain, Table, Parents),var(Dist1, Key1, Type1, Domain1, Table1, Parents1))))
).
bind_clpbns(Key, _, _, _, Key1, _, _, _) :-
Key\=Key1, !, fail.

View File

@ -14,7 +14,7 @@
get_dist_domain/2,
get_dist_params/2,
get_dist_domain_size/2,
get_dist_tparams/2,
get_dist_params/2,
get_dist_key/2,
get_evidence_position/3,
get_evidence_from_position/3,
@ -24,7 +24,8 @@
randomise_all_dists/0,
randomise_dist/1,
uniformise_all_dists/0,
uniformise_dist/1
uniformise_dist/1,
reset_all_dists/0
]).
:- use_module(library(lists),[is_list/1,nth0/3]).
@ -93,6 +94,10 @@ new_id(Id) :-
Id1 is Id+1,
assert(id(Id1)).
reset_id :-
retract(id(_)),
assert(id(1)).
dists(X) :- id(X1), X is X1-1.
dist(V, Id, Key, Parents) :-
@ -296,3 +301,17 @@ uniformise_dist(Dist) :-
dist_new_table(Dist, NewCPT).
reset_all_dists :-
recorded(clpbn_dist_psizes, _, R),
erase(R),
fail.
reset_all_dists :-
recorded(clpbn_dist_db, _, R),
erase(R),
fail.
reset_all_dists :-
reset_id,
fail.
reset_all_dists.

View File

@ -17,6 +17,7 @@
multiply_possibly_deterministic_factors/3,
random_CPT/2,
uniform_CPT/2,
uniform_CPT_as_list/2,
normalise_CPT_on_lines/3]).
:- use_module(dists,
@ -250,6 +251,10 @@ generate_random_entries(Size, [R|Randoms]) :-
Size1 is Size-1,
generate_random_entries(Size1, Randoms).
uniform_CPT_as_list(Dims, L) :-
uniform_CPT(Dims, M),
matrix_to_list(M, L).
uniform_CPT(Dims, M) :-
matrix_new_set(floats,Dims,1.0,M1),
normalise_possibly_deterministic_CPT(M1, M).

View File

@ -100,9 +100,9 @@ run_vel_solver([LVs|MoreLVs], [Ps|MorePs], [NVs0|MoreLVis]) :-
find_all_table_deps(Tables0, LV),
process(LVi, LVs, tab(Dist,_,_)),
% move from potentials back to probabilities
list_from_CPT(Dist, LPs),
normalise_CPT(Dist,MPs),
list_from_CPT(MPs, Ps),
length(Ps,_Len),
run_vel_solver(MoreLVs, MorePs, MoreLVis).
%

View File

@ -0,0 +1,217 @@
%
% Interface the Aleph ILP system to CLP(BN)
%
% Relies on the Aleph cost function.
% It assumes Aleph work as usual, but some variables are of type random.
%
:- module(clpbn_aleph,
[init_clpbn_cost/0,
random_type/2]).
:- dynamic rt/2, inited/1.
:- use_module(library('clpbn/learning/em')).
:- use_module(library('clpbn'),
[{}/1,
clpbn_flag/2,
clpbn_flag/3,
set_clpbn_flag/2]).
:- use_module(library('clpbn/matrix_cpt_utils'),
[uniform_CPT_as_list/2]).
:- use_module(library('clpbn/dists'),
[reset_all_dists/0,
get_dist_key/2,
get_dist_params/2
]).
%
% Tell Aleph not to use default solver during saturation
%
% all work will be done by EM
:- set_clpbn_flag(solver,none).
%
% This is the Aleph interface
% examples are stored as example(Id, Type, Example)
% CPT domains are stored as random_type(KeySkeleton, ListOfValues).
%
:- use_module(library(lists),[append/3]).
:- multifile user:cost/3.
% handle uninstantiated examples as hidden variables.
:- user:set(skolem_examples, false).
% avoid doing CLP(BN) stuff except at start
:- user:set(sat_start_hook, clpbn_aleph:disable_solver).
:- user:set(sat_stop_hook, clpbn_aleph:enable_solver).
:- user:set(reduce_start_hook, clpbn_aleph:disable_solver).
:- user:set(reduce_stop_hook, clpbn_aleph:enable_solver).
disable_solver(_) :-
clpbn_flag(solver, Old, none),
nb_setval(old_clpbn_solver, Old).
disable_solver(_,_) :-
clpbn_flag(solver, Old, none),
nb_setval(old_clpbn_solver, Old).
enable_solver :-
nb_getval(old_clpbn_solver, Old),
set_clpbn_flag(solver, Old).
enable_solver(_,_) :-
nb_getval(old_clpbn_solver, Old),
set_clpbn_flag(solver, Old).
:- user:set(best_clause_hook, clpbn_aleph:add_new_clause).
add_new_clause(_,(_ :- true),_,_) :- !.
add_new_clause(_,(H :- B),_,_) :-
user:db_usage,
user:db_dynamic,
domain(H, K, V, D),
rewrite_body(B, IB, Vs, _, ( !, { V = K with p(D, CPTList, Vs) })),
% need to remember which CPT we want
get_dist_key(Id, K),
get_dist_params(Id, CPTList),
asserta(user:(H :- IB)),
user:setting(verbosity,V),
( V >= 1 ->
user:p_message('CLP(BN) Theory'),
functor(H,N,Ar), listing(user:N/Ar)
;
true
).
% user-defined cost function, Aleph knows about this (and only about this).
user:cost((H :- B),Inf,Score) :- !,
domain(H, K, V, D),
check_info(Inf),
rewrite_body(B, IB, Vs, Ds, ( !, { V = K with p(D, CPTList, Vs) })),
uniform_cpt([D|Ds], CPTList),
asserta(user:(H :- IB), R),
(
cpt_score(Score0)
->
erase(R),
Score is -Score0
;
% illegal clause, just get out of here.
erase(R),
fail
).
user:cost(H,_Inf,Score) :- !,
init_clpbn_cost(H, Score0),
Score is -Score0.
% this is here so that Aleph will actually compute coverage. Aleph computes
% coverage only if cost actually checks Inf.
check_info(_).
init_clpbn_cost(_, Score) :-
inited(Score), !.
init_clpbn_cost(H, Score) :-
functor(H,N,A),
% get rid of Aleph crap
abolish(user:N/A),
% make it easy to add and remove clauses.
dynamic(user:N/A),
domain(H, K, V, D),
uniform_cpt([D], CPTList),
% This will be the default cause, called when the other rules fail.
assert(user:(H :- !, { V = K with p(D, CPTList) })),
cpt_score(Score),
assert(inited(Score)).
% receives H, and generates a key K, a random variable RV, and a domain D.
domain(H, K, RV, D) :-
functor(H,Name,Arity),
functor(Pred,Name,Arity),
(
recorded(aleph,modeh(_,Pred),_)
->
true
;
user:'$aleph_global'(modeh,modeh(_,Pred))
),
arg(Arity,Pred,+RType),
rt(RType,D), !,
key_from_head(H,K,RV).
domain(H, K, V, D) :-
current_predicate(_,user:domain(_)),
key_from_head(H,K,V),
user:domain(K,D).
key_from_head(H,K,V) :-
H =.. [Name|Args],
predicate_property(user:H,number_of_clauses(NClauses)),
atomic_concat(Name,NClauses,NName),
append(H0L,[V],Args),
K =.. [NName|H0L].
% transforms_body into something that is going to be called
% receives G0, and generates a list of goals, a list of variables, and a list of domains.
% receives also a Tail with the constraint to append at the end.
rewrite_body((A,B), (user:NA,NB), [V|Vs], [D|Ds], Tail) :-
rewrite_goal(A, V, D, NA), !,
rewrite_body(B, NB, Vs, Ds, Tail).
rewrite_body((A,B), (user:A,NB), Vs, Ds, Tail) :-
rewrite_body(B,NB, Vs, Ds, Tail).
rewrite_body(A,(user:NA,Tail), [V], [D], Tail) :-
rewrite_goal(A, V, D, NA), !.
rewrite_body(A, (user:A,Tail), [], [], Tail).
% so they need not be rewritten.
rewrite_goal(A,V,D,NA) :-
functor(A,Name,Arity),
functor(Pred,Name,Arity),
(
recorded(aleph,modeb(_,Pred),_)
->
true
;
user:'$aleph_global'(modeb,modeb(_,Pred))
),
arg(Arity,Pred,-RType),
rt(RType,D), !,
A =.. [Name|Args],
replace_last_var(Args,V,NArgs),
NA =.. [Name|NArgs].
replace_last_var([_],V,[V]) :- !.
replace_last_var([A|Args],V,[A|NArgs]) :-
replace_last_var(Args,V,NArgs).
%
% This is the key
%
cpt_score(Lik) :-
findall(user:Ex, user:example(_,pos,Ex), Exs),
clpbn_flag(solver, Solver),
clpbn_flag(em_solver, EMSolver),
set_clpbn_flag(solver, EMSolver),
reset_all_dists,
em(Exs, 0.1, 10, _Tables, Lik),
set_clpbn_flag(solver, Solver).
complete_clpbn_cost(_AlephClause).
random_type(A,B) :-
assert(rt(A,B)).
uniform_cpt(Ds, CPTList) :-
lengths(Ds, Ls),
uniform_CPT_as_list(Ls, CPTList).
lengths([], []).
lengths([D|Ds], [L|Ls]) :-
length(D, L),
lengths(Ds, Ls).

View File

@ -51,7 +51,12 @@
em(Items, MaxError, MaxIts, Tables, Likelihood) :-
init_em(Items, State),
em_loop(0, 0.0, State, MaxError, MaxIts, Likelihood, Tables).
em_loop(0, 0.0, State, MaxError, MaxIts, Likelihood, Tables),
assert(em_found(Tables, Likelihood)),
fail.
% get rid of new random variables the easy way :)
em(_, _, _, Tables, Likelihood) :-
retract(em_found(Tables, Likelihood)).
% This gets you an initial configuration. If there is a lot of evidence
% tables may be filled in close to optimal, otherwise they may be
@ -78,7 +83,7 @@ init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :-
em_loop(Its, Likelihood0, State, MaxError, MaxIts, LikelihoodF, FTables) :-
estimate(State, LPs),
maximise(State, Tables, LPs, Likelihood),
writeln(Likelihood:Likelihood0:Tables),
writeln(Likelihood:Its:Likelihood0:Tables),
(
(
abs((Likelihood - Likelihood0)/Likelihood) < MaxError
@ -170,24 +175,25 @@ estimate(state(_, _, Margs, SolverState), LPs) :-
maximise(state(_,DistInstances,MargVars,_), Tables, LPs, Likelihood) :-
rb_new(MDistTable0),
create_mdist_table(MargVars,LPs,MDistTable0,MDistTable),
compute_parameters(DistInstances, Tables, MDistTable, 0.0, Likelihood).
create_mdist_table(MargVars, LPs, MDistTable0, MDistTable),
compute_parameters(DistInstances, Tables, MDistTable, 0.0, Likelihood, LPs:MargVars).
create_mdist_table([],[],MDistTable,MDistTable).
create_mdist_table([Vs|MargVars],[Ps|LPs],MDistTable0,MDistTable) :-
rb_insert(MDistTable0, Vs, Ps, MDistTableI),
create_mdist_table(MargVars, LPs, MDistTableI ,MDistTable).
compute_parameters([], [], _, Lik, Lik).
compute_parameters([Id-Samples|Dists], [Id-NewTable|Tables], MDistTable, Lik0, Lik) :-
compute_parameters([], [], _, Lik, Lik, _).
compute_parameters([Id-Samples|Dists], [Id-NewTable|Tables], MDistTable, Lik0, Lik, LPs:MargVars) :-
empty_dist(Id, Table0),
add_samples(Samples, Table0, MDistTable),
soften_sample(Table0, SoftenedTable),
matrix:matrix_sum(Table0,TotM),writeln(Id-TotM),
normalise_counts(SoftenedTable, NewTable),
compute_likelihood(Table0, NewTable, DeltaLik),
dist_new_table(Id, NewTable),
NewLik is Lik0+DeltaLik,
compute_parameters(Dists, Tables, MDistTable, NewLik, Lik).
compute_parameters(Dists, Tables, MDistTable, NewLik, Lik, LPs:MargVars).
add_samples([], _, _).
add_samples([i(_,_,[Case],[])|Samples], Table, MDistTable) :- !,

View File

@ -34,7 +34,7 @@ run_all(M:Gs) :-
run_all([],_).
run_all([G|Gs],M) :-
call(M:G),
( call(M:G) -> true ; writeln(bad:M:G), break),
run_all(Gs,M).
clpbn_vars(Vs,BVars) :-