start with pfl learning.

This commit is contained in:
Vitor Santos Costa 2012-06-22 19:00:12 +01:00
parent 5fe052a3ef
commit 87e0b67e76
10 changed files with 141 additions and 14 deletions

View File

@ -93,6 +93,11 @@ CLPBN_HMMER_EXAMPLES= \
$(CLPBN_EXDIR)/HMMer/scan.yap \
$(CLPBN_EXDIR)/HMMer/score.yap
CLPBN_LEARNING_EXAMPLES= \
$(CLPBN_EXDIR)/learning/school_params.yap \
$(CLPBN_EXDIR)/learning/sprinkler_params.yap \
$(CLPBN_EXDIR)/learning/train.yap
CLPBN_EXAMPLES= \
$(CLPBN_EXDIR)/burglary-alarm.fg \
$(CLPBN_EXDIR)/burglary-alarm.yap \
@ -102,6 +107,7 @@ CLPBN_EXAMPLES= \
$(CLPBN_EXDIR)/comp_workshops.yap \
$(CLPBN_EXDIR)/social_domain1.yap \
$(CLPBN_EXDIR)/social_domain2.yap \
$(CLPBN_EXDIR)/sprinkler.pfl \
$(CLPBN_EXDIR)/sprinkler.yap \
$(CLPBN_EXDIR)/workshop_attrs.yap
@ -112,10 +118,12 @@ install: $(CLBN_TOP) $(CLBN_PROGRAMS) $(CLPBN_PROGRAMS)
mkdir -p $(DESTDIR)$(EXDIR)
mkdir -p $(DESTDIR)$(EXDIR)/School
mkdir -p $(DESTDIR)$(EXDIR)/HMMer
mkdir -p $(DESTDIR)$(EXDIR)/learning
for h in $(CLPBN_TOP); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR); done
for h in $(CLPBN_PROGRAMS); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR)/clpbn; done
for h in $(CLPBN_LEARNING_PROGRAMS); do $(INSTALL_DATA) $$h $(DESTDIR)$(SHAREDIR)/clpbn/learning; done
for h in $(CLPBN_EXAMPLES); do $(INSTALL_DATA) $$h $(DESTDIR)$(EXDIR); done
for h in $(CLPBN_SCHOOL_EXAMPLES); do $(INSTALL_DATA) $$h $(DESTDIR)$(EXDIR)/School; done
for h in $(CLPBN_HMMER_EXAMPLES); do $(INSTALL_DATA) $$h $(DESTDIR)$(EXDIR)/HMMer; done
for h in $(CLPBN_LEARNING_EXAMPLES); do $(INSTALL_DATA) $$h $(DESTDIR)$(EXDIR)/learning; done

View File

@ -512,6 +512,9 @@ clpbn_run_solver(pcg, LVs, LPs, State) :-
add_keys(Key1+V1,_Key2,Key1+V1).
%
% only useful for probabilistic context free grammars
%
clpbn_init_graph(pcg) :- !,
pcg_init_graph.
clpbn_init_graph(_).

View File

@ -293,7 +293,7 @@ empty_dist(Dist, TAB) :-
dist_new_table(DistId, NewMat) :-
use_parfactors(on), !,
matrix_to_list(NewMat, List),
pfl:set_pfl_parameters(DistId, List).
pfl:new_pfl_parameters(DistId, List).
dist_new_table(Id, NewMat) :-
matrix_to_list(NewMat, List),
recorded(clpbn_dist_db, db(Id, Key, _, A, B, C, D), R),

View File

@ -0,0 +1,40 @@
% learn distribution for school database.
:- ['../sprinkler.pfl'].
:- use_module(library(clpbn/learning/em)).
%% data(t,t,t,t).
data(t,f,_,t).
%% data(_,t,_,t).
%% data(t,t,f,f).
%% data(t,t,f,t).
%% data(t,_,_,t).
%% data(t,f,t,t).
%% data(t,t,f,t).
%% data(t,_,f,f).
%% data(t,t,f,f).
%% data(f,f,t,t).
%% data(t,t,_,f).
%% data(t,f,f,t).
%% data(t,f,t,t).
%:- clpbn:set_clpbn_flag(em_solver,gibbs).
%:- clpbn:set_clpbn_flag(em_solver,jt).
%:- clpbn:set_clpbn_flag(em_solver,ve).
:- clpbn:set_clpbn_flag(em_solver,bp).
timed_main :-
statistics(runtime, _),
main(Lik),
statistics(runtime, [T,_]),
format('Took ~d msec and Lik ~3f~n',[T,Lik]).
main(Lik) :-
findall(X,scan_data(X),L),
em(L,0.01,10,_,Lik).
scan_data(example([wet_grass(W),sprinkler(S),rain(R),cloudy(C)])) :-
data(W, S, R, C).

View File

@ -0,0 +1,33 @@
:- style_check(all).
:- ensure_loaded(library(pfl)).
% 1. define domain of random variables
% not necessary if they are boolean.
% 2. define parfactors
bayes cloudy ; cloudy_table ; [].
bayes sprinkler, cloudy ; sprinkler_table ; [].
bayes rain, cloudy ; rain_table ; [].
bayes wet_grass, sprinkler, rain ; wet_grass_table ; [].
% 3. define CPTs.
wet_grass_table([1.0,0.1,0.1,0.01,
0.0,0.9,0.9,0.99]).
sprinkler_table([0.5,0.9,
0.5,0.1]).
rain_table([0.8,0.2,
0.2,0.8]).
cloudy_table([0.5,0.5]).

View File

@ -76,15 +76,21 @@ handle_em(error(repeated_parents)) :-
% and more detailed info on distributions, namely with a list of all instances for the distribution.
init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :-
clpbn_flag(em_solver, Solver),
% only used for PCGs
clpbn_init_graph(Solver),
% create the ground network
call_run_all(Items),
% randomise_all_dists,
% set initial values for distributions
uniformise_all_dists,
% get all variablews to marginalise
attributes:all_attvars(AllVars0),
% and order them
sort_vars_by_key(AllVars0,AllVars,[]),
% remove variables that do not have to do with this query.
% check_for_hidden_vars(AllVars1, AllVars1, AllVars),
different_dists(AllVars, AllDists, AllDistInstances, MargVars),
% setup solver by doing parameter independent work.
clpbn_init_solver(Solver, MargVars, AllVars, _, SolverVars).
% loop for as long as you want.
@ -116,15 +122,32 @@ ltables([Id-T|Tables], [Key-LTable|FTables]) :-
% collect the different dists we are going to learn next.
different_dists(AllVars, AllDists, AllInfo, MargVars) :-
all_dists(AllVars, Dists0),
all_dists(AllVars, AllVars, Dists0),
sort(Dists0, Dists1),
group(Dists1, AllDists, AllInfo, MargVars0, []),
sort(MargVars0, MargVars).
all_dists([], []).
all_dists([V|AllVars], [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :-
%
% V -> to Id defining V. We get:
% the random variables that are parents
% the cases that can happen, eg if we have A <- B, C
% A and B are boolean w/o evidence, and C is f, the cases could be
% [0,0,1], [0,1,1], [1,0,0], [1,1,0],
% Hiddens will be C
%
all_dists([], _, []).
all_dists([V|AllVars], AllVars0, [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :-
clpbn:use_parfactors(on), !,
clpbn:get_atts(V, [key(K)]),
pfl:factor(bayes,Id,[K|PKeys],_,_,_),
find_variables(PKeys, AllVars0, Parents),
generate_hidden_cases([V|Parents], CompactCases, Hiddens),
uncompact_cases(CompactCases, Cases),
all_dists(AllVars, AllVars0, Dists).
all_dists([V|AllVars], AllVars0, [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :-
% V is an instance of Id
clpbn:get_atts(V, [dist(Id,Parents)]),
sort([V|Parents], Sorted),
sort([V|Parents], Sorted),
length(Sorted, LengSorted),
length(Parents, LengParents),
(
@ -133,15 +156,31 @@ all_dists([V|AllVars], [i(Id, [V|Parents], Cases, Hiddens)|Dists]) :-
true
;
throw(error(repeated_parents))
),
),
generate_hidden_cases([V|Parents], CompactCases, Hiddens),
uncompact_cases(CompactCases, Cases),
all_dists(AllVars, Dists).
all_dists(AllVars, AllVars0, Dists).
find_variables([], _AllVars0, []).
find_variables([K|PKeys], AllVars0, [Parent|Parents]) :-
find_variable(K, AllVars0, Parent),
find_variables(PKeys, AllVars0, Parents).
find_variable(K, [Parent|_AllVars0], Parent) :-
clpbn:get_atts(Parent, [key(K0)]), K0 =@= K, !.
find_variable(K, [_|AllVars0], Parent) :-
find_variable(K, AllVars0, Parent).
generate_hidden_cases([], [], []).
generate_hidden_cases([V|Parents], [P|Cases], Hiddens) :-
clpbn:get_atts(V, [evidence(P)]), !,
generate_hidden_cases(Parents, Cases, Hiddens).
generate_hidden_cases([V|Parents], [Cases|MoreCases], [V|Hiddens]) :-
clpbn:use_parfactors(on), !,
clpbn:get_atts(V, [key(K)]),
pfl:skolem(K,D), length(D,Sz),
gen_cases(0, Sz, Cases),
generate_hidden_cases(Parents, MoreCases, Hiddens).
generate_hidden_cases([V|Parents], [Cases|MoreCases], [V|Hiddens]) :-
clpbn:get_atts(V, [dist(Id,_)]),
get_dist_domain_size(Id, Sz),

View File

@ -36,8 +36,10 @@ run_all(M:Gs) :-
run_all(Gs,M).
run_all([],_).
run_all([example(Gs0)|Gs],M) :-
run_all(Gs0,M),
run_all(Gs,M).
run_all([G|Gs],M) :-
% (G = _:ge(ybr136w,t8,23,-1) -> nb_getval(clpbn_tables, Tab), writeln(Tab) ; true ),
( call(M:G) -> true ; throw(bad_call(M:G)) ),
run_all(Gs,M).

View File

@ -21,7 +21,7 @@
append/3,
member/2]).
:- dynamic factor/5, skolem_in/2, skolem/2, preprocess/3, evidence/2, id/1.
:- dynamic factor/6, skolem_in/2, skolem/2, preprocess/3, evidence/2, id/1.
:- reexport(library(clpbn),
[clpbn_flag/2 as pfl_flag,
@ -55,7 +55,7 @@ user:term_expansion( markov((Formula ; Phi ; Constraints)), pfl:factor(markov,Id
process_args(Formula, Id, 0, _, FList, []).
user:term_expansion( Id@N, L ) :-
atom(Id), number(N), !,
N1 is N + 1,
N1 is N + 1,
findall(G,generate_entity(1, N1, Id, G), L).
user:term_expansion( Goal, [] ) :-
preprocess(Goal, Sk,Var), !,
@ -112,6 +112,10 @@ process_arg(Sk::D, Id, _I) -->
process_arg(Sk, Id, _I) -->
!,
{
% if :: been used before for this skolem
% just keep on using it,
% otherwise, assume it is t,f
( \+ \+ skolem(Sk,_D) -> true ; new_skolem(Sk,[t,f]) ),
assert(skolem_in(Sk, Id))
},
[Sk].
@ -121,7 +125,7 @@ new_skolem(Sk,D) :-
skolem(Sk1, D1),
Sk1 =@= Sk,
!,
D1 = D.
( D1 = D -> true ; throw(pfl(permission_error(redefining_domain(Sk),D:D1)))).
new_skolem(Sk,D) :-
interface_predicate(Sk),
assert(skolem(Sk, D)).
@ -155,7 +159,7 @@ get_pfl_parameters(Id,Out) :-
new_pfl_parameters(Id, NewPhi) :-
retract(factor(Type.Id,FList,FV,_Phi,Constraints)),
retract(factor(Type,Id,FList,FV,_Phi,Constraints)),
assert(factor(Type,Id,FList,FV,NewPhi,Constraints)),
fail.
new_pfl_parameters(_Id, _NewPhi).
@ -179,5 +183,3 @@ get_factor_pvariable(Id,Var) :-
factor(_Type, Id,FList,_FV,_Phi,_Constraints),
member(Var, FList).