improve stochastic grammar learning (work in progress).

This commit is contained in:
Vitor Santos Costa 2009-05-26 10:49:04 -05:00
parent de7474a5d9
commit a10bf47a0c
4 changed files with 166 additions and 18 deletions

View File

@ -8,7 +8,8 @@
clpbn_init_solver/4,
clpbn_run_solver/3,
clpbn_init_solver/5,
clpbn_run_solver/4]).
clpbn_run_solver/4,
clpbn_init_graph/1]).
:- use_module(library(atts)).
:- use_module(library(lists)).
@ -53,6 +54,12 @@
run_gibbs_solver/3
]).
:- use_module('clpbn/pgrammar',
[init_pcg_solver/4,
run_pcg_solver/3,
pcg_init_graph/0
]).
:- use_module('clpbn/graphs',
[
clpbn2graph/1
@ -289,8 +296,7 @@ bind_clpbn(T, Var, _, _, _) :- nonvar(T),
!, ( add_evidence(Var,T) -> true ; writeln(T:Var), fail ).
bind_clpbn(T, Var, Key, Dist, Parents) :- var(T),
get_atts(T, [key(Key1),dist(Dist1,Parents1)]),
writeln(eq:Key:Key1),
(
(
bind_clpbns(Key, Dist, Parents, Key1, Dist1, Parents1)
->
(
@ -382,6 +388,8 @@ clpbn_init_solver(vel, LVs, Vs0, VarsWithUnboundKeys, State) :-
init_vel_solver(LVs, Vs0, VarsWithUnboundKeys, State).
clpbn_init_solver(jt, LVs, Vs0, VarsWithUnboundKeys, State) :-
init_jt_solver(LVs, Vs0, VarsWithUnboundKeys, State).
clpbn_init_solver(pcg, LVs, Vs0, VarsWithUnboundKeys, State) :-
init_pcg_solver(LVs, Vs0, VarsWithUnboundKeys, State).
%
% LVs is the list of lists of variables to marginalise
@ -390,7 +398,7 @@ clpbn_init_solver(jt, LVs, Vs0, VarsWithUnboundKeys, State) :-
%
%
clpbn_run_solver(LVs, LPs, State) :-
solver(Solver, State),
solver(Solver),
clpbn_run_solver(Solver, LVs, LPs, State).
clpbn_run_solver(gibbs, LVs, LPs, State) :-
@ -399,6 +407,11 @@ clpbn_run_solver(vel, LVs, LPs, State) :-
run_vel_solver(LVs, LPs, State).
clpbn_run_solver(jt, LVs, LPs, State) :-
run_jt_solver(LVs, LPs, State).
clpbn_run_solver(pcg, LVs, LPs, State) :-
run_pcg_solver(LVs, LPs, State).
add_keys(Key1+V1,_Key2,Key1+V1).
clpbn_init_graph(pcg) :- !,
pcg_init_graph.
clpbn_init_graph(_).

View File

@ -119,10 +119,10 @@ check_stored_evidence(_, _).
add_evidence(K, V) :-
evidence(K, Ev), !,
store_evidence(V, Ev),
clpbn:put_atts(V, [evidence(Ev)]).
add_evidence(_, _).
check_for_evidence(_, V, Vf, Vf, C, C) :-
clpbn:get_atts(V, [evidence(_)]), !.
check_for_evidence(K, _, Vf0, Vff, C0, Ci) :-

View File

@ -2,8 +2,12 @@
:- style_check(all).
:- module(clpbn_pgrammar,[grammar_prob/2,
grammar_mle/2]).
:- module(clpbn_pgrammar,[grammar_to_atts/1,
grammar_prob/2,
grammar_mle/2,
init_pcg_solver/4,
run_pcg_solver/3,
pcg_init_graph/0]).
:- load_files([library(clpbn)],
[ if(not_loaded),
@ -14,11 +18,20 @@
[ sum_list/2
]).
:- use_module([library(matrix)],
[ matrix_new/3,
matrix_add/3,
matrix_get/3,
matrix_op/4,
matrix_op_to_all/4,
matrix_set_all/2
]).
:- op(600, xfy,'::').
:- dynamic id/4.
:- dynamic id/4, dist_id/2, new_proof/2.
:- meta_predicate grammar_prob(:,-).
:- meta_predicate grammar_prob(:,-), grammar_mle(:,-), grammar_to_atts(:).
grammar_prob(M:S, P) :- !,
grammar_prob(S, M, P).
@ -94,10 +107,11 @@ add_to_predicate(M:EH1,M:EH,M:H0,NH,NB,Key,Choice,P,Id,(EH1:-NB)) :-
EH=NH.
p_rule(_,_,_,_) :-
nb_setval(grammar_fast,on), !.
nb_getval(grammar_fast,on), !.
p_rule(M,EH,Key,Choice) :-
all_tabs(M,EH,Dom,Opt),
{ Choice = Key with p(Dom,Opt) }.
{ AttVar = Key with p(Dom,Opt) },
Choice = AttVar.
ensure_tabled(M,H0,EH) :-
predicate_property(M:H0, tabled), !,
@ -178,3 +192,112 @@ extract_logprobability([P1|Ps], LogP0, LogP) :-
LogPI is LogDP+LogP0,
extract_logprobability(Ps, LogPI, LogP).
grammar_to_atts(M:S) :- !,
grammar_to_atts(S, M).
grammar_to_atts(S) :-
source_module(M),
grammar_to_atts(S, M).
grammar_to_atts(S, M) :-
nb_setval(grammar_fast,on),
get_internal(S, InternalS, Proof),
path_choices(M:InternalS,Proof).
path_choices(InternalS, Proof) :-
new_id(Id),
call(InternalS),
/* use Ids because we may have repeated examples */
assert(new_proof(Id,Proof)).
new_id(Id) :-
(nb_getval(grammar_id,Id) ->
I1 is Id+1,
nb_setval(grammar_id,I1)
;
nb_setval(grammar_id,1),
Id = 0
).
find_dom(K, Vs, Ps) :-
findall(V,id(_,K,_,V),Vs),
gen_ps(Vs, Ps).
gen_ps([], []).
gen_ps([_|Vs], [1.0|Ps]) :-
gen_ps(Vs, Ps).
init_pcg_solver(_, _, _, _).
run_pcg_solver(LVs, LPs, _) :-
init_prob_array(Array, ExArray),
add_proofs_to_array(Array, ExArray),
matrix:matrix_to_list(Array,L), writeln(L),
out_to_vs(LVs, LPs, Array).
add_proofs_to_array(Array, ExArray) :-
nb_getval(grammar_id,IdMax),
from(0,IdMax,Id),
matrix_set_all(ExArray,0.0),
sum_proofs(Id, ExArray),
% matrix:matrix_to_list(ExArray,L0), writeln(Id:L0),
matrix_op(ExArray, Array, +, Array),
% matrix:matrix_to_list(Array,L0),writeln(i:L0),
fail.
add_proofs_to_array(_,_).
from(I,_,I).
from(I0,Id,I) :-
I1 is I0+1,
I1 < Id,
from(I1,Id,I).
sum_proofs(Id, ExArray) :-
findall(P, add_proof(Id, ExArray, P), Ps),
sum_list(Ps,TotP),
matrix_op_to_all(ExArray,/,TotP,ExArray).
add_proof(Id, ExArray, P) :-
new_proof(Id,Proof),
extract_probability(Proof, P),
add_to_array(Proof, P, ExArray).
add_to_array(p(Id, Goals), P, Array) :-
add(Id, P, Array),
add_to_array_goals(Goals, P, Array).
add_to_array_goals([], _, _).
add_to_array_goals([G|Goals], P, Array) :-
add_to_array(G, P, Array),
add_to_array_goals(Goals, P, Array).
init_prob_array(Array, ExArray) :-
predicate_property(id(_,_,_,_),number_of_clauses(Cls)),
matrix_new(floats, [Cls], Array),
matrix_new(floats, [Cls], ExArray).
add(Id, P, Array) :-
matrix_add(Array, [Id], P).
out_to_vs([], [], _).
out_to_vs([[V]|LVs], [Ps|LPs], Array) :-
clpbn:get_atts(V, [key(Key)]),
findall(P,count_for_key(Key,P,Array),Ps),
out_to_vs(LVs, LPs, Array).
count_for_key(Key,P,Array) :-
id(Id,Key,_,_),
matrix_get(Array, [Id], P).
% generate attributed variables to make EM happy.
% just as few as possible
%
pcg_init_graph :-
setof(K,I^P^S^id(I,K,P,S),Ks),
generate_atts(Ks).
generate_atts([]).
generate_atts([Key|KVs]) :-
find_dom(Key, Dom, Ps),
{ _ = Key with p(Dom,Ps) },
generate_atts(KVs).

View File

@ -8,7 +8,8 @@
[append/3]).
:- use_module(library(clpbn),
[clpbn_init_solver/5,
[clpbn_init_graph/1,
clpbn_init_solver/5,
clpbn_run_solver/4,
clpbn_flag/2]).
@ -61,9 +62,7 @@ em(_, _, _, Tables, Likelihood) :-
handle_em(error(repeated_parents)) :-
assert(em_found(_, -inf)),
fail.
fail.
% This gets you an initial configuration. If there is a lot of evidence
% tables may be filled in close to optimal, otherwise they may be
@ -75,7 +74,9 @@ handle_em(error(repeated_parents)) :-
% the list of distributions for which we want to compute parameters,
% and more detailed info on distributions, namely with a list of all instances for the distribution.
init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :-
run_all(Items),
clpbn_flag(em_solver, Solver),
clpbn_init_graph(Solver),
call_run_all(Items),
% randomise_all_dists,
uniformise_all_dists,
attributes:all_attvars(AllVars0),
@ -83,14 +84,13 @@ init_em(Items, state( AllDists, AllDistInstances, MargVars, SolverVars)) :-
% remove variables that do not have to do with this query.
% check_for_hidden_vars(AllVars1, AllVars1, AllVars),
different_dists(AllVars, AllDists, AllDistInstances, MargVars),
clpbn_flag(em_solver, Solver),
clpbn_init_solver(Solver, MargVars, AllVars, _, SolverVars).
% loop for as long as you want.
em_loop(Its, Likelihood0, State, MaxError, MaxIts, LikelihoodF, FTables) :-
estimate(State, LPs),
maximise(State, Tables, LPs, Likelihood),
% writeln(Likelihood:Its:Likelihood0:Tables),
writeln(Likelihood:Its:Likelihood0:Tables),
(
(
abs((Likelihood - Likelihood0)/Likelihood) < MaxError
@ -226,4 +226,16 @@ run_sample([C|Cases], [P|Ps], Table) :-
matrix_add(Table, C, P),
run_sample(Cases, Ps, Table).
call_run_all(Mod:Items) :-
clpbn_flag(em_solver, pcg),
backtrack_run_all(Items, Mod).
call_run_all(Items) :-
clpbn_flag(em_solver, pcg),
run_all(Items).
backtrack_run_all([Item|_], Mod) :-
call(Mod:Item),
fail.
backtrack_run_all([_|Items], Mod) :-
backtrack_run_all(Items, Mod).
backtrack_run_all([], _).