significant improvements on viterbi performance for HMMs.

git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@1389 b08c6af1-5177-4d33-ba66-4b1c6b8b522a
This commit is contained in:
vsc 2005-09-09 17:22:33 +00:00
parent 787d2daa8a
commit de3bd15889
4 changed files with 221 additions and 190 deletions

80
CLPBN/clpbn/hmm.yap Normal file
View File

@ -0,0 +1,80 @@
:- module(hmm, [init_hmm/0,
hmm_state/1,
emission/4]).
:- ensure_loaded(library(clpbn)).
:- use_module(library(lists),
[nth/3]).
:- meta_predicate hmm_state(:).
:- dynamic hmm_tabled/1.
:- attribute emission/1.
:- ensure_loaded(library('clpbn/viterbi')).
init_hmm :-
retractall(hmm_tabled(_)).
hmm_state(Mod:A) :- !, hmm_state(A,Mod).
hmm_state(A) :- prolog_flag(typein_module,Mod), hmm_state(A,Mod).
hmm_state(Mod:N/A,_) :- !,
hmm_state(N/A,Mod).
hmm_state((A,B),Mod) :- !,
hmm_state(A,Mod),
hmm_state(B,Mod).
hmm_state(N/A,Mod) :-
atom_codes(N,[TC|_]),
atom_codes(T,[TC]),
build_args(A,LArgs,KArgs,Last),
Key =.. [T|KArgs],
Head =.. [N|LArgs],
asserta_static( Mod:(Head :-
(
hmm:hmm_tabled(Key)
->
% leave work for solver!
%
%format(' ~w~n',[Key]),
Last = Key, !
% clpbn:put_atts(Last,[key(Key)]), !
;
% first time we saw this entry
%format('+~w~n',[Key]),
%write(Key),nl,
%(Key = d(30,46) -> start_low_level_trace ; stop_low_level_trace),
assert(hmm:hmm_tabled(Key)), fail
)
)
).
build_args(3,[A,B,C],[A,B],C).
build_args(2,[A,B],[A],B).
find_var(Key,Last) :-
array_element(hmm_tree,1,Tree),
lookup(Key, Tree, Last).
emission(Vals,CPT,Ev,V) :-
cvt_vals(Vals,LVals),
once(nth(Nth, LVals, Ev)),
find_probs(CPT,Nth,Prob),
put_atts(V,[emission(Prob)]).
cvt_vals(aminoacids,[a, c, d, e, f, g, h, i, k, l, m, n, p, q, r, s, t, v, w, y]).
cvt_vals(bool,[t,f]).
cvt_vals(bases,[a,c,g,t]).
cvt_vals([A|B],[A|B]).
% first, try standard representation
find_probs(Logs,Nth,Log) :-
arg(Nth,Logs,Log).

View File

@ -1,8 +1,13 @@
:- module(topsort, [topsort/2]). :- module(topsort, [topsort/2,
topsort/3,
reversed_topsort/3]).
:- use_module(library(ordsets), :- use_module(library(ordsets),
[ord_subtract/3]). [ord_subtract/3,
ord_insert/3]).
:- attribute index/1,count/1.
/* simple implementation of a topological sorting algorithm */ /* simple implementation of a topological sorting algorithm */
/* graph is as Node-[Parents] */ /* graph is as Node-[Parents] */
@ -14,14 +19,36 @@ topsort(Graph0,Sorted) :-
delete_parents(Graph1, Included, NoParents), delete_parents(Graph1, Included, NoParents),
topsort(NoParents, SortedRest). topsort(NoParents, SortedRest).
add_parentless([], Sorted, [], [], Sorted). topsort([], Sorted0, Sorted0) :- !.
add_parentless([Node-[]|Graph0], [Node|Sorted], [Node|Included], Graph1, SortedRest) :- !, topsort(Graph0,Sorted0, Sorted) :-
add_parentless(Graph0, Sorted, Included, Graph1, SortedRest). add_parentless(Graph0, Sorted, IncludedI, Graph1, SortedRest),
add_parentless([Node|Graph0], Sorted, Included, [Node|Graph1], SortedRest) :- sort(IncludedI, Included),
add_parentless(Graph0, Sorted, Included, Graph1, SortedRest). delete_parents(Graph1, Included, NoParents),
topsort(NoParents, Sorted0, SortedRest).
%
% add the first elements found by topsort to the end of the list, so we
% have: a-> [], b -> [], c->[a,b], d ->[b,c] gives [d,c,a,b|Sorted0]
%
reversed_topsort([], Sorted, Sorted) :- !.
reversed_topsort(Graph0, Sorted0, Sorted) :-
add_parentless(Graph0, [], SortedRest, New, Graph1, Sorted0),
delete_parents(Graph1, New, NoParents),
reversed_topsort(NoParents, SortedRest, Sorted).
add_parentless([], New, Sorted, New, [], Sorted).
add_parentless([Node-Parents|Graph0], New, Sorted, Included, Graph1, SortedRest) :-
% Parents = [], !,
ord_subtract(Parents,New,[]), !,
ord_insert(New, Node, NNew),
add_parentless(Graph0, NNew, Sorted, Included, Graph1, [Node|SortedRest]).
add_parentless([Node|Graph0], New, Sorted, Included, [Node|Graph1], SortedRest) :-
add_parentless(Graph0, New, Sorted, Included, Graph1, SortedRest).
delete_parents([], _, []). delete_parents([], _, []).
delete_parents([Node-Parents|Graph1], Included, [Node-NewParents|NoParents]) :- delete_parents([Node-Parents|Graph1], Included, [Node-NewParents|NoParents]) :-
ord_subtract(Parents, Included, NewParents), ord_subtract(Parents, Included, NewParents),
delete_parents(Graph1, Included, NoParents). delete_parents(Graph1, Included, NoParents).

View File

@ -3,7 +3,7 @@
clpbn_var_member/2, clpbn_var_member/2,
check_for_hidden_vars/3, check_for_hidden_vars/3,
sort_vars_by_key/3, sort_vars_by_key/3,
sort_vars_by_key_and_parents/3]). sort_vars_by_key_and_parents/4]).
% %
% It may happen that variables from a previous query may still be around. % It may happen that variables from a previous query may still be around.
@ -37,10 +37,10 @@ clpbn_not_var_member([V1|Vs], V) :- V1 \== V,
clpbn_not_var_member(Vs, V). clpbn_not_var_member(Vs, V).
sort_vars_by_key(AVars, SortedAVars, UnifiableVars) :- sort_vars_by_key(AVars, SortedAVars, Keys) :-
get_keys(AVars, KeysVars), get_keys(AVars, KeysVars),
keysort(KeysVars, KVars), keysort(KeysVars, KVars),
merge_same_key(KVars, SortedAVars, [], UnifiableVars). merge_same_key(KVars, SortedAVars, Keys).
get_keys([], []). get_keys([], []).
get_keys([V|AVars], [K-V|KeysVars]) :- get_keys([V|AVars], [K-V|KeysVars]) :-
@ -49,56 +49,13 @@ get_keys([V|AVars], [K-V|KeysVars]) :-
get_keys([_|AVars], KeysVars) :- % may be non-CLPBN vars. get_keys([_|AVars], KeysVars) :- % may be non-CLPBN vars.
get_keys(AVars, KeysVars). get_keys(AVars, KeysVars).
merge_same_key([], [], _, []). merge_same_key([], [], []).
merge_same_key([K1-V1,K2-V2|Vs], SortedAVars, Ks, UnifiableVars) :- merge_same_key([K1-V1|KVs], [V1|Vs], [K1|Ks]) :-
K1 == K2, !, V1 = V2, eat_same_key(KVs,K1,V1,RKVs),
merge_same_key([K1-V1|Vs], SortedAVars, Ks, UnifiableVars). merge_same_key(RKVs, Vs, Ks).
merge_same_key([K1-V1,K2-V2|Vs], [V1|SortedAVars], Ks, [K1|UnifiableVars]) :-
(in_keys(K1, Ks) ; \+ \+ K1 == K2), !,
add_to_keys(K1, Ks, NKs),
merge_same_key([K2-V2|Vs], SortedAVars, NKs, UnifiableVars).
merge_same_key([K-V|Vs], [V|SortedAVars], Ks, UnifiableVars) :-
add_to_keys(K, Ks, NKs),
merge_same_key(Vs, SortedAVars, NKs, UnifiableVars).
in_keys(K1,[K|_]) :- \+ \+ K1 = K, !. eat_same_key([K-V|KVs],K,V,RKVs) :- !,
in_keys(K1,[_|Ks]) :- eat_same_key(KVs,K,V,RKVs).
in_keys(K1,Ks). eat_same_key(KVs,_,_,KVs).
add_to_keys(K1, Ks, Ks) :- ground(K1), !.
add_to_keys(K1, Ks, [K1|Ks]).
sort_vars_by_key_and_parents(AVars, SortedAVars, UnifiableVars) :-
get_keys_and_parents(AVars, KeysVars),
keysort(KeysVars, KVars),
merge_same_key(KVars, SortedAVars, [], UnifiableVars).
get_keys_and_parents([], []).
get_keys_and_parents([V|AVars], [K-V|KeysVarsF]) :-
clpbn:get_atts(V, [key(K),dist(D,T,Parents)]), !,
add_parents(Parents,V,D,T,KeysVarsF,KeysVars0),
get_keys_and_parents(AVars, KeysVars0).
get_keys_and_parents([_|AVars], KeysVars) :- % may be non-CLPBN vars.
get_keys_and_parents(AVars, KeysVars).
add_parents(Parents,_,_,_,KeyVars,KeyVars) :-
all_vars(Parents), !.
add_parents(Parents,V,D,T,KeyVarsF,KeyVars0) :-
transform_parents(Parents,NParents,KeyVarsF,KeyVars0),
clpbn:put_atts(V, [dist(D,T,NParents)]).
all_vars([]).
all_vars([P|Parents]) :-
var(P),
all_vars(Parents).
transform_parents([],[],KeyVars,KeyVars).
transform_parents([P|Parents0],[P|NParents],KeyVarsF,KeyVars0) :-
var(P), !,
transform_parents(Parents0,NParents,KeyVarsF,KeyVars0).
transform_parents([P|Parents0],[V|NParents],[P-V|KeyVarsF],KeyVars0) :-
transform_parents(Parents0,NParents,KeyVarsF,KeyVars0).

View File

@ -1,165 +1,139 @@
:- style_check(all). %:- style_check(all).
:- module(viterbi, [viterbi/3]). :- module(viterbi, [viterbi/5]).
:- use_module(library(lists), :- use_module(library(lists),
[nth/3]). [nth/3]).
:- use_module(library('clpbn'), []). :- use_module(library(clpbn), []).
:- use_module(library('clpbn/utils'), [ :- attribute prob/1, emission/1, backp/1, ancestors/1.
sort_vars_by_key_and_parents/3]).
:- use_module(library('ugraphs'), [ viterbi(Start,End,Trace,Ticks,Slices) :-
vertices_edges_to_ugraph/3,
add_edges/3,
top_sort/3]).
:- attribute prob/1, emission/1, backp/1.
viterbi(Start,End,Trace) :-
attributes:all_attvars(Vars0), attributes:all_attvars(Vars0),
sort_vars_by_key_and_parents(Vars0,Vars,_), group_vars_by_key_and_parents(Vars0,Ticks,Slices),
add_emissions(Vars),
topsort_vars(Vars,SortedVars),
init_viterbi(Start), init_viterbi(Start),
viterbi_alg(SortedVars), viterbi_alg([Start|R],R),
backtrace(Start,End,[],Trace). backtrace(Start,End,[],Trace).
group_vars_by_key_and_parents(AVars, NTicks, Slices) :-
NTicks1 is NTicks+2,
functor(Hashes,slots,NTicks1),
NSlices is Slices+2,
build_slices(0,NTicks1,NSlices,Hashes),
get_keys(AVars, Hashes),
get_parents(AVars, Hashes).
add_emissions([]). build_slices(NTicks,NTicks,_,_) :- !.
add_emissions([Var|Vars]) :- build_slices(I0,NTicks,NSlices,Hashes) :-
add_emission(Var), functor(Slice,slices,NSlices),
add_emissions(Vars). I is I0+1,
arg(I,Hashes,Slice),
add_emission(Var) :- build_slices(I,NTicks,NSlices,Hashes).
clpbn:get_atts(Var,[key(K),evidence(Ev),dist(Vals,emission(CPT),Parents)]), !,
cvt_vals(Vals,LVals),
once(nth(Nth, LVals, Ev)),
find_probs(CPT,K,Nth,Prob),
adde2pars(Parents,Prob).
add_emission(_).
% get_keys([], _).
% well known domains get_keys([V|AVars], Trees) :-
% clpbn:get_atts(V, [key(K)]), !,
cvt_vals(aminoacids,[a, c, d, e, f, g, h, i, k, l, m, n, p, q, r, s, t, v, w, y]). arg(1,K,Time0),
cvt_vals(bool,[t,f]). Time is Time0+1,
cvt_vals(bases,[a,c,g,t]). arg(Time, Trees, Tree),
cvt_vals([A|B],[A|B]). make_key(K, TKey),
arg(TKey, Tree, List),
lookup(List, K, V),
get_keys(AVars, Trees).
get_keys([_|AVars], Trees) :- % may be non-CLPBN vars.
get_keys(AVars, Trees).
get_parents([], _).
get_parents([V|AVars], Trees) :-
clpbn:get_atts(V, [dist(D,T,Parents)]), !,
%clpbn:get_atts(V, [key(K)]), format('~w (~w): ~w~n',[V,K,Parents]),
add_parents(Parents,V,D,T,Trees),
get_parents(AVars, Trees).
get_parents([_|AVars], Trees) :- % may be non-CLPBN vars.
get_parents(AVars, Trees).
% first, try standard representation add_parents(Parents,V,D,T,Trees) :-
find_probs(Logs,_,Nth,Log) :- transform_parents(Parents,NParents,Copy,Trees),
compound(Logs), ( var(Copy) -> true ; clpbn:put_atts(V, [dist(D,T,NParents)]) ).
arg(Nth,Logs,Log), !.
% key independent, compacted
find_probs(A,_,Nth,Log) :- atom(A), !,
user:compacted_cpt(A,Logs),
arg(Nth,Logs,Log).
% key dependent, compacted
find_probs(I,K,Nth,Log) :- integer(I), !,
functor(K,A,_),
user:compacted_cpt(A,I,Logs),
arg(Nth,Logs,Log).
% now, try hacked representation
% get_norm(Norms,Nth,Norm).
%get_norm(Norms,_,Norms) :- number(Norms), !. transform_parents([],[],_,_).
%get_norm(Norms,Nth,Norm) :- transform_parents([P|Parents0],[P|NParents],Copy,Trees) :-
% arg(Nth,Norms,Norm). var(P), !,
inc_ancestors(P),
transform_parents(Parents0,NParents,Copy,Trees).
transform_parents([P|Parents0],[V|NParents],copy,Trees) :-
arg(1,P,Time0),
Time is Time0+1,
arg(Time, Trees, Tree),
make_key(P, TKey),
arg(TKey, Tree, List),
lookup(List, P, V),
inc_ancestors(V),
transform_parents(Parents0,NParents,copy,Trees).
adde2pars([],_). inc_ancestors(P) :-
adde2pars([V|Vs],P) :- get_atts(P,[ancestors(N)]), !,
put_atts(V,[emission(P)]), N1 is N+1,
adde2pars(Vs,P). %format(' ~w->~d:~n',[P,N1]),
put_atts(P,[ancestors(N1)]).
inc_ancestors(P) :-
%format(' ~w->1:~n',[P]),
put_atts(P,[ancestors(1)]).
topsort_vars(Vars,SortedVars) :- make_key(T,K) :-
vertices_edges_to_ugraph([],[],Graph0), arg(2,T,I), !,
sort_vars(Vars, Graph0, SortedVars). K is I+2.
make_key(_,1).
% lookup(Tree, K, V) :- var(Tree), !,
% take advantage of the fact that variables can be split by timestamp. Tree = [[K|V]|_].
% lookup([[K1|V]|_],K2,V) :- K1 == K2, !.
sort_vars(Vars, Graph0, SortedVars) :- lookup([_|List],K,V) :-
fetch_times(Vars,T0Vars), lookup(List,K,V).
keysort(T0Vars, TVars),
sort_times(TVars, Graph0, SortedVars).
fetch_times([], []).
fetch_times([V|Vs], [T-V|TVs]) :-
clpbn:get_atts(V,[key(K)]),
arg(1,K,T),
fetch_times(Vs, TVs).
sort_times([], _, []).
sort_times([T-V|TVs], Graph0, SortedVars) :-
fetch_same_time(TVs, T, Vars, NTVs),
fetch_parents([V|Vars],Edges),
add_edges(Graph0, Edges, Graph),
top_sort(Graph,SortedVars0,SortedVars),
sort_times(NTVs, Graph0, SortedVars0).
fetch_same_time([T-V|TVs], T, [V|Vs], TVs0) :- !,
fetch_same_time(TVs, T, Vs, TVs0).
fetch_same_time(TVs, _, [], TVs) :- !.
fetch_parents([],[]).
fetch_parents([V|Vars],EdgesF) :-
clpbn:get_atts(V,[dist(_,_,Parents)]),
exp_edges(Parents,V,EdgesF,Edges0),
fetch_parents(Vars,Edges0).
exp_edges([],_,Edges,Edges).
exp_edges([P|Parents],V,[V-P|Edges],Edges0) :-
exp_edges(Parents,V,Edges,Edges0).
extract_vars([],[]).
extract_vars([_-V|KVars],[V|Vars]) :-
extract_vars(KVars,Vars).
init_viterbi(V) :- init_viterbi(V) :-
put_atts(V,[prob(0)]). put_atts(V,[prob(0)]).
viterbi_alg([]). viterbi_alg(L0, Lf) :- L0 == Lf, !.
viterbi_alg([V|Vs]) :- viterbi_alg([V|Vs], Rs) :-
%format('<< ~w~n',[V]),
% get the current status % get the current status
get_atts(V,[prob(P0)]), !, get_atts(V,[prob(P0)]), !,
% clpbn:get_atts(V,[key(K)]),format('doing(~w)~n',[K]),
clpbn:get_atts(V,[dist(_,trans(Probs),States)]), clpbn:get_atts(V,[dist(_,trans(Probs),States)]),
% adjust to consider emission probabilities % adjust to consider emission probabilities
adjust_for_emission(V, P0, Pf), adjust_for_emission(V, P0, Pf),
propagate(Probs,States,Pf,V), propagate(Probs,States,Pf,V,Rs,NRs),
viterbi_alg(Vs). viterbi_alg(Vs,NRs).
viterbi_alg([_|Vs]) :-
viterbi_alg(Vs).
adjust_for_emission(V, P0, Pf) :- adjust_for_emission(V, P0, Pf) :-
get_atts(V,[emission(P)]), !, hmm:get_atts(V,[emission(P)]), !,
mprob(P0,P,Pf), Pf is P+P0,
put_atts(V,[prob(Pf)]). put_atts(V,[prob(Pf)]).
adjust_for_emission(_, P, P). adjust_for_emission(_, P, P).
propagate([],[],_,_). propagate([],[],_,_,Rs,Rs).
propagate([-inf|Probs],[_|States],Pf,V) :- !, propagate([Prob|Probs],[State|States],Pf,V,Rs,Rs0) :-
propagate(Probs,States,Pf,V). %format(' ~w~n',[State]),
propagate([Prob|Probs],[State|States],Pf,V) :- get_atts(State,[prob(P0),ancestors(N)]), !,
get_atts(State,[prob(P0)]), !,
mprob(Pf,Prob,P), mprob(Pf,Prob,P),
N1 is N-1,
(P > P0 -> (P > P0 ->
put_atts(State,[prob(P),backp(V)]) put_atts(State,[prob(P),backp(V),ancestors(N1)])
; ;
true put_atts(State,[ancestors(N1)])
), ),
% clpbn:get_atts(State,[key(K)]),format(' ~w: ~w -> ~w~n',[K,P0,P]), (N1 == 0 -> Rs = [State|NRs] ; Rs = NRs),
propagate(Probs,States,Pf,V). propagate(Probs,States,Pf,V,NRs,Rs0).
propagate([Prob|Probs],[State|States],Pf,V) :- propagate([Prob|Probs],[State|States],Pf,V,Rs,Rs0) :-
get_atts(State,[ancestors(N)]), !,
N1 is N-1,
mprob(Pf,Prob,P), mprob(Pf,Prob,P),
put_atts(State,[prob(P),backp(V)]), put_atts(State,[prob(P),backp(V),ancestors(N1)]),
% clpbn:get_atts(State,[key(K)]),format(' ~w: ~w!!~n',[K,P]), (N1 == 0 -> Rs = [State|NRs] ; Rs = NRs),
propagate(Probs,States,Pf,V). propagate(Probs,States,Pf,V,NRs,Rs0).
backtrace(Start,Var,Trace,Trace) :- Start == Var, !. backtrace(Start,Var,Trace,Trace) :- Start == Var, !.
backtrace(Start,Var,Trace0,Trace) :- backtrace(Start,Var,Trace0,Trace) :-
@ -167,11 +141,4 @@ backtrace(Start,Var,Trace0,Trace) :-
clpbn:get_atts(Var, [key(K)]), clpbn:get_atts(Var, [key(K)]),
backtrace(Start,V,[K|Trace0],Trace). backtrace(Start,V,[K|Trace0],Trace).
mprob(P0,P1,Pf) :- Pf is P0+P1.
mprob(*,_,-inf) :- !.
mprob(_,*,-inf) :- !.
mprob(P1,P2,P) :- P is P1+P2.