significant improvements on viterbi performance for HMMs.

git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@1389 b08c6af1-5177-4d33-ba66-4b1c6b8b522a
This commit is contained in:
vsc 2005-09-09 17:22:33 +00:00
parent 787d2daa8a
commit de3bd15889
4 changed files with 221 additions and 190 deletions

80
CLPBN/clpbn/hmm.yap Normal file
View File

@ -0,0 +1,80 @@
:- module(hmm, [init_hmm/0,
hmm_state/1,
emission/4]).
:- ensure_loaded(library(clpbn)).
:- use_module(library(lists),
[nth/3]).
:- meta_predicate hmm_state(:).
:- dynamic hmm_tabled/1.
:- attribute emission/1.
:- ensure_loaded(library('clpbn/viterbi')).
init_hmm :-
retractall(hmm_tabled(_)).
hmm_state(Mod:A) :- !, hmm_state(A,Mod).
hmm_state(A) :- prolog_flag(typein_module,Mod), hmm_state(A,Mod).
hmm_state(Mod:N/A,_) :- !,
hmm_state(N/A,Mod).
hmm_state((A,B),Mod) :- !,
hmm_state(A,Mod),
hmm_state(B,Mod).
hmm_state(N/A,Mod) :-
atom_codes(N,[TC|_]),
atom_codes(T,[TC]),
build_args(A,LArgs,KArgs,Last),
Key =.. [T|KArgs],
Head =.. [N|LArgs],
asserta_static( Mod:(Head :-
(
hmm:hmm_tabled(Key)
->
% leave work for solver!
%
%format(' ~w~n',[Key]),
Last = Key, !
% clpbn:put_atts(Last,[key(Key)]), !
;
% first time we saw this entry
%format('+~w~n',[Key]),
%write(Key),nl,
%(Key = d(30,46) -> start_low_level_trace ; stop_low_level_trace),
assert(hmm:hmm_tabled(Key)), fail
)
)
).
build_args(3,[A,B,C],[A,B],C).
build_args(2,[A,B],[A],B).
find_var(Key,Last) :-
array_element(hmm_tree,1,Tree),
lookup(Key, Tree, Last).
emission(Vals,CPT,Ev,V) :-
cvt_vals(Vals,LVals),
once(nth(Nth, LVals, Ev)),
find_probs(CPT,Nth,Prob),
put_atts(V,[emission(Prob)]).
cvt_vals(aminoacids,[a, c, d, e, f, g, h, i, k, l, m, n, p, q, r, s, t, v, w, y]).
cvt_vals(bool,[t,f]).
cvt_vals(bases,[a,c,g,t]).
cvt_vals([A|B],[A|B]).
% first, try standard representation
find_probs(Logs,Nth,Log) :-
arg(Nth,Logs,Log).

View File

@ -1,8 +1,13 @@
:- module(topsort, [topsort/2]).
:- module(topsort, [topsort/2,
topsort/3,
reversed_topsort/3]).
:- use_module(library(ordsets),
[ord_subtract/3]).
[ord_subtract/3,
ord_insert/3]).
:- attribute index/1,count/1.
/* simple implementation of a topological sorting algorithm */
/* graph is as Node-[Parents] */
@ -14,14 +19,36 @@ topsort(Graph0,Sorted) :-
delete_parents(Graph1, Included, NoParents),
topsort(NoParents, SortedRest).
add_parentless([], Sorted, [], [], Sorted).
add_parentless([Node-[]|Graph0], [Node|Sorted], [Node|Included], Graph1, SortedRest) :- !,
add_parentless(Graph0, Sorted, Included, Graph1, SortedRest).
add_parentless([Node|Graph0], Sorted, Included, [Node|Graph1], SortedRest) :-
add_parentless(Graph0, Sorted, Included, Graph1, SortedRest).
topsort([], Sorted0, Sorted0) :- !.
topsort(Graph0,Sorted0, Sorted) :-
add_parentless(Graph0, Sorted, IncludedI, Graph1, SortedRest),
sort(IncludedI, Included),
delete_parents(Graph1, Included, NoParents),
topsort(NoParents, Sorted0, SortedRest).
%
% add the first elements found by topsort to the end of the list, so we
% have: a-> [], b -> [], c->[a,b], d ->[b,c] gives [d,c,a,b|Sorted0]
%
reversed_topsort([], Sorted, Sorted) :- !.
reversed_topsort(Graph0, Sorted0, Sorted) :-
add_parentless(Graph0, [], SortedRest, New, Graph1, Sorted0),
delete_parents(Graph1, New, NoParents),
reversed_topsort(NoParents, SortedRest, Sorted).
add_parentless([], New, Sorted, New, [], Sorted).
add_parentless([Node-Parents|Graph0], New, Sorted, Included, Graph1, SortedRest) :-
% Parents = [], !,
ord_subtract(Parents,New,[]), !,
ord_insert(New, Node, NNew),
add_parentless(Graph0, NNew, Sorted, Included, Graph1, [Node|SortedRest]).
add_parentless([Node|Graph0], New, Sorted, Included, [Node|Graph1], SortedRest) :-
add_parentless(Graph0, New, Sorted, Included, Graph1, SortedRest).
delete_parents([], _, []).
delete_parents([Node-Parents|Graph1], Included, [Node-NewParents|NoParents]) :-
ord_subtract(Parents, Included, NewParents),
delete_parents(Graph1, Included, NoParents).

View File

@ -3,7 +3,7 @@
clpbn_var_member/2,
check_for_hidden_vars/3,
sort_vars_by_key/3,
sort_vars_by_key_and_parents/3]).
sort_vars_by_key_and_parents/4]).
%
% It may happen that variables from a previous query may still be around.
@ -37,10 +37,10 @@ clpbn_not_var_member([V1|Vs], V) :- V1 \== V,
clpbn_not_var_member(Vs, V).
sort_vars_by_key(AVars, SortedAVars, UnifiableVars) :-
sort_vars_by_key(AVars, SortedAVars, Keys) :-
get_keys(AVars, KeysVars),
keysort(KeysVars, KVars),
merge_same_key(KVars, SortedAVars, [], UnifiableVars).
merge_same_key(KVars, SortedAVars, Keys).
get_keys([], []).
get_keys([V|AVars], [K-V|KeysVars]) :-
@ -49,56 +49,13 @@ get_keys([V|AVars], [K-V|KeysVars]) :-
get_keys([_|AVars], KeysVars) :- % may be non-CLPBN vars.
get_keys(AVars, KeysVars).
merge_same_key([], [], _, []).
merge_same_key([K1-V1,K2-V2|Vs], SortedAVars, Ks, UnifiableVars) :-
K1 == K2, !, V1 = V2,
merge_same_key([K1-V1|Vs], SortedAVars, Ks, UnifiableVars).
merge_same_key([K1-V1,K2-V2|Vs], [V1|SortedAVars], Ks, [K1|UnifiableVars]) :-
(in_keys(K1, Ks) ; \+ \+ K1 == K2), !,
add_to_keys(K1, Ks, NKs),
merge_same_key([K2-V2|Vs], SortedAVars, NKs, UnifiableVars).
merge_same_key([K-V|Vs], [V|SortedAVars], Ks, UnifiableVars) :-
add_to_keys(K, Ks, NKs),
merge_same_key(Vs, SortedAVars, NKs, UnifiableVars).
merge_same_key([], [], []).
merge_same_key([K1-V1|KVs], [V1|Vs], [K1|Ks]) :-
eat_same_key(KVs,K1,V1,RKVs),
merge_same_key(RKVs, Vs, Ks).
in_keys(K1,[K|_]) :- \+ \+ K1 = K, !.
in_keys(K1,[_|Ks]) :-
in_keys(K1,Ks).
add_to_keys(K1, Ks, Ks) :- ground(K1), !.
add_to_keys(K1, Ks, [K1|Ks]).
sort_vars_by_key_and_parents(AVars, SortedAVars, UnifiableVars) :-
get_keys_and_parents(AVars, KeysVars),
keysort(KeysVars, KVars),
merge_same_key(KVars, SortedAVars, [], UnifiableVars).
get_keys_and_parents([], []).
get_keys_and_parents([V|AVars], [K-V|KeysVarsF]) :-
clpbn:get_atts(V, [key(K),dist(D,T,Parents)]), !,
add_parents(Parents,V,D,T,KeysVarsF,KeysVars0),
get_keys_and_parents(AVars, KeysVars0).
get_keys_and_parents([_|AVars], KeysVars) :- % may be non-CLPBN vars.
get_keys_and_parents(AVars, KeysVars).
add_parents(Parents,_,_,_,KeyVars,KeyVars) :-
all_vars(Parents), !.
add_parents(Parents,V,D,T,KeyVarsF,KeyVars0) :-
transform_parents(Parents,NParents,KeyVarsF,KeyVars0),
clpbn:put_atts(V, [dist(D,T,NParents)]).
all_vars([]).
all_vars([P|Parents]) :-
var(P),
all_vars(Parents).
transform_parents([],[],KeyVars,KeyVars).
transform_parents([P|Parents0],[P|NParents],KeyVarsF,KeyVars0) :-
var(P), !,
transform_parents(Parents0,NParents,KeyVarsF,KeyVars0).
transform_parents([P|Parents0],[V|NParents],[P-V|KeyVarsF],KeyVars0) :-
transform_parents(Parents0,NParents,KeyVarsF,KeyVars0).
eat_same_key([K-V|KVs],K,V,RKVs) :- !,
eat_same_key(KVs,K,V,RKVs).
eat_same_key(KVs,_,_,KVs).

View File

@ -1,165 +1,139 @@
:- style_check(all).
%:- style_check(all).
:- module(viterbi, [viterbi/3]).
:- module(viterbi, [viterbi/5]).
:- use_module(library(lists),
[nth/3]).
:- use_module(library('clpbn'), []).
:- use_module(library(clpbn), []).
:- use_module(library('clpbn/utils'), [
sort_vars_by_key_and_parents/3]).
:- attribute prob/1, emission/1, backp/1, ancestors/1.
:- use_module(library('ugraphs'), [
vertices_edges_to_ugraph/3,
add_edges/3,
top_sort/3]).
:- attribute prob/1, emission/1, backp/1.
viterbi(Start,End,Trace) :-
viterbi(Start,End,Trace,Ticks,Slices) :-
attributes:all_attvars(Vars0),
sort_vars_by_key_and_parents(Vars0,Vars,_),
add_emissions(Vars),
topsort_vars(Vars,SortedVars),
group_vars_by_key_and_parents(Vars0,Ticks,Slices),
init_viterbi(Start),
viterbi_alg(SortedVars),
viterbi_alg([Start|R],R),
backtrace(Start,End,[],Trace).
group_vars_by_key_and_parents(AVars, NTicks, Slices) :-
NTicks1 is NTicks+2,
functor(Hashes,slots,NTicks1),
NSlices is Slices+2,
build_slices(0,NTicks1,NSlices,Hashes),
get_keys(AVars, Hashes),
get_parents(AVars, Hashes).
add_emissions([]).
add_emissions([Var|Vars]) :-
add_emission(Var),
add_emissions(Vars).
add_emission(Var) :-
clpbn:get_atts(Var,[key(K),evidence(Ev),dist(Vals,emission(CPT),Parents)]), !,
cvt_vals(Vals,LVals),
once(nth(Nth, LVals, Ev)),
find_probs(CPT,K,Nth,Prob),
adde2pars(Parents,Prob).
add_emission(_).
build_slices(NTicks,NTicks,_,_) :- !.
build_slices(I0,NTicks,NSlices,Hashes) :-
functor(Slice,slices,NSlices),
I is I0+1,
arg(I,Hashes,Slice),
build_slices(I,NTicks,NSlices,Hashes).
%
% well known domains
%
cvt_vals(aminoacids,[a, c, d, e, f, g, h, i, k, l, m, n, p, q, r, s, t, v, w, y]).
cvt_vals(bool,[t,f]).
cvt_vals(bases,[a,c,g,t]).
cvt_vals([A|B],[A|B]).
get_keys([], _).
get_keys([V|AVars], Trees) :-
clpbn:get_atts(V, [key(K)]), !,
arg(1,K,Time0),
Time is Time0+1,
arg(Time, Trees, Tree),
make_key(K, TKey),
arg(TKey, Tree, List),
lookup(List, K, V),
get_keys(AVars, Trees).
get_keys([_|AVars], Trees) :- % may be non-CLPBN vars.
get_keys(AVars, Trees).
get_parents([], _).
get_parents([V|AVars], Trees) :-
clpbn:get_atts(V, [dist(D,T,Parents)]), !,
%clpbn:get_atts(V, [key(K)]), format('~w (~w): ~w~n',[V,K,Parents]),
add_parents(Parents,V,D,T,Trees),
get_parents(AVars, Trees).
get_parents([_|AVars], Trees) :- % may be non-CLPBN vars.
get_parents(AVars, Trees).
% first, try standard representation
find_probs(Logs,_,Nth,Log) :-
compound(Logs),
arg(Nth,Logs,Log), !.
% key independent, compacted
find_probs(A,_,Nth,Log) :- atom(A), !,
user:compacted_cpt(A,Logs),
arg(Nth,Logs,Log).
% key dependent, compacted
find_probs(I,K,Nth,Log) :- integer(I), !,
functor(K,A,_),
user:compacted_cpt(A,I,Logs),
arg(Nth,Logs,Log).
% now, try hacked representation
% get_norm(Norms,Nth,Norm).
add_parents(Parents,V,D,T,Trees) :-
transform_parents(Parents,NParents,Copy,Trees),
( var(Copy) -> true ; clpbn:put_atts(V, [dist(D,T,NParents)]) ).
%get_norm(Norms,_,Norms) :- number(Norms), !.
%get_norm(Norms,Nth,Norm) :-
% arg(Nth,Norms,Norm).
transform_parents([],[],_,_).
transform_parents([P|Parents0],[P|NParents],Copy,Trees) :-
var(P), !,
inc_ancestors(P),
transform_parents(Parents0,NParents,Copy,Trees).
transform_parents([P|Parents0],[V|NParents],copy,Trees) :-
arg(1,P,Time0),
Time is Time0+1,
arg(Time, Trees, Tree),
make_key(P, TKey),
arg(TKey, Tree, List),
lookup(List, P, V),
inc_ancestors(V),
transform_parents(Parents0,NParents,copy,Trees).
adde2pars([],_).
adde2pars([V|Vs],P) :-
put_atts(V,[emission(P)]),
adde2pars(Vs,P).
inc_ancestors(P) :-
get_atts(P,[ancestors(N)]), !,
N1 is N+1,
%format(' ~w->~d:~n',[P,N1]),
put_atts(P,[ancestors(N1)]).
inc_ancestors(P) :-
%format(' ~w->1:~n',[P]),
put_atts(P,[ancestors(1)]).
topsort_vars(Vars,SortedVars) :-
vertices_edges_to_ugraph([],[],Graph0),
sort_vars(Vars, Graph0, SortedVars).
make_key(T,K) :-
arg(2,T,I), !,
K is I+2.
make_key(_,1).
%
% take advantage of the fact that variables can be split by timestamp.
%
sort_vars(Vars, Graph0, SortedVars) :-
fetch_times(Vars,T0Vars),
keysort(T0Vars, TVars),
sort_times(TVars, Graph0, SortedVars).
lookup(Tree, K, V) :- var(Tree), !,
Tree = [[K|V]|_].
lookup([[K1|V]|_],K2,V) :- K1 == K2, !.
lookup([_|List],K,V) :-
lookup(List,K,V).
fetch_times([], []).
fetch_times([V|Vs], [T-V|TVs]) :-
clpbn:get_atts(V,[key(K)]),
arg(1,K,T),
fetch_times(Vs, TVs).
sort_times([], _, []).
sort_times([T-V|TVs], Graph0, SortedVars) :-
fetch_same_time(TVs, T, Vars, NTVs),
fetch_parents([V|Vars],Edges),
add_edges(Graph0, Edges, Graph),
top_sort(Graph,SortedVars0,SortedVars),
sort_times(NTVs, Graph0, SortedVars0).
fetch_same_time([T-V|TVs], T, [V|Vs], TVs0) :- !,
fetch_same_time(TVs, T, Vs, TVs0).
fetch_same_time(TVs, _, [], TVs) :- !.
fetch_parents([],[]).
fetch_parents([V|Vars],EdgesF) :-
clpbn:get_atts(V,[dist(_,_,Parents)]),
exp_edges(Parents,V,EdgesF,Edges0),
fetch_parents(Vars,Edges0).
exp_edges([],_,Edges,Edges).
exp_edges([P|Parents],V,[V-P|Edges],Edges0) :-
exp_edges(Parents,V,Edges,Edges0).
extract_vars([],[]).
extract_vars([_-V|KVars],[V|Vars]) :-
extract_vars(KVars,Vars).
init_viterbi(V) :-
put_atts(V,[prob(0)]).
viterbi_alg([]).
viterbi_alg([V|Vs]) :-
viterbi_alg(L0, Lf) :- L0 == Lf, !.
viterbi_alg([V|Vs], Rs) :-
%format('<< ~w~n',[V]),
% get the current status
get_atts(V,[prob(P0)]), !,
% clpbn:get_atts(V,[key(K)]),format('doing(~w)~n',[K]),
clpbn:get_atts(V,[dist(_,trans(Probs),States)]),
% adjust to consider emission probabilities
adjust_for_emission(V, P0, Pf),
propagate(Probs,States,Pf,V),
viterbi_alg(Vs).
viterbi_alg([_|Vs]) :-
viterbi_alg(Vs).
propagate(Probs,States,Pf,V,Rs,NRs),
viterbi_alg(Vs,NRs).
adjust_for_emission(V, P0, Pf) :-
get_atts(V,[emission(P)]), !,
mprob(P0,P,Pf),
hmm:get_atts(V,[emission(P)]), !,
Pf is P+P0,
put_atts(V,[prob(Pf)]).
adjust_for_emission(_, P, P).
propagate([],[],_,_).
propagate([-inf|Probs],[_|States],Pf,V) :- !,
propagate(Probs,States,Pf,V).
propagate([Prob|Probs],[State|States],Pf,V) :-
get_atts(State,[prob(P0)]), !,
propagate([],[],_,_,Rs,Rs).
propagate([Prob|Probs],[State|States],Pf,V,Rs,Rs0) :-
%format(' ~w~n',[State]),
get_atts(State,[prob(P0),ancestors(N)]), !,
mprob(Pf,Prob,P),
N1 is N-1,
(P > P0 ->
put_atts(State,[prob(P),backp(V)])
put_atts(State,[prob(P),backp(V),ancestors(N1)])
;
true
put_atts(State,[ancestors(N1)])
),
% clpbn:get_atts(State,[key(K)]),format(' ~w: ~w -> ~w~n',[K,P0,P]),
propagate(Probs,States,Pf,V).
propagate([Prob|Probs],[State|States],Pf,V) :-
(N1 == 0 -> Rs = [State|NRs] ; Rs = NRs),
propagate(Probs,States,Pf,V,NRs,Rs0).
propagate([Prob|Probs],[State|States],Pf,V,Rs,Rs0) :-
get_atts(State,[ancestors(N)]), !,
N1 is N-1,
mprob(Pf,Prob,P),
put_atts(State,[prob(P),backp(V)]),
% clpbn:get_atts(State,[key(K)]),format(' ~w: ~w!!~n',[K,P]),
propagate(Probs,States,Pf,V).
put_atts(State,[prob(P),backp(V),ancestors(N1)]),
(N1 == 0 -> Rs = [State|NRs] ; Rs = NRs),
propagate(Probs,States,Pf,V,NRs,Rs0).
backtrace(Start,Var,Trace,Trace) :- Start == Var, !.
backtrace(Start,Var,Trace0,Trace) :-
@ -167,11 +141,4 @@ backtrace(Start,Var,Trace0,Trace) :-
clpbn:get_atts(Var, [key(K)]),
backtrace(Start,V,[K|Trace0],Trace).
mprob(*,_,-inf) :- !.
mprob(_,*,-inf) :- !.
mprob(P1,P2,P) :- P is P1+P2.
mprob(P0,P1,Pf) :- Pf is P0+P1.