This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/CLPBN/clpbn/ve.yap
2012-12-20 23:19:10 +00:00

480 lines
13 KiB
Prolog

/***********************************
Variable Elimination in Prolog
How to do it
Three steps:
build the graph:
- for all variables, find out
all tables they connect to;
multiply their size
order by size
*********************************/
:- module(clpbn_ve,
[ve/3,
check_if_ve_done/1,
init_ve_solver/4,
run_ve_solver/3,
init_ve_ground_solver/5,
run_ve_ground_solver/3,
call_ve_ground_solver/6
]).
:- use_module(library(atts)).
:- use_module(library(ordsets),
[ord_union/3,
ord_member/2
]).
:- use_module(library('clpbn/xbif'),
[clpbn2xbif/3]).
:- use_module(library('clpbn/graphviz'),
[clpbn2gviz/4]).
:- use_module(library('clpbn/dists'),
[dist/4,
get_dist_domain_size/2,
get_dist_params/2,
get_dist_domain_size/2,
get_dist_matrix/5
]).
:- use_module(library('clpbn/utils'),
[clpbn_not_var_member/2]).
:- use_module(library('clpbn/display'),
[clpbn_bind_vals/3]).
:- use_module(library('clpbn/connected'),
[init_influences/3,
influences/4,
factor_influences/4
]).
:- use_module(library(clpbn/matrix_cpt_utils)).
:- use_module(library(clpbn/numbers)).
:- use_module(library(lists),
[member/2,
append/3,
delete/3
]).
:- use_module(library(maplist)).
:- use_module(library(rbtrees)).
:- use_module(library(clpbn/vmap)).
:- use_module(library('clpbn/aggregates'),
[check_for_agg_vars/2]).
:- attribute size/1, all_diffs/1.
%
% uses a bipartite graph where bigraph(Vs, NFs, Fs)
% Vs=map variables to lists of factors
% NFs=number of factors
% Fs=map factor id -> f(Id, Vars, Table)
%
check_if_ve_done(Var) :-
get_atts(Var, [size(_)]), !.
%
% new PFL like interface...
%
call_ve_ground_solver(QueryVars, QueryKeys, AllKeys, Factors, Evidence, Output) :-
call_ve_ground_solver_for_probabilities([QueryKeys], AllKeys, Factors, Evidence, Solutions),
clpbn_bind_vals([QueryVars], Solutions, Output).
call_ve_ground_solver_for_probabilities(QueryKeys, AllKeys, Factors, Evidence, Solutions) :-
init_ve_ground_solver(QueryKeys, AllKeys, Factors, Evidence, VE),
run_ve_ground_solver(QueryKeys, Solutions, VE).
simulate_ve_ground_solver(_QueryVars, QueryKeys, AllKeys, Factors, Evidence, Output) :-
simulate_ve_ground_solver_for_probabilities([QueryKeys], AllKeys, Factors, Evidence, Output).
simulate_ve_ground_solver_for_probabilities(QueryKeys, AllKeys, Factors, Evidence, Solutions) :-
init_ve_ground_solver(QueryKeys, AllKeys, Factors, Evidence, VE),
simulate_solver(QueryKeys, Solutions, VE).
init_ve_ground_solver(_QueryKeys, AllKeys, Factors, Evidence, VE) :-
keys_to_numbers(AllKeys, Factors, Evidence, Hash4, Id4, FactorIds, EvidenceIds),
init_ve(FactorIds, EvidenceIds, Hash4, Id4, VE).
%
% implementation of the well known variable elimination algorithm
%
ve([[]],_,_) :- !.
ve(LLVs,Vs0,AllDiffs) :-
init_ve_solver(LLVs, Vs0, AllDiffs, State),
% variable elimination proper
run_ve_solver(LLVs, LLPs, State),
% bind Probs back to variables so that they can be output.
clpbn_bind_vals(LLVs,LLPs,AllDiffs).
init_ve(FactorIds, EvidenceIds, Hash, Id, ve(FactorIds, Hash, Id, Ev)) :-
rb_new(Ev0),
foldl(evtotree,EvidenceIds,Ev0,Ev).
evtotree(K=V,Ev0,Ev) :-
rb_insert(Ev0, K, V, Ev).
factor_to_graph( f(Nodes, Sizes, _Pars0, Id), Factors0, Factors, Edges0, Edges, I0, I) :-
I is I0+1,
pfl:get_pfl_parameters(Id, Pars0),
init_CPT(Pars0, Sizes, CPT0),
reorder_CPT(Nodes, CPT0, FIPs, CPT, _),
F = f(I0, FIPs, CPT),
rb_insert(Factors0, I0, F, Factors),
foldl(add_f_to_nodes(I0), Nodes, Edges0, Edges).
add_f_to_nodes(I0, Node, Edges, [Node-I0|Edges]).
%
% Qs is a list of lists with all query vars (marginals)
% IQs is the corresponding list of integers
% LVis is a list of lists with all variables reachable from the query
% ILVis is the corresponding list of integers
% Vmap is the map V->I
%
init_ve_solver(Qs, Vs0, _, state(IQs, LVIs, VMap, Bigraph, Ev)) :-
% LVi will have a list of CLPBN variables
init_influences(Vs0, Graph, TGraph),
maplist(init_ve_solver_for_question(Graph, TGraph), Qs, LVs),
init_vmap(VMap0),
lvars_to_numbers(LVs, LVIs, VMap0, VMap1),
lvars_to_numbers(Qs, IQs, VMap1, VMap),
vars_to_bigraph(VMap, Bigraph, Ev).
init_ve_solver_for_question(G, RG, Vs, NVs) :-
influences(Vs, G, RG, NVs0),
sort(NVs0, NVs).
%
% construct a bipartite graph with vars and factors
% the nodes of the var graph just contain pointer to the factors
% the nodes of the factors contain a list of variables and a matrix
% also provide a matrix with evidence
%
vars_to_bigraph(VMap, bigraph(VInfo, IF, Fs), Evs) :-
rb_new(Fs0),
vmap_to_list(VMap, VIds),
foldl3(id_to_factor(VMap), VIds, 0, IF, Fs0, Fs, [], Evs),
factors_to_vs(Fs, VInfo).
id_to_factor(VMap, V-I, IF0, IF, Fs0, Fs, Evs0, Evs) :-
% process evidence for variable
clpbn:get_atts(V, [evidence(E), dist(_,Ps)]),
checklist(noparent_of_interest(VMap), Ps), !,
% I don't need to get a factor here
Evs = [I=E|Evs0],
IF = IF0,
Fs = Fs0.
id_to_factor(VMap, V-I, IF0, IF, Fs0, Fs, Evs0, Evs) :-
% process distribution/factors
(
clpbn:get_atts(V, [evidence(E)])
->
Evs = [I=E|Evs0]
;
Evs = Evs0
),
clpbn:get_atts(V, [dist(D, Ps)]),
get_dist_params(D, Pars0),
get_dist_domain_size(D, DS),
maplist(parent_to_id(VMap), Ps, Sizes, IPs),
init_CPT(Pars0, [DS|Sizes], CPT0),
reorder_CPT([I|IPs], CPT0, FIPs, CPT, _),
rb_insert(Fs0, IF0, f(IF0, FIPs, CPT), Fs),
IF is IF0+1.
noparent_of_interest(VMap, P) :-
\+ get_from_vmap(P, _, VMap).
parent_to_id(VMap, V, DS, I) :-
clpbn:get_atts(V, [dist(D, _Ps)]),
get_dist_domain_size(D, DS),
get_from_vmap(V, I, VMap).
factors_to_vs(Fs, VInfo) :-
rb_visit(Fs, L),
fsvs(L, FVs, []),
sort(FVs, SFVs),
rb_new(VInfo0),
add_vs(SFVs, Fs, VInfo0, VInfo).
fsvs(F-f(_, IVs, _)) -->
fvs(IVs, F).
fvs([], _F) --> [].
fvs([I|IVs], F) -->
[I-F],
fvs(IVs, F).
%
% construct variable nodes
%
add_vs([], _, VInfo, VInfo).
add_vs([V-F|SFVs], Fs, VInfo0, VInfo) :-
rb_lookup(F, FInfo, Fs),
collect_factors(SFVs, Fs, V, Fs0, R),
rb_insert(VInfo0, V, [FInfo|Fs0], VInfoI),
add_vs(R, Fs, VInfoI, VInfo).
collect_factors([], _Fs, _V, [], []) :- !.
collect_factors([V-F|SFVs], Fs, V, [FInfo|FInfos], R):-
!,
rb_lookup(F, FInfo, Fs),
collect_factors(SFVs, Fs, V, FInfos, R).
collect_factors(SFVs, _Fs, _V, [], SFVs).
% solve each query independently
% use a findall to recover space without needing for GC
run_ve_ground_solver(LQVs, LLPs, ve(FactorIds, Hash, Id, Ev)) :-
rb_new(Fs0),
foldl3(factor_to_graph, FactorIds, Fs0, Fs, [], FVs, 0, IF),
sort(FVs, SFVs),
rb_new(VInfo0),
add_vs(SFVs, Fs, VInfo0, VInfo),
BG = bigraph(VInfo, IF, Fs),
lists_of_keys_to_ids(LQVs, LQIds, Hash, _, Id, _),
findall(LPs, solve(LQIds, FactorIds, BG, Ev, LPs), LLPs).
solve([QVs|_], FIds, Bigraph, Evs, LPs) :-
factor_influences(FIds, QVs, Evs, LVs),
do_solve(QVs, LVs, Bigraph, Evs, LPs).
solve([_|LQVs], FIds, Bigraph, Ev, LPs) :-
solve(LQVs, FIds, Bigraph, Ev, LPs).
do_solve(IQVs, IVs, bigraph(OldVs, IF, _Fs), Ev, Ps) :-
% get only what is relevant to query,
project_to_query_related(IVs, OldVs, SVs, Fs1),
% and also prune using evidence
rb_visit(Ev, EvL),
foldl2(clean_v_ev, EvL, Fs1, Fs2, SVs, EVs),
% eliminate
eliminate(IQVs, digraph(EVs, IF, Fs2), Dist),
% writeln(m:Dist),matrix:matrix_to_list(Dist,LD),writeln(LD),
%exps(LD,LDE),writeln(LDE),
% move from potentials back to probabilities
normalise_CPT(Dist,MPs),
list_from_CPT(MPs, Ps).
simulate_solver(LQVs, Choices, ve(FIds, Hash, Id, BG, Evs)) :-
lists_of_keys_to_ids(LQVs, [QVs], Hash, _, Id, _),
factor_influences(FIds, QVs, Evs, LVs),
do_simulate(QVs, LVs, BG, Evs, Choices).
do_simulate(IQVs, IVs, bigraph(OldVs, IF, _Fs), Ev, Choices) :-
% get only what is relevant to query,
project_to_query_related(IVs, OldVs, SVs, Fs1),
% and also prune using evidence
rb_visit(Ev, EvL),
foldl2(clean_v_ev, EvL, Fs1, Fs2, SVs, EVs),
% eliminate
simulate_eiminate(IQVs, digraph(EVs, IF, Fs2), Choices).
% solve each query independently
% use a findall to recover space without needing for GC
run_ve_solver(_, LLPs, state(LQVs, LVs, _VMap, Bigraph, Ev)) :-
findall(LPs, solve_ve(LQVs, LVs, Bigraph, Ev, LPs), LLPs).
%
% IQVs are the current marginal,
% IVs are all variables related to that
% IFVs are the factors
% SVs are the variables
%
solve_ve([IQVs|_], [IVs|_], bigraph(OldVs, IF, _Fs), Ev, Ps) :-
% get only what is relevant to query,
project_to_query_related(IVs, OldVs, SVs, Fs1),
% and also prune using evidence
foldl2(clean_v_ev, Ev, Fs1, Fs2, SVs, EVs),
% eliminate
eliminate(IQVs, digraph(EVs, IF, Fs2), Dist),
% writeln(m:Dist),matrix:matrix_to_list(Dist,LD),writeln(LD),
%exps(LD,LDE),writeln(LDE),
% move from potentials back to probabilities
normalise_CPT(Dist,MPs),
list_from_CPT(MPs, Ps).
solve_ve([_|MoreLVs], [_|MoreLVis], Digraph, Ev, Ps) :-
solve_ve(MoreLVs, MoreLVis, Digraph, Ev, Ps).
%
% given our input queries, sort them and obtain the subgraphs of vars and facs.
%
project_to_query_related(IVs0, OldVs, NVs, NFs) :-
sort(IVs0, IVs),
rb_new(Vs0),
foldl(cp_to_vs, IVs, Vs0, AuxVs),
rb_new(NFs0),
foldl(simplify_graph_node(OldVs, AuxVs), IVs, VFs, NFs0, NFs),
list_to_rbtree(VFs, NVs).
%
% auxiliary tree for fast access to vars.
%
cp_to_vs(V, Vs0, Vs) :-
rb_insert(Vs0, V, _, Vs).
%
% construct a new, hopefully much smaller, graph
%
simplify_graph_node(OldVs, NVs, V, V-RemFs, NFs0, NFs) :-
rb_lookup(V, Fs, OldVs),
foldl2(check_factor(V, NVs), Fs, NFs0, NFs, [], RemFs).
%
% check if a factor belongs to the subgraph.
%
%
% Two cases: first time factor comes up: all its vars must be in subgraph
% second case: second time it comes up, it must be already in graph
%
% args: +Factor F, +current V (int), +rbtree with all Vs,
% -Factors in new Graph, +factors in current graph, -rbtree of factors
%
%
check_factor(V, NVs, F, NFs0, NFs, RemFs, NewRemFs) :-
F = f(IF, [V|More], _), !,
(
checklist(check_v(NVs), More)
->
rb_insert(NFs0, IF, F, NFs),
NewRemFs = [F|RemFs]
;
NFs0 = NFs,
NewRemFs = RemFs
).
check_factor(_V, _NVs, F, NFs, NFs, RemFs, NewRemFs) :-
F = f(Id, _, _),
(
rb_lookup(Id, F, NFs)
->
NewRemFs = [F|RemFs]
;
NewRemFs = RemFs
).
check_v(NVs, V) :-
rb_lookup(V, _, NVs).
%
% simplify a variable with evidence
%
clean_v_ev(V=E, FVs0, FVs, Vs0, Vs) :-
rb_delete(Vs0, V, Fs, Vs1), !,
foldl2(simplify_f_ev(V, E), Fs, FVs0, FVs, Vs1, Vs).
clean_v_ev(V-E, FVs0, FVs, Vs0, Vs) :-
rb_delete(Vs0, V, Fs, Vs1), !,
foldl2(simplify_f_ev(V, E), Fs, FVs0, FVs, Vs1, Vs).
% The variable is not there
clean_v_ev(_, FVs, FVs, Vs, Vs).
%
%
% tricky: clean a factor means also cleaning all back references.
%
simplify_f_ev(V, E, F, Fs0, Fs, Vs0, Vs) :-
F = f(Id, FVs, CPT),
NF = f(Id, NFVs, NCPT),
project_from_CPT(V, E, CPT, FVs, NCPT, NFVs),
% update factor
rb_update(Fs0, Id, NF, Fs),
foldl(update_factors(F,NF), NFVs, Vs0, Vs).
% update all instances of F in var graph
update_factors(F, NF, V, Vs0, Vs) :-
rb_update(Vs0, V, Fs, NFs, Vs),
maplist(replace_factor(F,NF), Fs, NFs).
replace_factor(F, NF, F, NF) :- !.
replace_factor(_F,_NF,OF, OF).
eliminate(QVs, digraph(Vs0, I, Fs0), Dist) :-
find_best(Vs0, QVs, BestV, VFs), !,
%writeln(best:BestV:VFs),
% delete all factors that touched the variable
foldl2(del_fac, VFs, Fs0, Fs1, Vs0, Vs1),
% delete current variable
rb_delete(Vs1, BestV, Vs2),
I1 is I+1,
% construct new table
multiply_and_delete(VFs, BestV, NewFVs, NewCPT),
% insert new factor in graph
insert_fac(I, NewFVs, NewCPT, Fs1, Fs, Vs2, Vs),
eliminate(QVs, digraph(Vs, I1, Fs), Dist).
eliminate(_QVs, digraph(_, _, Fs), Dist) :-
combine_factors(Fs, Dist).
find_best(Vs, QVs, BestV, VFs) :-
rb_key_fold(best_var(QVs), Vs, i(+inf,-1,[]), i(_Cost,BestV,VFs)),
BestV \= -1, !.
% do not eliminate marginalised variables
best_var(QVs, I, _Node, Info, Info) :-
member(I, QVs),
!.
% pick the variable with less factors
best_var(_Qs, I, Node, i(ValSoFar,_,_), i(NewVal,I,Node)) :-
foldl(szfac,Node,1,NewVal),
%length(Node, NewVal),
NewVal < ValSoFar,
!.
best_var(_, _I, _Node, Info, Info).
szfac(f(_,Vs,_), I0, I) :-
length(Vs,L),
I is I0*L.
% delete one factor, need to also touch all variables
del_fac(f(I,FVs,_), Fs0, Fs, Vs0, Vs) :-
rb_delete(Fs0, I, Fs),
foldl(delete_fac_from_v(I), FVs, Vs0, Vs).
delete_fac_from_v(I, FV, Vs0, Vs) :-
rb_update(Vs0, FV, Fs, NFs, Vs),
exclude(factor_name(I), Fs, NFs).
factor_name(I, f(I,_,_)).
% insert one factor, need to touch all corresponding variables
insert_fac(I, FVs, CPT, Fs0, Fs, Vs0, Vs) :-
F = f(I, FVs, CPT),
rb_insert(Fs0, I, F, Fs),
foldl(insert_fac_in_v(F), FVs, Vs0, Vs).
insert_fac_in_v(F, FV, Vs0, Vs) :-
rb_update(Vs0, FV, Fs, [F|Fs], Vs).
combine_factors(Fs, Dist) :-
rb_visit(Fs,Els),
maplist(extract_factor,Els,Factors),
multiply(Factors, _, Dist).
extract_factor(_-Factor, Factor).
multiply_and_delete([f(I,Vs0,T0)|Fs], V, Vs, T) :-
foldl(multiply_factor, Fs, f(I,Vs0,T0), f(_,Vs1,T1)),
sum_out_from_CPT(V, T1, Vs1, T, Vs).
multiply([F0|Fs], Vs, T) :-
foldl(multiply_factor, Fs, F0, f(_,Vs,T)).
multiply_factor(f(_,Vs1,T1), f(_,Vs0,T0), f(_,Vs,T)) :-
multiply_CPTs(T1, Vs1, T0, Vs0, T, Vs).