379 lines
11 KiB
Prolog
379 lines
11 KiB
Prolog
%
|
|
% generate explicit CPTs
|
|
%
|
|
:- module(clpbn_aggregates, [
|
|
check_for_agg_vars/2,
|
|
cpt_average/6,
|
|
cpt_average/7,
|
|
cpt_max/6,
|
|
cpt_min/6,
|
|
avg_factors/5
|
|
]).
|
|
|
|
:- use_module(library(clpbn), [{}/1]).
|
|
|
|
:- use_module(library(lists),
|
|
[last/2,
|
|
sumlist/2,
|
|
sum_list/3,
|
|
max_list/2,
|
|
min_list/2,
|
|
nth0/3
|
|
]).
|
|
|
|
:- use_module(library(matrix),
|
|
[matrix_new/3,
|
|
matrix_to_list/2,
|
|
matrix_set/3]).
|
|
|
|
:- use_module(library(clpbn/dists),
|
|
[
|
|
add_dist/6,
|
|
get_dist_domain_size/2]).
|
|
|
|
:- use_module(library(clpbn/matrix_cpt_utils),
|
|
[normalise_CPT_on_lines/3]).
|
|
|
|
:- use_module(library(pfl),
|
|
[skolem/2,
|
|
add_ground_factor/5]).
|
|
|
|
:- use_module(library(bhash)).
|
|
|
|
:- use_module(library(maplist)).
|
|
|
|
check_for_agg_vars([], []).
|
|
check_for_agg_vars([V|Vs0], [V|Vs1]) :-
|
|
clpbn:get_atts(V, [key(K), dist(Id,Parents)]), !,
|
|
simplify_dist(Id, V, K, Parents, Vs0, Vs00),
|
|
check_for_agg_vars(Vs00, Vs1).
|
|
check_for_agg_vars([V|Vs0], [V|Vs1]) :-
|
|
check_for_agg_vars(Vs0, Vs1).
|
|
|
|
% transform aggregate distribution into tree
|
|
simplify_dist(avg(Domain), V, Key, Parents, Vs0, VsF) :- !,
|
|
cpt_average([V|Parents], Key, Domain, NewDist, Vs0, VsF),
|
|
NewDist = p(Dom, Tab, Ps),
|
|
add_dist(Dom, tab, Tab, Ps, Key, Id),
|
|
clpbn:put_atts(V, [dist(Id,Ps)]).
|
|
simplify_dist(_, _, _, _, Vs0, Vs0).
|
|
|
|
%
|
|
avg_factors(Key, Parents, _Smoothing, NewParents, Id) :-
|
|
% we keep ev as a list
|
|
skolem(Key, Domain),
|
|
avg_table(Parents, Parents, Domain, Key, 0, 1.0, NewParents, [], _ExtraSkolems, Id).
|
|
|
|
% there are 4 cases:
|
|
% no evidence on top node
|
|
% evidence on top node compatible with values of parents
|
|
% evidence on top node *entailed* by values of parents (so there is no real connection)
|
|
% evidence incompatible with parents
|
|
query_evidence(Key, EvHash, MAT0, MAT, NewParents0, NewParents, Vs, IVs, NewVs) :-
|
|
b_hash_lookup(Key, Ev, EvHash), !,
|
|
normalise_CPT_on_lines(MAT0, MAT1, L1),
|
|
check_consistency(L1, Ev, MAT0, MAT1, L1, MAT, NewParents0, NewParents, Vs, IVs, NewVs).
|
|
query_evidence(_, _, MAT, MAT, NewParents, NewParents, _, Vs, Vs).
|
|
|
|
hash_ev(K=V, Es0, Es) :-
|
|
b_hash_insert(Es0, K, V, Es).
|
|
|
|
find_ev(Ev, Key, RemKeys, RemKeys, Ev0, EvF) :-
|
|
b_hash_lookup(Key, V, Ev), !,
|
|
EvF is Ev0+V.
|
|
find_ev(_Evs, Key, RemKeys, [Key|RemKeys], Ev, Ev).
|
|
|
|
|
|
% +Vars -> Keys without ev
|
|
% +all keys
|
|
% +domain to project to
|
|
% +ouput key
|
|
% +sum of evidence
|
|
% +softness
|
|
% +final CPT
|
|
% - New Parents
|
|
% + - list of new keys
|
|
%
|
|
avg_table(Vars, OVars, Domain, Key, TotEvidence, Softness, Vars, Vs, Vs, Id) :-
|
|
length(Domain, SDomain),
|
|
int_power(Vars, SDomain, 1, TabSize),
|
|
TabSize =< 256,
|
|
/* case gmp is not there !! */
|
|
TabSize > 0, !,
|
|
average_cpt(Vars, OVars, Domain, TotEvidence, Softness, CPT),
|
|
matrix_to_list(CPT, Mat),
|
|
add_ground_factor(bayes, Domain, [Key|OVars], Mat, Id).
|
|
avg_table(Vars, OVars, Domain, Key, TotEvidence, Softness, [V1,V2], Vs, [V1,V2|NewVs], Id) :-
|
|
length(Vars,L),
|
|
LL1 is L//2,
|
|
LL2 is L-LL1,
|
|
list_split(LL1, Vars, L1, L2),
|
|
Min = 0,
|
|
length(Domain,Max1), Max is Max1-1,
|
|
intermediate_table(LL1, sum(Min,Max), L1, V1, Key, 1.0, 0, I1, Vs, Vs1),
|
|
intermediate_table(LL2, sum(Min,Max), L2, V2, Key, 1.0, I1, _, Vs1, NewVs),
|
|
average_cpt([V1,V2], OVars, Domain, TotEvidence, Softness, CPT),
|
|
matrix_to_list(CPT, Mat),
|
|
add_ground_factor(bayes, Domain, [Key,V1,V2], Mat, Id).
|
|
|
|
intermediate_table(1,_,[V],V, _, _, I, I, Vs, Vs) :- !.
|
|
intermediate_table(2, Op, [V1,V2], V, Key, Softness, I0, If, Vs, Vs) :- !,
|
|
If is I0+1,
|
|
extra_key_factor(Op, 2, [V1,V2], V, Key, Softness, I0).
|
|
intermediate_table(N, Op, L, V, Key, Softness, I0, If, Vs, [V1,V2|NewVs]) :-
|
|
LL1 is N//2,
|
|
LL2 is N-LL1,
|
|
list_split(LL1, L, L1, L2),
|
|
I1 is I0+1,
|
|
intermediate_table(LL1, Op, L1, V1, Key, Softness, I1, I2, Vs, Vs1),
|
|
intermediate_table(LL2, Op, L2, V2, Key, Softness, I2, If, Vs1, NewVs),
|
|
extra_key_factor(Op, N, [V1,V2], V, Key, Softness, I0).
|
|
|
|
extra_key_factor(sum(Min,Max), N, [V1,V2], V, Key, Softness, I) :-
|
|
Lower is Min*N,
|
|
Upper is Max*N,
|
|
generate_list(Lower, Upper, Nbs),
|
|
sum_cpt([V1,V2], Nbs, Softness, CPT),
|
|
V = 'AVG'(I,Key),
|
|
add_ground_factor(bayes, Nbs, [V,V1,V2], CPT, Id),
|
|
assert(pfl:currently_defined(V)),
|
|
assert(pfl:f(bayes, Id, [V,V1,V2])).
|
|
|
|
cpt_average(AllVars, Key, Els0, Tab, Vs, NewVs) :-
|
|
cpt_average(AllVars, Key, Els0, 1.0, Tab, Vs, NewVs).
|
|
|
|
% support variables with evidence from domain. This should make everyone's life easier.
|
|
cpt_average([Ev|Vars], Key, Els0, Softness, p(Els0, TAB, NewParents), Vs, NewVs) :-
|
|
find_evidence(Vars, 0, TotEvidence, RVars),
|
|
build_avg_table(RVars, Vars, Els0, Key, TotEvidence, Softness, MAT0, NewParents0, Vs, IVs),
|
|
include_qevidence(Ev, MAT0, MAT, NewParents0, NewParents, Vs, IVs, NewVs),
|
|
matrix_to_list(MAT, TAB).
|
|
|
|
% find all fixed kids, this simplifies significantly the function.
|
|
find_evidence([], TotEvidence, TotEvidence, []).
|
|
find_evidence([V|Vars], TotEvidence0, TotEvidence, RVars) :-
|
|
clpbn:get_atts(V,[evidence(Ev)]), !,
|
|
TotEvidenceI is TotEvidence0+Ev,
|
|
find_evidence(Vars, TotEvidenceI, TotEvidence, RVars).
|
|
find_evidence([V|Vars], TotEvidence0, TotEvidence, [V|RVars]) :-
|
|
find_evidence(Vars, TotEvidence0, TotEvidence, RVars).
|
|
|
|
cpt_max([_|Vars], Key, Els0, CPT, Vs, NewVs) :-
|
|
build_max_table(Vars, Els0, Els0, Key, 1.0, CPT, Vs, NewVs).
|
|
|
|
cpt_min([_|Vars], Key, Els0, CPT, Vs, NewVs) :-
|
|
build_min_table(Vars, Els0, Els0, Key, 1.0, CPT, Vs, NewVs).
|
|
|
|
build_avg_table(Vars, OVars, Domain, _, TotEvidence, Softness, CPT, Vars, Vs, Vs) :-
|
|
length(Domain, SDomain),
|
|
int_power(Vars, SDomain, 1, TabSize),
|
|
TabSize =< 256,
|
|
/* case gmp is not there !! */
|
|
TabSize > 0, !,
|
|
average_cpt(Vars, OVars, Domain, TotEvidence, Softness, CPT).
|
|
build_avg_table(Vars, OVars, Domain, Key, TotEvidence, Softness, CPT, [V1,V2], Vs, [V1,V2|NewVs]) :-
|
|
length(Vars,L),
|
|
LL1 is L//2,
|
|
LL2 is L-LL1,
|
|
list_split(LL1, Vars, L1, L2),
|
|
Min = 0,
|
|
length(Domain,Max1), Max is Max1-1,
|
|
build_intermediate_table(LL1, sum(Min,Max), L1, V1, Key, 1.0, 0, I1, Vs, Vs1),
|
|
build_intermediate_table(LL2, sum(Min,Max), L2, V2, Key, 1.0, I1, _, Vs1, NewVs),
|
|
average_cpt([V1,V2], OVars, Domain, TotEvidence, Softness, CPT).
|
|
|
|
build_max_table(Vars, Domain, Softness, p(Domain, CPT, Vars), Vs, Vs) :-
|
|
length(Domain, SDomain),
|
|
int_power(Vars, SDomain, 1, TabSize),
|
|
TabSize =< 16,
|
|
/* case gmp is not there !! */
|
|
TabSize > 0, !,
|
|
max_cpt(Vars, Domain, Softness, CPT).
|
|
build_max_table(Vars, Domain, Softness, p(Domain, CPT, [V1,V2]), Vs, [V1,V2|NewVs]) :-
|
|
length(Vars,L),
|
|
LL1 is L//2,
|
|
LL2 is L-LL1,
|
|
list_split(LL1, Vars, L1, L2),
|
|
build_intermediate_table(LL1, max(Domain,CPT), L1, V1, Key, 1.0, 0, I1, Vs, Vs1),
|
|
build_intermediate_table(LL2, max(Domain,CPT), L2, V2, Key, 1.0, I1, _, Vs1, NewVs),
|
|
max_cpt([V1,V2], Domain, Softness, CPT).
|
|
|
|
build_min_table(Vars, Domain, Softness, p(Domain, CPT, Vars), Vs, Vs) :-
|
|
length(Domain, SDomain),
|
|
int_power(Vars, SDomain, 1, TabSize),
|
|
TabSize =< 16,
|
|
/* case gmp is not there !! */
|
|
TabSize > 0, !,
|
|
min_cpt(Vars, Domain, Softness, CPT).
|
|
build_min_table(Vars, Domain, Softness, p(Domain, CPT, [V1,V2]), Vs, [V1,V2|NewVs]) :-
|
|
length(Vars,L),
|
|
LL1 is L//2,
|
|
LL2 is L-LL1,
|
|
list_split(LL1, Vars, L1, L2),
|
|
build_intermediate_table(LL1, min(Domain,CPT), L1, V1, Key, 1.0, 0, I1, Vs, Vs1),
|
|
build_intermediate_table(LL2, min(Domain,CPT), L2, V2, Key, 1.0, I1, _, Vs1, NewVs),
|
|
min_cpt([V1,V2], Domain, Softness, CPT).
|
|
|
|
int_power([], _, TabSize, TabSize).
|
|
int_power([_|L], X, I0, TabSize) :-
|
|
I is I0*X,
|
|
int_power(L, X, I, TabSize).
|
|
|
|
build_intermediate_table(1,_,[V],V, _, _, I, I, Vs, Vs) :- !.
|
|
build_intermediate_table(2, Op, [V1,V2], V, Key, Softness, I0, If, Vs, Vs) :- !,
|
|
If is I0+1,
|
|
generate_tmp_random(Op, 2, [V1,V2], V, Key, Softness, I0).
|
|
build_intermediate_table(N, Op, L, V, Key, Softness, I0, If, Vs, [V1,V2|NewVs]) :-
|
|
LL1 is N//2,
|
|
LL2 is N-LL1,
|
|
list_split(LL1, L, L1, L2),
|
|
I1 is I0+1,
|
|
build_intermediate_table(LL1, Op, L1, V1, Key, Softness, I1, I2, Vs, Vs1),
|
|
build_intermediate_table(LL2, Op, L2, V2, Key, Softness, I2, If, Vs1, NewVs),
|
|
generate_tmp_random(Op, N, [V1,V2], V, Key, Softness, I0).
|
|
|
|
% averages are transformed into sums.
|
|
generate_tmp_random(sum(Min,Max), N, [V1,V2], V, Key, Softness, I) :-
|
|
Lower is Min*N,
|
|
Upper is Max*N,
|
|
generate_list(Lower, Upper, Nbs),
|
|
sum_cpt([V1,V2], Nbs, Softness, CPT),
|
|
generate_var('AVG'(I,Key), Nbs, CPT, [V1,V2], V).
|
|
% write(sum(Nbs, CPT, [V1,V2])),nl, % debugging
|
|
generate_tmp_random(max(Domain,CPT), _, [V1,V2], V, Key, I) :-
|
|
generate_var('MAX'(I,Key), Domain, CPT, [V1,V2], V).
|
|
generate_tmp_random(min(Domain,CPT), _, [V1,V2], V, Key, I) :-
|
|
generate_var('MIN'(I,Key), Domain, CPT, [V1,V2], V).
|
|
|
|
generate_var(VKey, Domain, CPT, Parents, V) :-
|
|
{ V = VKey with tab(Domain, CPT, Parents) }.
|
|
|
|
generate_list(M, M, [M]) :- !.
|
|
generate_list(I, M, [I|Nbs]) :-
|
|
I1 is I+1,
|
|
generate_list(I1, M, Nbs).
|
|
|
|
list_split(0, L, [], L) :- !.
|
|
list_split(I, [H|L], [H|L1], L2) :-
|
|
I1 is I-1,
|
|
list_split(I1, L, L1, L2).
|
|
|
|
%
|
|
% if we have evidence, we need to check if we are always consistent, never consistent, or can be consistent
|
|
%
|
|
include_qevidence(V, MAT0, MAT, NewParents0, NewParents, Vs, IVs, NewVs) :-
|
|
clpbn:get_atts(V,[evidence(Ev)]), !,
|
|
normalise_CPT_on_lines(MAT0, MAT1, L1),
|
|
check_consistency(L1, Ev, MAT0, MAT1, L1, MAT, NewParents0, NewParents, Vs, IVs, NewVs).
|
|
include_qevidence(_, MAT, MAT, NewParents, NewParents, _, Vs, Vs).
|
|
|
|
check_consistency(L1, Ev, MAT0, MAT1, L1, MAT, NewParents0, NewParents, Vs, IVs, NewVs) :-
|
|
sumlist(L1, Tot),
|
|
nth0(Ev, L1, Val),
|
|
(Val == Tot ->
|
|
MAT1 = MAT,
|
|
NewParents = [],
|
|
Vs = NewVs
|
|
;
|
|
Val == 0.0 ->
|
|
throw(error(domain_error(incompatible_evidence),evidence(Ev)))
|
|
;
|
|
MAT0 = MAT,
|
|
NewParents = NewParents0,
|
|
IVs = NewVs
|
|
).
|
|
|
|
|
|
%
|
|
% generate actual table, instead of trusting the solver
|
|
%
|
|
|
|
average_cpt(Vs, OVars, Vals, Base, _, MCPT) :-
|
|
get_ds_lengths(Vs,Lengs),
|
|
length(OVars, N),
|
|
length(Vals, SVals),
|
|
matrix_new(floats,[SVals|Lengs],MCPT),
|
|
fill_in_average(Lengs,N,Base,MCPT).
|
|
|
|
get_ds_lengths([],[]).
|
|
get_ds_lengths([V|Vs],[Sz|Lengs]) :-
|
|
get_vdist_size(V, Sz),
|
|
get_ds_lengths(Vs,Lengs).
|
|
|
|
fill_in_average(Lengs, N, Base, MCPT) :-
|
|
generate(Lengs, Case),
|
|
average(Case, N, Base, Val),
|
|
matrix_set(MCPT,[Val|Case],1.0),
|
|
fail.
|
|
fill_in_average(_,_,_,_).
|
|
|
|
generate([], []).
|
|
generate([N|Lengs], [C|Case]) :-
|
|
from(0,N,C),
|
|
generate(Lengs, Case).
|
|
|
|
from(I,_,I).
|
|
from(I1,M,J) :-
|
|
I is I1+1,
|
|
I < M,
|
|
from(I,M,J).
|
|
|
|
average(Case, N, Base, Val) :-
|
|
sum_list(Case, Base, Tot),
|
|
Val is integer(round(Tot/N)).
|
|
|
|
|
|
sum_cpt(Vs,Vals,_,CPT) :-
|
|
get_ds_lengths(Vs,Lengs),
|
|
length(Vals,SVals),
|
|
matrix_new(floats,[SVals|Lengs],MCPT),
|
|
fill_in_sum(Lengs,MCPT),
|
|
matrix_to_list(MCPT,CPT).
|
|
|
|
fill_in_sum(Lengs,MCPT) :-
|
|
generate(Lengs, Case),
|
|
sumlist(Case, Val),
|
|
matrix_set(MCPT,[Val|Case],1.0),
|
|
fail.
|
|
fill_in_sum(_,_).
|
|
|
|
|
|
max_cpt(Vs,Vals,_,CPT) :-
|
|
get_ds_lengths(Vs,Lengs),
|
|
length(Vals,SVals),
|
|
matrix_new(floats,[SVals|Lengs],MCPT),
|
|
fill_in_max(Lengs,MCPT),
|
|
matrix_to_list(MCPT,CPT).
|
|
|
|
fill_in_max(Lengs,MCPT) :-
|
|
generate(Lengs, Case),
|
|
max_list(Case, Val),
|
|
matrix_set(MCPT,[Val|Case],1.0),
|
|
fail.
|
|
fill_in_max(_,_).
|
|
|
|
|
|
min_cpt(Vs,Vals,_,CPT) :-
|
|
get_ds_lengths(Vs,Lengs),
|
|
length(Vals,SVals),
|
|
matrix_new(floats,[SVals|Lengs],MCPT),
|
|
fill_in_max(Lengs,MCPT),
|
|
matrix_to_list(MCPT,CPT).
|
|
|
|
fill_in_min(Lengs,MCPT) :-
|
|
generate(Lengs, Case),
|
|
max_list(Case, Val),
|
|
matrix_set(MCPT,[Val|Case],1.0),
|
|
fail.
|
|
fill_in_min(_,_).
|
|
|
|
|
|
get_vdist_size(V, Sz) :-
|
|
var(V), !,
|
|
clpbn:get_atts(V, [dist(Dist,_)]),
|
|
get_dist_domain_size(Dist, Sz).
|
|
get_vdist_size(V, Sz) :-
|
|
skolem(V, Dom),
|
|
length(Dom, Sz).
|
|
|