This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/ProbLog/problog_lfi.yap

1183 lines
38 KiB
Prolog

%%% -*- Mode: Prolog; -*-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% $Date: 2011-12-05 14:07:19 +0100 (Mon, 05 Dec 2011) $
% $Revision: 6766 $
%
% This file is part of ProbLog
% http://dtai.cs.kuleuven.be/problog
%
% ProbLog was developed at Katholieke Universiteit Leuven
%
% Copyright 2009
% Angelika Kimmig, Vitor Santos Costa, Bernd Gutmann
%
% Main author of this file:
% Bernd Gutmann
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Artistic License 2.0
%
% Copyright (c) 2000-2006, The Perl Foundation.
%
% Everyone is permitted to copy and distribute verbatim copies of this
% license document, but changing it is not allowed. Preamble
%
% This license establishes the terms under which a given free software
% Package may be copied, modified, distributed, and/or
% redistributed. The intent is that the Copyright Holder maintains some
% artistic control over the development of that Package while still
% keeping the Package available as open source and free software.
%
% You are always permitted to make arrangements wholly outside of this
% license directly with the Copyright Holder of a given Package. If the
% terms of this license do not permit the full use that you propose to
% make of the Package, you should contact the Copyright Holder and seek
% a different licensing arrangement. Definitions
%
% "Copyright Holder" means the individual(s) or organization(s) named in
% the copyright notice for the entire Package.
%
% "Contributor" means any party that has contributed code or other
% material to the Package, in accordance with the Copyright Holder's
% procedures.
%
% "You" and "your" means any person who would like to copy, distribute,
% or modify the Package.
%
% "Package" means the collection of files distributed by the Copyright
% Holder, and derivatives of that collection and/or of those files. A
% given Package may consist of either the Standard Version, or a
% Modified Version.
%
% "Distribute" means providing a copy of the Package or making it
% accessible to anyone else, or in the case of a company or
% organization, to others outside of your company or organization.
%
% "Distributor Fee" means any fee that you charge for Distributing this
% Package or providing support for this Package to another party. It
% does not mean licensing fees.
%
% "Standard Version" refers to the Package if it has not been modified,
% or has been modified only in ways explicitly requested by the
% Copyright Holder.
%
% "Modified Version" means the Package, if it has been changed, and such
% changes were not explicitly requested by the Copyright Holder.
%
% "Original License" means this Artistic License as Distributed with the
% Standard Version of the Package, in its current version or as it may
% be modified by The Perl Foundation in the future.
%
% "Source" form means the source code, documentation source, and
% configuration files for the Package.
%
% "Compiled" form means the compiled bytecode, object code, binary, or
% any other form resulting from mechanical transformation or translation
% of the Source form.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Permission for Use and Modification Without Distribution
%
% (1) You are permitted to use the Standard Version and create and use
% Modified Versions for any purpose without restriction, provided that
% you do not Distribute the Modified Version.
%
% Permissions for Redistribution of the Standard Version
%
% (2) You may Distribute verbatim copies of the Source form of the
% Standard Version of this Package in any medium without restriction,
% either gratis or for a Distributor Fee, provided that you duplicate
% all of the original copyright notices and associated disclaimers. At
% your discretion, such verbatim copies may or may not include a
% Compiled form of the Package.
%
% (3) You may apply any bug fixes, portability changes, and other
% modifications made available from the Copyright Holder. The resulting
% Package will still be considered the Standard Version, and as such
% will be subject to the Original License.
%
% Distribution of Modified Versions of the Package as Source
%
% (4) You may Distribute your Modified Version as Source (either gratis
% or for a Distributor Fee, and with or without a Compiled form of the
% Modified Version) provided that you clearly document how it differs
% from the Standard Version, including, but not limited to, documenting
% any non-standard features, executables, or modules, and provided that
% you do at least ONE of the following:
%
% (a) make the Modified Version available to the Copyright Holder of the
% Standard Version, under the Original License, so that the Copyright
% Holder may include your modifications in the Standard Version. (b)
% ensure that installation of your Modified Version does not prevent the
% user installing or running the Standard Version. In addition, the
% modified Version must bear a name that is different from the name of
% the Standard Version. (c) allow anyone who receives a copy of the
% Modified Version to make the Source form of the Modified Version
% available to others under (i) the Original License or (ii) a license
% that permits the licensee to freely copy, modify and redistribute the
% Modified Version using the same licensing terms that apply to the copy
% that the licensee received, and requires that the Source form of the
% Modified Version, and of any works derived from it, be made freely
% available in that license fees are prohibited but Distributor Fees are
% allowed.
%
% Distribution of Compiled Forms of the Standard Version or
% Modified Versions without the Source
%
% (5) You may Distribute Compiled forms of the Standard Version without
% the Source, provided that you include complete instructions on how to
% get the Source of the Standard Version. Such instructions must be
% valid at the time of your distribution. If these instructions, at any
% time while you are carrying out such distribution, become invalid, you
% must provide new instructions on demand or cease further
% distribution. If you provide valid instructions or cease distribution
% within thirty days after you become aware that the instructions are
% invalid, then you do not forfeit any of your rights under this
% license.
%
% (6) You may Distribute a Modified Version in Compiled form without the
% Source, provided that you comply with Section 4 with respect to the
% Source of the Modified Version.
%
% Aggregating or Linking the Package
%
% (7) You may aggregate the Package (either the Standard Version or
% Modified Version) with other packages and Distribute the resulting
% aggregation provided that you do not charge a licensing fee for the
% Package. Distributor Fees are permitted, and licensing fees for other
% components in the aggregation are permitted. The terms of this license
% apply to the use and Distribution of the Standard or Modified Versions
% as included in the aggregation.
%
% (8) You are permitted to link Modified and Standard Versions with
% other works, to embed the Package in a larger work of your own, or to
% build stand-alone binary or bytecode versions of applications that
% include the Package, and Distribute the result without restriction,
% provided the result does not expose a direct interface to the Package.
%
% Items That are Not Considered Part of a Modified Version
%
% (9) Works (including, but not limited to, modules and scripts) that
% merely extend or make use of the Package, do not, by themselves, cause
% the Package to be a Modified Version. In addition, such works are not
% considered parts of the Package itself, and are not subject to the
% terms of this license.
%
% General Provisions
%
% (10) Any use, modification, and distribution of the Standard or
% Modified Versions is governed by this Artistic License. By using,
% modifying or distributing the Package, you accept this license. Do not
% use, modify, or distribute the Package, if you do not accept this
% license.
%
% (11) If your Modified Version has been derived from a Modified Version
% made by someone other than you, you are nevertheless required to
% ensure that your Modified Version complies with the requirements of
% this license.
%
% (12) This license does not grant you the right to use any trademark,
% service mark, tradename, or logo of the Copyright Holder.
%
% (13) This license includes the non-exclusive, worldwide,
% free-of-charge patent license to make, have made, use, offer to sell,
% sell, import and otherwise transfer the Package with respect to any
% patent claims licensable by the Copyright Holder that are necessarily
% infringed by the Package. If you institute patent litigation
% (including a cross-claim or counterclaim) against any party alleging
% that the Package constitutes direct or contributory patent
% infringement, then this Artistic License to you shall terminate on the
% date that such litigation is filed.
%
% (14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT
% HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED
% WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
% PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT
% PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT
% HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT,
% INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE
% OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
:-source.
:- module(problog_lfi,[do_learning/1,
do_learning/2,
create_ground_tunable_fact/2,
reset_learning/0
]).
% switch on all the checks to reduce bug searching time
:- style_check(all).
:- yap_flag(unknown,error).
% load modules from the YAP library
:- use_module(library(lists),[member/2,nth1/3,sum_list/2,min_list/2,max_list/2]).
:- use_module(library(system),[file_exists/1,exec/3,wait/2]).
% load our own modules
:- use_module('problog').
:- use_module('problog/logger').
:- use_module('problog/flags').
:- use_module('problog/os').
:- use_module('problog/completion').
:- use_module('problog/print_learning').
:- use_module('problog/utils_learning').
:- use_module('problog/utils').
:- use_module('problog/ad_converter').
% used to indicate the state of the system
:- dynamic(learning_initialized/0).
:- dynamic(current_iteration/1).
:- dynamic(query_all_scripts/2).
:- dynamic(last_llh/1).
:- discontiguous(user:myclause/1).
:- discontiguous(user:myclause/2).
:- discontiguous(user:known/3).
:- discontiguous(user:example/1).
:- discontiguous(user:test_example/1).
:- multifile(completion:bdd_cluster/2).
%:- multifile(completion:known_count/4).
user:term_expansion(myclause((Head<--Body)), C) :-
prolog_load_context(module,Module),
term_expansion_intern_ad((Head<--Body), Module,lfi_learning, C).
%========================================================================
%= Hack for Ingo, to allow tunable facts with body
%=
%= e.g. :- create_ground_tunable_fact( t(_) :: f(X), member(X,[a,b,c])).
%= will create
%= t(_) :: f(a).
%= t(_) :: f(b).
%= t(_) :: f(c).
%========================================================================
create_ground_tunable_fact(F,B) :-
B,
once(problog_assert(F)),
fail.
create_ground_tunable_fact(_,_).
%========================================================================
%= store the facts with the learned probabilities to a file
%= if F is a variable, a filename based on the current iteration is used
%=
%========================================================================
save_model:-
current_iteration(Iteration),
create_factprobs_file_name(Iteration,Filename),
open(Filename,'write',Handle),
forall((current_predicate(user:ad_intern/3),user:ad_intern(Original,ID,Facts)),
print_ad_intern(Handle,Original,ID,Facts)
),
forall(probabilistic_fact(_,Goal,ID),
(
array_element(factprob,ID,P),
(
is_mvs_aux_fact(Goal)
->
format(Handle,'% ~10f :: ~q. %ID=~q~n',[P,Goal,ID]);
format(Handle ,'~10f :: ~q. %ID=~q~n',[P,Goal,ID])
)
)
),
close(Handle).
is_mvs_aux_fact(A) :-
functor(A,B,_),
atomic_concat(mvs_fact_,_,B).
print_ad_intern(Handle,(Head<--Body),_ID,Facts) :-
format(Handle,'myclause( (',[]),
print_ad_intern(Head,Facts,0.0,Handle),
format(Handle,' <-- ~q) ).~n',[Body]).
print_ad_intern((A1;B1),[A2|B2],Mass,Handle) :-
once(print_ad_intern_one(A1,A2,Mass,NewMass,Handle)),
format(Handle,'; ',[]),
print_ad_intern(B1,B2,NewMass,Handle).
print_ad_intern(_::Fact,[],Mass,Handle) :-
P2 is 1.0 - Mass,
format(Handle,'~f :: ~q',[P2,Fact]).
print_ad_intern_one(_::Fact,_::AuxFact,Mass,NewMass,Handle) :-
% ask problog to get the fact_id
once(probabilistic_fact(_,AuxFact,FactID)),
% look in our table for the probability
array_element(factprob,FactID,P),
P2 is P * (1-Mass),
NewMass is Mass+P2,
format(Handle,'~f :: ~q',[P2,Fact]).
%========================================================================
%= initialize everything and perform Iterations times EM
%= can be called several times
%========================================================================
do_learning(Iterations) :-
do_learning(Iterations,-1).
do_learning(Iterations,Epsilon) :-
integer(Iterations),
number(Epsilon),
Iterations>0,
init_learning,
!,
do_learning_intern(Iterations,Epsilon),
!,
copy_back_fact_probabilities.
do_learning_intern(0,_) :-
!.
do_learning_intern(Iterations,Epsilon) :-
Iterations>0,
logger_start_timer(duration),
current_iteration(CurrentIteration),
!,
retractall(current_iteration(_)),
!,
NextIteration is CurrentIteration+1,
assertz(current_iteration(NextIteration)),
EndIteration is CurrentIteration+Iterations-1,
format_learning(1,'~nIteration ~d of ~d~n',[CurrentIteration,EndIteration]),
logger_set_variable(iteration,CurrentIteration),
write_probabilities_file,
once(llh_testset),
once(ground_truth_difference),
once(em_one_iteration),
problog_flag(log_frequency,Log_Frequency),
(
( Log_Frequency>0, 0 =:= CurrentIteration mod Log_Frequency)
->
once(save_model);
true
),
!,
(
last_llh(Last_LLH)
->
(
retractall(last_llh(_)),
logger_get_variable(llh_training_set,Current_LLH),
assertz(last_llh(Current_LLH)),
!,
LLH_Diff is abs(Last_LLH-Current_LLH)
); (
logger_get_variable(llh_training_set,Current_LLH),
assertz(last_llh(Current_LLH)),
LLH_Diff is Epsilon+1
)
),
logger_stop_timer(duration),
logger_write_data,
RemainingIterations is Iterations-1,
!,
garbage_collect,
!,
(
LLH_Diff>Epsilon
->
do_learning_intern(RemainingIterations,Epsilon);
true
).
%========================================================================
%= find proofs and build bdds for all training and test examples
%=
%=
%========================================================================
init_learning :-
learning_initialized,
!.
init_learning :-
convert_filename_to_problog_path('problogbdd_lfi', Path),
(
file_exists(Path)
->
true;
(
problog_path(PD),
format(user_error, 'WARNING: Can not find file: problogbdd_lfi. Please place file in problog path: ~q~n',[PD]),
fail
)
),
check_theory,
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Delete the stuff from the previous run
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
problog_flag(reuse_initialized_bdds,Re_Use_Flag),
(
Re_Use_Flag==false
->
empty_bdd_directory;
true
),
empty_output_directory,
logger_write_header,
format_learning(1,'Initializing everything~n',[]),
(
current_predicate(user:test_example/1)
->
(
succeeds_n_times(user:test_example(_),TestExampleCount),
format_learning(3,'~q test example(s)~n',[TestExampleCount])
);
true
),
succeeds_n_times(user:example(_),TrainingExampleCount),
format_learning(3,'~q training example(s)~n',[TrainingExampleCount]),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Create arrays for probabilities and counting tables
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
once(initialize_fact_probabilities),
problog:probclause_id(N),
static_array(factprob_temp,N,float),
static_array(factusage,N,int),
static_array(known_count_true_training,N,int),
static_array(known_count_false_training,N,int),
static_array(known_count_true_test,N,int),
static_array(known_count_false_test,N,int),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% build BDD script for every example
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
once(init_queries),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% done
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
assertz(current_iteration(0)),
assertz(learning_initialized),
once(save_model),
format_learning(1,'~n',[]),
garbage_collect,
garbage_collect_atoms.
%========================================================================
%= This predicate checks some aspects of the data given by the user.
%= You know folks: Garbage in, garbage out.
%=
%========================================================================
check_theory :-
(
(user:myclause(Head,Body),P :: Head)
->
(
format(user_error,'===============================================================~n',[]),
format(user_error,' The theory contains an atom that appears both as probabilistic~n',[]),
format(user_error,' fact and as head of an rule. This is not allowed.~2n',[]),
format(user_error,' ~q~n',[P :: Head]),
format(user_error,' ~q~2n',[myclause(Head,Body)]),
format(user_error,'===============================================================~2n',[]),
throw(bad_theory(Head))
);
true
),
(
(current_predicate(user:example/1),user:example(_))
->
true;
(
format(user_error,'===============================================================~n',[]),
format(user_error,' No training examples specified.~n',[]),
format(user_error,'===============================================================~2n',[]),
throw(bad_theory(no_training_examples))
)
),
(
( current_predicate(user:test_example/1),user:example(ID), user:test_example(ID) )
->
(
format(user_error,'===============================================================~n',[]),
format(user_error,' The example ~q appears both as test and as training example.~n',[ID]),
format(user_error,' Example IDs from test and training examples must be disjoint.~2n',[]),
format(user_error,' Do NOT bypass this test, since the implementation yields wrong resuls~n',[]),
format(user_error,' when an example ID appears both as test and training example.',[]),
format(user_error,'===============================================================~2n',[]),
throw(bad_theory(double_id(ID)))
);
true
),
(
(current_predicate(user:known/3),user:example(ID2),user:known(ID2,_,_))
->
true;
(
format(user_error,'===============================================================~n',[]),
format(user_error,' No evidence specified.~n',[]),
format(user_error,'===============================================================~2n',[]),
throw(bad_theory(no_evidence))
)
),
(
(user:known(ID,Foo,Evidence), (Evidence\=true,Evidence\=false))
->
(
format(user_error,'===============================================================~n',[]),
format(user_error,' Bad evidence for training example ~q: ~q.~n',[ID,known(ID,Foo,Evidence)]),
format(user_error,'===============================================================~2n',[]),
throw(bad_theory(bad_evidence(ID)))
);
true
),
(
(user:known(ID,Foo,true), user:known(ID,Foo,false))
->
(
format(user_error,'===============================================================~n',[]),
format(user_error,' Bad evidence for training example ~q: ~q and ~q~n',[ID,known(ID,Foo,true),known(ID,Foo,false)]),
format(user_error,'===============================================================~2n',[]),
throw(bad_theory(bad_evidence(ID)))
);
true
).
%========================================================================
%= copy fact probabilities to array for speeding up the update
%=
%=
%========================================================================
initialize_fact_probabilities :-
problog:probclause_id(N),
static_array(factprob,N,float),
forall(get_fact_probability(FactID,P),
update_array(factprob,FactID,P)).
copy_back_fact_probabilities :-
forall(tunable_fact(FactID,_),
(
array_element(factprob,FactID,P),
set_fact_probability(FactID,P)
)
).
%========================================================================
%= This predicate goes over all training and test examples,
%= calls the inference method of ProbLog and stores the resulting
%= BDDs
%========================================================================
init_queries :-
problog_flag(cluster_bdds,Cluster_BDDs),
format_learning(2,'Build BDDs for examples~n',[]),
forall(user:example(Training_ID),
(
format_learning(3,'training example ~q: ',[Training_ID]),
init_one_query(Training_ID,training)
)
),
writeln(Training_ID),
forall(
(
current_predicate(user:test_example/1),
user:test_example(Test_ID)
),
(
format_learning(3,'test example ~q: ',[Test_ID]),
init_one_query(Test_ID,test)
)
),
(
Cluster_BDDs==true
->
(
format_learning(2,'Calculate MD5s for training example BDD scripts~n',[]),
create_training_query_cluster_list(Training_Set_Cluster_List),
format_learning(2,'Calculate MD5s for test example BDD scripts~n',[]),
create_test_query_cluster_list(Test_Set_Cluster_List)
);
(
findall( a(QueryID,ClusterID,1), (
current_predicate(user:test_example/1),
user:test_example(QueryID),
bdd_cluster(QueryID,ClusterIDs),
member(ClusterID,ClusterIDs)
), Test_Set_Cluster_List),
findall( a(QueryID,ClusterID,1), (
user:example(QueryID),
bdd_cluster(QueryID,ClusterIDs),
member(ClusterID,ClusterIDs)
), Training_Set_Cluster_List)
)
),
assertz(training_set_cluster_list(Training_Set_Cluster_List)),
assertz(test_set_cluster_list(Test_Set_Cluster_List)).
%========================================================================
%=
%========================================================================
init_one_query(QueryID,_Query_Type) :-
create_known_values_file_name(QueryID,File_Name),
file_exists(File_Name),
!,
format_learning(3,'Will reuse existing BDD script ~q for example ~q.~n',[File_Name,QueryID]),
consult(File_Name).
%FIXME
% check whether we can read the BDD script for each cluster
init_one_query(QueryID,Query_Type) :-
once(propagate_evidence(QueryID,Query_Type)),
format_learning(3,'~n',[]),
garbage_collect_atoms,
garbage_collect.
create_test_query_cluster_list(L2) :-
findall( a(QueryID,ClusterID), (
current_predicate(user:test_example/1),
user:test_example(QueryID),
bdd_cluster(QueryID,ClusterIDs),
member(ClusterID,ClusterIDs)
), AllCluster),
calc_all_md5(AllCluster,AllCluster2),
findall(a(QueryID1,ClusterID1,Len),(bagof(a(QueryID,ClusterID),member(a(QueryID,ClusterID,_MD5),AllCluster2),L),nth1(1,L,a(QueryID1,ClusterID1)),length(L,Len)),L2),
!,
length(AllCluster,Len1),
length(L2,Len2),
(
Len1>0
->
(
Reduction is Len2/Len1,
format_learning(3,' ~d cluster after splitting, ~d unique cluster ==> reduction factor of ~4f~n',[Len1,Len2,Reduction])
);
true
).
calc_all_md5([],[]).
calc_all_md5([a(QueryID,ClusterID)|T],[a(QueryID,ClusterID,MD5)|T2]) :-
create_bdd_file_name(QueryID,ClusterID,File_Name),
calc_md5(File_Name,MD5),
calc_all_md5(T,T2).
create_training_query_cluster_list(L2) :-
findall( a(QueryID,ClusterID), (
user:example(QueryID),
bdd_cluster(QueryID,ClusterIDs),
member(ClusterID,ClusterIDs)
), AllCluster),
calc_all_md5(AllCluster,AllCluster2),
findall(a(QueryID1,ClusterID1,Len),
(
bagof(a(QueryID,ClusterID),member(a(QueryID,ClusterID,_MD5),AllCluster2),L),
nth1(1,L,a(QueryID1,ClusterID1)),
length(L,Len)
),L2),
length(AllCluster,Len1),
length(L2,Len2),
Reduction is Len2/Len1,
format_learning(3,' ~d cluster after splitting, ~d unique cluster ==> reduction factor of ~4f~n',[Len1,Len2,Reduction]).
%========================================================================
%=
%========================================================================
reset_learning :-
(
learning_initialized
->
(
retractall(current_iteration(_)),
retractall(learning_initialized),
retractall(training_set_cluster_list(_)),
retractall(test_set_cluster_list(_)),
close_static_array(factprob),
close_static_array(factprob_temp),
close_static_array(factusage),
close_static_array(known_count_true_training),
close_static_array(known_count_false_training),
close_static_array(known_count_true_test),
close_static_array(known_count_false_test),
reset_completion,
empty_bdd_directory,
empty_output_directory,
logger_reset_all_variables
);
true
).
%========================================================================
%= calculate the LLH on the test set and set the variable
%= in the logger module
%========================================================================
llh_testset :-
current_predicate(user:test_example/1),
!,
current_iteration(Iteration),
create_test_predictions_file_name(Iteration,F),
open(F,'write',Handle),
catch(
sum_forall(LProb,
(
probabilistic_fact(_,_,FactID),
array_element(factprob,FactID,PFact),
array_element(known_count_true_test,FactID,KK_True),
array_element(known_count_false_test,FactID,KK_False),
(
KK_True>0
->
Part1 is KK_True*log(PFact);
Part1 is 0.0
),
(
KK_False>0
->
LProb is Part1+KK_False*log(1-PFact);
LProb is Part1
)
),
PropagatedLLH
),_,PropagatedLLH is 0.0/0.0),
format(Handle,'prob_known_atoms(~15e).~n',[PropagatedLLH]),
test_set_cluster_list(AllCluster),
% deal with test examples where BDD needs to be evaluated
problog_flag(parallel_processes,Parallel_Processes),
once(evaluate_bdds(AllCluster,Handle,Parallel_Processes,'d',':',PropagatedLLH,LLH)),
logger_set_variable(llh_test_set,LLH),
close(Handle).
llh_testset :-
true.
%========================================================================
%=
%=
%=
%========================================================================
% FIXME
ground_truth_difference :-
findall(Diff,(tunable_fact(FactID,GroundTruth),
\+continuous_fact(FactID),
\+ var(GroundTruth),
array_element(factprob,FactID,Prob),
Diff is abs(GroundTruth-Prob)),AllDiffs),
(
AllDiffs==[]
->
(
MinDiff=0.0,
MaxDiff=0.0,
DiffMean=0.0
) ;
(
length(AllDiffs,Len),
sum_list(AllDiffs,AllDiffsSum),
min_list(AllDiffs,MinDiff),
max_list(AllDiffs,MaxDiff),
DiffMean is AllDiffsSum/Len
)
),
logger_set_variable(ground_truth_diff,DiffMean),
logger_set_variable(ground_truth_mindiff,MinDiff),
logger_set_variable(ground_truth_maxdiff,MaxDiff).
%========================================================================
%=
%=
%========================================================================
write_probabilities_file :-
current_iteration(Iteration),
create_bdd_input_file_name(Iteration,Probabilities_File),
open(Probabilities_File,'write',Handle),
forall(get_fact_probability(ID,_),
(
array_element(factprob,ID,Prob),
(
non_ground_fact(ID)
->
format(Handle,'@x~q_*~n~15e~n1~nx~q~N',[ID,Prob,ID]);
format(Handle,'@x~q~n~15e~n1~nx~q~N',[ID,Prob,ID])
)
)
),
close(Handle).
%========================================================================
%=
%=
%=
%========================================================================
update_query(QueryID,ClusterID ,Method,Command,PID,Output_File_Name) :-
current_iteration(Iteration),
create_bdd_input_file_name(Iteration,Input_File_Name),
create_bdd_output_file_name(QueryID,ClusterID,Iteration,Output_File_Name),
create_bdd_file_name(QueryID,ClusterID,BDD_File_Name),
convert_filename_to_problog_path('problogbdd_lfi',Absolute_Name),
atomic_concat([Absolute_Name,
' -i "', Input_File_Name, '"',
' -l "', BDD_File_Name, '"',
' -m ', Method,
' -id ', QueryID],Command),
open( Output_File_Name, write, Stream ),
exec(Command,[std, Stream ,std],PID),
close( Stream ).
update_query_wait(QueryID,_ClusterID,Count,Symbol,Command,PID,OutputFilename,BDD_Probability) :-
wait(PID,Error),
format_learning(4,'~w',[Symbol]),
(
Error \= 0
->
(
format(user_error,'SimpleCUDD stopped with error code ~q.~n', [Error]),
format(user_error,'The command was~n ~q~n',[Command]),
throw(bdd_error(QueryID,Error))
);
true
),
once(my_load_allinone(OutputFilename,QueryID,Count,BDD_Probability)),
problog_flag(retain_bdd_output,Retain_BDD_Output),
(
Retain_BDD_Output==true
->
true;
delete_file_silently(OutputFilename)
).
%========================================================================
%=
%=
%=
%========================================================================
my_load_allinone(File,QueryID,Count,BDD_Probability) :-
open(File,'read',Handle),
read(Handle,Atom),
once(my_load_intern_allinone(Atom,Handle,QueryID,Count,error,BDD_Probability)),
!,
close(Handle).
my_load_allinone(File,QueryID,_,_,_,_) :-
format(user_error,'Error at ~q.~2n',[my_load(File,QueryID)]),
throw(error(my_load(File,QueryID))).
my_load_intern_allinone(end_of_file,_,_,_,BDD_Probability,BDD_Probability) :-
!.
my_load_intern_allinone(query_probability(QueryID,Prob),Handle,QueryID,Count,Old_BDD_Probability,BDD_Probability) :-
!,
(
Old_BDD_Probability==error
->
true;
throw(error(bdd_output_contains_prob_twice(query_probability(QueryID,Prob))))
),
Prob2 is Prob*Count, % this is will throw an exception if simplecudd delivers non-number garbage
read(Handle,X),
my_load_intern_allinone(X,Handle,QueryID,Count,Prob2,BDD_Probability).
my_load_intern_allinone(ec(QueryID,VarName,Value),Handle,QueryID,Count,Old_BDD_Probability,BDD_Probability) :-
!,
split_atom_name(VarName,FactID,_GroundID),
MultValue is Value*Count,
add_to_array_element(factprob_temp,FactID,MultValue,_NewEC),
add_to_array_element(factusage,FactID,Count,_NewDiv),
read(Handle,X),
my_load_intern_allinone(X,Handle,QueryID,Count,Old_BDD_Probability,BDD_Probability).
my_load_intern_allinone(X,Handle,QueryID,Count,Old_BDD_Probability,BDD_Probability) :-
format(user_error,'Unknown atom ~q in results file.~n',[X]),
read(Handle,X2),
my_load_intern_allinone(X2,Handle,QueryID,Count,Old_BDD_Probability,BDD_Probability).
%========================================================================
%= Perform one iteration of EM
%========================================================================
my_reset_static_array(Name) :-
%%% DELETE ME AFTER VITOR FIXED HIS BUG
static_array_properties(Name,Size,Type),
LastPos is Size-1,
(
Type==int
->
forall(between(0,LastPos,Pos), update_array(Name,Pos,0))
;
Type==float
->
forall(between(0,LastPos,Pos), update_array(Name,Pos,0.0))
;
fail
).
em_one_iteration :-
write_probabilities_file,
my_reset_static_array(factprob_temp),
my_reset_static_array(factusage),
current_iteration(Iteration),
create_training_predictions_file_name(Iteration,Name),
open(Name,'write',Handle),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% start calculate new values
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% process known_count information
bb_put(dummy,0.0),
(
% go over all tunable facts and get their current probability
tunable_fact(FactID,_),
array_element(factprob,FactID,P),
% get known counts
array_element(known_count_true_training,FactID,KK_True),
array_element(known_count_false_training,FactID,KK_False),
KK_Sum is KK_True+KK_False,
KK_Sum>0,
% add counts
add_to_array_element(factprob_temp,FactID,KK_True,_NewValue),
add_to_array_element(factusage,FactID,KK_Sum,_NewCount),
% for LLH training set
(
KK_True>0
->
Part1 is KK_True*log(P);
Part1 is 0.0
),
(
KK_False>0
->
LProb is Part1 + KK_False*log(1-P);
LProb is Part1
),
bb_get(dummy,Old),
New is Old+LProb,
bb_put(dummy,New),
fail;
true
),
bb_delete(dummy,LLH_From_True_BDDs),
format(Handle,'propagatedprob(~15e).~n',[LLH_From_True_BDDs]),
training_set_cluster_list(AllCluster),
problog_flag(parallel_processes,Parallel_Processes),
evaluate_bdds(AllCluster,Handle,Parallel_Processes,'e','.',LLH_From_True_BDDs,LLH),
logger_set_variable(llh_training_set,LLH),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% stop calculate new values
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
format_learning(2,'~n',[]),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% start copy new values
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
problog_flag(pc_numerator,Pseudo_Counts_Numerator),
problog_flag(pc_denominator,Pseudo_Counts_Denominator),
forall(
(
tunable_fact(FactID,_),
array_element(factusage,FactID,Used),
Used>0 % only update relevant facts
),
(
array_element(factprob_temp,FactID,NewValue),
NewP is (NewValue+ Pseudo_Counts_Numerator) / (Used+Pseudo_Counts_Denominator),
update_array(factprob,FactID,NewP)
)
),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% stop copy new values
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
close(Handle).
%========================================================================
%= Call SimpleCUDD for each BDD Cluster script
%= L : a list containing 3-tuples a(QueryID,ClusterID,Count)
%= H : file handle for the log file
%= P : number of parallel SimpleCUDD processes
%= T : type of evaluation, either 'd' or 'e'
%= S : symbol to print after a process finished
%= OldLLH : accumulator for LLH
%= LLH : resulting LLH
%=
%= evaluate_bdds(+L,+H,+P,+T,+S,+OldLLH,-LLH)
%========================================================================
evaluate_bdds([],_,_,_,_,LLH,LLH).
evaluate_bdds([H|T],Handle,Parallel_Processes,Type,Symbol,OldLLH,LLH) :-
once(slice_n([H|T],Parallel_Processes,ForNow,Later)),
logger_start_timer(bdd_evaluation),
once(evaluate_bdds_start(ForNow,Type,ForNow_Jobs)),
once(evaluate_bdds_stop(ForNow_Jobs,Handle,Symbol,OldLLH,NewLLH)),
logger_stop_timer(bdd_evaluation),
evaluate_bdds(Later,Handle,Parallel_Processes,Type,Symbol,NewLLH,LLH).
evaluate_bdds_start([],_,[]).
evaluate_bdds_start([a(QueryID,ClusterID,Count)|T],Type,[job(QueryID,ClusterID,Count,Command,PID,OutputFilename)|T2]) :-
once(update_query(QueryID,ClusterID,Type,Command,PID,OutputFilename)),
evaluate_bdds_start(T,Type,T2).
evaluate_bdds_stop([],_,_,LLH,LLH).
evaluate_bdds_stop([job(ID,ClusterID,Count,Command,PID,OutputFilename)|T],Handle,Symbol,OldLLH,LLH) :-
once(update_query_wait(ID,ClusterID,Count,Symbol,Command,PID,OutputFilename,BDD_Prob)),
format(Handle,'bdd_prob(~w,~w,~15e). % Count=~w~n',[ID,ClusterID,BDD_Prob,Count]),
catch(NewLLH is OldLLH + Count*log(BDD_Prob),_Exception,NewLLH is 0.0/0.0),
evaluate_bdds_stop(T,Handle,Symbol,NewLLH,LLH).
%========================================================================
%=
%=
%========================================================================
%========================================================================
%= initialize the logger module and set the flags for learning
%= don't change anything here! use set_learning_flag/2 instead
%========================================================================
init_flags :-
prolog_file_name('queries',Queries_Folder), % get absolute file name for './queries'
prolog_file_name('output',Output_Folder), % get absolute file name for './output'
problog_define_flag(bdd_directory, problog_flag_validate_directory, 'directory for BDD scripts', Queries_Folder,learning_general),
problog_define_flag(output_directory, problog_flag_validate_directory, 'directory for logfiles etc', Output_Folder,learning_general,flags:learning_output_dir_handler),
problog_define_flag(retain_bdd_output,problog_flag_validate_boolean,'Keep output files from BDD tool',false,learning_general),
problog_define_flag(log_frequency, problog_flag_validate_posint, 'log results every nth iteration', 1, learning_general),
problog_define_flag(reuse_initialized_bdds,problog_flag_validate_boolean, 'Reuse BDDs from previous runs',false, learning_general),
problog_define_flag(pc_numerator,problog_flag_validate_in_interval_right_open([0.0,+inf]),'Add X to numerator (Pseudocounts)',0.0,learning_general),
problog_define_flag(pc_denominator,problog_flag_validate_in_interval_right_open([0.0,+inf]),'Add X to denominator (Pseudocounts)',0.0,learning_general),
problog_define_flag(parallel_processes,problog_flag_validate_posint,'Number of parallel BDD processes',8,learning_general),
problog_define_flag(cluster_bdds,problog_flag_validate_boolean,'Cluster similar BDDs',true,learning_general).
init_logger :-
logger_define_variable(iteration, int),
logger_define_variable(duration,time),
logger_define_variable(llh_training_set,float),
logger_define_variable(llh_test_set,float),
logger_define_variable(bdd_evaluation,time),
logger_define_variable(ground_truth_diff,float),
logger_define_variable(ground_truth_mindiff,float),
logger_define_variable(ground_truth_maxdiff,float),
logger_define_variable(train_bdd_script_generation,time),
logger_define_variable(train_bdd_script_generation_grounding,time),
logger_define_variable(train_bdd_script_generation_completion,time),
logger_define_variable(train_bdd_script_generation_propagation,time),
logger_define_variable(train_bdd_script_generation_splitting,time),
logger_define_variable(train_bdd_script_generation_active_ground_atoms,int),
logger_define_variable(train_bdd_script_generation_propagated_ground_atoms,int),
logger_define_variable(test_bdd_script_generation,time),
logger_define_variable(test_bdd_script_generation_grounding,time),
logger_define_variable(test_bdd_script_generation_completion,time),
logger_define_variable(test_bdd_script_generation_propagation,time),
logger_define_variable(test_bdd_script_generation_splitting,time),
logger_define_variable(test_bdd_script_generation_active_ground_atoms,int),
logger_define_variable(test_bdd_script_generation_propagated_ground_atoms,int).
:- initialization(init_flags).
:- initialization(init_logger).
%:- spy em_one_iteration.
%:- initialization(do_learning(100) ).