gradient is more complex to compute

This commit is contained in:
Vitor Santos Costa 2013-02-26 09:39:56 -06:00
parent 59d97f83b4
commit b49e9b908b
2 changed files with 50 additions and 219 deletions

View File

@ -12,9 +12,9 @@ problog_kbest_bdd(Goal, K, Prob, ok) :-
bind_maplist(MapList, BoundVars), bind_maplist(MapList, BoundVars),
bdd_to_probability_sum_product(BDD, BoundVars, Prob). bdd_to_probability_sum_product(BDD, BoundVars, Prob).
problog_kbest_as_bdd(Goal, K, bdd(Tree, MapList)) :- problog_kbest_as_bdd(Goal, K, bdd(Dir, Tree, MapList)) :-
problog_kbest_to_bdd(Goal, K, BDD, MapList), problog_kbest_to_bdd(Goal, K, BDD, MapList),
bdd_tree(BDD, bdd(_Dir, Tree, _Vars)), bdd_tree(BDD, bdd(Dir, Tree, _Vars)),
bdd_close(BDD). bdd_close(BDD).
problog_kbest_to_bdd(Goal, K, BDD, MapList) :- problog_kbest_to_bdd(Goal, K, BDD, MapList) :-

View File

@ -362,7 +362,7 @@ reset_learning :-
retractall(current_iteration(_)), retractall(current_iteration(_)),
retractall(example_count(_)), retractall(example_count(_)),
retractall(query_probability_intern(_,_)), retractall(query_probability_intern(_,_)),
retractall(query_gradient_intern(_,_,_)), retractall(query_gradient_intern(_,_,_,_)),
retractall(last_mse(_)), retractall(last_mse(_)),
retractall(query_is_similar(_,_)), retractall(query_is_similar(_,_)),
retractall(query_md5(_,_,_)), retractall(query_md5(_,_,_)),
@ -581,7 +581,7 @@ init_learning :-
empty_bdd_directory :- empty_bdd_directory :-
current_key(_,I), current_key(_,I),
integer(I), integer(I),
recorded(I,bdd(_,_),R), recorded(I,bdd(_,_,_),R),
erase(R), erase(R),
fail. fail.
empty_bdd_directory. empty_bdd_directory.
@ -615,13 +615,14 @@ init_one_query(QueryID,Query,Type) :-
format_learning(3,' Reuse existing BDD ~q~n~n',[QueryID]); format_learning(3,' Reuse existing BDD ~q~n~n',[QueryID]);
( (
problog_flag(libbdd_init_method,(Query,Bdd,Call)), problog_flag(libbdd_init_method,(Query,Bdd,Call)),
Bdd = bdd(Tree, MapList), Bdd = bdd(Dir, Tree, MapList),
once(Call), once(Call),
rb_new(H0), rb_new(H0),
maplist_to_hash(MapList, H0, Hash), maplist_to_hash(MapList, H0, Hash),
% writeln(Dir:Tree:MapList),
tree_to_grad(Tree, Hash, [], Grad), tree_to_grad(Tree, Hash, [], Grad),
%% %writeln(Call:Tree), %% %writeln(Call:Tree),
recordz(QueryID,bdd(Grad,MapList),_) recordz(QueryID,bdd(Dir, Grad, MapList),_)
) )
), ),
@ -706,17 +707,21 @@ get_prob(Node, Prob) :-
gradient(QueryID, l, Slope) :- gradient(QueryID, l, Slope) :-
/* query_probability(21,6.775948e-01). */ /* query_probability(21,6.775948e-01). */
recorded(QueryID, bdd(Tree, MapList), _), recorded(QueryID, bdd(Dir, Tree, MapList), _),
bind_maplist(MapList), bind_maplist(MapList),
run_sp(Tree, Slope, 0.0, Prob), run_sp(Tree, Slope, 1.0, Prob0),
(Dir == 1 -> Prob0 = Prob ; Prob is 1.0-Prob0),
%writeln(QueryID:Prob),
assert(query_probability_intern(QueryID,Prob)), assert(query_probability_intern(QueryID,Prob)),
fail. fail.
gradient(_QueryID, l, _). gradient(_QueryID, l, _).
gradient(QueryID, g, Slope) :- gradient(QueryID, g, Slope) :-
recorded(QueryID, bdd(Tree, MapList), _), recorded(QueryID, bdd(Dir, Tree, MapList), _),
bind_maplist(MapList), bind_maplist(MapList),
member(I-_, MapList), member(I-_, MapList),
run_grad(Tree, I, Slope, 0.0, Grad), run_grad(Tree, I, Slope, 0.0, Grad0),
( Dir = 1 -> Grad = Grad0 ; Grad is -Grad0),
% writeln(grad(QueryID:I:Grad)),
assert(query_gradient_intern(QueryID,I,p,Grad)), assert(query_gradient_intern(QueryID,I,p,Grad)),
fail. fail.
gradient(QueryID, g, Slope) :- gradient(QueryID, g, Slope) :-
@ -732,7 +737,6 @@ tree_to_grad([Node|Tree], H, Grad0, Grad) :-
node_to_gradient_node(Node, H, GNode), node_to_gradient_node(Node, H, GNode),
tree_to_grad(Tree, H, [GNode|Grad0], Grad). tree_to_grad(Tree, H, [GNode|Grad0], Grad).
/* pp should never happen */
node_to_gradient_node(pp(P-G,X,L,R), H, gnodep(P,G,X,Id,PL,GL,PR,GR)) :- node_to_gradient_node(pp(P-G,X,L,R), H, gnodep(P,G,X,Id,PL,GL,PR,GR)) :-
rb_lookup(X,Id,H), rb_lookup(X,Id,H),
(L == 1 -> GL=0, PL=1 ; L == 0 -> GL = 0, PL=0 ; L = PL-GL), (L == 1 -> GL=0, PL=1 ; L == 0 -> GL = 0, PL=0 ; L = PL-GL),
@ -744,28 +748,27 @@ node_to_gradient_node(pn(P-G,X,L,R), H, gnoden(P,G,X,Id,PL,GL,PR,GR)) :-
run_sp([], _, P0, P0). run_sp([], _, P0, P0).
run_sp(gnodep(P,_G, X, _Id, PL, _GL, PR, _GR).Tree, Slope, _, PF) :- run_sp(gnodep(P,_G, X, _Id, PL, _GL, PR, _GR).Tree, Slope, _, PF) :-
P is (PL / (1.0 + exp(-X * Slope)))+ EP = 1.0 / (1.0 + exp(-X * Slope) ),
(PR / (1.0 + exp(X * Slope))), P is EP*PL+ (1.0-EP)*PR,
run_sp(Tree, Slope, P, PF). run_sp(Tree, Slope, P, PF).
run_sp(gnoden(P,_G, X, _Id, PL, _GL, PR, _GR).Tree, Slope, _, PF) :- run_sp(gnoden(P,_G, X, _Id, PL, _GL, PR, _GR).Tree, Slope, _, PF) :-
P is (PL / (1.0 + exp(-X * Slope)))+ EP is 1.0 / (1.0 + exp(-X * Slope) ),
((1-PR) / (1.0 + exp(X * Slope))), P is EP*PL + (1.0-EP)*(1.0 - PR),
run_sp(Tree, Slope, P, PF). run_sp(Tree, Slope, P, PF).
run_grad([], _I, _, G0, G0). run_grad([], _I, _, G0, G0).
run_grad([gnodep(P,G, X, Id, PL, GL, PR, GR)|Tree], I, Slope, _, GF) :- run_grad([gnodep(P,G, X, Id, PL, GL, PR, GR)|Tree], I, Slope, _, GF) :-
P is (PL / (1.0 + exp(-X * Slope)))+ EP is 1.0/(1.0 + exp(-X * Slope)),
(PR / (1.0 + exp(X * Slope))), P is EP*PL+ (1.0-EP)*PR,
G0 is (GL / (1.0 + exp(-X * Slope)))+ G0 is EP*GL + (1.0-EP)*GR,
(GR / (1.0 + exp(X * Slope))), % don' t forget the -X
( I == Id -> G is G0+(PL-PR)*(1.0 / (1.0 + exp(-X * Slope)))*(1.0 / (1.0 + exp(X * Slope))) ; G = G0 ), ( I == Id -> G is G0+(PL-PR)* EP*(1-EP)*Slope ; G = G0 ),
run_grad(Tree, I, Slope, G, GF). run_grad(Tree, I, Slope, G, GF).
run_grad([gnoden(P,G, X, Id, PL, GL, PR, GR)|Tree], I, Slope, _, GF) :- run_grad([gnoden(P,G, X, Id, PL, GL, PR, GR)|Tree], I, Slope, _, GF) :-
P is (PL / (1.0 + exp(-X * Slope)))+ EP is 1.0 / (1.0 + exp(-X * Slope) ),
((1-PR) / (1.0 + exp(X * Slope))), P is EP*PL + (1.0-EP)*(1.0 - PR),
G0 is (GL / (1.0 + exp(-X * Slope)))- G0 is EP*GL - (1.0 - EP) * GR,
(-GR / (1.0 + exp(X * Slope))), ( I == Id -> G is G0+(PL+PR-1)*EP*(1-EP)*Slope ; G = G0 ),
( I == Id -> G is G0+(PL-(1-PR))*(1.0 / (1.0 + exp(-X * Slope)))*(1.0 / (1.0 + exp(X * Slope))) ; G = G0 ),
run_grad(Tree, I, Slope, G, GF). run_grad(Tree, I, Slope, G, GF).
@ -1065,7 +1068,8 @@ add_gradient(Learning_Rate) :-
bb_get(Key2,GradValue), bb_get(Key2,GradValue),
inv_sigmoid(OldProbability,OldValue), inv_sigmoid(OldProbability,OldValue),
NewValue is OldValue -Learning_Rate*GradValue, %writeln(FactID:OldValue +Learning_Rate*GradValue),
NewValue is OldValue +Learning_Rate*GradValue,
sigmoid(NewValue,NewProbability), sigmoid(NewValue,NewProbability),
% Prevent "inf" by using values too close to 1.0 % Prevent "inf" by using values too close to 1.0
@ -1079,7 +1083,6 @@ add_gradient(Learning_Rate) :-
% vsc: avoid silly search % vsc: avoid silly search
gradient_descent :- gradient_descent :-
continuous_fact(_), !,
current_iteration(Iteration), current_iteration(Iteration),
create_training_predictions_file_name(Iteration,File_Name), create_training_predictions_file_name(Iteration,File_Name),
open(File_Name,'write',Handle), open(File_Name,'write',Handle),
@ -1171,11 +1174,21 @@ gradient_descent :-
( % go over all tunable facts ( % go over all tunable facts
tunable_fact(FactID,_), query_gradient(QueryID,FactID,p,GradValue),
( atomic_concat(['grad_',FactID],Key),
continuous_fact(FactID) % if the following query fails,
-> % it means, the fact is not used in the proof
( % of QueryID, and the gradient is 0.0 and will
% not contribute to NewValue either way
% DON'T FORGET THIS IF YOU CHANGE SOMETHING HERE!
%writeln(u:QueryID:FactID:Y:GradValue),
bb_get(Key,OldValue),
NewValue is OldValue - Y*GradValue,
bb_put(Key,NewValue),
fail; % go to next fact
true
),
( continuous_fact(FactID),
atomic_concat(['grad_mu_',FactID],Key), atomic_concat(['grad_mu_',FactID],Key),
atomic_concat(['grad_sigma_',FactID],Key2), atomic_concat(['grad_sigma_',FactID],Key2),
@ -1194,26 +1207,11 @@ gradient_descent :-
NewValueSigma is OldValueSigma + Y*GradValueSigma, NewValueSigma is OldValueSigma + Y*GradValueSigma,
bb_put(Key,NewValueMu), bb_put(Key,NewValueMu),
bb_put(Key2,NewValueSigma) bb_put(Key2,NewValueSigma),
); fail
( ;
atomic_concat(['grad_',FactID],Key), true
),
% if the following query fails,
% it means, the fact is not used in the proof
% of QueryID, and the gradient is 0.0 and will
% not contribute to NewValue either way
% DON'T FORGET THIS IF YOU CHANGE SOMETHING HERE!
query_gradient(QueryID,FactID,p,GradValue),
bb_get(Key,OldValue),
NewValue is OldValue + Y*GradValue,
bb_put(Key,NewValue)
)
),
fail; % go to next fact
true
),
once(update_query_cleanup(QueryID)) once(update_query_cleanup(QueryID))
)), )),
@ -1232,7 +1230,6 @@ gradient_descent :-
atomic_concat(['grad_',FactID],Key), atomic_concat(['grad_',FactID],Key),
bb_get(Key,V) bb_get(Key,V)
),Gradient_Values), ),Gradient_Values),
( (
Gradient_Values==[] Gradient_Values==[]
-> ->
@ -1288,172 +1285,6 @@ gradient_descent :-
!, !,
forget_old_probabilities. forget_old_probabilities.
% VSC: no continuous facts
% simplify code
gradient_descent :-
current_iteration(Iteration),
create_training_predictions_file_name(Iteration,File_Name),
open(File_Name,'write',Handle),
format(Handle,"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%~n",[]),
format(Handle,"% Iteration, train/test, QueryID, Query, GroundTruth, Prediction %~n",[]),
format(Handle,"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%~n",[]),
format_learning(2,'Gradient ',[]),
save_old_probabilities,
update_values,
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% start set gradient to zero
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
forall(tunable_fact(FactID,_),
(
(
atomic_concat(['grad_',FactID],Key),
bb_put(Key,0.0)
)
)
),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% stop gradient to zero
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% start calculate gradient
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
bb_put(mse_train_sum, 0.0),
bb_put(mse_train_min, 0.0),
bb_put(mse_train_max, 0.0),
bb_put(llh_training_queries, 0.0),
problog_flag(alpha,Alpha),
logger_set_variable(alpha,Alpha),
example_count(Example_Count),
forall(user:example(QueryID,Query,QueryProb,Type),
(
once(update_query(QueryID,'.',all)),
query_probability(QueryID,BDDProb),
format(Handle,'ex(~q,train,~q,~q,~10f,~10f).~n',[Iteration,QueryID,Query,QueryProb,BDDProb]),
(
QueryProb=:=0.0
->
Y2=Alpha;
Y2=1.0
),
(
(Type == '='; (Type == '<', BDDProb>QueryProb); (Type=='>',BDDProb<QueryProb))
->
Y is Y2*2/Example_Count * (BDDProb-QueryProb);
Y=0.0
),
% first do the calculations for the MSE on training set
(
(Type == '='; (Type == '<', BDDProb>QueryProb); (Type=='>',BDDProb<QueryProb))
->
Squared_Error is (BDDProb-QueryProb)**2;
Squared_Error=0.0
),
bb_get(mse_train_sum,Old_MSE_Train_Sum),
bb_get(mse_train_min,Old_MSE_Train_Min),
bb_get(mse_train_max,Old_MSE_Train_Max),
bb_get(llh_training_queries,Old_LLH_Training_Queries),
New_MSE_Train_Sum is Old_MSE_Train_Sum+Squared_Error,
New_MSE_Train_Min is min(Old_MSE_Train_Min,Squared_Error),
New_MSE_Train_Max is max(Old_MSE_Train_Max,Squared_Error),
New_LLH_Training_Queries is Old_LLH_Training_Queries+log(BDDProb),
bb_put(mse_train_sum,New_MSE_Train_Sum),
bb_put(mse_train_min,New_MSE_Train_Min),
bb_put(mse_train_max,New_MSE_Train_Max),
bb_put(llh_training_queries,New_LLH_Training_Queries),
( % go over all tunable facts
query_gradient(QueryID,FactID,p,GradValue),
atomic_concat(['grad_',FactID],Key),
bb_get(Key,OldValue),
NewValue is OldValue + Y*GradValue,
bb_put(Key,NewValue),
fail; % go to next fact
true
),
once(update_query_cleanup(QueryID))
)),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% stop calculate gradient
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
!,
close(Handle),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% start statistics on gradient
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
findall(V, (
tunable_fact(FactID,_),
atomic_concat(['grad_',FactID],Key),
bb_get(Key,V)
),Gradient_Values),
(
Gradient_Values==[]
->
(
logger_set_variable(gradient_mean,0.0),
logger_set_variable(gradient_min,0.0),
logger_set_variable(gradient_max,0.0)
);
(
sum_list(Gradient_Values,GradSum),
max_list(Gradient_Values,GradMax),
min_list(Gradient_Values,GradMin),
length(Gradient_Values,GradLength),
GradMean is GradSum/GradLength,
logger_set_variable(gradient_mean,GradMean),
logger_set_variable(gradient_min,GradMin),
logger_set_variable(gradient_max,GradMax)
)
),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% stop statistics on gradient
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
bb_delete(mse_train_sum,MSE_Train_Sum),
bb_delete(mse_train_min,MSE_Train_Min),
bb_delete(mse_train_max,MSE_Train_Max),
bb_delete(llh_training_queries,LLH_Training_Queries),
MSE is MSE_Train_Sum/Example_Count,
logger_set_variable(mse_trainingset,MSE),
logger_set_variable(mse_min_trainingset,MSE_Train_Min),
logger_set_variable(mse_max_trainingset,MSE_Train_Max),
logger_set_variable(llh_training_queries,LLH_Training_Queries),
format_learning(2,'~n',[]),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% start add gradient to current probabilities
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
(
problog_flag(line_search,false)
->
problog_flag(learning_rate,LearningRate);
lineSearch(LearningRate,_)
),
format_learning(3,'learning rate:~8f~n',[LearningRate]),
add_gradient(LearningRate),
logger_set_variable(learning_rate,LearningRate),
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% stop add gradient to current probabilities
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
!,
forget_old_probabilities.
%======================================================================== %========================================================================
%= %=