gradient is more complex to compute

2013-02-26 09:39:56 -06:00 · 2013-02-26 09:39:56 -06:00 · b49e9b908b
commit b49e9b908b
parent 59d97f83b4
2 changed files with 50 additions and 219 deletions
--- a/packages/ProbLog/problog_lbdd.yap
+++ b/packages/ProbLog/problog_lbdd.yap
@ -12,9 +12,9 @@ problog_kbest_bdd(Goal, K, Prob, ok) :-
 	bind_maplist(MapList, BoundVars),
 	bdd_to_probability_sum_product(BDD, BoundVars, Prob).
-problog_kbest_as_bdd(Goal, K, bdd(Tree, MapList)) :-
+problog_kbest_as_bdd(Goal, K, bdd(Dir, Tree, MapList)) :-
 	problog_kbest_to_bdd(Goal, K, BDD, MapList),
-	bdd_tree(BDD, bdd(_Dir, Tree, _Vars)),
+	bdd_tree(BDD, bdd(Dir, Tree, _Vars)),
 	bdd_close(BDD).
 problog_kbest_to_bdd(Goal, K, BDD, MapList) :-
--- a/packages/ProbLog/problog_learning_lbdd.yap
+++ b/packages/ProbLog/problog_learning_lbdd.yap
@ -362,7 +362,7 @@ reset_learning :-
 	retractall(current_iteration(_)),
 	retractall(example_count(_)),
 	retractall(query_probability_intern(_,_)),
-	retractall(query_gradient_intern(_,_,_)),
+	retractall(query_gradient_intern(_,_,_,_)),
 	retractall(last_mse(_)),
 	retractall(query_is_similar(_,_)),
 	retractall(query_md5(_,_,_)),
@ -581,7 +581,7 @@ init_learning :-
 empty_bdd_directory :-
 	current_key(_,I),
 	integer(I),
-	recorded(I,bdd(_,_),R),
+	recorded(I,bdd(_,_,_),R),
 	erase(R),
 	fail.
 empty_bdd_directory.
@ -615,13 +615,14 @@ init_one_query(QueryID,Query,Type) :-
 	 format_learning(3,' Reuse existing BDD ~q~n~n',[QueryID]);
 	 (
 	  problog_flag(libbdd_init_method,(Query,Bdd,Call)),
-	  Bdd = bdd(Tree, MapList),
+	  Bdd = bdd(Dir, Tree, MapList),
 	  once(Call),
 	  rb_new(H0),
 	  maplist_to_hash(MapList, H0, Hash),
 	%  writeln(Dir:Tree:MapList),
 	  tree_to_grad(Tree, Hash, [], Grad),
 %% %writeln(Call:Tree),
-	  recordz(QueryID,bdd(Grad,MapList),_)
+	  recordz(QueryID,bdd(Dir, Grad, MapList),_)
 	 )
 	),
@ -706,17 +707,21 @@ get_prob(Node, Prob) :-
 gradient(QueryID, l, Slope) :-
 /*	query_probability(21,6.775948e-01). */
-        recorded(QueryID, bdd(Tree, MapList), _),
+        recorded(QueryID, bdd(Dir, Tree, MapList), _),
 	bind_maplist(MapList),
-	run_sp(Tree, Slope, 0.0, Prob),
+	run_sp(Tree, Slope, 1.0, Prob0),
 	(Dir == 1 -> Prob0 = Prob ;  Prob is 1.0-Prob0),
 %writeln(QueryID:Prob),
 	assert(query_probability_intern(QueryID,Prob)),
 	fail.
 gradient(_QueryID, l, _).
 gradient(QueryID, g, Slope) :-
-        recorded(QueryID, bdd(Tree, MapList), _),
+        recorded(QueryID, bdd(Dir, Tree, MapList), _),
 	bind_maplist(MapList),
        member(I-_, MapList),
-	run_grad(Tree, I, Slope, 0.0, Grad),
+	run_grad(Tree, I, Slope, 0.0, Grad0),
 	( Dir = 1 -> Grad = Grad0 ; Grad is -Grad0),
 %	writeln(grad(QueryID:I:Grad)),
 	assert(query_gradient_intern(QueryID,I,p,Grad)),
 	fail.
 gradient(QueryID, g, Slope) :-
@ -732,7 +737,6 @@ tree_to_grad([Node|Tree], H, Grad0, Grad) :-
 	node_to_gradient_node(Node, H, GNode),
 	tree_to_grad(Tree, H, [GNode|Grad0], Grad).
 /* pp should never happen */
 node_to_gradient_node(pp(P-G,X,L,R), H, gnodep(P,G,X,Id,PL,GL,PR,GR)) :-
 	rb_lookup(X,Id,H),
 	(L == 1 -> GL=0, PL=1 ; L == 0 -> GL = 0, PL=0 ; L = PL-GL),
@ -744,28 +748,27 @@ node_to_gradient_node(pn(P-G,X,L,R), H, gnoden(P,G,X,Id,PL,GL,PR,GR)) :-
 run_sp([], _, P0, P0).
 run_sp(gnodep(P,_G, X, _Id, PL, _GL, PR, _GR).Tree, Slope, _, PF) :-
-	P is (PL / (1.0 + exp(-X * Slope)))+
+	EP = 1.0 / (1.0 + exp(-X * Slope) ),
-             (PR / (1.0 + exp(X * Slope))),
+	P is EP*PL+ (1.0-EP)*PR,
 	run_sp(Tree, Slope, P, PF).
 run_sp(gnoden(P,_G, X, _Id, PL, _GL, PR, _GR).Tree, Slope, _, PF) :-
-	P is (PL / (1.0 + exp(-X * Slope)))+
+	EP is 1.0 / (1.0 + exp(-X * Slope) ),
-             ((1-PR) / (1.0 + exp(X * Slope))),
+	P is EP*PL + (1.0-EP)*(1.0 - PR),
 	run_sp(Tree, Slope, P, PF).
 run_grad([], _I, _, G0, G0).
 run_grad([gnodep(P,G, X, Id, PL, GL, PR, GR)|Tree], I, Slope, _, GF) :-
-	P is (PL / (1.0 + exp(-X * Slope)))+
+	EP is 1.0/(1.0 + exp(-X * Slope)),
-             (PR / (1.0 + exp(X * Slope))),
+	P is EP*PL+ (1.0-EP)*PR,
-	G0 is (GL / (1.0 + exp(-X * Slope)))+
+	G0 is EP*GL + (1.0-EP)*GR,
-             (GR / (1.0 + exp(X * Slope))),
+	% don' t forget the -X
-	( I == Id -> G is G0+(PL-PR)*(1.0 / (1.0 + exp(-X * Slope)))*(1.0 / (1.0 + exp(X * Slope))) ; G = G0 ),
+	( I == Id -> G is G0+(PL-PR)* EP*(1-EP)*Slope ; G = G0 ),
 	run_grad(Tree, I, Slope, G, GF).
 run_grad([gnoden(P,G, X, Id, PL, GL, PR, GR)|Tree], I, Slope, _, GF) :-
-	P is (PL / (1.0 + exp(-X * Slope)))+
+	EP is 1.0 / (1.0 + exp(-X * Slope) ),
-             ((1-PR) / (1.0 + exp(X * Slope))),
+	P is EP*PL + (1.0-EP)*(1.0 - PR),
-	G0 is (GL / (1.0 + exp(-X * Slope)))-
+	G0 is EP*GL  - (1.0 - EP) * GR,
-             (-GR / (1.0 + exp(X * Slope))),
+	( I == Id -> G is G0+(PL+PR-1)*EP*(1-EP)*Slope ; G = G0 ),
 	     ( I == Id -> G is G0+(PL-(1-PR))*(1.0 / (1.0 + exp(-X * Slope)))*(1.0 / (1.0 + exp(X * Slope))) ; G = G0 ),
 	run_grad(Tree, I, Slope, G, GF).
@ -1065,7 +1068,8 @@ add_gradient(Learning_Rate) :-
 		 bb_get(Key2,GradValue),
 		 inv_sigmoid(OldProbability,OldValue),
-		 NewValue is OldValue -Learning_Rate*GradValue,
+		 %writeln(FactID:OldValue +Learning_Rate*GradValue),
 		 NewValue is OldValue +Learning_Rate*GradValue,
 		 sigmoid(NewValue,NewProbability),
 				% Prevent "inf" by using values too close to 1.0
@ -1079,7 +1083,6 @@ add_gradient(Learning_Rate) :-
 % vsc: avoid silly search
 gradient_descent :-
 	continuous_fact(_), !, 
 	current_iteration(Iteration),
 	create_training_predictions_file_name(Iteration,File_Name),
 	open(File_Name,'write',Handle),
@ -1171,11 +1174,21 @@ gradient_descent :-
 		(		% go over all tunable facts
-		  tunable_fact(FactID,_),
+		  query_gradient(QueryID,FactID,p,GradValue),
-		  (
+		  atomic_concat(['grad_',FactID],Key),
-		   continuous_fact(FactID)
+	      % if the following query fails,
-		  ->
+	      % it means, the fact is not used in the proof
-		   (
+	      % of QueryID, and the gradient is 0.0 and will
 	      % not contribute to NewValue either way
 	      % DON'T FORGET THIS IF YOU CHANGE SOMETHING HERE!
 		    %writeln(u:QueryID:FactID:Y:GradValue),
 		    bb_get(Key,OldValue),
 		    NewValue is OldValue - Y*GradValue,
 		    bb_put(Key,NewValue),
 				fail; % go to next fact
 				true
 		),
 		(   continuous_fact(FactID),
 		    atomic_concat(['grad_mu_',FactID],Key),
 		    atomic_concat(['grad_sigma_',FactID],Key2),
@ -1194,26 +1207,11 @@ gradient_descent :-
 		    NewValueSigma is OldValueSigma + Y*GradValueSigma,
 		    bb_put(Key,NewValueMu),
-		    bb_put(Key2,NewValueSigma)
+		    bb_put(Key2,NewValueSigma),
-		   );
+		    fail
-		   (
+		;
-		    atomic_concat(['grad_',FactID],Key),
+		     true
-	    
+		   ),
 	      % if the following query fails,
 	      % it means, the fact is not used in the proof
 	      % of QueryID, and the gradient is 0.0 and will
 	      % not contribute to NewValue either way
 	      % DON'T FORGET THIS IF YOU CHANGE SOMETHING HERE!
 		    query_gradient(QueryID,FactID,p,GradValue),
 		    bb_get(Key,OldValue),
 		    NewValue is OldValue + Y*GradValue,
 		    bb_put(Key,NewValue)
 		   )
 		  ),
 				fail; % go to next fact
 				true
 		),
 		once(update_query_cleanup(QueryID))
 	       )),
@ -1232,7 +1230,6 @@ gradient_descent :-
 		    atomic_concat(['grad_',FactID],Key),
 		    bb_get(Key,V)
 		   ),Gradient_Values),
 	(
 	 Gradient_Values==[]
 	->
@ -1288,172 +1285,6 @@ gradient_descent :-
 	!,
 	forget_old_probabilities.
 % VSC: no continuous facts
 % simplify code
 gradient_descent :-
 	current_iteration(Iteration),
 	create_training_predictions_file_name(Iteration,File_Name),
 	open(File_Name,'write',Handle),
 	format(Handle,"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%~n",[]),
 	format(Handle,"% Iteration, train/test, QueryID, Query, GroundTruth, Prediction %~n",[]),
 	format(Handle,"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%~n",[]),
 	format_learning(2,'Gradient ',[]),
 	save_old_probabilities,
 	update_values,
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	% start set gradient to zero
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	forall(tunable_fact(FactID,_),
 	       (
 		(
 		 atomic_concat(['grad_',FactID],Key),
 		 bb_put(Key,0.0)
 		)
 	       )
 	      ),
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	% stop gradient to zero
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	% start calculate gradient
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	bb_put(mse_train_sum, 0.0),
 	bb_put(mse_train_min, 0.0),
 	bb_put(mse_train_max, 0.0),
 	bb_put(llh_training_queries, 0.0),
 	problog_flag(alpha,Alpha),
 	logger_set_variable(alpha,Alpha),
 	example_count(Example_Count),
 	forall(user:example(QueryID,Query,QueryProb,Type),
 	       (
 		once(update_query(QueryID,'.',all)),
 		query_probability(QueryID,BDDProb),
 		format(Handle,'ex(~q,train,~q,~q,~10f,~10f).~n',[Iteration,QueryID,Query,QueryProb,BDDProb]),
 		(
 		 QueryProb=:=0.0
 		->
 		 Y2=Alpha;
 		 Y2=1.0
 		),
 		(
 		 (Type == '='; (Type == '<', BDDProb>QueryProb); (Type=='>',BDDProb<QueryProb))
 		->
 		 Y is Y2*2/Example_Count * (BDDProb-QueryProb);
 		 Y=0.0
 		),
 				% first do the calculations for the MSE on training set
 		(
 		 (Type == '='; (Type == '<', BDDProb>QueryProb); (Type=='>',BDDProb<QueryProb))
 		->
 		 Squared_Error is (BDDProb-QueryProb)**2;
 		 Squared_Error=0.0
 		),
 		bb_get(mse_train_sum,Old_MSE_Train_Sum),
 		bb_get(mse_train_min,Old_MSE_Train_Min),
 		bb_get(mse_train_max,Old_MSE_Train_Max),
 		bb_get(llh_training_queries,Old_LLH_Training_Queries),
 		New_MSE_Train_Sum is Old_MSE_Train_Sum+Squared_Error,
 		New_MSE_Train_Min is min(Old_MSE_Train_Min,Squared_Error),
 		New_MSE_Train_Max is max(Old_MSE_Train_Max,Squared_Error),
 		New_LLH_Training_Queries is Old_LLH_Training_Queries+log(BDDProb),
 		bb_put(mse_train_sum,New_MSE_Train_Sum),
 		bb_put(mse_train_min,New_MSE_Train_Min),
 		bb_put(mse_train_max,New_MSE_Train_Max),
 		bb_put(llh_training_queries,New_LLH_Training_Queries),
 		(		% go over all tunable facts
 		  query_gradient(QueryID,FactID,p,GradValue),
 		    atomic_concat(['grad_',FactID],Key),		  
 		    bb_get(Key,OldValue),
 		    NewValue is OldValue + Y*GradValue,
 		    bb_put(Key,NewValue),
 		    fail; % go to next fact
 		    true
 		),
 		once(update_query_cleanup(QueryID))
 	    )),
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	% stop calculate gradient
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	!,
 	close(Handle),
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	% start statistics on gradient
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	findall(V, (
 		    tunable_fact(FactID,_),
 		    atomic_concat(['grad_',FactID],Key),
 		    bb_get(Key,V)
 		   ),Gradient_Values),
 	(
 	 Gradient_Values==[]
 	->
 	 (
 	  logger_set_variable(gradient_mean,0.0),
 	  logger_set_variable(gradient_min,0.0),
 	  logger_set_variable(gradient_max,0.0)
 	 );
 	 (
 	  sum_list(Gradient_Values,GradSum),
 	  max_list(Gradient_Values,GradMax),
 	  min_list(Gradient_Values,GradMin),
 	  length(Gradient_Values,GradLength),
 	  GradMean is GradSum/GradLength,
 	  logger_set_variable(gradient_mean,GradMean),
 	  logger_set_variable(gradient_min,GradMin),
 	  logger_set_variable(gradient_max,GradMax)
 	 )
 	),
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	% stop statistics on gradient
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	bb_delete(mse_train_sum,MSE_Train_Sum),
 	bb_delete(mse_train_min,MSE_Train_Min),
 	bb_delete(mse_train_max,MSE_Train_Max),
 	bb_delete(llh_training_queries,LLH_Training_Queries),
 	MSE is MSE_Train_Sum/Example_Count,
 	logger_set_variable(mse_trainingset,MSE),
 	logger_set_variable(mse_min_trainingset,MSE_Train_Min),
 	logger_set_variable(mse_max_trainingset,MSE_Train_Max),
 	logger_set_variable(llh_training_queries,LLH_Training_Queries),
 	format_learning(2,'~n',[]),
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	% start add gradient to current probabilities
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	(
 	    problog_flag(line_search,false)
 	->
 	    problog_flag(learning_rate,LearningRate);
 	    lineSearch(LearningRate,_)
 	),
 	format_learning(3,'learning rate:~8f~n',[LearningRate]),
 	add_gradient(LearningRate),
 	logger_set_variable(learning_rate,LearningRate),
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	% stop add gradient to current probabilities
 	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 	!,
 	forget_old_probabilities.
 %========================================================================
 %=