/*  $Id: aggregate.pl,v 1.4 2008-07-22 23:34:49 vsc Exp $

    Part of SWI-Prolog

    Author:        Jan Wielemaker
    E-mail:        wielemak@science.uva.nl
    WWW:           http://www.swi-prolog.org
    Copyright (C): 2008, University of Amsterdam

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    As a special exception, if you link this library with other files,
    compiled with a Free Software compiler, to produce an executable, this
    library does not by itself cause the resulting executable to be covered
    by the GNU General Public License. This exception does not however
    invalidate any other reasons why the executable file might be covered by
    the GNU General Public License.
*/

:- module(aggretate,
	  [ foreach/2,			% :Generator, :Goal
	    aggregate/3,		% +Templ, :Goal, -Result
	    aggregate/4,		% +Templ, +Discrim, :Goal, -Result
	    aggregate_all/3,		% +Templ, :Goal, -Result
	    aggregate_all/4,		% +Templ, +Discrim, :Goal, -Result
	    free_variables/4		% :Generator, :Template, +Vars0, -Vars
	  ]).
:- use_module(library(ordsets)).
:- use_module(library(pairs)).
:- use_module(library(error)).
:- use_module(library(lists)).

:- meta_predicate
	foreach(0,0),
	aggregate(?,0,-),
	aggregate(?,?,0,-),
	aggregate_all(?,0,-),
	aggregate_all(?,?,0,-).

/** <module> Aggregation operators on backtrackable predicates

This library provides aggregating operators  over   the  solutions  of a
predicate. The operations are a generalisation   of the bagof/3, setof/3
and findall/3 built-in predicates. The   defined  aggregation operations
are counting, computing the sum, minimum,   maximum,  a bag of solutions
and a set of solutions. We first   give  a simple example, computing the
country with the smallest area:

==
average_country_area(Name, Area) :-
	aggregate(min(A, N), country(N, A), min(Area, Name)).
==

There are four aggregation predicates, distinguished on two properties.

    $ aggregate vs. aggregate_all :
    The aggregate predicates use setof/3 (aggregate/4) or bagof/3
    (aggregate/3), dealing with existential qualified variables
    (Var^Goal) and providing multiple solutions for the remaining free
    variables in Goal. The aggregate_all/3 predicate uses findall/3,
    implicitely qualifying all free variables and providing exactly one
    solution, while aggregate_all/4 uses sort/2 over solutions and
    Distinguish (see below) generated using findall/3.

    $ The Distinguish argument :
    The versions with 4 arguments provide a Distinguish argument that
    allow for keeping duplicate bindings of a variable in the result.
    For example, if we wish to compute the total population of all
    countries we do not want to loose results because two countries
    have the same population.  Therefore we use:
    
    ==
	aggregate(sum(P), Name, country(Name, P), Total)
    ==

All aggregation predicates support the following operator below in
Template. In addition, they allow for an arbitrary named compound term
where each of the arguments is a term from the list below. I.e. the term
r(min(X), max(X)) computes both the minimum and maximum binding for X.

	* count
	Count number of solutions.  Same as sum(1).
	* sum(Expr)
	Sum of Expr for all solutions.
	* min(Expr)
	Minimum of Expr for all solutions.
	* min(Expr, Witness)
	A term min(Min, Witness), where Min is the minimal version
	of Expr over all Solution and Witness is any other template
	the applied to the solution that produced Min.  If multiple
	solutions provide the same minimum, Witness corresponds to
	the first solution.
	* max(Expr)
	Maximum of Expr for all solutions.
	* max(Expr, Witness)
	As min(Expr, Witness), but producing the maximum result.
	* set(X)
	An ordered set with all solutions for X.
	* bag(X)
	A list of all solutions for X.

---+++ Acknowledgements

_|The development of this library was sponsored by SecuritEase,
  http://www.securitease.com
|_

@compat Quintus, SICStus 4. The forall/2 is a SWI-Prolog built-in and
	term_variables/3 is a SWI-Prolog with a *|different definition|*.
@tbd	Analysing the aggregation template and compiling a predicate
	for the list aggregation can be done at compile time.
@tbd	aggregate_all/3 can be rewritten to run in constant space using
	non-backtrackable assignment on a term.
*/

		 /*******************************
		 *	     AGGREGATE		*
		 *******************************/

%%	aggregate(+Template, :Goal, -Result) is nondet.
%
%	Aggregate bindings in Goal according to Template.  The aggregate/3
%	version performs bagof/3 on Goal.

aggregate(Template, Goal0, Result) :-
	template_to_pattern(bag, Template, Pattern, Goal0, Goal, Aggregate),
	bagof(Pattern, Goal, List),
	aggregate_list(Aggregate, List, Result).

%%	aggregate(+Template, +Discriminator, :Goal, -Result) is nondet.
%
%	Aggregate bindings in Goal according to Template.  The aggregate/3
%	version performs setof/3 on Goal.

aggregate(Template, Discriminator, Goal0, Result) :-
	template_to_pattern(bag, Template, Pattern, Goal0, Goal, Aggregate),
	setof(Discriminator-Pattern, Goal, Pairs),
	pairs_values(Pairs, List),
	aggregate_list(Aggregate, List, Result).

%%	aggregate_all(+Template, :Goal, -Result) is semidet.
%
%	Aggregate bindings in Goal according to Template.  The aggregate_all/3
%	version performs findall/3 on Goal.

aggregate_all(Template, Goal0, Result) :-
	template_to_pattern(all, Template, Pattern, Goal0, Goal, Aggregate),
	findall(Pattern, Goal, List),
	aggregate_list(Aggregate, List, Result).

%%	aggregate_all(+Template, +Discriminator, :Goal, -Result) is semidet.
%
%	Aggregate bindings in Goal according to Template.  The aggregate_all/3
%	version performs findall/3 followed by sort/2 on Goal.

aggregate_all(Template, Discriminator, Goal0, Result) :-
	template_to_pattern(all, Template, Pattern, Goal0, Goal, Aggregate),
	findall(Discriminator-Pattern, Goal, Pairs0),
	sort(Pairs0, Pairs),
	pairs_values(Pairs, List),
	aggregate_list(Aggregate, List, Result).


template_to_pattern(_All, Template, Pattern, Goal0, Goal, Aggregate) :-
	template_to_pattern(Template, Pattern, Post, Vars, Aggregate),
	existential_vars(Goal0, Goal1, AllVars, Vars),
	clean_body((Goal1, Post), Goal2),
	add_existential_vars(AllVars, Goal2, Goal).

existential_vars(Var, Var) -->
	{ var(Var) }, !.
existential_vars(Var^G0, G) --> !,
	[Var],
	existential_vars(G0, G).
existential_vars(G, G) -->
	[].

add_existential_vars([], G, G).
add_existential_vars([H|T], G0, H^G1) :-
	add_existential_vars(T, G0, G1).


%%	clean_body(+Goal0, -Goal) is det.
%
%	Remove redundant =true= from Goal0.

clean_body((Goal0,Goal1), Goal) :- !,
	clean_body(Goal0, GoalA),
	clean_body(Goal1, GoalB),
	(   GoalA == true
	->  Goal = GoalB
	;   GoalB == true
	->  Goal = GoalA
	;   Goal = (GoalA,GoalB)
	).
clean_body(Goal, Goal).


%%	template_to_pattern(+Template, -Pattern, -Post, -Vars, -Agregate)
%
%	Determine which parts of the goal we must remember in the
%	findall/3 pattern.
%	
%	@param Post is a body-term that evaluates expressions to reduce
%		    storage requirements.
%	@param Vars is a list of intermediate variables that must be
%		    added to the existential variables for bagof/3.
%	@param Agregate defines the aggregation operation to execute. 

template_to_pattern(sum(X),	      X,	 true, 	  [],   sum) :- var(X), !.
template_to_pattern(sum(X0),	      X,	 X is X0, [X0], sum) :- !.
template_to_pattern(count,	      1,	 true,    [],   count) :- !.
template_to_pattern(min(X),	      X,	 true,    [],   min) :- var(X), !.
template_to_pattern(min(X0),	      X,	 X is X0, [X0], min) :- !.
template_to_pattern(min(X0, Witness), X-Witness, X is X0, [X0], min_witness) :- !.
template_to_pattern(max(X0),	      X,	 X is X0, [X0], max) :- !.
template_to_pattern(max(X0, Witness), X-Witness, X is X0, [X0], max_witness) :- !.
template_to_pattern(set(X),	      X,	 true,    [],   set) :- !.
template_to_pattern(bag(X),	      X,	 true,    [],   bag) :- !.
template_to_pattern(Term, Pattern, Goal, Vars, term(MinNeeded, Functor, AggregateArgs)) :-
	compound(Term), !,
	Term =.. [Functor|Args0],
	templates_to_patterns(Args0, Args, Goal, Vars, AggregateArgs),
	needs_one(AggregateArgs, MinNeeded),
	Pattern =.. [Functor|Args].
template_to_pattern(Term, _, _, _, _) :-
	type_error(aggregate_template, Term).

templates_to_patterns([], [], true, [], []).
templates_to_patterns([H0], [H], G, Vars, [A]) :- !,
	template_to_pattern(H0, H, G, Vars, A).
templates_to_patterns([H0|T0], [H|T], (G0,G), Vars, [A0|A]) :-
	template_to_pattern(H0, H, G0, V0, A0),
	append(V0, RV, Vars),
	templates_to_patterns(T0, T, G, RV, A).
			    
%%	needs_one(+Ops, -OneOrZero)
%
%	If one of the operations in Ops needs at least one answer,
%	unify OneOrZero to 1.  Else 0.

needs_one(Ops, 1) :-
	member(Op, Ops),
	needs_one(Op), !.
needs_one(_, 0).

needs_one(min).
needs_one(min_witness).
needs_one(max).
needs_one(max_witness).

%%	aggregate_list(+Op, +List, -Answer) is semidet.
%
%	Aggregate the answer  from  the   list  produced  by  findall/3,
%	bagof/3 or setof/3. The latter  two   cases  deal  with compound
%	answers.
%	
%	@tbd	Compile code for incremental state update, which we will use
%		for aggregate_all/3 as well.  We should be using goal_expansion
%		to generate these clauses.

aggregate_list(bag, List0, List) :- !,
	List = List0.
aggregate_list(set, List, Set) :- !,
	sort(List, Set).
aggregate_list(sum, List, Sum) :-
	sumlist(List, Sum).
aggregate_list(count, List, Count) :-
	length(List, Count).
aggregate_list(max, List, Sum) :-
	max_list(List, Sum).
aggregate_list(max_witness, List, max(Max, Witness)) :-
	max_pair(List, Max, Witness).
aggregate_list(min, List, Sum) :-
	min_list(List, Sum).
aggregate_list(min_witness, List, min(Min, Witness)) :-
	min_pair(List, Min, Witness).
aggregate_list(term(0, Functor, Ops), List, Result) :- !,
	maplist(state0, Ops, StateArgs, FinishArgs),
	State0 =.. [Functor|StateArgs],
	aggregate_term_list(List, Ops, State0, Result0),
	finish_result(Ops, FinishArgs, Result0, Result).
aggregate_list(term(1, Functor, Ops), [H|List], Result) :-
	H =.. [Functor|Args],
	maplist(state1, Ops, Args, StateArgs, FinishArgs),
	State0 =.. [Functor|StateArgs],
	aggregate_term_list(List, Ops, State0, Result0),
	finish_result(Ops, FinishArgs, Result0, Result).

aggregate_term_list([], _, State, State).
aggregate_term_list([H|T], Ops, State0, State) :-
	step_term(Ops, H, State0, State1),
	aggregate_term_list(T, Ops, State1, State).


%%	min_pair(+Pairs, -Key, -Value) is det.
%%	max_pair(+Pairs, -Key, -Value) is det.
%
%	True if Key-Value has the  smallest/largest   key  in  Pairs. If
%	multiple pairs share the smallest/largest key, the first pair is
%	returned.

min_pair([M0-W0|T], M, W) :-
	min_pair(T, M0, W0, M, W).

min_pair([], M, W, M, W).
min_pair([M0-W0|T], M1, W1, M, W) :-
	(   M0 > M1
	->  min_pair(T, M0, W0, M, W)
	;   min_pair(T, M1, W1, M, W)
	).

max_pair([M0-W0|T], M, W) :-
	max_pair(T, M0, W0, M, W).

max_pair([], M, W, M, W).
max_pair([M0-W0|T], M1, W1, M, W) :-
	(   M0 > M1
	->  max_pair(T, M0, W0, M, W)
	;   max_pair(T, M1, W1, M, W)
	).

%%	step(+AggregateAction, +New, +State0, -State1).

step(bag,   X, [X|L], L).
step(set,   X, [X|L], L).
step(count, _, X0, X1) :-
	succ(X0, X1).
step(sum,   X, X0, X1) :-
	X1 is X0+X.
step(max,   X, X0, X1) :-
	X1 is max(X0, X).
step(min,   X, X0, X1) :-
	X1 is min(X0, X).
step(max_witness, X-W, X0-W0, X1-W1) :-
	(   X > X0
	->  X1 = X, W1 = W
	;   X1 = X0, W1 = W0
	).
step(min_witness, X-W, X0-W0, X1-W1) :-
	(   X < X0
	->  X1 = X, W1 = W
	;   X1 = X0, W1 = W0
	).
step(term(Ops), Row, Row0, Row1) :-
	step_term(Ops, Row, Row0, Row1).

step_term(Ops, Row, Row0, Row1) :-
	functor(Row, Name, Arity),
	functor(Row1, Name, Arity),
	step_list(Ops, 1, Row, Row0, Row1).

step_list([], _, _, _, _).
step_list([Op|OpT], Arg, Row, Row0, Row1) :-
	arg(Arg, Row, X),
	arg(Arg, Row0, X0),
	arg(Arg, Row1, X1),
	step(Op, X, X0, X1),
	succ(Arg, Arg1),
	step_list(OpT, Arg1, Row, Row0, Row1).

finish_result(Ops, Finish, R0, R) :-
	functor(R0, Functor, Arity),
	functor(R, Functor, Arity),
	finish_result(Ops, Finish, 1, R0, R).

finish_result([], _, _, _, _).
finish_result([Op|OpT], [F|FT], I, R0, R) :-
	arg(I, R0, A0),
	arg(I, R, A),
	finish_result1(Op, F, A0, A),
	succ(I, I2),
	finish_result(OpT, FT, I2, R0, R).
	
finish_result1(bag, Bag0, [], Bag) :- !,
	Bag = Bag0.
finish_result1(set, Bag,  [], Set) :- !,
	sort(Bag, Set).
finish_result1(max_witness, _, M-W, R) :- !,
	R = max(M,W).
finish_result1(min_witness, _, M-W, R) :- !,
	R = min(M,W).
finish_result1(_, _, A, A).

%%	state0(+Op, -State, -Finish)

state0(bag,   L, L).
state0(set,   L, L).
state0(count, 0, _).
state0(sum,   0, _).

%%	state1(+Op, +First, -State, -Finish)

state1(bag, X, [X|L], L).
state1(set, X, [X|L], L).
state1(_,   X, X,     _).


		 /*******************************
		 *	       FOREACH		*
		 *******************************/

%%	foreach(:Generator, :Goal)
%
%	True if the conjunction of instances  of Goal using the bindings
%	from  Generator  is  true.  Unlike    forall/2,   which  runs  a
%	failure-driven loop that  proves  Goal   for  each  solution  of
%	Generator, foreach creates a  conjunction.   Each  member of the
%	conjunction is a copy of  Goal,   where  the variables it shares
%	with Generator are filled with the values from the corresponding
%	solution.
%	
%	The implementation executes forall/2 if   Goal  does not contain
%	any variables that are not shared with Generator.
%	
%	Here is an example:
%	
%	==
%	?- foreach(between(1,4,X), dif(X,Y)), Y = 5.
%	Y = 5
%	?- foreach(between(1,4,X), dif(X,Y)), Y = 3.
%	No
%	==
%	
%	@bug	Goal is copied repeatetly, which may cause problems if
%		attributed variables are involved.

foreach(Generator, Goal0) :-
	strip_module(Goal0, M, G),
	Goal = M:G,
	term_variables(Generator, GenVars0), sort(GenVars0, GenVars),
	term_variables(Goal, GoalVars0), sort(GoalVars0, GoalVars),
	ord_subtract(GoalVars, GenVars, SharedGoalVars),
	(   SharedGoalVars == []
	->  \+ (Generator, \+Goal)	% = forall(Generator, Goal)
	;   ord_intersection(GenVars, GoalVars, SharedVars),
	    Templ =.. [v|SharedVars],
	    SharedTempl =.. [v|SharedGoalVars],
	    findall(Templ, Generator, List),
	    prove_list(List, Templ, SharedTempl, Goal)
	).

prove_list([], _, _, _).
prove_list([H|T], Templ, SharedTempl, Goal) :-
	copy_term(Templ+SharedTempl+Goal,
		  H+SharedTempl+Copy),
	Copy,
	prove_list(T, Templ, SharedTempl, Goal).
	

%%	free_variables(:Generator, +Template, +VarList0, -VarList) is det.
%
%	In order to handle variables properly, we   have to find all the
%	universally quantified variables in the Generator. All variables
%	as yet unbound are universally quantified, unless
%   
%	    1. they occur in the template
%	    2. they are bound by X^P, setof, or bagof
%	
%	free_variables(Generator, Template, OldList, NewList) finds this
%	set, using OldList as an accumulator.
%	
%	@author Richard O'Keefe
%	@author Jan Wielemaker (made some SWI-Prolog enhancements)
%	@license Public domain (from DEC10 library).
%	@tbd Distinguish between control-structures and data terms.
%	@tbd Exploit our built-in term_variables/2 at some places?

free_variables(Term, Bound, VarList, [Term|VarList]) :-
	var(Term),
	term_is_free_of(Bound, Term),
	list_is_free_of(VarList, Term), !.
free_variables(Term, _Bound, VarList, VarList) :-
	var(Term), !.
free_variables(Term, Bound, OldList, NewList) :-
	explicit_binding(Term, Bound, NewTerm, NewBound), !,
	free_variables(NewTerm, NewBound, OldList, NewList).
free_variables(Term, Bound, OldList, NewList) :-
	functor(Term, _, N),
	free_variables(N, Term, Bound, OldList, NewList).

free_variables(0, _, _, VarList, VarList) :- !.
free_variables(N, Term, Bound, OldList, NewList) :-
	arg(N, Term, Argument),
	free_variables(Argument, Bound, OldList, MidList),
	M is N-1, !,
	free_variables(M, Term, Bound, MidList, NewList).

%   explicit_binding checks for goals known to existentially quantify
%   one or more variables.  In particular \+ is quite common.

explicit_binding(\+ _Goal,	       Bound, fail,	Bound      ) :- !.
explicit_binding(not(_Goal),	       Bound, fail,	Bound	   ) :- !.
explicit_binding(Var^Goal,	       Bound, Goal,	Bound+Var) :- !.
explicit_binding(setof(Var,Goal,Set),  Bound, Goal-Set, Bound+Var) :- !.
explicit_binding(bagof(Var,Goal,Bag),  Bound, Goal-Bag, Bound+Var) :- !.

%%	term_is_free_of(+Term, +Var) is semidet.
%
%	True if Var does not appear  in   Term.  This has been rewritten
%	from the DEC10 library source   to exploit our non-deterministic
%	arg/3.

term_is_free_of(Term, Var) :-
	\+ var_in_term(Term, Var).

var_in_term(Term, Var) :-
	Var == Term, !.
var_in_term(Term, Var) :-
	compound(Term),
	genarg(_, Term, Arg),
	var_in_term(Arg, Var), !.


%%	list_is_free_of(+List, +Var) is semidet.
%
%	True if Var is not in List.

list_is_free_of([Head|Tail], Var) :-
	Head \== Var, !,
	list_is_free_of(Tail, Var).
list_is_free_of([], _).


%	term_variables(+Term, +Vars0, -Vars) is det.
%
%	True if Vars is the union of variables in Term and Vars0.
%	We cannot have this as term_variables/3 is already defined
%	as a difference-list version of term_variables/2.

%term_variables(Term, Vars0, Vars) :-
%	term_variables(Term+Vars0, Vars).