/**
 * @file   lineutils.yap
 * @author VITOR SANTOS COSTA <vsc@VITORs-MBP.lan>
 * @date   Tue Nov 17 22:02:22 2015
 * 
 * @brief  line text processing.
 * 
 * 
*/

:- module(lineutils,
	  [search_for/2,
	   search_for/3,
	   scan_natural/3,
	   scan_integer/3,
	   natural/3,
	   integer/3,
	   blank/3,
	   split/2,
	   split/3,
	   split/4,
	   split/5,
       split_unquoted/3,
	   fields/2,
	   fields/3,
	   glue/3,
	   copy_line/2,
	   filter/3,
	   file_filter/3,
       file_select/2,
		file_filter_with_initialization/5,
		file_filter_with_start_end/5,
		file_filter_with_initialization/5 as file_filter_with_init,
	   process/2
	  ]).

/** @defgroup line_utils Line Manipulation Utilities
@ingroup library
@{

This package provides a set of useful predicates to manipulate
sequences of characters codes, usually first read in as a line. It is
available by loading the
~~~~
:- use_module(library(lineutils)).
~~~~


*/

:- meta_predicate
	filter(+,+,2),
	file_filter(+,+,2),
	file_filter_with_initialization(+,+,2,+,:),
	file_filter_with_start_end(+,+,2,2,2),
	process(+,1).

:- use_module(library(lists),
	      [member/2,
	       append/3]).

:- use_module(library(readutil),
	      [read_line_to_codes/2]).

/**
 @pred search_for(+ _Char_,+ _Line_)
  Search for a character  _Char_ in the list of codes  _Line_.
*/
search_for(C,L) :-
	search_for(C, L, []).

search_for(C) --> [C], !.
search_for(C) --> [_],
	search_for(C).

/** @pred scan_integer(? _Int_,+ _Line_,+ _RestOfLine_)

Scan the list of codes  _Line_ for an integer  _Nat_, either a
positive, zero, or negative integer, and unify  _RestOfLine_ with
the remainder of the line.
*/
scan_integer(N) -->
	"-", !,
	scan_natural(0, N0),
	N is -N0.
scan_integer(N) -->
	scan_natural(0, N).

/** @pred integer(? _Int_,+ _Line_,+ _RestOfLine_)

Scan the list of codes  _Line_ for an integer  _Nat_, either a
positive, zero, or negative integer, and unify  _RestOfLine_ with
the remainder of the line.
*/
integer(N) -->
	"-", !,
	natural(0, N0),
	N is -N0.
integer(N) -->
	natural(0, N).

/** @pred scan_natural(? _Nat_,+ _Line_,+ _RestOfLine_)

Scan the list of codes  _Line_ for a natural number  _Nat_, zero
or a positive integer, and unify  _RestOfLine_ with the remainder
of the line.
*/
scan_natural(N) -->
	scan_natural(0, N).

scan_natural(N0,N) -->
	[C],
	{C >= 0'0, C =< 0'9 }, !,
	{ N1 is N0*10+(C-0'0) }, %'
	get_natural(N1,N).
scan_natural(N,N) --> [].

/** @pred natural(? _Nat_,+ _Line_,+ _RestOfLine_)

Scan the list of codes  _Line_ for a natural number  _Nat_, zero
or a positive integer, and unify  _RestOfLine_ with the remainder
of the line.
*/
natural(N) -->
	natural(0, N).

natural(N0,N) -->
	[C],
	{C >= 0'0, C =< 0'9 }, !,
	{ N1 is N0*10+(C-0'0) }, %'
	get_natural(N1,N).
natural(N,N) --> [].

/** @pred skip_whitespace(+ _Line_,+ _RestOfLine_)

Scan the list of codes  _Line_ for white space,  namely for tabbing and space characters.
*/
skip_whitespace([0' |Blanks]) -->
	" ",
	skip_whitespace( Blanks ).
skip_whitespace([0'	|Blanks]) -->
	"	",
	skip_whitespace( Blanks ).
skip_whitespace( [] ) -->
	!.

/** @pred blank(+ _Line_,+ _RestOfLine_)

 The list of codes  _Line_ is formed by white space,  namely by tabbing and space characters.
*/
blank([0' |Blanks]) -->
	" ",
	blank( Blanks ).
blank([0'	|Blanks]) -->
	"	",
	blank( Blanks ).
blank( [] ) -->
	[].


/** @pred split(+ _Line_,- _Split_)

Unify  _Words_ with a set of strings obtained from  _Line_ by
using the blank characters  as separators.
*/
split(String, Strings) :-
	split_at_blank(" 	", Strings, String, []).

/** @pred split(+ _Line_,+ _Separators_,- _Split_)


Unify  _Words_ with a set of strings obtained from  _Line_ by
using the character codes in  _Separators_ as separators. As an
example, consider:

~~~~~{.prolog}
?- split("Hello * I am free"," *",S).

S = ["Hello","I","am","free"] ?

no
~~~~~

*/
split(String, SplitCodes, Strings) :-
	split_at_blank(SplitCodes, Strings, String, []).

split_at_blank(SplitCodes, More) -->
	[C],
	{ member(C, SplitCodes) }, !,
	split_at_blank(SplitCodes, More).
split_at_blank(SplitCodes, [[C|New]| More]) -->
	[C], !,
	split_(SplitCodes, New, More).
split_at_blank(_, []) --> [].

split_(SplitCodes, [], More) -->
	[C],
	{ member(C, SplitCodes) }, !,
	split_at_blank(SplitCodes, More).
split_(SplitCodes, [C|New], Set) -->
	[C], !,
	split_(SplitCodes, New, Set).
split_(_, [], []) --> [].


split(Text, SplitCodes, DoubleQs, SingleQs, Strings) :-
	split_element(SplitCodes, DoubleQs, SingleQs, Strings, Text, []).

split_element(SplitCodes,  DoubleQs, SingleQs, Strings) -->
    [C],
    !,
    split_element(SplitCodes,  DoubleQs, SingleQs, Strings, C).
split_element(_SplitCodes,  _DoubleQs, _SingleQs, []) --> !.
split_element(_SplitCodes,  _DoubleQs, _SingleQs, [[]]) --> [].

split_element(SplitCodes,  DoubleQs, SingleQs, Strings, C) -->
	{ member( C, SingleQs ) },
	!,
	 [C2],
	{ Strings = [[C2|String]|More] },
	split_element(SplitCodes,  DoubleQs, SingleQs, [String| More]).
split_element(SplitCodes,  DoubleQs, SingleQs, [[]|Strings], C) -->
	{ member( C, SplitCodes ) },
	!,
	split_element(SplitCodes,  DoubleQs, SingleQs, Strings).
split_element(SplitCodes,  DoubleQs, SingleQs, Strings, C) -->
	{ member( C, DoubleQs ) } ,
	!,
	split_within(SplitCodes,  C-DoubleQs, SingleQs, Strings).
split_element(SplitCodes,  DoubleQs, SingleQs, [[C|String]|Strings], C) -->
	split_element(SplitCodes,  DoubleQs, SingleQs, [String|Strings]). 

split_within(SplitCodes,  DoubleQs, SingleQs, Strings) -->
    [C],
    split_within(SplitCodes,  DoubleQs, SingleQs, Strings, C).

split_within(SplitCodes,  DoubleQs, SingleQs, Strings, C) -->
	{ member( C, SingleQs ) },
	!,
	 [C2],
	{ Strings = [[C2|String]|More] },
	split_within(SplitCodes,  DoubleQs, SingleQs, [String| More]).
split_within(SplitCodes,  DoubleQs, C-SingleQs, Strings, C) -->
	!,
	split_element(SplitCodes,  DoubleQs, SingleQs, Strings).
split_within(SplitCodes,  DoubleQs, SingleQs, [[C|String]|Strings], C) -->
	split_within(SplitCodes,  DoubleQs, SingleQs, [String|Strings]). 


/** @pred split_quoted(+ _Line_,+ _Separators_, GroupQuotes, SingleQuotes, - _Split_)


Unify  _Words_ with a set of strings obtained from  _Line_ by
using the character codes in  _Separators_ as separators, but treat text within  quotes as a single unit. As an
example, consider:

~~~~~{.prolog}
?- split_quoted("Hello * I \"am free\""," *",S).

S = ["Hello","I","am free"] ?

no
~~~~~

*/
split_quoted( [0'"], _More) --> %0'"
    "\"".
split_quoted( [0'\\ ,C|New], More) --> 
    %0'"
    "\\",
    [C],
    split_quoted(New, More).
split_quoted( [C|New], More) --> %0'"
    [C],
    split_quoted(New, More).

/** @pred fields(+ _Line_,- _Split_)

Unify  _Words_ with a set of strings obtained from  _Line_ by
using the blank characters  as field separators.

*/
fields(String, Strings) :-
	fields(" 	", Strings, String, []).

/** @pred fields(+ _Line_,+ _Separators_,- _Split_)

Unify  _Words_ with a set of strings obtained from  _Line_ by
using the character codes in  _Separators_ as separators for
fields. If two separators occur in a row, the field is considered
empty. As an example, consider:

~~~~~{.prolog}
?- fields("Hello  I am  free"," *",S).

  S = ["Hello","","I","am","","free"] ?
~~~~~
*/
fields(String, FieldsCodes, Strings) :-
	dofields(FieldsCodes, First, More, String, []),
	(
	  First = [], More = []
	->
	  Strings = []
	;
	  Strings = [First|More]
	).

dofields(FieldsCodes, [], New.More) -->
	[C],
	{ member(C, FieldsCodes) }, !,
	dofields(FieldsCodes, New, More).
dofields(FieldsCodes, [C|New], Set) -->
	[C], !,
	dofields(FieldsCodes, New, Set).
dofields(_, [], []) --> [].

/** @pred glue(+ _Words_,+ _Separator_,- _Line_)

Unify  _Line_ with  string obtained by glueing  _Words_ with
the character code  _Separator_.
*/
glue([], _, []).
glue([A], _, A) :- !.
glue([H|T], [B|_], Merged) :-
	append(H, [B|Rest], Merged),
	glue(T, [B], Rest).

/** @pred copy_line(+ _StreamInput_,+ _StreamOutput_)

Copy a line from  _StreamInput_ to  _StreamOutput_.
*/
copy_line(StreamInp, StreamOut) :-
	read_line_to_codes(StreamInp, Line),
	format(StreamOut, '~s~n', [Line]).


/** @pred filter(+ _StreamInp_, + _StreamOut_, + _Goal_)

For every line  _LineIn_ in stream  _StreamInp_, execute
`call(Goal,LineIn,LineOut)`, and output  _LineOut_ to
stream  _StreamOut_. If `call(Goal,LineIn,LineOut)` fails,
nothing will be output but execution continues with the next
line. As an example, consider a procedure to select the second and
fifth field of a CSV table :
~~~~~{.prolog}
select(Sep, In, Out) :-
	fields(In, Sep, [_,F2,_,_,F5|_]),
        fields(Out,Sep, [F2,F5]).

select :-
       filter(",",
~~~~~

*/
filter(StreamInp, StreamOut, Command) :-
	repeat,
	read_line_to_codes(StreamInp, Line),
	(
	 Line == end_of_file
	->
	 !
	;
	 call(Command, Line, NewLine),
	 ground(NewLine),
	 format(StreamOut, '~s~n', [NewLine]),
	 fail
	).

/** @pred process(+ _StreamInp_, + _Goal_) is meta

For every line  _LineIn_ in stream  _StreamInp_, call
`call(Goal,LineIn)`.
*/
process(StreamInp, Command) :-
	repeat,
	read_line_to_codes(StreamInp, Line),
	(
	 Line == end_of_file
	->
	 !
	;
	 call(Command, Line),
	 fail
	).

/**
  * @pred file_filter(+ _FileIn_, + _FileOut_, + _Goal_)  is meta
  *
  * @param _FileIn_  File to process
  * @param _FileOut_ Output file, often user_error
  * @param _Goal_ to be metacalled, receives FileIn and FileOut as
  * extra arguments
  *
  * @return succeeds

  For every line  _LineIn_ in file  _FileIn_, execute
  `call(Goal,LineIn,LineOut)`, and output  _LineOut_ to file
  _FileOut_.

  The input stream is accessible through the alias `filter_input`, and
  the output stream is accessible through `filter_output`.
*/
file_filter(Inp, Out, Command) :-
	open(Inp, read, StreamInp, [alias(filter_input)]),
	open(Out, write, StreamOut),
	filter(StreamInp, StreamOut, Command),
	close(StreamInp),
	close(StreamOut).

/** @pred file_filter_with_initialization(+ _FileIn_, + _FileOut_, + _Goal_, + _FormatCommand_,   + _Arguments_)

Same as file_filter/3, but before starting the filter execute
`format/3` on the output stream, using  _FormatCommand_ and
 _Arguments_.
*/
file_filter_with_initialization(Inp, Out, Command, FormatString, Parameters) :-
	open(Inp, read, StreamInp, [alias(filter_input)]),
	open(Out, write, StreamOut, [alias(filter_output)]),
	format(StreamOut, FormatString, Parameters),
	filter(StreamInp, StreamOut, Command),
	close(StreamInp),
	close(StreamOut).


/** @pred file_filter_with_start_end(+ FileIn, + FileOut, + Goal, + StartGoal,   + EndGoal)

Same as file_filter/3, but before starting the filter execute
_StartGoal_,  and call _ENdGoal_ as an epilog.

The input stream are always accessible through `filter_output` and `filter_input`.
*/
file_filter_with_start_end(Inp, Out, Command, StartGoal, EndGoal) :-
	open(Inp, read, StreamInp, [alias(filter_input)]),
	open(Out, write, StreamOut, [alias(filter_output)]),
	call( StartGoal, StreamInp, StreamOut ),
	filter(StreamInp, StreamOut, Command),
	call( EndGoal, StreamInp, StreamOut ),
	close(StreamInp),
	close(StreamOut).


/**
  * @pred file_select(+ _FileIn_, + _Goal_)  is meta
  *
  * @param _FileIn_  File to process
  * @param _Goal_ to be metacalled, receives FileIn as
  * extra arguments
  *
  * @return  bindings to arguments of _Goal_.

  For every line  _LineIn_ in file  _FileIn_, execute
  `call(`Goal,LineIn)`.

  The input stream is accessible through the alias `filter_input`, and
  the output stream is accessible through `filter_output`.
*/
file_select(Inp, Command) :-
	( retract(alias(F)) -> true ; F = '' ),
	atom_concat(filter_input, F, Alias),
        open(Inp, read, StreamInp, [Alias]),
	atom_concat('_', F, NF),
	assert( alias(NF) ),
	repeat,
	read_line_to_codes(StreamInp, Line),
	(
	 Line == end_of_file
	->
	 close(StreamInp),
	 retract(alias(NF)),
	 assert(alias(F)),
	  !,
          atom_concat(filter_input, F, Alias),
	  fail
	;
	 call(Command, Line)
        ).

/**
@}
*/