2014-09-11 20:06:57 +01:00
|
|
|
|
2008-01-14 16:34:06 +00:00
|
|
|
:- module(line_utils,
|
2008-09-01 04:44:54 +01:00
|
|
|
[search_for/2,
|
|
|
|
search_for/3,
|
2008-01-14 16:34:06 +00:00
|
|
|
scan_natural/3,
|
2008-01-23 22:22:42 +00:00
|
|
|
scan_integer/3,
|
2014-11-25 12:03:48 +00:00
|
|
|
natural/3,
|
|
|
|
integer/3,
|
|
|
|
blank/3,
|
2008-12-17 14:46:39 +00:00
|
|
|
split/2,
|
2008-02-25 10:15:31 +00:00
|
|
|
split/3,
|
2009-07-15 23:30:27 +01:00
|
|
|
fields/2,
|
|
|
|
fields/3,
|
2008-02-25 10:15:31 +00:00
|
|
|
glue/3,
|
2008-09-01 04:44:54 +01:00
|
|
|
copy_line/2,
|
2008-02-25 10:15:31 +00:00
|
|
|
filter/3,
|
2008-02-28 00:33:52 +00:00
|
|
|
file_filter/3,
|
2014-09-10 05:55:47 +01:00
|
|
|
file_filter_with_initialization/5,
|
2008-09-04 16:58:45 +01:00
|
|
|
process/2
|
2008-01-14 16:34:06 +00:00
|
|
|
]).
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @defgroup LineUtilities Line Manipulation Utilities
|
2015-01-04 23:58:23 +00:00
|
|
|
@ingroup library
|
2014-09-13 06:26:46 +01:00
|
|
|
@{
|
|
|
|
|
|
|
|
This package provides a set of useful predicates to manipulate
|
|
|
|
sequences of characters codes, usually first read in as a line. It is
|
2014-09-15 09:13:50 +01:00
|
|
|
available by loading the
|
|
|
|
~~~~
|
|
|
|
:- use_module(library(lineutils)).
|
|
|
|
~~~~
|
2014-09-13 06:26:46 +01:00
|
|
|
|
|
|
|
*/
|
|
|
|
|
2010-04-22 20:45:57 +01:00
|
|
|
:- meta_predicate
|
|
|
|
filter(+,+,2),
|
|
|
|
file_filter(+,+,2),
|
2014-10-02 14:20:28 +01:00
|
|
|
file_filter_with_initialization(+,+,2,+,:),
|
2010-04-22 20:45:57 +01:00
|
|
|
process(+,1).
|
2008-02-25 10:15:31 +00:00
|
|
|
|
2008-01-14 16:34:06 +00:00
|
|
|
:- use_module(library(lists),
|
2008-02-25 10:15:31 +00:00
|
|
|
[member/2,
|
|
|
|
append/3]).
|
|
|
|
|
|
|
|
:- use_module(library(readutil),
|
|
|
|
[read_line_to_codes/2]).
|
2008-01-14 16:34:06 +00:00
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/**
|
|
|
|
@pred search_for(+ _Char_,+ _Line_)
|
|
|
|
Search for a character _Char_ in the list of codes _Line_.
|
|
|
|
*/
|
2008-09-01 04:44:54 +01:00
|
|
|
search_for(C,L) :-
|
|
|
|
search_for(C, L, []).
|
2008-01-14 16:34:06 +00:00
|
|
|
|
|
|
|
search_for(C) --> [C], !.
|
|
|
|
search_for(C) --> [_],
|
|
|
|
search_for(C).
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred scan_integer(? _Int_,+ _Line_,+ _RestOfLine_)
|
|
|
|
|
|
|
|
Scan the list of codes _Line_ for an integer _Nat_, either a
|
|
|
|
positive, zero, or negative integer, and unify _RestOfLine_ with
|
|
|
|
the remainder of the line.
|
|
|
|
*/
|
2008-01-23 22:22:42 +00:00
|
|
|
scan_integer(N) -->
|
|
|
|
"-", !,
|
|
|
|
scan_natural(0, N0),
|
|
|
|
N is -N0.
|
|
|
|
scan_integer(N) -->
|
2008-01-14 16:34:06 +00:00
|
|
|
scan_natural(0, N).
|
|
|
|
|
2014-11-25 12:03:48 +00:00
|
|
|
/** @pred integer(? _Int_,+ _Line_,+ _RestOfLine_)
|
|
|
|
|
|
|
|
Scan the list of codes _Line_ for an integer _Nat_, either a
|
|
|
|
positive, zero, or negative integer, and unify _RestOfLine_ with
|
|
|
|
the remainder of the line.
|
|
|
|
*/
|
|
|
|
integer(N) -->
|
|
|
|
"-", !,
|
|
|
|
natural(0, N0),
|
|
|
|
N is -N0.
|
|
|
|
integer(N) -->
|
|
|
|
natural(0, N).
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred scan_natural(? _Nat_,+ _Line_,+ _RestOfLine_)
|
|
|
|
|
|
|
|
Scan the list of codes _Line_ for a natural number _Nat_, zero
|
|
|
|
or a positive integer, and unify _RestOfLine_ with the remainder
|
|
|
|
of the line.
|
|
|
|
*/
|
2008-09-01 04:44:54 +01:00
|
|
|
scan_natural(N) -->
|
|
|
|
scan_natural(0, N).
|
|
|
|
|
2008-01-14 16:34:06 +00:00
|
|
|
scan_natural(N0,N) -->
|
|
|
|
[C],
|
|
|
|
{C >= 0'0, C =< 0'9 }, !,
|
2008-09-01 04:44:54 +01:00
|
|
|
{ N1 is N0*10+(C-0'0) }, %'
|
2008-01-14 16:34:06 +00:00
|
|
|
get_natural(N1,N).
|
|
|
|
scan_natural(N,N) --> [].
|
|
|
|
|
2014-11-25 12:03:48 +00:00
|
|
|
/** @pred natural(? _Nat_,+ _Line_,+ _RestOfLine_)
|
|
|
|
|
|
|
|
Scan the list of codes _Line_ for a natural number _Nat_, zero
|
|
|
|
or a positive integer, and unify _RestOfLine_ with the remainder
|
|
|
|
of the line.
|
|
|
|
*/
|
|
|
|
natural(N) -->
|
|
|
|
natural(0, N).
|
|
|
|
|
|
|
|
natural(N0,N) -->
|
|
|
|
[C],
|
|
|
|
{C >= 0'0, C =< 0'9 }, !,
|
|
|
|
{ N1 is N0*10+(C-0'0) }, %'
|
|
|
|
get_natural(N1,N).
|
|
|
|
natural(N,N) --> [].
|
|
|
|
|
|
|
|
/** @pred skip_whitespace(+ _Line_,+ _RestOfLine_)
|
|
|
|
|
|
|
|
Scan the list of codes _Line_ for white space, namely for tabbing and space characters.
|
|
|
|
*/
|
|
|
|
skip_whitespace([0' |Blanks]) -->
|
|
|
|
" ",
|
|
|
|
skip_whitespace( Blanks ).
|
|
|
|
skip_whitespace([0' |Blanks]) -->
|
|
|
|
" ",
|
|
|
|
skip_whitespace( Blanks ).
|
|
|
|
skip_whitespace( [] ) -->
|
|
|
|
!.
|
|
|
|
|
|
|
|
/** @pred blank(+ _Line_,+ _RestOfLine_)
|
|
|
|
|
|
|
|
The list of codes _Line_ is formed by white space, namely by tabbing and space characters.
|
|
|
|
*/
|
|
|
|
blank([0' |Blanks]) -->
|
|
|
|
" ",
|
|
|
|
blank( Blanks ).
|
|
|
|
blank([0' |Blanks]) -->
|
|
|
|
" ",
|
|
|
|
blank( Blanks ).
|
|
|
|
blank( [] ) -->
|
|
|
|
[].
|
|
|
|
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred split(+ _Line_,- _Split_)
|
|
|
|
|
|
|
|
Unify _Words_ with a set of strings obtained from _Line_ by
|
|
|
|
using the blank characters as separators.
|
|
|
|
*/
|
2008-12-17 14:46:39 +00:00
|
|
|
split(String, Strings) :-
|
|
|
|
split_at_blank(" ", Strings, String, []).
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred split(+ _Line_,+ _Separators_,- _Split_)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Unify _Words_ with a set of strings obtained from _Line_ by
|
|
|
|
using the character codes in _Separators_ as separators. As an
|
|
|
|
example, consider:
|
|
|
|
|
|
|
|
~~~~~{.prolog}
|
|
|
|
?- split("Hello * I am free"," *",S).
|
|
|
|
|
|
|
|
S = ["Hello","I","am","free"] ?
|
|
|
|
|
|
|
|
no
|
|
|
|
~~~~~
|
|
|
|
|
|
|
|
*/
|
2008-02-25 10:15:31 +00:00
|
|
|
split(String, SplitCodes, Strings) :-
|
|
|
|
split_at_blank(SplitCodes, Strings, String, []).
|
2008-01-14 16:34:06 +00:00
|
|
|
|
2008-02-25 10:15:31 +00:00
|
|
|
split_at_blank(SplitCodes, More) -->
|
2008-01-14 16:34:06 +00:00
|
|
|
[C],
|
|
|
|
{ member(C, SplitCodes) }, !,
|
2008-02-25 10:15:31 +00:00
|
|
|
split_at_blank(SplitCodes, More).
|
|
|
|
split_at_blank(SplitCodes, [[C|New]| More]) -->
|
|
|
|
[C], !,
|
|
|
|
split(SplitCodes, New, More).
|
|
|
|
split_at_blank(_, []) --> [].
|
|
|
|
|
|
|
|
split(SplitCodes, [], More) -->
|
|
|
|
[C],
|
|
|
|
{ member(C, SplitCodes) }, !,
|
|
|
|
split_at_blank(SplitCodes, More).
|
2008-01-14 16:34:06 +00:00
|
|
|
split(SplitCodes, [C|New], Set) -->
|
|
|
|
[C], !,
|
|
|
|
split(SplitCodes, New, Set).
|
|
|
|
split(_, [], []) --> [].
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred fields(+ _Line_,- _Split_)
|
|
|
|
|
|
|
|
Unify _Words_ with a set of strings obtained from _Line_ by
|
|
|
|
using the blank characters as field separators.
|
|
|
|
|
|
|
|
*/
|
2009-07-15 23:30:27 +01:00
|
|
|
fields(String, Strings) :-
|
|
|
|
fields(" ", Strings, String, []).
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred fields(+ _Line_,+ _Separators_,- _Split_)
|
|
|
|
|
|
|
|
Unify _Words_ with a set of strings obtained from _Line_ by
|
|
|
|
using the character codes in _Separators_ as separators for
|
|
|
|
fields. If two separators occur in a row, the field is considered
|
|
|
|
empty. As an example, consider:
|
|
|
|
|
|
|
|
~~~~~{.prolog}
|
|
|
|
?- fields("Hello I am free"," *",S).
|
|
|
|
|
|
|
|
S = ["Hello","","I","am","","free"] ?
|
|
|
|
~~~~~
|
|
|
|
*/
|
2009-07-15 23:30:27 +01:00
|
|
|
fields(String, FieldsCodes, Strings) :-
|
|
|
|
dofields(FieldsCodes, First, More, String, []),
|
|
|
|
(
|
|
|
|
First = [], More = []
|
|
|
|
->
|
|
|
|
Strings = []
|
|
|
|
;
|
|
|
|
Strings = [First|More]
|
|
|
|
).
|
|
|
|
|
|
|
|
dofields(FieldsCodes, [], New.More) -->
|
|
|
|
[C],
|
|
|
|
{ member(C, FieldsCodes) }, !,
|
|
|
|
dofields(FieldsCodes, New, More).
|
|
|
|
dofields(FieldsCodes, [C|New], Set) -->
|
|
|
|
[C], !,
|
|
|
|
dofields(FieldsCodes, New, Set).
|
|
|
|
dofields(_, [], []) --> [].
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred glue(+ _Words_,+ _Separator_,- _Line_)
|
|
|
|
|
|
|
|
Unify _Line_ with string obtained by glueing _Words_ with
|
|
|
|
the character code _Separator_.
|
|
|
|
*/
|
2008-02-25 10:15:31 +00:00
|
|
|
glue([], _, []).
|
2008-04-14 18:30:18 +01:00
|
|
|
glue([A], _, A) :- !.
|
2008-02-25 10:15:31 +00:00
|
|
|
glue([H|T], [B|_], Merged) :-
|
|
|
|
append(H, [B|Rest], Merged),
|
|
|
|
glue(T, [B], Rest).
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred copy_line(+ _StreamInput_,+ _StreamOutput_)
|
|
|
|
|
|
|
|
Copy a line from _StreamInput_ to _StreamOutput_.
|
|
|
|
*/
|
2008-02-25 10:15:31 +00:00
|
|
|
copy_line(StreamInp, StreamOut) :-
|
|
|
|
read_line_to_codes(StreamInp, Line),
|
|
|
|
format(StreamOut, '~s~n', [Line]).
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
|
|
|
|
/** @pred filter(+ _StreamInp_, + _StreamOut_, + _Goal_)
|
|
|
|
|
|
|
|
For every line _LineIn_ in stream _StreamInp_, execute
|
|
|
|
`call(Goal,LineIn,LineOut)`, and output _LineOut_ to
|
2014-09-15 09:13:50 +01:00
|
|
|
stream _StreamOut_. If `call(Goal,LineIn,LineOut)` fails,
|
|
|
|
nothing will be output but execution continues with the next
|
|
|
|
line. As an example, consider a procedure to select the second and
|
|
|
|
fifth field of a CSV table :
|
|
|
|
~~~~~{.prolog}
|
|
|
|
select(Sep, In, Out) :-
|
|
|
|
fields(In, Sep, [_,F2,_,_,F5|_]),
|
|
|
|
fields(Out,Sep, [F2,F5]).
|
|
|
|
|
|
|
|
select :-
|
|
|
|
filter(",",
|
|
|
|
~~~~~
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
*/
|
2008-02-25 10:15:31 +00:00
|
|
|
filter(StreamInp, StreamOut, Command) :-
|
|
|
|
repeat,
|
|
|
|
read_line_to_codes(StreamInp, Line),
|
|
|
|
(
|
|
|
|
Line == end_of_file
|
|
|
|
->
|
2009-08-20 02:23:20 +01:00
|
|
|
!
|
2008-02-25 10:15:31 +00:00
|
|
|
;
|
|
|
|
call(Command, Line, NewLine),
|
|
|
|
format(StreamOut, '~s~n', [NewLine]),
|
|
|
|
fail
|
|
|
|
).
|
|
|
|
|
2014-09-15 09:13:50 +01:00
|
|
|
/** @pred process(+ _StreamInp_, + _Goal_) is meta
|
2008-02-25 10:15:31 +00:00
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
For every line _LineIn_ in stream _StreamInp_, call
|
|
|
|
`call(Goal,LineIn)`.
|
|
|
|
*/
|
2008-02-28 00:33:52 +00:00
|
|
|
process(StreamInp, Command) :-
|
|
|
|
repeat,
|
|
|
|
read_line_to_codes(StreamInp, Line),
|
|
|
|
(
|
|
|
|
Line == end_of_file
|
|
|
|
->
|
2009-08-20 02:23:20 +01:00
|
|
|
!
|
2008-02-28 00:33:52 +00:00
|
|
|
;
|
|
|
|
call(Command, Line),
|
|
|
|
fail
|
|
|
|
).
|
|
|
|
|
|
|
|
|
2014-09-15 09:13:50 +01:00
|
|
|
/** @pred file_filter(+ _FileIn_, + _FileOut_, + _Goal_) is meta
|
2014-09-13 06:26:46 +01:00
|
|
|
|
|
|
|
For every line _LineIn_ in file _FileIn_, execute
|
|
|
|
`call(Goal,LineIn,LineOut)`, and output _LineOut_ to file
|
|
|
|
_FileOut_.
|
2014-11-25 12:03:48 +00:00
|
|
|
|
|
|
|
The input stream is accessible through the alias `filter_input`, and the output
|
|
|
|
stream is accessible through `filter_output`.
|
2014-09-13 06:26:46 +01:00
|
|
|
*/
|
2008-02-25 10:15:31 +00:00
|
|
|
file_filter(Inp, Out, Command) :-
|
2014-11-25 12:03:48 +00:00
|
|
|
open(Inp, read, StreamInp, [alias(filter_input)]),
|
2008-02-25 10:15:31 +00:00
|
|
|
open(Out, write, StreamOut),
|
|
|
|
filter(StreamInp, StreamOut, Command),
|
|
|
|
close(StreamInp),
|
|
|
|
close(StreamOut).
|
|
|
|
|
2014-09-13 06:26:46 +01:00
|
|
|
/** @pred file_filter_with_initialization(+ _FileIn_, + _FileOut_, + _Goal_, + _FormatCommand_, + _Arguments_)
|
|
|
|
|
|
|
|
Same as file_filter/3, but before starting the filter execute
|
|
|
|
`format/3` on the output stream, using _FormatCommand_ and
|
|
|
|
_Arguments_.
|
|
|
|
*/
|
2014-09-10 05:55:47 +01:00
|
|
|
file_filter_with_initialization(Inp, Out, Command, FormatString, Parameters) :-
|
2014-11-25 12:03:48 +00:00
|
|
|
open(Inp, read, StreamInp, [alias(filter_input)]),
|
|
|
|
open(Out, write, StreamOut, [alias(filter_output)]),
|
2010-04-18 21:48:00 +01:00
|
|
|
format(StreamOut, FormatString, Parameters),
|
|
|
|
filter(StreamInp, StreamOut, Command),
|
|
|
|
close(StreamInp),
|
|
|
|
close(StreamOut).
|
2014-09-13 06:26:46 +01:00
|
|
|
/**
|
|
|
|
@}
|
|
|
|
*/
|