This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/library/lineutils.yap

485 lines
12 KiB
Plaintext
Raw Normal View History

2015-11-18 15:06:25 +00:00
/**
* @file lineutils.yap
* @author VITOR SANTOS COSTA <vsc@VITORs-MBP.lan>
* @date Tue Nov 17 22:02:22 2015
*
* @brief line text processing.
*
*
*/
2014-09-11 20:06:57 +01:00
2016-01-31 19:41:10 +00:00
:- module(lineutils,
2008-09-01 04:44:54 +01:00
[search_for/2,
search_for/3,
scan_natural/3,
scan_integer/3,
2014-11-25 12:03:48 +00:00
natural/3,
integer/3,
blank/3,
split/2,
split/3,
2016-08-16 15:49:43 +01:00
split/4,
split/5,
split_unquoted/3,
fields/2,
fields/3,
glue/3,
2008-09-01 04:44:54 +01:00
copy_line/2,
filter/3,
file_filter/3,
2015-07-06 12:04:42 +01:00
file_select/2,
file_filter_with_initialization/5,
2016-08-16 15:49:43 +01:00
file_filter_with_start_end/5,
2015-07-06 12:04:42 +01:00
file_filter_with_initialization/5 as file_filter_with_init,
2008-09-04 16:58:45 +01:00
process/2
]).
2015-11-18 15:06:25 +00:00
/** @defgroup line_utils Line Manipulation Utilities
2015-01-04 23:58:23 +00:00
@ingroup library
2014-09-13 06:26:46 +01:00
@{
This package provides a set of useful predicates to manipulate
sequences of characters codes, usually first read in as a line. It is
2015-07-06 12:04:42 +01:00
available by loading the
2014-09-15 09:13:50 +01:00
~~~~
:- use_module(library(lineutils)).
~~~~
2014-09-13 06:26:46 +01:00
2015-11-18 15:06:25 +00:00
2014-09-13 06:26:46 +01:00
*/
:- meta_predicate
filter(+,+,2),
file_filter(+,+,2),
2014-10-02 14:20:28 +01:00
file_filter_with_initialization(+,+,2,+,:),
2016-08-16 15:49:43 +01:00
file_filter_with_start_end(+,+,2,2,2),
process(+,1).
:- use_module(library(lists),
[member/2,
append/3]).
:- use_module(library(readutil),
[read_line_to_codes/2]).
2014-09-13 06:26:46 +01:00
/**
2015-07-06 12:04:42 +01:00
@pred search_for(+ _Char_,+ _Line_)
2014-09-13 06:26:46 +01:00
Search for a character _Char_ in the list of codes _Line_.
*/
2008-09-01 04:44:54 +01:00
search_for(C,L) :-
search_for(C, L, []).
search_for(C) --> [C], !.
search_for(C) --> [_],
search_for(C).
2015-07-06 12:04:42 +01:00
/** @pred scan_integer(? _Int_,+ _Line_,+ _RestOfLine_)
2014-09-13 06:26:46 +01:00
Scan the list of codes _Line_ for an integer _Nat_, either a
positive, zero, or negative integer, and unify _RestOfLine_ with
2015-07-06 12:04:42 +01:00
the remainder of the line.
2014-09-13 06:26:46 +01:00
*/
scan_integer(N) -->
"-", !,
scan_natural(0, N0),
N is -N0.
scan_integer(N) -->
scan_natural(0, N).
2015-07-06 12:04:42 +01:00
/** @pred integer(? _Int_,+ _Line_,+ _RestOfLine_)
2014-11-25 12:03:48 +00:00
Scan the list of codes _Line_ for an integer _Nat_, either a
positive, zero, or negative integer, and unify _RestOfLine_ with
2015-07-06 12:04:42 +01:00
the remainder of the line.
2014-11-25 12:03:48 +00:00
*/
integer(N) -->
"-", !,
natural(0, N0),
N is -N0.
integer(N) -->
natural(0, N).
2015-07-06 12:04:42 +01:00
/** @pred scan_natural(? _Nat_,+ _Line_,+ _RestOfLine_)
2014-09-13 06:26:46 +01:00
Scan the list of codes _Line_ for a natural number _Nat_, zero
or a positive integer, and unify _RestOfLine_ with the remainder
of the line.
*/
2008-09-01 04:44:54 +01:00
scan_natural(N) -->
scan_natural(0, N).
scan_natural(N0,N) -->
[C],
{C >= 0'0, C =< 0'9 }, !,
2008-09-01 04:44:54 +01:00
{ N1 is N0*10+(C-0'0) }, %'
get_natural(N1,N).
scan_natural(N,N) --> [].
2015-07-06 12:04:42 +01:00
/** @pred natural(? _Nat_,+ _Line_,+ _RestOfLine_)
2014-11-25 12:03:48 +00:00
Scan the list of codes _Line_ for a natural number _Nat_, zero
or a positive integer, and unify _RestOfLine_ with the remainder
of the line.
*/
natural(N) -->
natural(0, N).
natural(N0,N) -->
[C],
{C >= 0'0, C =< 0'9 }, !,
{ N1 is N0*10+(C-0'0) }, %'
get_natural(N1,N).
natural(N,N) --> [].
2015-07-06 12:04:42 +01:00
/** @pred skip_whitespace(+ _Line_,+ _RestOfLine_)
2014-11-25 12:03:48 +00:00
Scan the list of codes _Line_ for white space, namely for tabbing and space characters.
*/
skip_whitespace([0' |Blanks]) -->
" ",
skip_whitespace( Blanks ).
skip_whitespace([0' |Blanks]) -->
" ",
skip_whitespace( Blanks ).
skip_whitespace( [] ) -->
!.
2015-07-06 12:04:42 +01:00
/** @pred blank(+ _Line_,+ _RestOfLine_)
2014-11-25 12:03:48 +00:00
The list of codes _Line_ is formed by white space, namely by tabbing and space characters.
*/
blank([0' |Blanks]) -->
" ",
blank( Blanks ).
blank([0' |Blanks]) -->
" ",
blank( Blanks ).
blank( [] ) -->
[].
2014-09-13 06:26:46 +01:00
/** @pred split(+ _Line_,- _Split_)
Unify _Words_ with a set of strings obtained from _Line_ by
2015-07-06 12:04:42 +01:00
using the blank characters as separators.
2014-09-13 06:26:46 +01:00
*/
split(String, Strings) :-
split_at_blank(" ", Strings, String, []).
2015-07-06 12:04:42 +01:00
/** @pred split(+ _Line_,+ _Separators_,- _Split_)
2014-09-13 06:26:46 +01:00
Unify _Words_ with a set of strings obtained from _Line_ by
using the character codes in _Separators_ as separators. As an
example, consider:
~~~~~{.prolog}
?- split("Hello * I am free"," *",S).
S = ["Hello","I","am","free"] ?
no
~~~~~
2015-07-06 12:04:42 +01:00
2014-09-13 06:26:46 +01:00
*/
split(String, SplitCodes, Strings) :-
split_at_blank(SplitCodes, Strings, String, []).
split_at_blank(SplitCodes, More) -->
[C],
{ member(C, SplitCodes) }, !,
split_at_blank(SplitCodes, More).
split_at_blank(SplitCodes, [[C|New]| More]) -->
[C], !,
2016-08-16 15:49:43 +01:00
split_(SplitCodes, New, More).
split_at_blank(_, []) --> [].
2016-08-16 15:49:43 +01:00
split_(SplitCodes, [], More) -->
[C],
{ member(C, SplitCodes) }, !,
split_at_blank(SplitCodes, More).
2016-08-16 15:49:43 +01:00
split_(SplitCodes, [C|New], Set) -->
[C], !,
2016-08-16 15:49:43 +01:00
split_(SplitCodes, New, Set).
split_(_, [], []) --> [].
2016-08-16 15:49:43 +01:00
split(Text, SplitCodes, DoubleQs, SingleQs, Strings) :-
split_element(SplitCodes, DoubleQs, SingleQs, Strings, Text, []).
split_element(SplitCodes, DoubleQs, SingleQs, Strings) -->
[C],
!,
split_element(SplitCodes, DoubleQs, SingleQs, Strings, C).
split_element(_SplitCodes, _DoubleQs, _SingleQs, []) --> !.
split_element(_SplitCodes, _DoubleQs, _SingleQs, [[]]) --> [].
split_element(SplitCodes, DoubleQs, SingleQs, Strings, C) -->
{ member( C, SingleQs ) },
!,
[C2],
{ Strings = [[C2|String]|More] },
split_element(SplitCodes, DoubleQs, SingleQs, [String| More]).
split_element(SplitCodes, DoubleQs, SingleQs, [[]|Strings], C) -->
{ member( C, SplitCodes ) },
!,
split_element(SplitCodes, DoubleQs, SingleQs, Strings).
split_element(SplitCodes, DoubleQs, SingleQs, Strings, C) -->
{ member( C, DoubleQs ) } ,
!,
split_within(SplitCodes, C-DoubleQs, SingleQs, Strings).
split_element(SplitCodes, DoubleQs, SingleQs, [[C|String]|Strings], C) -->
split_element(SplitCodes, DoubleQs, SingleQs, [String|Strings]).
split_within(SplitCodes, DoubleQs, SingleQs, Strings) -->
[C],
split_within(SplitCodes, DoubleQs, SingleQs, Strings, C).
split_within(SplitCodes, DoubleQs, SingleQs, Strings, C) -->
{ member( C, SingleQs ) },
!,
[C2],
{ Strings = [[C2|String]|More] },
split_within(SplitCodes, DoubleQs, SingleQs, [String| More]).
split_within(SplitCodes, DoubleQs, C-SingleQs, Strings, C) -->
!,
split_element(SplitCodes, DoubleQs, SingleQs, Strings).
split_within(SplitCodes, DoubleQs, SingleQs, [[C|String]|Strings], C) -->
split_within(SplitCodes, DoubleQs, SingleQs, [String|Strings]).
/** @pred split_quoted(+ _Line_,+ _Separators_, GroupQuotes, SingleQuotes, - _Split_)
Unify _Words_ with a set of strings obtained from _Line_ by
2016-08-16 15:49:43 +01:00
using the character codes in _Separators_ as separators, but treat text within quotes as a single unit. As an
example, consider:
~~~~~{.prolog}
2016-08-16 15:49:43 +01:00
?- split_quoted("Hello * I \"am free\""," *",S).
S = ["Hello","I","am free"] ?
no
~~~~~
*/
2016-02-20 20:40:23 +00:00
split_quoted( [0'"], _More) --> %0'"
"\"".
2016-01-31 19:41:10 +00:00
split_quoted( [0'\\ ,C|New], More) -->
%0'"
"\\",
[C],
split_quoted(New, More).
split_quoted( [C|New], More) --> %0'"
[C],
split_quoted(New, More).
2014-09-13 06:26:46 +01:00
/** @pred fields(+ _Line_,- _Split_)
Unify _Words_ with a set of strings obtained from _Line_ by
using the blank characters as field separators.
*/
fields(String, Strings) :-
fields(" ", Strings, String, []).
2015-07-06 12:04:42 +01:00
/** @pred fields(+ _Line_,+ _Separators_,- _Split_)
2014-09-13 06:26:46 +01:00
Unify _Words_ with a set of strings obtained from _Line_ by
using the character codes in _Separators_ as separators for
fields. If two separators occur in a row, the field is considered
empty. As an example, consider:
~~~~~{.prolog}
?- fields("Hello I am free"," *",S).
S = ["Hello","","I","am","","free"] ?
2014-09-13 06:26:46 +01:00
~~~~~
*/
fields(String, FieldsCodes, Strings) :-
dofields(FieldsCodes, First, More, String, []),
(
First = [], More = []
->
Strings = []
;
Strings = [First|More]
).
dofields(FieldsCodes, [], New.More) -->
[C],
{ member(C, FieldsCodes) }, !,
dofields(FieldsCodes, New, More).
dofields(FieldsCodes, [C|New], Set) -->
[C], !,
dofields(FieldsCodes, New, Set).
dofields(_, [], []) --> [].
2015-07-06 12:04:42 +01:00
/** @pred glue(+ _Words_,+ _Separator_,- _Line_)
2014-09-13 06:26:46 +01:00
Unify _Line_ with string obtained by glueing _Words_ with
the character code _Separator_.
*/
glue([], _, []).
glue([A], _, A) :- !.
glue([H|T], [B|_], Merged) :-
append(H, [B|Rest], Merged),
glue(T, [B], Rest).
2015-07-06 12:04:42 +01:00
/** @pred copy_line(+ _StreamInput_,+ _StreamOutput_)
2014-09-13 06:26:46 +01:00
Copy a line from _StreamInput_ to _StreamOutput_.
*/
copy_line(StreamInp, StreamOut) :-
read_line_to_codes(StreamInp, Line),
format(StreamOut, '~s~n', [Line]).
2014-09-13 06:26:46 +01:00
2015-07-06 12:04:42 +01:00
/** @pred filter(+ _StreamInp_, + _StreamOut_, + _Goal_)
2014-09-13 06:26:46 +01:00
For every line _LineIn_ in stream _StreamInp_, execute
`call(Goal,LineIn,LineOut)`, and output _LineOut_ to
2015-07-06 12:04:42 +01:00
stream _StreamOut_. If `call(Goal,LineIn,LineOut)` fails,
2014-09-15 09:13:50 +01:00
nothing will be output but execution continues with the next
line. As an example, consider a procedure to select the second and
fifth field of a CSV table :
~~~~~{.prolog}
2015-07-06 12:04:42 +01:00
select(Sep, In, Out) :-
2014-09-15 09:13:50 +01:00
fields(In, Sep, [_,F2,_,_,F5|_]),
fields(Out,Sep, [F2,F5]).
select :-
filter(",",
~~~~~
2014-09-13 06:26:46 +01:00
*/
filter(StreamInp, StreamOut, Command) :-
repeat,
read_line_to_codes(StreamInp, Line),
(
Line == end_of_file
->
2009-08-20 02:23:20 +01:00
!
;
call(Command, Line, NewLine),
ground(NewLine),
format(StreamOut, '~s~n', [NewLine]),
fail
).
2014-09-15 09:13:50 +01:00
/** @pred process(+ _StreamInp_, + _Goal_) is meta
2014-09-13 06:26:46 +01:00
For every line _LineIn_ in stream _StreamInp_, call
2015-07-06 12:04:42 +01:00
`call(Goal,LineIn)`.
2014-09-13 06:26:46 +01:00
*/
process(StreamInp, Command) :-
repeat,
read_line_to_codes(StreamInp, Line),
(
Line == end_of_file
->
2009-08-20 02:23:20 +01:00
!
;
call(Command, Line),
fail
).
2015-07-06 12:04:42 +01:00
/**
2015-03-16 17:25:09 +00:00
* @pred file_filter(+ _FileIn_, + _FileOut_, + _Goal_) is meta
2015-07-06 12:04:42 +01:00
*
2015-03-16 17:25:09 +00:00
* @param _FileIn_ File to process
* @param _FileOut_ Output file, often user_error
* @param _Goal_ to be metacalled, receives FileIn and FileOut as
* extra arguments
2015-07-06 12:04:42 +01:00
*
2015-03-16 17:25:09 +00:00
* @return succeeds
For every line _LineIn_ in file _FileIn_, execute
`call(Goal,LineIn,LineOut)`, and output _LineOut_ to file
_FileOut_.
The input stream is accessible through the alias `filter_input`, and
the output stream is accessible through `filter_output`.
2014-09-13 06:26:46 +01:00
*/
file_filter(Inp, Out, Command) :-
2014-11-25 12:03:48 +00:00
open(Inp, read, StreamInp, [alias(filter_input)]),
open(Out, write, StreamOut),
filter(StreamInp, StreamOut, Command),
close(StreamInp),
close(StreamOut).
2014-09-13 06:26:46 +01:00
/** @pred file_filter_with_initialization(+ _FileIn_, + _FileOut_, + _Goal_, + _FormatCommand_, + _Arguments_)
Same as file_filter/3, but before starting the filter execute
`format/3` on the output stream, using _FormatCommand_ and
_Arguments_.
*/
2014-09-10 05:55:47 +01:00
file_filter_with_initialization(Inp, Out, Command, FormatString, Parameters) :-
2014-11-25 12:03:48 +00:00
open(Inp, read, StreamInp, [alias(filter_input)]),
open(Out, write, StreamOut, [alias(filter_output)]),
format(StreamOut, FormatString, Parameters),
filter(StreamInp, StreamOut, Command),
close(StreamInp),
close(StreamOut).
2015-03-16 17:25:09 +00:00
2016-08-16 15:49:43 +01:00
/** @pred file_filter_with_start_end(+ FileIn, + FileOut, + Goal, + StartGoal, + EndGoal)
Same as file_filter/3, but before starting the filter execute
_StartGoal_, and call _ENdGoal_ as an epilog.
The input stream are always accessible through `filter_output` and `filter_input`.
*/
file_filter_with_start_end(Inp, Out, Command, StartGoal, EndGoal) :-
open(Inp, read, StreamInp, [alias(filter_input)]),
open(Out, write, StreamOut, [alias(filter_output)]),
call( StartGoal, StreamInp, StreamOut ),
filter(StreamInp, StreamOut, Command),
call( EndGoal, StreamInp, StreamOut ),
close(StreamInp),
close(StreamOut).
2015-07-06 12:04:42 +01:00
/**
2015-03-16 17:25:09 +00:00
* @pred file_select(+ _FileIn_, + _Goal_) is meta
2015-07-06 12:04:42 +01:00
*
2015-03-16 17:25:09 +00:00
* @param _FileIn_ File to process
* @param _Goal_ to be metacalled, receives FileIn as
* extra arguments
2015-07-06 12:04:42 +01:00
*
2015-03-16 17:25:09 +00:00
* @return bindings to arguments of _Goal_.
For every line _LineIn_ in file _FileIn_, execute
`call(`Goal,LineIn)`.
The input stream is accessible through the alias `filter_input`, and
the output stream is accessible through `filter_output`.
*/
file_select(Inp, Command) :-
2015-03-20 12:50:06 +00:00
( retract(alias(F)) -> true ; F = '' ),
atom_concat(filter_input, F, Alias),
open(Inp, read, StreamInp, [Alias]),
atom_concat('_', F, NF),
assert( alias(NF) ),
2015-03-16 17:25:09 +00:00
repeat,
read_line_to_codes(StreamInp, Line),
(
Line == end_of_file
->
2015-03-20 12:50:06 +00:00
close(StreamInp),
retract(alias(NF)),
assert(alias(F)),
2015-03-16 17:25:09 +00:00
!,
2015-03-20 12:50:06 +00:00
atom_concat(filter_input, F, Alias),
2015-03-16 17:25:09 +00:00
fail
;
call(Command, Line)
).
2014-09-13 06:26:46 +01:00
/**
@}
*/