247 lines
8.3 KiB
Perl
247 lines
8.3 KiB
Perl
|
/* Part of SWI-Prolog
|
||
|
|
||
|
Author: Jan Wielemaker
|
||
|
E-mail: J.Wielemaker@vu.nl
|
||
|
WWW: http://www.swi-prolog.org
|
||
|
Copyright (C): 2008-2013, University of Amsterdam
|
||
|
VU University Amsterdam
|
||
|
Vienna University of Technology
|
||
|
|
||
|
This program is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU General Public License
|
||
|
as published by the Free Software Foundation; either version 2
|
||
|
of the License, or (at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public
|
||
|
License along with this library; if not, write to the Free Software
|
||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||
|
|
||
|
As a special exception, if you link this library with other files,
|
||
|
compiled with a Free Software compiler, to produce an executable, this
|
||
|
library does not by itself cause the resulting executable to be covered
|
||
|
by the GNU General Public License. This exception does not however
|
||
|
invalidate any other reasons why the executable file might be covered by
|
||
|
the GNU General Public License.
|
||
|
*/
|
||
|
|
||
|
:- module(pure_input,
|
||
|
[ phrase_from_file/2, % :Grammar, +File
|
||
|
phrase_from_file/3, % :Grammar, +File, +Options
|
||
|
syntax_error//1, % +ErrorTerm
|
||
|
% Low level interface
|
||
|
lazy_list_location//1, % -Location
|
||
|
lazy_list_character_count//1, % -CharacterCount
|
||
|
phrase_from_stream/2, % :Grammar, +Stream
|
||
|
stream_to_lazy_list/2 % :Stream -List
|
||
|
]).
|
||
|
:- use_module(library(option)).
|
||
|
:- use_module(library(error)).
|
||
|
|
||
|
/** <module> Pure Input from files
|
||
|
|
||
|
This module is part of pio.pl, dealing with _pure_ _input_: processing
|
||
|
input streams from the outside world using pure predicates, notably
|
||
|
grammar rules (DCG). Using pure predicates makes non-deterministic
|
||
|
processing of input much simpler.
|
||
|
|
||
|
Pure input uses coroutining (freeze/2) to read input from the external
|
||
|
source into a list _|on demand|_. The overhead of lazy reading is more
|
||
|
than compensated for by using block reads based on read_pending_input/3.
|
||
|
|
||
|
@tbd Provide support for alternative input readers, e.g. reading
|
||
|
terms, tokens, etc.
|
||
|
@tbd Support non-repositioning streams, such as sockets and pipes.
|
||
|
@author Ulrich Neumerkel
|
||
|
@author Jan Wielemaker
|
||
|
*/
|
||
|
|
||
|
:- predicate_options(phrase_from_file/3, 3,
|
||
|
[ buffer_size(positive_integer),
|
||
|
pass_to(system:open/4, 4)
|
||
|
]).
|
||
|
|
||
|
%% phrase_from_file(:Grammar, +File) is nondet.
|
||
|
%
|
||
|
% Process the content of File using the DCG rule Grammar. The
|
||
|
% space usage of this mechanism depends on the length of the not
|
||
|
% committed part of Grammar. Committed parts of the temporary list
|
||
|
% are reclaimed by the garbage collector, while the list is
|
||
|
% extended on demand. Here is a very simple definition for
|
||
|
% searching a string in a file:
|
||
|
%
|
||
|
% ==
|
||
|
% ... --> []|[_],... .
|
||
|
%
|
||
|
% file_contains(File, Pattern) :-
|
||
|
% phrase_from_file((..., Pattern, ...), File).
|
||
|
%
|
||
|
% match_count(File, Pattern, Count) :-
|
||
|
% findall(x, file_contains(File, Pattern), Xs),
|
||
|
% length(Xs, Count).
|
||
|
% ==
|
||
|
%
|
||
|
% This can be called as (note that the pattern must be a string
|
||
|
% (code list)):
|
||
|
%
|
||
|
% ==
|
||
|
% ?- match_count('pure_input.pl', "file", Count).
|
||
|
% ==
|
||
|
|
||
|
:- meta_predicate
|
||
|
phrase_from_file(//, +),
|
||
|
phrase_from_file(//, +, +),
|
||
|
phrase_from_stream(//, +).
|
||
|
|
||
|
phrase_from_file(Grammar, File) :-
|
||
|
phrase_from_file(Grammar, File, []).
|
||
|
|
||
|
%% phrase_from_file(:Grammar, +File, +Options) is nondet.
|
||
|
%
|
||
|
% As phrase_from_file/2, providing additional Options. Options are
|
||
|
% passed to open/4, except for =buffer_size=, which is passed to
|
||
|
% set_stream/2. If not specified, the default buffer size is 512
|
||
|
% bytes. Of particular importance are the open/4 options =type=
|
||
|
% and =encoding=.
|
||
|
|
||
|
phrase_from_file(Grammar, File, Options) :-
|
||
|
( select_option(buffer_size(BS), Options, OpenOptions)
|
||
|
-> true
|
||
|
; BS=512,
|
||
|
OpenOptions = Options
|
||
|
),
|
||
|
setup_call_cleanup(
|
||
|
open(File, read, In, OpenOptions),
|
||
|
phrase_stream(Grammar, In, BS),
|
||
|
close(In)).
|
||
|
|
||
|
phrase_stream(Grammar, In, BuffserSize) :-
|
||
|
set_stream(In, buffer_size(BuffserSize)),
|
||
|
phrase_from_stream(Grammar, In).
|
||
|
|
||
|
|
||
|
%% phrase_from_stream(:Grammer, +Stream)
|
||
|
%
|
||
|
% Helper for phrase_from_file/3. This predicate cooperates with
|
||
|
% syntax_error//1 to generate syntax error locations for grammars.
|
||
|
|
||
|
phrase_from_stream(Grammar, In) :-
|
||
|
stream_to_lazy_list(In, List),
|
||
|
phrase(Grammar, List).
|
||
|
|
||
|
%% syntax_error(+Error)//
|
||
|
%
|
||
|
% Throw the syntax error Error at the current location of the
|
||
|
% input. This predicate is designed to be called from the handler
|
||
|
% of phrase_from_file/3.
|
||
|
%
|
||
|
% @throws error(syntax_error(Error), Location)
|
||
|
|
||
|
syntax_error(Error) -->
|
||
|
lazy_list_location(Location),
|
||
|
{ throw(error(syntax_error(Error), Location))
|
||
|
}.
|
||
|
|
||
|
%% lazy_list_location(-Location)// is det.
|
||
|
%
|
||
|
% True when Location is an (error) location term that represents
|
||
|
% the current location in the DCG list.
|
||
|
%
|
||
|
% @arg Location is a term file(Name, Line, LinePos, CharNo) or
|
||
|
% stream(Stream, Line, LinePos, CharNo) if no file is
|
||
|
% associated to the stream RestLazyList. Finally, if the
|
||
|
% Lazy list is fully materialized (ends in =|[]|=), Location
|
||
|
% is unified with `end_of_file-CharCount`.
|
||
|
% @see lazy_list_character_count//1 only provides the character
|
||
|
% count.
|
||
|
|
||
|
lazy_list_location(Location, Here, Here) :-
|
||
|
lazy_list_location(Here, Location).
|
||
|
|
||
|
lazy_list_location(Here, Location) :-
|
||
|
'$skip_list'(Skipped, Here, Tail),
|
||
|
( attvar(Tail)
|
||
|
-> frozen(Tail,
|
||
|
pure_input:read_to_input_stream(Stream, PrevPos, Pos, _List)),
|
||
|
Details = [Line, LinePos, CharNo],
|
||
|
( stream_property(Stream, file_name(File))
|
||
|
-> PosParts = [file, File|Details]
|
||
|
; PosParts = [stream, Stream|Details]
|
||
|
),
|
||
|
Location =.. PosParts,
|
||
|
stream_position_data(char_count, Pos, EndRecordCharNo),
|
||
|
CharNo is EndRecordCharNo - Skipped,
|
||
|
set_stream_position(Stream, PrevPos),
|
||
|
stream_position_data(char_count, PrevPos, StartRecordCharNo),
|
||
|
Skip is CharNo-StartRecordCharNo,
|
||
|
forall(between(1, Skip, _), get_code(Stream, _)),
|
||
|
stream_property(Stream, position(ErrorPos)),
|
||
|
stream_position_data(line_count, ErrorPos, Line),
|
||
|
stream_position_data(line_position, ErrorPos, LinePos)
|
||
|
; Tail == []
|
||
|
-> Location = end_of_file-Skipped
|
||
|
; type_error(lazy_list, Here)
|
||
|
).
|
||
|
|
||
|
|
||
|
%% lazy_list_character_count(-CharCount)//
|
||
|
%
|
||
|
% True when CharCount is the current character count in the Lazy
|
||
|
% list. The character count is computed by finding the distance to
|
||
|
% the next frozen tail of the lazy list. CharCount is one of:
|
||
|
%
|
||
|
% - An integer
|
||
|
% - A term end_of_file-Count
|
||
|
%
|
||
|
% @see lazy_list_location//1 provides full details of the location
|
||
|
% for error reporting.
|
||
|
|
||
|
lazy_list_character_count(Location, Here, Here) :-
|
||
|
lazy_list_character_count(Here, Location).
|
||
|
|
||
|
lazy_list_character_count(Here, CharNo) :-
|
||
|
'$skip_list'(Skipped, Here, Tail),
|
||
|
( attvar(Tail)
|
||
|
-> frozen(Tail,
|
||
|
pure_input:read_to_input_stream(_Stream, _PrevPos, Pos, _List)),
|
||
|
stream_position_data(char_count, Pos, EndRecordCharNo),
|
||
|
CharNo is EndRecordCharNo - Skipped
|
||
|
; Tail == []
|
||
|
-> CharNo = end_of_file-Skipped
|
||
|
; type_error(lazy_list, Here)
|
||
|
).
|
||
|
|
||
|
|
||
|
%% stream_to_lazy_list(+Stream, -List) is det.
|
||
|
%
|
||
|
% Create a lazy list representing the character codes in Stream.
|
||
|
% It must be possible to reposition Stream. List is a list that
|
||
|
% ends in a delayed goal. List can be unified completely
|
||
|
% transparent to a (partial) list and processed transparently
|
||
|
% using DCGs, but please be aware that a lazy list is not the same
|
||
|
% as a materialized list in all respects.
|
||
|
%
|
||
|
% Typically, this predicate is used as a building block for more
|
||
|
% high level safe predicates such as phrase_from_file/2.
|
||
|
%
|
||
|
% @tbd Enhance of lazy list throughout the system.
|
||
|
|
||
|
stream_to_lazy_list(Stream, List) :-
|
||
|
stream_to_lazy_list(Stream, -, List).
|
||
|
|
||
|
stream_to_lazy_list(Stream, PrevPos, List) :-
|
||
|
stream_property(Stream, position(Pos)),
|
||
|
freeze(List, read_to_input_stream(Stream, PrevPos, Pos, List)).
|
||
|
|
||
|
read_to_input_stream(Stream, _PrevPos, Pos, List) :-
|
||
|
set_stream_position(Stream, Pos),
|
||
|
( at_end_of_stream(Stream)
|
||
|
-> List = []
|
||
|
; read_pending_input(Stream, List, Tail),
|
||
|
stream_to_lazy_list(Stream, Pos, Tail)
|
||
|
).
|