This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/swi/library/pure_input.pl

248 lines
8.3 KiB
Prolog

/* Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: J.Wielemaker@vu.nl
WWW: http://www.swi-prolog.org
Copyright (C): 2008-2013, University of Amsterdam
VU University Amsterdam
Vienna University of Technology
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, if you link this library with other files,
compiled with a Free Software compiler, to produce an executable, this
library does not by itself cause the resulting executable to be covered
by the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
:- module(pure_input,
[ phrase_from_file/2, % :Grammar, +File
phrase_from_file/3, % :Grammar, +File, +Options
syntax_error//1, % +ErrorTerm
% Low level interface
lazy_list_location//1, % -Location
lazy_list_character_count//1, % -CharacterCount
phrase_from_stream/2, % :Grammar, +Stream
stream_to_lazy_list/2 % :Stream -List
]).
:- use_module(library(option)).
:- use_module(library(error)).
/** <module> Pure Input from files
@ingroup swi
This module is part of pio.pl, dealing with _pure_ _input_: processing
input streams from the outside world using pure predicates, notably
grammar rules (DCG). Using pure predicates makes non-deterministic
processing of input much simpler.
Pure input uses coroutining (freeze/2) to read input from the external
source into a list _|on demand|_. The overhead of lazy reading is more
than compensated for by using block reads based on read_pending_input/3.
@tbd Provide support for alternative input readers, e.g. reading
terms, tokens, etc.
@tbd Support non-repositioning streams, such as sockets and pipes.
@author Ulrich Neumerkel
@author Jan Wielemaker
*/
:- predicate_options(phrase_from_file/3, 3,
[ buffer_size(positive_integer),
pass_to(system:open/4, 4)
]).
%% phrase_from_file(:Grammar, +File) is nondet.
%
% Process the content of File using the DCG rule Grammar. The
% space usage of this mechanism depends on the length of the not
% committed part of Grammar. Committed parts of the temporary list
% are reclaimed by the garbage collector, while the list is
% extended on demand. Here is a very simple definition for
% searching a string in a file:
%
% ==
% ... --> []|[_],... .
%
% file_contains(File, Pattern) :-
% phrase_from_file((..., Pattern, ...), File).
%
% match_count(File, Pattern, Count) :-
% findall(x, file_contains(File, Pattern), Xs),
% length(Xs, Count).
% ==
%
% This can be called as (note that the pattern must be a string
% (code list)):
%
% ==
% ?- match_count('pure_input.pl', "file", Count).
% ==
:- meta_predicate
phrase_from_file(//, +),
phrase_from_file(//, +, +),
phrase_from_stream(//, +).
phrase_from_file(Grammar, File) :-
phrase_from_file(Grammar, File, []).
%% phrase_from_file(:Grammar, +File, +Options) is nondet.
%
% As phrase_from_file/2, providing additional Options. Options are
% passed to open/4, except for =buffer_size=, which is passed to
% set_stream/2. If not specified, the default buffer size is 512
% bytes. Of particular importance are the open/4 options =type=
% and =encoding=.
phrase_from_file(Grammar, File, Options) :-
( select_option(buffer_size(BS), Options, OpenOptions)
-> true
; BS=512,
OpenOptions = Options
),
setup_call_cleanup(
open(File, read, In, OpenOptions),
phrase_stream(Grammar, In, BS),
close(In)).
phrase_stream(Grammar, In, BuffserSize) :-
set_stream(In, buffer_size(BuffserSize)),
phrase_from_stream(Grammar, In).
%% phrase_from_stream(:Grammer, +Stream)
%
% Helper for phrase_from_file/3. This predicate cooperates with
% syntax_error//1 to generate syntax error locations for grammars.
phrase_from_stream(Grammar, In) :-
stream_to_lazy_list(In, List),
phrase(Grammar, List).
%% syntax_error(+Error)//
%
% Throw the syntax error Error at the current location of the
% input. This predicate is designed to be called from the handler
% of phrase_from_file/3.
%
% @throws error(syntax_error(Error), Location)
syntax_error(Error) -->
lazy_list_location(Location),
{ throw(error(syntax_error(Error), Location))
}.
%% lazy_list_location(-Location)// is det.
%
% True when Location is an (error) location term that represents
% the current location in the DCG list.
%
% @arg Location is a term file(Name, Line, LinePos, CharNo) or
% stream(Stream, Line, LinePos, CharNo) if no file is
% associated to the stream RestLazyList. Finally, if the
% Lazy list is fully materialized (ends in =|[]|=), Location
% is unified with `end_of_file-CharCount`.
% @see lazy_list_character_count//1 only provides the character
% count.
lazy_list_location(Location, Here, Here) :-
lazy_list_location(Here, Location).
lazy_list_location(Here, Location) :-
'$skip_list'(Skipped, Here, Tail),
( attvar(Tail)
-> frozen(Tail,
pure_input:read_to_input_stream(Stream, PrevPos, Pos, _List)),
Details = [Line, LinePos, CharNo],
( stream_property(Stream, file_name(File))
-> PosParts = [file, File|Details]
; PosParts = [stream, Stream|Details]
),
Location =.. PosParts,
stream_position_data(char_count, Pos, EndRecordCharNo),
CharNo is EndRecordCharNo - Skipped,
set_stream_position(Stream, PrevPos),
stream_position_data(char_count, PrevPos, StartRecordCharNo),
Skip is CharNo-StartRecordCharNo,
forall(between(1, Skip, _), get_code(Stream, _)),
stream_property(Stream, position(ErrorPos)),
stream_position_data(line_count, ErrorPos, Line),
stream_position_data(line_position, ErrorPos, LinePos)
; Tail == []
-> Location = end_of_file-Skipped
; type_error(lazy_list, Here)
).
%% lazy_list_character_count(-CharCount)//
%
% True when CharCount is the current character count in the Lazy
% list. The character count is computed by finding the distance to
% the next frozen tail of the lazy list. CharCount is one of:
%
% - An integer
% - A term end_of_file-Count
%
% @see lazy_list_location//1 provides full details of the location
% for error reporting.
lazy_list_character_count(Location, Here, Here) :-
lazy_list_character_count(Here, Location).
lazy_list_character_count(Here, CharNo) :-
'$skip_list'(Skipped, Here, Tail),
( attvar(Tail)
-> frozen(Tail,
pure_input:read_to_input_stream(_Stream, _PrevPos, Pos, _List)),
stream_position_data(char_count, Pos, EndRecordCharNo),
CharNo is EndRecordCharNo - Skipped
; Tail == []
-> CharNo = end_of_file-Skipped
; type_error(lazy_list, Here)
).
%% stream_to_lazy_list(+Stream, -List) is det.
%
% Create a lazy list representing the character codes in Stream.
% It must be possible to reposition Stream. List is a list that
% ends in a delayed goal. List can be unified completely
% transparent to a (partial) list and processed transparently
% using DCGs, but please be aware that a lazy list is not the same
% as a materialized list in all respects.
%
% Typically, this predicate is used as a building block for more
% high level safe predicates such as phrase_from_file/2.
%
% @tbd Enhance of lazy list throughout the system.
stream_to_lazy_list(Stream, List) :-
stream_to_lazy_list(Stream, -, List).
stream_to_lazy_list(Stream, PrevPos, List) :-
stream_property(Stream, position(Pos)),
freeze(List, read_to_input_stream(Stream, PrevPos, Pos, List)).
read_to_input_stream(Stream, _PrevPos, Pos, List) :-
set_stream_position(Stream, Pos),
( at_end_of_stream(Stream)
-> List = []
; read_pending_input(Stream, List, Tail),
stream_to_lazy_list(Stream, Pos, Tail)
).