/* Part of SWI-Prolog Author: Jan Wielemaker E-mail: J.Wielemaker@vu.nl WWW: http://www.swi-prolog.org Copyright (C): 2008-2013, University of Amsterdam VU University Amsterdam Vienna University of Technology This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA As a special exception, if you link this library with other files, compiled with a Free Software compiler, to produce an executable, this library does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */ :- module(pure_input, [ phrase_from_file/2, % :Grammar, +File phrase_from_file/3, % :Grammar, +File, +Options syntax_error//1, % +ErrorTerm % Low level interface lazy_list_location//1, % -Location lazy_list_character_count//1, % -CharacterCount phrase_from_stream/2, % :Grammar, +Stream stream_to_lazy_list/2 % :Stream -List ]). :- use_module(library(option)). :- use_module(library(error)). /** Pure Input from files This module is part of pio.pl, dealing with _pure_ _input_: processing input streams from the outside world using pure predicates, notably grammar rules (DCG). Using pure predicates makes non-deterministic processing of input much simpler. Pure input uses coroutining (freeze/2) to read input from the external source into a list _|on demand|_. The overhead of lazy reading is more than compensated for by using block reads based on read_pending_input/3. @tbd Provide support for alternative input readers, e.g. reading terms, tokens, etc. @tbd Support non-repositioning streams, such as sockets and pipes. @author Ulrich Neumerkel @author Jan Wielemaker */ :- predicate_options(phrase_from_file/3, 3, [ buffer_size(positive_integer), pass_to(system:open/4, 4) ]). %% phrase_from_file(:Grammar, +File) is nondet. % % Process the content of File using the DCG rule Grammar. The % space usage of this mechanism depends on the length of the not % committed part of Grammar. Committed parts of the temporary list % are reclaimed by the garbage collector, while the list is % extended on demand. Here is a very simple definition for % searching a string in a file: % % == % ... --> []|[_],... . % % file_contains(File, Pattern) :- % phrase_from_file((..., Pattern, ...), File). % % match_count(File, Pattern, Count) :- % findall(x, file_contains(File, Pattern), Xs), % length(Xs, Count). % == % % This can be called as (note that the pattern must be a string % (code list)): % % == % ?- match_count('pure_input.pl', "file", Count). % == :- meta_predicate phrase_from_file(//, +), phrase_from_file(//, +, +), phrase_from_stream(//, +). phrase_from_file(Grammar, File) :- phrase_from_file(Grammar, File, []). %% phrase_from_file(:Grammar, +File, +Options) is nondet. % % As phrase_from_file/2, providing additional Options. Options are % passed to open/4, except for =buffer_size=, which is passed to % set_stream/2. If not specified, the default buffer size is 512 % bytes. Of particular importance are the open/4 options =type= % and =encoding=. phrase_from_file(Grammar, File, Options) :- ( select_option(buffer_size(BS), Options, OpenOptions) -> true ; BS=512, OpenOptions = Options ), setup_call_cleanup( open(File, read, In, OpenOptions), phrase_stream(Grammar, In, BS), close(In)). phrase_stream(Grammar, In, BuffserSize) :- set_stream(In, buffer_size(BuffserSize)), phrase_from_stream(Grammar, In). %% phrase_from_stream(:Grammer, +Stream) % % Helper for phrase_from_file/3. This predicate cooperates with % syntax_error//1 to generate syntax error locations for grammars. phrase_from_stream(Grammar, In) :- stream_to_lazy_list(In, List), phrase(Grammar, List). %% syntax_error(+Error)// % % Throw the syntax error Error at the current location of the % input. This predicate is designed to be called from the handler % of phrase_from_file/3. % % @throws error(syntax_error(Error), Location) syntax_error(Error) --> lazy_list_location(Location), { throw(error(syntax_error(Error), Location)) }. %% lazy_list_location(-Location)// is det. % % True when Location is an (error) location term that represents % the current location in the DCG list. % % @arg Location is a term file(Name, Line, LinePos, CharNo) or % stream(Stream, Line, LinePos, CharNo) if no file is % associated to the stream RestLazyList. Finally, if the % Lazy list is fully materialized (ends in =|[]|=), Location % is unified with `end_of_file-CharCount`. % @see lazy_list_character_count//1 only provides the character % count. lazy_list_location(Location, Here, Here) :- lazy_list_location(Here, Location). lazy_list_location(Here, Location) :- '$skip_list'(Skipped, Here, Tail), ( attvar(Tail) -> frozen(Tail, pure_input:read_to_input_stream(Stream, PrevPos, Pos, _List)), Details = [Line, LinePos, CharNo], ( stream_property(Stream, file_name(File)) -> PosParts = [file, File|Details] ; PosParts = [stream, Stream|Details] ), Location =.. PosParts, stream_position_data(char_count, Pos, EndRecordCharNo), CharNo is EndRecordCharNo - Skipped, set_stream_position(Stream, PrevPos), stream_position_data(char_count, PrevPos, StartRecordCharNo), Skip is CharNo-StartRecordCharNo, forall(between(1, Skip, _), get_code(Stream, _)), stream_property(Stream, position(ErrorPos)), stream_position_data(line_count, ErrorPos, Line), stream_position_data(line_position, ErrorPos, LinePos) ; Tail == [] -> Location = end_of_file-Skipped ; type_error(lazy_list, Here) ). %% lazy_list_character_count(-CharCount)// % % True when CharCount is the current character count in the Lazy % list. The character count is computed by finding the distance to % the next frozen tail of the lazy list. CharCount is one of: % % - An integer % - A term end_of_file-Count % % @see lazy_list_location//1 provides full details of the location % for error reporting. lazy_list_character_count(Location, Here, Here) :- lazy_list_character_count(Here, Location). lazy_list_character_count(Here, CharNo) :- '$skip_list'(Skipped, Here, Tail), ( attvar(Tail) -> frozen(Tail, pure_input:read_to_input_stream(_Stream, _PrevPos, Pos, _List)), stream_position_data(char_count, Pos, EndRecordCharNo), CharNo is EndRecordCharNo - Skipped ; Tail == [] -> CharNo = end_of_file-Skipped ; type_error(lazy_list, Here) ). %% stream_to_lazy_list(+Stream, -List) is det. % % Create a lazy list representing the character codes in Stream. % It must be possible to reposition Stream. List is a list that % ends in a delayed goal. List can be unified completely % transparent to a (partial) list and processed transparently % using DCGs, but please be aware that a lazy list is not the same % as a materialized list in all respects. % % Typically, this predicate is used as a building block for more % high level safe predicates such as phrase_from_file/2. % % @tbd Enhance of lazy list throughout the system. stream_to_lazy_list(Stream, List) :- stream_to_lazy_list(Stream, -, List). stream_to_lazy_list(Stream, PrevPos, List) :- stream_property(Stream, position(Pos)), freeze(List, read_to_input_stream(Stream, PrevPos, Pos, List)). read_to_input_stream(Stream, _PrevPos, Pos, List) :- set_stream_position(Stream, Pos), ( at_end_of_stream(Stream) -> List = [] ; read_pending_input(Stream, List, Tail), stream_to_lazy_list(Stream, Pos, Tail) ).