418 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
			
		
		
	
	
			418 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
/*  Part of SWI-Prolog
 | 
						|
 | 
						|
    Author:        Jan Wielemaker
 | 
						|
    E-mail:        J.Wielemaker@vu.nl
 | 
						|
    WWW:           http://www.swi-prolog.org
 | 
						|
    Copyright (C): 2002-2013, University of Amsterdam
 | 
						|
			      VU University Amsterdam
 | 
						|
 | 
						|
    This program is free software; you can redistribute it and/or
 | 
						|
    modify it under the terms of the GNU General Public License
 | 
						|
    as published by the Free Software Foundation; either version 2
 | 
						|
    of the License, or (at your option) any later version.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Lesser General Public
 | 
						|
    License along with this library; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 | 
						|
 | 
						|
    As a special exception, if you link this library with other files,
 | 
						|
    compiled with a Free Software compiler, to produce an executable, this
 | 
						|
    library does not by itself cause the resulting executable to be covered
 | 
						|
    by the GNU General Public License. This exception does not however
 | 
						|
    invalidate any other reasons why the executable file might be covered by
 | 
						|
    the GNU General Public License.
 | 
						|
*/
 | 
						|
 | 
						|
:- module(dcg_basics,
 | 
						|
	  [ white//0,			% <white inside line>
 | 
						|
	    whites//0,			% <white inside line>*
 | 
						|
	    blank//0,			% <blank>
 | 
						|
	    blanks//0,			% <blank>*
 | 
						|
	    nonblank//1,		% <nonblank>
 | 
						|
	    nonblanks//1,		% <nonblank>* --> chars		(long)
 | 
						|
	    blanks_to_nl//0,		% [space,tab,ret]*nl
 | 
						|
	    string//1,			% <any>* -->chars		(short)
 | 
						|
	    string_without//2,		% Exclude, -->chars		(long)
 | 
						|
					% Characters
 | 
						|
	    alpha_to_lower//1,		% Get lower|upper, return lower
 | 
						|
					% Decimal numbers
 | 
						|
	    digits//1,			% [0-9]* -->chars
 | 
						|
	    digit//1,			% [0-9] --> char
 | 
						|
	    integer//1,			% [+-][0-9]+ --> integer
 | 
						|
	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
 | 
						|
	    number//1,			% integer | float
 | 
						|
					% Hexadecimal numbers
 | 
						|
	    xdigits//1,			% [0-9a-f]* --> 0-15*
 | 
						|
	    xdigit//1,			% [0-9a-f] --> 0-15
 | 
						|
	    xinteger//1,		% [0-9a-f]+ --> integer
 | 
						|
 | 
						|
	    prolog_var_name//1,		% Read a Prolog variable name
 | 
						|
 | 
						|
	    eos//0,			% Test end of input.
 | 
						|
 | 
						|
					% generation (TBD)
 | 
						|
	    atom//1			% generate atom
 | 
						|
	  ]).
 | 
						|
:- use_module(library(lists)).
 | 
						|
 | 
						|
 | 
						|
/** <module> Various general DCG utilities
 | 
						|
@ingroup SWILibrary
 | 
						|
 | 
						|
This library provides various commonly  used   DCG  primitives acting on
 | 
						|
list  of  character  *codes*.  Character   classification  is  based  on
 | 
						|
code_type/2.
 | 
						|
 | 
						|
This module started its life as  library(http/dcg_basics) to support the
 | 
						|
HTTP protocol. Since then, it was increasingly  used in code that has no
 | 
						|
relation to HTTP and therefore  this  library   was  moved  to  the core
 | 
						|
library.
 | 
						|
 | 
						|
@tbd	This is just a starting point. We need a comprehensive set of
 | 
						|
	generally useful DCG primitives.
 | 
						|
*/
 | 
						|
 | 
						|
%%	string_without(+End, -Codes)// is det.
 | 
						|
%
 | 
						|
%	Take as many tokens from the input until the next character code
 | 
						|
%	appears in the list End. The terminating  code itself is left on
 | 
						|
%	the input. Typical use is to read  upto a defined delimiter such
 | 
						|
%	as a newline or other reserved character.  For example:
 | 
						|
%
 | 
						|
%	    ==
 | 
						|
%	        ...,
 | 
						|
%	        string_without("\n", RestOfLine)
 | 
						|
%	    ==
 | 
						|
%
 | 
						|
%	@arg End is a list of character codes.
 | 
						|
%	@see string//1.
 | 
						|
 | 
						|
string_without(End, Codes) -->
 | 
						|
	{ string(End), !,
 | 
						|
	  string_codes(End, EndCodes)
 | 
						|
	},
 | 
						|
	list_string_without(EndCodes, Codes).
 | 
						|
string_without(End, Codes) -->
 | 
						|
	list_string_without(End, Codes).
 | 
						|
 | 
						|
list_string_without(Not, [C|T]) -->
 | 
						|
	[C],
 | 
						|
	{ \+ memberchk(C, Not)
 | 
						|
	}, !,
 | 
						|
	list_string_without(Not, T).
 | 
						|
list_string_without(_, []) -->
 | 
						|
	[].
 | 
						|
 | 
						|
%%	string(-Codes)// is nondet.
 | 
						|
%
 | 
						|
%	Take as few as possible tokens from the input, taking one more
 | 
						|
%	each time on backtracking. This code is normally followed by a
 | 
						|
%	test for a delimiter.  For example:
 | 
						|
%
 | 
						|
%	==
 | 
						|
%	upto_colon(Atom) -->
 | 
						|
%		string(Codes), ":", !,
 | 
						|
%		{ atom_codes(Atom, Codes) }.
 | 
						|
%	==
 | 
						|
%
 | 
						|
%	@see string_without//2.
 | 
						|
 | 
						|
string([]) -->
 | 
						|
	[].
 | 
						|
string([H|T]) -->
 | 
						|
	[H],
 | 
						|
	string(T).
 | 
						|
 | 
						|
%%	blanks// is det.
 | 
						|
%
 | 
						|
%	Skip zero or more white-space characters.
 | 
						|
 | 
						|
blanks -->
 | 
						|
	blank, !,
 | 
						|
	blanks.
 | 
						|
blanks -->
 | 
						|
	[].
 | 
						|
 | 
						|
%%	blank// is semidet.
 | 
						|
%
 | 
						|
%	Take next =space= character from input. Space characters include
 | 
						|
%	newline.
 | 
						|
%
 | 
						|
%	@see white//0
 | 
						|
 | 
						|
blank -->
 | 
						|
	[C],
 | 
						|
	{ nonvar(C),
 | 
						|
	  code_type(C, space)
 | 
						|
	}.
 | 
						|
 | 
						|
%%	nonblanks(-Codes)// is det.
 | 
						|
%
 | 
						|
%	Take all =graph= characters
 | 
						|
 | 
						|
nonblanks([H|T]) -->
 | 
						|
	[H],
 | 
						|
	{ code_type(H, graph)
 | 
						|
	}, !,
 | 
						|
	nonblanks(T).
 | 
						|
nonblanks([]) -->
 | 
						|
	[].
 | 
						|
 | 
						|
%%	nonblank(-Code)// is semidet.
 | 
						|
%
 | 
						|
%	Code is the next non-blank (=graph=) character.
 | 
						|
 | 
						|
nonblank(H) -->
 | 
						|
	[H],
 | 
						|
	{ code_type(H, graph)
 | 
						|
	}.
 | 
						|
 | 
						|
%%	blanks_to_nl// is semidet.
 | 
						|
%
 | 
						|
%	Take a sequence of blank//0 codes if banks are followed by a
 | 
						|
%	newline or end of the input.
 | 
						|
 | 
						|
blanks_to_nl -->
 | 
						|
	"\n", !.
 | 
						|
blanks_to_nl -->
 | 
						|
	blank, !,
 | 
						|
	blanks_to_nl.
 | 
						|
blanks_to_nl -->
 | 
						|
	eos.
 | 
						|
 | 
						|
%%	whites// is det.
 | 
						|
%
 | 
						|
%	Skip white space _inside_ a line.
 | 
						|
%
 | 
						|
%	@see blanks//0 also skips newlines.
 | 
						|
 | 
						|
whites -->
 | 
						|
	white, !,
 | 
						|
	whites.
 | 
						|
whites -->
 | 
						|
	[].
 | 
						|
 | 
						|
%%	white// is semidet.
 | 
						|
%
 | 
						|
%	Take next =white= character from input. White characters do
 | 
						|
%	_not_ include newline.
 | 
						|
 | 
						|
white -->
 | 
						|
	[C],
 | 
						|
	{ nonvar(C),
 | 
						|
	  code_type(C, white)
 | 
						|
	}.
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	 CHARACTER STUFF	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
%%	alpha_to_lower(+C)// is det.
 | 
						|
%%	alpha_to_lower(-C)// is semidet.
 | 
						|
%
 | 
						|
%	Read a letter (class  =alpha=)  and   return  it  as a lowercase
 | 
						|
%	letter. In output mode this simply emits the character.
 | 
						|
 | 
						|
alpha_to_lower(L) -->
 | 
						|
	{ integer(L) }, !,
 | 
						|
	[L].
 | 
						|
alpha_to_lower(L) -->
 | 
						|
	[C],
 | 
						|
	{ code_type(C, alpha),
 | 
						|
	  code_type(C, to_upper(L))
 | 
						|
	}.
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      NUMBERS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
%%	digits(?Chars)// is det.
 | 
						|
%%	digit(?Char)// is det.
 | 
						|
%%	integer(?Integer)// is det.
 | 
						|
%
 | 
						|
%	Number processing. The predicate  digits//1   matches  a posibly
 | 
						|
%	empty set of digits,  digit//1  processes   a  single  digit and
 | 
						|
%	integer processes an  optional  sign   followed  by  a non-empty
 | 
						|
%	sequence of digits into an integer.
 | 
						|
 | 
						|
digits([H|T]) -->
 | 
						|
	digit(H), !,
 | 
						|
	digits(T).
 | 
						|
digits([]) -->
 | 
						|
	[].
 | 
						|
 | 
						|
digit(C) -->
 | 
						|
	[C],
 | 
						|
	{ code_type(C, digit)
 | 
						|
	}.
 | 
						|
 | 
						|
integer(I, Head, Tail) :-
 | 
						|
	integer(I), !,
 | 
						|
	format(codes(Head, Tail), '~w', [I]).
 | 
						|
integer(I) -->
 | 
						|
	int_codes(Codes),
 | 
						|
	{ number_codes(I, Codes)
 | 
						|
	}.
 | 
						|
 | 
						|
int_codes([C,D0|D]) -->
 | 
						|
	sign(C), !,
 | 
						|
	digit(D0),
 | 
						|
	digits(D).
 | 
						|
int_codes([D0|D]) -->
 | 
						|
	digit(D0),
 | 
						|
	digits(D).
 | 
						|
 | 
						|
 | 
						|
%%	float(?Float)// is det.
 | 
						|
%
 | 
						|
%	Process a floating  point  number.   The  actual  conversion  is
 | 
						|
%	controlled by number_codes/2.
 | 
						|
 | 
						|
float(F, Head, Tail) :-
 | 
						|
	float(F), !,
 | 
						|
	with_output_to(codes(Head, Tail), write(F)).
 | 
						|
float(F) -->
 | 
						|
	number(F),
 | 
						|
	{ float(F) }.
 | 
						|
 | 
						|
%%	number(+Number)// is det.
 | 
						|
%%	number(-Number)// is semidet.
 | 
						|
%
 | 
						|
%	Generate extract a number. Handles   both  integers and floating
 | 
						|
%	point numbers.
 | 
						|
 | 
						|
number(N, Head, Tail) :-
 | 
						|
	number(N), !,
 | 
						|
	format(codes(Head, Tail), '~w', N).
 | 
						|
number(N) -->
 | 
						|
	int_codes(I),
 | 
						|
	(   dot,
 | 
						|
	    digit(DF0),
 | 
						|
	    digits(DF)
 | 
						|
	->  {F = [0'., DF0|DF]}
 | 
						|
	;   {F = ""}
 | 
						|
	),
 | 
						|
	(   exp
 | 
						|
	->  int_codes(DI),
 | 
						|
	    {E=[0'e|DI]}
 | 
						|
	;   {E = ""}
 | 
						|
	),
 | 
						|
	{ append([I, F, E], Codes),
 | 
						|
	  number_codes(N, Codes)
 | 
						|
	}.
 | 
						|
 | 
						|
sign(0'-) --> "-".
 | 
						|
sign(0'+) --> "+".
 | 
						|
 | 
						|
dot --> ".".
 | 
						|
 | 
						|
exp --> "e".
 | 
						|
exp --> "E".
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    HEX NUMBERS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
%%	xinteger(+Integer)// is det.
 | 
						|
%%	xinteger(-Integer)// is semidet.
 | 
						|
%
 | 
						|
%	Generate or extract an integer from   a  sequence of hexadecimal
 | 
						|
%	digits.
 | 
						|
 | 
						|
xinteger(Val, Head, Tail) :-
 | 
						|
	integer(Val),
 | 
						|
	format(codes(Head, Tail), '~16r', [Val]).
 | 
						|
xinteger(Val) -->
 | 
						|
	xdigit(D0),
 | 
						|
	xdigits(D),
 | 
						|
	{ mkval([D0|D], 16, Val)
 | 
						|
	}.
 | 
						|
 | 
						|
%%	xdigit(-Weight)// is semidet.
 | 
						|
%
 | 
						|
%	True if the next code is a  hexdecimal digit with Weight. Weight
 | 
						|
%	is between 0 and 15.
 | 
						|
 | 
						|
xdigit(D) -->
 | 
						|
	[C],
 | 
						|
	{ code_type(C, xdigit(D))
 | 
						|
	}.
 | 
						|
 | 
						|
%%	xdigits(-WeightList)// is det.
 | 
						|
%
 | 
						|
%	List of weights of a sequence of hexadecimal codes.  WeightList
 | 
						|
%	may be empty.
 | 
						|
 | 
						|
xdigits([D0|D]) -->
 | 
						|
	xdigit(D0), !,
 | 
						|
	xdigits(D).
 | 
						|
xdigits([]) -->
 | 
						|
	[].
 | 
						|
 | 
						|
mkval([W0|Weights], Base, Val) :-
 | 
						|
	mkval(Weights, Base, W0, Val).
 | 
						|
 | 
						|
mkval([], _, W, W).
 | 
						|
mkval([H|T], Base, W0, W) :-
 | 
						|
	W1 is W0*Base+H,
 | 
						|
	mkval(T, Base, W1, W).
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	   END-OF-STRING	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
%%	eos//
 | 
						|
%
 | 
						|
%	Matches  end-of-input.  The  implementation    behaves   as  the
 | 
						|
%	following portable implementation:
 | 
						|
%
 | 
						|
%	  ==
 | 
						|
%	  eos --> call(eos_).
 | 
						|
%	  eos_([], []).
 | 
						|
%	  ==
 | 
						|
%
 | 
						|
%	@tbd	This is a difficult concept and violates the _context free_
 | 
						|
%		property of DCGs.  Explain the exact problems.
 | 
						|
 | 
						|
eos([], []).
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	   PROLOG SYNTAX		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
%%	prolog_var_name(-Name:atom)// is semidet.
 | 
						|
%
 | 
						|
%	Matches a Prolog variable name. Primarily  intended to deal with
 | 
						|
%	quasi quotations that embed Prolog variables.
 | 
						|
 | 
						|
prolog_var_name(Name) -->
 | 
						|
	[C0], { code_type(C0, prolog_var_start) }, !,
 | 
						|
	prolog_id_cont(CL),
 | 
						|
	{ atom_codes(Name, [C0|CL]) }.
 | 
						|
 | 
						|
prolog_id_cont([H|T]) -->
 | 
						|
	[H], { code_type(H, prolog_identifier_continue) }, !,
 | 
						|
	prolog_id_cont(T).
 | 
						|
prolog_id_cont([]) --> "".
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	     GENERATION		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
%%	atom(+Atom)// is det.
 | 
						|
%
 | 
						|
%	Generate codes of Atom.  Current implementation uses write/1,
 | 
						|
%	dealing with any Prolog term.
 | 
						|
 | 
						|
atom(Atom, Head, Tail) :-
 | 
						|
	format(codes(Head, Tail), '~w', [Atom]).
 |