417 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			417 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
|   | /*  Part of SWI-Prolog | ||
|  | 
 | ||
|  |     Author:        Jan Wielemaker | ||
|  |     E-mail:        J.Wielemaker@vu.nl | ||
|  |     WWW:           http://www.swi-prolog.org | ||
|  |     Copyright (C): 2002-2013, University of Amsterdam | ||
|  | 			      VU University Amsterdam | ||
|  | 
 | ||
|  |     This program is free software; you can redistribute it and/or | ||
|  |     modify it under the terms of the GNU General Public License | ||
|  |     as published by the Free Software Foundation; either version 2 | ||
|  |     of the License, or (at your option) any later version. | ||
|  | 
 | ||
|  |     This program is distributed in the hope that it will be useful, | ||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||
|  |     GNU General Public License for more details. | ||
|  | 
 | ||
|  |     You should have received a copy of the GNU Lesser General Public | ||
|  |     License along with this library; if not, write to the Free Software | ||
|  |     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA | ||
|  | 
 | ||
|  |     As a special exception, if you link this library with other files, | ||
|  |     compiled with a Free Software compiler, to produce an executable, this | ||
|  |     library does not by itself cause the resulting executable to be covered | ||
|  |     by the GNU General Public License. This exception does not however | ||
|  |     invalidate any other reasons why the executable file might be covered by | ||
|  |     the GNU General Public License. | ||
|  | */ | ||
|  | 
 | ||
|  | :- module(dcg_basics, | ||
|  | 	  [ white//0,			% <white inside line> | ||
|  | 	    whites//0,			% <white inside line>* | ||
|  | 	    blank//0,			% <blank> | ||
|  | 	    blanks//0,			% <blank>* | ||
|  | 	    nonblank//1,		% <nonblank> | ||
|  | 	    nonblanks//1,		% <nonblank>* --> chars		(long) | ||
|  | 	    blanks_to_nl//0,		% [space,tab,ret]*nl | ||
|  | 	    string//1,			% <any>* -->chars		(short) | ||
|  | 	    string_without//2,		% Exclude, -->chars		(long) | ||
|  | 					% Characters | ||
|  | 	    alpha_to_lower//1,		% Get lower|upper, return lower | ||
|  | 					% Decimal numbers | ||
|  | 	    digits//1,			% [0-9]* -->chars | ||
|  | 	    digit//1,			% [0-9] --> char | ||
|  | 	    integer//1,			% [+-][0-9]+ --> integer | ||
|  | 	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float | ||
|  | 	    number//1,			% integer | float | ||
|  | 					% Hexadecimal numbers | ||
|  | 	    xdigits//1,			% [0-9a-f]* --> 0-15* | ||
|  | 	    xdigit//1,			% [0-9a-f] --> 0-15 | ||
|  | 	    xinteger//1,		% [0-9a-f]+ --> integer | ||
|  | 
 | ||
|  | 	    prolog_var_name//1,		% Read a Prolog variable name | ||
|  | 
 | ||
|  | 	    eos//0,			% Test end of input. | ||
|  | 
 | ||
|  | 					% generation (TBD) | ||
|  | 	    atom//1			% generate atom | ||
|  | 	  ]). | ||
|  | :- use_module(library(lists)). | ||
|  | 
 | ||
|  | 
 | ||
|  | /** <module> Various general DCG utilities | ||
|  | 
 | ||
|  | This library provides various commonly  used   DCG  primitives acting on | ||
|  | list  of  character  *codes*.  Character   classification  is  based  on | ||
|  | code_type/2. | ||
|  | 
 | ||
|  | This module started its life as  library(http/dcg_basics) to support the | ||
|  | HTTP protocol. Since then, it was increasingly  used in code that has no | ||
|  | relation to HTTP and therefore  this  library   was  moved  to  the core | ||
|  | library. | ||
|  | 
 | ||
|  | @tbd	This is just a starting point. We need a comprehensive set of | ||
|  | 	generally useful DCG primitives. | ||
|  | */ | ||
|  | 
 | ||
|  | %%	string_without(+End, -Codes)// is det. | ||
|  | % | ||
|  | %	Take as many tokens from the input until the next character code | ||
|  | %	appears in the list End. The terminating  code itself is left on | ||
|  | %	the input. Typical use is to read  upto a defined delimiter such | ||
|  | %	as a newline or other reserved character.  For example: | ||
|  | % | ||
|  | %	    == | ||
|  | %	        ..., | ||
|  | %	        string_without("\n", RestOfLine) | ||
|  | %	    == | ||
|  | % | ||
|  | %	@arg End is a list of character codes. | ||
|  | %	@see string//1. | ||
|  | 
 | ||
|  | string_without(End, Codes) --> | ||
|  | 	{ string(End), !, | ||
|  | 	  string_codes(End, EndCodes) | ||
|  | 	}, | ||
|  | 	list_string_without(EndCodes, Codes). | ||
|  | string_without(End, Codes) --> | ||
|  | 	list_string_without(End, Codes). | ||
|  | 
 | ||
|  | list_string_without(Not, [C|T]) --> | ||
|  | 	[C], | ||
|  | 	{ \+ memberchk(C, Not) | ||
|  | 	}, !, | ||
|  | 	list_string_without(Not, T). | ||
|  | list_string_without(_, []) --> | ||
|  | 	[]. | ||
|  | 
 | ||
|  | %%	string(-Codes)// is nondet. | ||
|  | % | ||
|  | %	Take as few as possible tokens from the input, taking one more | ||
|  | %	each time on backtracking. This code is normally followed by a | ||
|  | %	test for a delimiter.  For example: | ||
|  | % | ||
|  | %	== | ||
|  | %	upto_colon(Atom) --> | ||
|  | %		string(Codes), ":", !, | ||
|  | %		{ atom_codes(Atom, Codes) }. | ||
|  | %	== | ||
|  | % | ||
|  | %	@see string_without//2. | ||
|  | 
 | ||
|  | string([]) --> | ||
|  | 	[]. | ||
|  | string([H|T]) --> | ||
|  | 	[H], | ||
|  | 	string(T). | ||
|  | 
 | ||
|  | %%	blanks// is det. | ||
|  | % | ||
|  | %	Skip zero or more white-space characters. | ||
|  | 
 | ||
|  | blanks --> | ||
|  | 	blank, !, | ||
|  | 	blanks. | ||
|  | blanks --> | ||
|  | 	[]. | ||
|  | 
 | ||
|  | %%	blank// is semidet. | ||
|  | % | ||
|  | %	Take next =space= character from input. Space characters include | ||
|  | %	newline. | ||
|  | % | ||
|  | %	@see white//0 | ||
|  | 
 | ||
|  | blank --> | ||
|  | 	[C], | ||
|  | 	{ nonvar(C), | ||
|  | 	  code_type(C, space) | ||
|  | 	}. | ||
|  | 
 | ||
|  | %%	nonblanks(-Codes)// is det. | ||
|  | % | ||
|  | %	Take all =graph= characters | ||
|  | 
 | ||
|  | nonblanks([H|T]) --> | ||
|  | 	[H], | ||
|  | 	{ code_type(H, graph) | ||
|  | 	}, !, | ||
|  | 	nonblanks(T). | ||
|  | nonblanks([]) --> | ||
|  | 	[]. | ||
|  | 
 | ||
|  | %%	nonblank(-Code)// is semidet. | ||
|  | % | ||
|  | %	Code is the next non-blank (=graph=) character. | ||
|  | 
 | ||
|  | nonblank(H) --> | ||
|  | 	[H], | ||
|  | 	{ code_type(H, graph) | ||
|  | 	}. | ||
|  | 
 | ||
|  | %%	blanks_to_nl// is semidet. | ||
|  | % | ||
|  | %	Take a sequence of blank//0 codes if banks are followed by a | ||
|  | %	newline or end of the input. | ||
|  | 
 | ||
|  | blanks_to_nl --> | ||
|  | 	"\n", !. | ||
|  | blanks_to_nl --> | ||
|  | 	blank, !, | ||
|  | 	blanks_to_nl. | ||
|  | blanks_to_nl --> | ||
|  | 	eos. | ||
|  | 
 | ||
|  | %%	whites// is det. | ||
|  | % | ||
|  | %	Skip white space _inside_ a line. | ||
|  | % | ||
|  | %	@see blanks//0 also skips newlines. | ||
|  | 
 | ||
|  | whites --> | ||
|  | 	white, !, | ||
|  | 	whites. | ||
|  | whites --> | ||
|  | 	[]. | ||
|  | 
 | ||
|  | %%	white// is semidet. | ||
|  | % | ||
|  | %	Take next =white= character from input. White characters do | ||
|  | %	_not_ include newline. | ||
|  | 
 | ||
|  | white --> | ||
|  | 	[C], | ||
|  | 	{ nonvar(C), | ||
|  | 	  code_type(C, white) | ||
|  | 	}. | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	 CHARACTER STUFF	* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %%	alpha_to_lower(+C)// is det. | ||
|  | %%	alpha_to_lower(-C)// is semidet. | ||
|  | % | ||
|  | %	Read a letter (class  =alpha=)  and   return  it  as a lowercase | ||
|  | %	letter. In output mode this simply emits the character. | ||
|  | 
 | ||
|  | alpha_to_lower(L) --> | ||
|  | 	{ integer(L) }, !, | ||
|  | 	[L]. | ||
|  | alpha_to_lower(L) --> | ||
|  | 	[C], | ||
|  | 	{ code_type(C, alpha), | ||
|  | 	  code_type(C, to_upper(L)) | ||
|  | 	}. | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	      NUMBERS		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %%	digits(?Chars)// is det. | ||
|  | %%	digit(?Char)// is det. | ||
|  | %%	integer(?Integer)// is det. | ||
|  | % | ||
|  | %	Number processing. The predicate  digits//1   matches  a posibly | ||
|  | %	empty set of digits,  digit//1  processes   a  single  digit and | ||
|  | %	integer processes an  optional  sign   followed  by  a non-empty | ||
|  | %	sequence of digits into an integer. | ||
|  | 
 | ||
|  | digits([H|T]) --> | ||
|  | 	digit(H), !, | ||
|  | 	digits(T). | ||
|  | digits([]) --> | ||
|  | 	[]. | ||
|  | 
 | ||
|  | digit(C) --> | ||
|  | 	[C], | ||
|  | 	{ code_type(C, digit) | ||
|  | 	}. | ||
|  | 
 | ||
|  | integer(I, Head, Tail) :- | ||
|  | 	integer(I), !, | ||
|  | 	format(codes(Head, Tail), '~w', [I]). | ||
|  | integer(I) --> | ||
|  | 	int_codes(Codes), | ||
|  | 	{ number_codes(I, Codes) | ||
|  | 	}. | ||
|  | 
 | ||
|  | int_codes([C,D0|D]) --> | ||
|  | 	sign(C), !, | ||
|  | 	digit(D0), | ||
|  | 	digits(D). | ||
|  | int_codes([D0|D]) --> | ||
|  | 	digit(D0), | ||
|  | 	digits(D). | ||
|  | 
 | ||
|  | 
 | ||
|  | %%	float(?Float)// is det. | ||
|  | % | ||
|  | %	Process a floating  point  number.   The  actual  conversion  is | ||
|  | %	controlled by number_codes/2. | ||
|  | 
 | ||
|  | float(F, Head, Tail) :- | ||
|  | 	float(F), !, | ||
|  | 	with_output_to(codes(Head, Tail), write(F)). | ||
|  | float(F) --> | ||
|  | 	number(F), | ||
|  | 	{ float(F) }. | ||
|  | 
 | ||
|  | %%	number(+Number)// is det. | ||
|  | %%	number(-Number)// is semidet. | ||
|  | % | ||
|  | %	Generate extract a number. Handles   both  integers and floating | ||
|  | %	point numbers. | ||
|  | 
 | ||
|  | number(N, Head, Tail) :- | ||
|  | 	number(N), !, | ||
|  | 	format(codes(Head, Tail), '~w', N). | ||
|  | number(N) --> | ||
|  | 	int_codes(I), | ||
|  | 	(   dot, | ||
|  | 	    digit(DF0), | ||
|  | 	    digits(DF) | ||
|  | 	->  {F = [0'., DF0|DF]} | ||
|  | 	;   {F = ""} | ||
|  | 	), | ||
|  | 	(   exp | ||
|  | 	->  int_codes(DI), | ||
|  | 	    {E=[0'e|DI]} | ||
|  | 	;   {E = ""} | ||
|  | 	), | ||
|  | 	{ append([I, F, E], Codes), | ||
|  | 	  number_codes(N, Codes) | ||
|  | 	}. | ||
|  | 
 | ||
|  | sign(0'-) --> "-". | ||
|  | sign(0'+) --> "+". | ||
|  | 
 | ||
|  | dot --> ".". | ||
|  | 
 | ||
|  | exp --> "e". | ||
|  | exp --> "E". | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	    HEX NUMBERS		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %%	xinteger(+Integer)// is det. | ||
|  | %%	xinteger(-Integer)// is semidet. | ||
|  | % | ||
|  | %	Generate or extract an integer from   a  sequence of hexadecimal | ||
|  | %	digits. | ||
|  | 
 | ||
|  | xinteger(Val, Head, Tail) :- | ||
|  | 	integer(Val), | ||
|  | 	format(codes(Head, Tail), '~16r', [Val]). | ||
|  | xinteger(Val) --> | ||
|  | 	xdigit(D0), | ||
|  | 	xdigits(D), | ||
|  | 	{ mkval([D0|D], 16, Val) | ||
|  | 	}. | ||
|  | 
 | ||
|  | %%	xdigit(-Weight)// is semidet. | ||
|  | % | ||
|  | %	True if the next code is a  hexdecimal digit with Weight. Weight | ||
|  | %	is between 0 and 15. | ||
|  | 
 | ||
|  | xdigit(D) --> | ||
|  | 	[C], | ||
|  | 	{ code_type(C, xdigit(D)) | ||
|  | 	}. | ||
|  | 
 | ||
|  | %%	xdigits(-WeightList)// is det. | ||
|  | % | ||
|  | %	List of weights of a sequence of hexadecimal codes.  WeightList | ||
|  | %	may be empty. | ||
|  | 
 | ||
|  | xdigits([D0|D]) --> | ||
|  | 	xdigit(D0), !, | ||
|  | 	xdigits(D). | ||
|  | xdigits([]) --> | ||
|  | 	[]. | ||
|  | 
 | ||
|  | mkval([W0|Weights], Base, Val) :- | ||
|  | 	mkval(Weights, Base, W0, Val). | ||
|  | 
 | ||
|  | mkval([], _, W, W). | ||
|  | mkval([H|T], Base, W0, W) :- | ||
|  | 	W1 is W0*Base+H, | ||
|  | 	mkval(T, Base, W1, W). | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	   END-OF-STRING	* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %%	eos// | ||
|  | % | ||
|  | %	Matches  end-of-input.  The  implementation    behaves   as  the | ||
|  | %	following portable implementation: | ||
|  | % | ||
|  | %	  == | ||
|  | %	  eos --> call(eos_). | ||
|  | %	  eos_([], []). | ||
|  | %	  == | ||
|  | % | ||
|  | %	@tbd	This is a difficult concept and violates the _context free_ | ||
|  | %		property of DCGs.  Explain the exact problems. | ||
|  | 
 | ||
|  | eos([], []). | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	   PROLOG SYNTAX		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %%	prolog_var_name(-Name:atom)// is semidet. | ||
|  | % | ||
|  | %	Matches a Prolog variable name. Primarily  intended to deal with | ||
|  | %	quasi quotations that embed Prolog variables. | ||
|  | 
 | ||
|  | prolog_var_name(Name) --> | ||
|  | 	[C0], { code_type(C0, prolog_var_start) }, !, | ||
|  | 	prolog_id_cont(CL), | ||
|  | 	{ atom_codes(Name, [C0|CL]) }. | ||
|  | 
 | ||
|  | prolog_id_cont([H|T]) --> | ||
|  | 	[H], { code_type(H, prolog_identifier_continue) }, !, | ||
|  | 	prolog_id_cont(T). | ||
|  | prolog_id_cont([]) --> "". | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	     GENERATION		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %%	atom(+Atom)// is det. | ||
|  | % | ||
|  | %	Generate codes of Atom.  Current implementation uses write/1, | ||
|  | %	dealing with any Prolog term. | ||
|  | 
 | ||
|  | atom(Atom, Head, Tail) :- | ||
|  | 	format(codes(Head, Tail), '~w', [Atom]). |