435 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			435 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
|   | /*  $Id$ | ||
|  | 
 | ||
|  |     Part of SWI-Prolog | ||
|  | 
 | ||
|  |     Author:        Jan Wielemaker | ||
|  |     E-mail:        wielemak@science.uva.nl | ||
|  |     WWW:           http://www.swi-prolog.org | ||
|  |     Copyright (C): 1985-2005, University of Amsterdam | ||
|  | 
 | ||
|  |     This program is free software; you can redistribute it and/or | ||
|  |     modify it under the terms of the GNU General Public License | ||
|  |     as published by the Free Software Foundation; either version 2 | ||
|  |     of the License, or (at your option) any later version. | ||
|  | 
 | ||
|  |     This program is distributed in the hope that it will be useful, | ||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||
|  |     GNU General Public License for more details. | ||
|  | 
 | ||
|  |     You should have received a copy of the GNU Lesser General Public | ||
|  |     License along with this library; if not, write to the Free Software | ||
|  |     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | ||
|  | 
 | ||
|  |     As a special exception, if you link this library with other files, | ||
|  |     compiled with a Free Software compiler, to produce an executable, this | ||
|  |     library does not by itself cause the resulting executable to be covered | ||
|  |     by the GNU General Public License. This exception does not however | ||
|  |     invalidate any other reasons why the executable file might be covered by | ||
|  |     the GNU General Public License. | ||
|  | */ | ||
|  | 
 | ||
|  | :- module(sgml, | ||
|  | 	  [ load_sgml_file/2,		% +File, -ListOfContent | ||
|  | 	    load_xml_file/2,		% +File, -ListOfContent | ||
|  | 	    load_html_file/2,		% +File, -Document | ||
|  | 
 | ||
|  | 	    load_structure/3,		% +File, -Term, +Options | ||
|  | 
 | ||
|  | 	    load_dtd/2,			% +DTD, +File | ||
|  | 	    load_dtd/3,			% +DTD, +File, +Options | ||
|  | 	    dtd/2,			% +Type, -DTD | ||
|  | 	    dtd_property/2,		% +DTD, ?Property | ||
|  | 
 | ||
|  | 	    new_dtd/2,			% +Doctype, -DTD | ||
|  | 	    free_dtd/1,			% +DTD | ||
|  | 	    open_dtd/3,			% +DTD, +Options, -Stream | ||
|  | 
 | ||
|  | 	    new_sgml_parser/2,		% -Parser, +Options | ||
|  | 	    free_sgml_parser/1,		% +Parser | ||
|  | 	    set_sgml_parser/2,		% +Parser, +Options | ||
|  | 	    get_sgml_parser/2,		% +Parser, +Options | ||
|  | 	    sgml_parse/2,		% +Parser, +Options | ||
|  | 
 | ||
|  | 	    sgml_register_catalog_file/2, % +File, +StartOrEnd | ||
|  | 
 | ||
|  | 	    xml_quote_attribute/3,	% +In, -Quoted, +Encoding | ||
|  | 	    xml_quote_cdata/3,		% +In, -Quoted, +Encoding | ||
|  | 	    xml_quote_attribute/2,	% +In, -Quoted | ||
|  | 	    xml_quote_cdata/2,		% +In, -Quoted | ||
|  | 	    xml_name/1,			% +In | ||
|  | 	    xml_is_dom/1		% +Term | ||
|  | 	  ]). | ||
|  | 
 | ||
|  | :- expects_dialect(swi). | ||
|  | 
 | ||
|  | :- use_module(library(lists)). | ||
|  | :- use_module(library(option)). | ||
|  | 
 | ||
|  | :- multifile user:file_search_path/2. | ||
|  | :- dynamic   user:file_search_path/2. | ||
|  | 
 | ||
|  | user:file_search_path(dtd, '.'). | ||
|  | :- if(current_prolog_flag(version_data, swi(_,_,_,_))). | ||
|  | user:file_search_path(dtd, swi('library/DTD')). | ||
|  | :- else. | ||
|  | user:file_search_path(dtd, library('DTD')). | ||
|  | :- endif. | ||
|  | 
 | ||
|  | sgml_register_catalog_file(File, Location) :- | ||
|  | 	prolog_to_os_filename(File, OsFile), | ||
|  | 	'_sgml_register_catalog_file'(OsFile, Location). | ||
|  | 
 | ||
|  | load_foreign :- | ||
|  | 	current_predicate(_, _:sgml_parse(_,_)), !. | ||
|  | load_foreign :- | ||
|  | 	load_foreign_library(foreign(sgml2pl)). | ||
|  | 
 | ||
|  | register_catalog(Base) :- | ||
|  | 	absolute_file_name(dtd(Base), | ||
|  | 			       [ extensions([soc]), | ||
|  | 				 access(read), | ||
|  | 				 file_errors(fail) | ||
|  | 			       ], | ||
|  | 			       SocFile), | ||
|  | 	sgml_register_catalog_file(SocFile, end). | ||
|  | 
 | ||
|  | init :- | ||
|  | 	load_foreign, | ||
|  | 	ignore(register_catalog('HTML4')). | ||
|  | 
 | ||
|  | :- initialization | ||
|  | 	init. | ||
|  | 	 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	   DTD HANDLING		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
|  | Note that concurrent access to DTD objects  is not allowed, and hence we | ||
|  | will allocate and destroy them in each   thread.  Possibibly it would be | ||
|  | nicer to find out why  concurrent  access   to  DTD's  is  flawed. It is | ||
|  | diagnosed to mess with the entity resolution by Fabien Todescato. | ||
|  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | ||
|  | 
 | ||
|  | :- thread_local | ||
|  | 	current_dtd/2. | ||
|  | :- volatile | ||
|  | 	current_dtd/2. | ||
|  | :- thread_local | ||
|  | 	registered_cleanup/0. | ||
|  | :- volatile | ||
|  | 	registered_cleanup/0. | ||
|  | 
 | ||
|  | :- multifile | ||
|  | 	dtd_alias/2. | ||
|  | 
 | ||
|  | dtd_alias(html, 'HTML4'). | ||
|  | 
 | ||
|  | dtd(Type, DTD) :- | ||
|  | 	current_dtd(Type, DTD), !. | ||
|  | dtd(Type, DTD) :- | ||
|  | 	new_dtd(Type, DTD), | ||
|  | 	(   dtd_alias(Type, Base) | ||
|  | 	->  true | ||
|  | 	;   Base = Type | ||
|  | 	), | ||
|  | 	absolute_file_name(dtd(Base), | ||
|  | 			   [ extensions([dtd]), | ||
|  | 			     access(read) | ||
|  | 			   ], DtdFile), | ||
|  | 	load_dtd(DTD, DtdFile), | ||
|  | 	register_cleanup, | ||
|  | 	asserta(current_dtd(Type, DTD)). | ||
|  | 
 | ||
|  | %%	load_dtd(+DTD, +DtdFile, +Options) | ||
|  | %	 | ||
|  | %	Load file into a DTD.  Defined options are: | ||
|  | %	 | ||
|  | %		* dialect(+Dialect) | ||
|  | %		Dialect to use (xml, xmlns, sgml) | ||
|  | % | ||
|  | %		* encoding(+Encoding) | ||
|  | %		Encoding of DTD file | ||
|  | 
 | ||
|  | load_dtd(DTD, DtdFile) :- | ||
|  | 	load_dtd(DTD, DtdFile, []). | ||
|  | load_dtd(DTD, DtdFile, Options) :- | ||
|  | 	split_dtd_options(Options, DTDOptions, FileOptions), | ||
|  | 	open_dtd(DTD, DTDOptions, DtdOut), | ||
|  | 	swi:swi_open(DtdFile, read, DtdIn, FileOptions), | ||
|  | 	swi:swi_copy_stream_data(DtdIn, DtdOut), | ||
|  | 	swi:swi_close(DtdIn), | ||
|  | 	swi:swi_close(DtdOut). | ||
|  | 
 | ||
|  | split_dtd_options([], [], []). | ||
|  | split_dtd_options([H|T], [H|TD], S) :- | ||
|  | 	dtd_option(H), !, | ||
|  | 	split_dtd_options(T, TD, S). | ||
|  | split_dtd_options([H|T], TD, [H|S]) :- | ||
|  | 	split_dtd_options(T, TD, S). | ||
|  | 
 | ||
|  | dtd_option(dialect(_)). | ||
|  | 
 | ||
|  | 
 | ||
|  | %%	destroy_dtds | ||
|  | %	 | ||
|  | %	Destroy  DTDs  cached  by  this  thread   as  they  will  become | ||
|  | %	unreachable anyway. | ||
|  | 
 | ||
|  | destroy_dtds :- | ||
|  | 	(   current_dtd(_Type, DTD), | ||
|  | 	    free_dtd(DTD), | ||
|  | 	    fail | ||
|  | 	;   true | ||
|  | 	). | ||
|  | 
 | ||
|  | %%	register_cleanup | ||
|  | % | ||
|  | %	Register cleanup of DTDs created for this thread. | ||
|  | 
 | ||
|  | register_cleanup :- | ||
|  | 	registered_cleanup, !. | ||
|  | register_cleanup :- | ||
|  | 	catch(thread_at_exit(destroy_dtds), _, true), | ||
|  | 	assert(registered_cleanup). | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	    EXAMINE DTD		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | prop(doctype(_), _). | ||
|  | prop(elements(_), _). | ||
|  | prop(entities(_), _). | ||
|  | prop(notations(_), _). | ||
|  | prop(entity(E, _), DTD) :- | ||
|  | 	(   nonvar(E) | ||
|  | 	->  true | ||
|  | 	;   '$dtd_property'(DTD, entities(EL)), | ||
|  | 	    member(E, EL) | ||
|  | 	). | ||
|  | prop(element(E, _, _), DTD) :- | ||
|  | 	(   nonvar(E) | ||
|  | 	->  true | ||
|  | 	;   '$dtd_property'(DTD, elements(EL)), | ||
|  | 	    member(E, EL) | ||
|  | 	). | ||
|  | prop(attributes(E, _), DTD) :- | ||
|  | 	(   nonvar(E) | ||
|  | 	->  true | ||
|  | 	;   '$dtd_property'(DTD, elements(EL)), | ||
|  | 	    member(E, EL) | ||
|  | 	). | ||
|  | prop(attribute(E, A, _, _), DTD) :- | ||
|  | 	(   nonvar(E) | ||
|  | 	->  true | ||
|  | 	;   '$dtd_property'(DTD, elements(EL)), | ||
|  | 	    member(E, EL) | ||
|  | 	), | ||
|  | 	(   nonvar(A) | ||
|  | 	->  true | ||
|  | 	;   '$dtd_property'(DTD, attributes(E, AL)), | ||
|  | 	    member(A, AL) | ||
|  | 	). | ||
|  | prop(notation(N, _), DTD) :- | ||
|  | 	(   nonvar(N) | ||
|  | 	->  true | ||
|  | 	;   '$dtd_property'(DTD, notations(NL)), | ||
|  | 	    member(N, NL) | ||
|  | 	). | ||
|  | 
 | ||
|  | dtd_property(DTD, Prop) :- | ||
|  | 	prop(Prop, DTD), | ||
|  | 	'$dtd_property'(DTD, Prop). | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	       SGML		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | parser_option(dialect(_)). | ||
|  | parser_option(shorttag(_)). | ||
|  | parser_option(file(_)). | ||
|  | parser_option(line(_)). | ||
|  | parser_option(space(_)). | ||
|  | parser_option(number(_)). | ||
|  | parser_option(defaults(_)). | ||
|  | parser_option(doctype(_)). | ||
|  | parser_option(qualify_attributes(_)). | ||
|  | parser_option(encoding(_)). | ||
|  | 
 | ||
|  | set_parser_options(Parser, Options, RestOptions) :- | ||
|  | 	parser_option(Option), | ||
|  | 	select_option(Option, Options, RestOptions0), !, | ||
|  | 	set_sgml_parser(Parser, Option), | ||
|  | 	set_parser_options(Parser, RestOptions0, RestOptions). | ||
|  | set_parser_options(_, Options, Options). | ||
|  | 
 | ||
|  | 
 | ||
|  | load_structure(stream(In), Term, Options) :- !, | ||
|  | 	(   select_option(offset(Offset), Options, Options1) | ||
|  | 	->  seek(In, Offset, bof, _) | ||
|  | 	;   Options1 = Options | ||
|  | 	), | ||
|  | 	(   select_option(dtd(DTD), Options1, Options2) | ||
|  | 	->  ExplicitDTD = true | ||
|  | 	;   ExplicitDTD = false, | ||
|  | 	    Options2 = Options1 | ||
|  | 	), | ||
|  | 	new_sgml_parser(Parser, | ||
|  | 			[ dtd(DTD) | ||
|  | 			]), | ||
|  | 	def_entities(Options2, DTD, Options3), | ||
|  | 	call_cleanup(parse(Parser, Options3, TermRead, In), | ||
|  | 		     free_sgml_parser(Parser)), | ||
|  | 	(   ExplicitDTD == true | ||
|  | 	->  (   DTD = dtd(_, DocType), | ||
|  | 	        dtd_property(DTD, doctype(DocType)) | ||
|  | 	    ->	true | ||
|  | 	    ;	true | ||
|  | 	    ) | ||
|  | 	;   free_dtd(DTD) | ||
|  | 	), | ||
|  | 	Term = TermRead. | ||
|  | load_structure(Stream, Term, Options) :- | ||
|  | 	swi:swi_is_stream(Stream), !, | ||
|  | 	load_structure(stream(Stream), Term, Options). | ||
|  | load_structure(File, Term, Options) :- | ||
|  | 	swi:swi_open(File, read, In, [type(binary)]), | ||
|  | 	load_structure(stream(In), Term, [file(File)|Options]), | ||
|  | 	swi:swi_close(In). | ||
|  | 
 | ||
|  | parse(Parser, Options, Document, In) :- | ||
|  | 	set_parser_options(Parser, Options, Options1), | ||
|  | 	sgml_parse(Parser, | ||
|  | 		   [ document(Document), | ||
|  | 		     source(In) | ||
|  | 		   | Options1 | ||
|  | 		   ]). | ||
|  | 
 | ||
|  | def_entities([], _, []). | ||
|  | def_entities([entity(Name, Value)|T], DTD, Opts) :- !, | ||
|  | 	def_entity(DTD, Name, Value), | ||
|  | 	def_entities(T, DTD, Opts). | ||
|  | def_entities([H|T0], DTD, [H|T]) :- | ||
|  | 	def_entities(T0, DTD, T). | ||
|  | 
 | ||
|  | def_entity(DTD, Name, Value) :- | ||
|  | 	open_dtd(DTD, [], Stream), | ||
|  | 	xml_quote_attribute(Value, QValue), | ||
|  | 	swi:swi_format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]), | ||
|  | 	swi:close(Stream). | ||
|  | 	 | ||
|  | 	 | ||
|  | 		 /******************************* | ||
|  | 		 *	     UTILITIES		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | load_sgml_file(File, Term) :- | ||
|  | 	load_structure(File, Term, [dialect(sgml)]). | ||
|  | 
 | ||
|  | load_xml_file(File, Term) :- | ||
|  | 	load_structure(File, Term, [dialect(xml)]). | ||
|  | 
 | ||
|  | load_html_file(File, Term) :- | ||
|  | 	dtd(html, DTD), | ||
|  | 	load_structure(File, Term, | ||
|  | 		       [ dtd(DTD), | ||
|  | 			 dialect(sgml), | ||
|  | 			 shorttag(false) | ||
|  | 		       ]). | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	      ENCODING		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %	xml_quote_attribute(+In, -Quoted) | ||
|  | %	xml_quote_cdata(+In, -Quoted) | ||
|  | %	 | ||
|  | %	Backward  compatibility  for  versions  that  allow  to  specify | ||
|  | %	encoding. All characters that cannot fit the encoding are mapped | ||
|  | %	to XML character entities (&#dd;).  Using   ASCII  is the safest | ||
|  | %	value. | ||
|  | 
 | ||
|  | xml_quote_attribute(In, Quoted) :- | ||
|  | 	xml_quote_attribute(In, Quoted, ascii). | ||
|  | 
 | ||
|  | xml_quote_cdata(In, Quoted) :- | ||
|  | 	xml_quote_cdata(In, Quoted, ascii). | ||
|  | 
 | ||
|  | xml_name(In) :- | ||
|  | 	xml_name(In, ascii). | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	   TYPE CHECKING	* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %	xml_is_dome(@Term) | ||
|  | %	 | ||
|  | %	True  if  term  statisfies   the    structure   as  returned  by | ||
|  | %	load_structure/3 and friends. | ||
|  | 
 | ||
|  | xml_is_dom(0) :- !, fail.		% catch variables | ||
|  | xml_is_dom([]) :- !. | ||
|  | xml_is_dom([H|T]) :- !, | ||
|  | 	xml_is_dom(H), | ||
|  | 	xml_is_dom(T). | ||
|  | xml_is_dom(element(Name, Attributes, Content)) :- !, | ||
|  | 	dom_name(Name), | ||
|  | 	dom_attributes(Attributes), | ||
|  | 	xml_is_dom(Content). | ||
|  | xml_is_dom(pi(Pi)) :- !, | ||
|  | 	atom(Pi). | ||
|  | xml_is_dom(CDATA) :- | ||
|  | 	atom(CDATA). | ||
|  | 
 | ||
|  | dom_name(NS:Local) :- | ||
|  | 	atom(NS), | ||
|  | 	atom(Local), !. | ||
|  | dom_name(Local) :- | ||
|  | 	atom(Local). | ||
|  | 
 | ||
|  | dom_attributes(0) :- !, fail. | ||
|  | dom_attributes([]). | ||
|  | dom_attributes([H|T]) :- | ||
|  | 	dom_attribute(H), | ||
|  | 	dom_attributes(T). | ||
|  | 
 | ||
|  | dom_attribute(Name=Value) :- | ||
|  | 	dom_name(Name), | ||
|  | 	atomic(Value). | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	      MESSAGES		* | ||
|  | 		 *******************************/ | ||
|  | :- multifile | ||
|  | 	prolog:message/3. | ||
|  | 
 | ||
|  | %	Catch messages.  sgml/4 is generated by the SGML2PL binding. | ||
|  | 
 | ||
|  | prolog:message(sgml(Parser, File, Line, Message)) --> | ||
|  | 	{ get_sgml_parser(Parser, dialect(Dialect)) | ||
|  | 	}, | ||
|  | 	[ 'SGML2PL(~w): ~w:~w: ~w'-[Dialect, File, Line, Message] ]. | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	   XREF SUPPORT		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | :- multifile | ||
|  | 	prolog:called_by/2. | ||
|  | 
 | ||
|  | prolog:called_by(sgml_parse(_, Options), Called) :- | ||
|  | 	is_list(Options), | ||
|  | 	findall(G+3, | ||
|  | 		(   member(call(_, G), Options), | ||
|  | 		    callable(G) | ||
|  | 		), | ||
|  | 		Called). |