435 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
			
		
		
	
	
			435 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
| /*  $Id$
 | |
| 
 | |
|     Part of SWI-Prolog
 | |
| 
 | |
|     Author:        Jan Wielemaker
 | |
|     E-mail:        wielemak@science.uva.nl
 | |
|     WWW:           http://www.swi-prolog.org
 | |
|     Copyright (C): 1985-2005, University of Amsterdam
 | |
| 
 | |
|     This program is free software; you can redistribute it and/or
 | |
|     modify it under the terms of the GNU General Public License
 | |
|     as published by the Free Software Foundation; either version 2
 | |
|     of the License, or (at your option) any later version.
 | |
| 
 | |
|     This program is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|     GNU General Public License for more details.
 | |
| 
 | |
|     You should have received a copy of the GNU Lesser General Public
 | |
|     License along with this library; if not, write to the Free Software
 | |
|     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| 
 | |
|     As a special exception, if you link this library with other files,
 | |
|     compiled with a Free Software compiler, to produce an executable, this
 | |
|     library does not by itself cause the resulting executable to be covered
 | |
|     by the GNU General Public License. This exception does not however
 | |
|     invalidate any other reasons why the executable file might be covered by
 | |
|     the GNU General Public License.
 | |
| */
 | |
| 
 | |
| :- module(sgml,
 | |
| 	  [ load_sgml_file/2,		% +File, -ListOfContent
 | |
| 	    load_xml_file/2,		% +File, -ListOfContent
 | |
| 	    load_html_file/2,		% +File, -Document
 | |
| 
 | |
| 	    load_structure/3,		% +File, -Term, +Options
 | |
| 
 | |
| 	    load_dtd/2,			% +DTD, +File
 | |
| 	    load_dtd/3,			% +DTD, +File, +Options
 | |
| 	    dtd/2,			% +Type, -DTD
 | |
| 	    dtd_property/2,		% +DTD, ?Property
 | |
| 
 | |
| 	    new_dtd/2,			% +Doctype, -DTD
 | |
| 	    free_dtd/1,			% +DTD
 | |
| 	    open_dtd/3,			% +DTD, +Options, -Stream
 | |
| 
 | |
| 	    new_sgml_parser/2,		% -Parser, +Options
 | |
| 	    free_sgml_parser/1,		% +Parser
 | |
| 	    set_sgml_parser/2,		% +Parser, +Options
 | |
| 	    get_sgml_parser/2,		% +Parser, +Options
 | |
| 	    sgml_parse/2,		% +Parser, +Options
 | |
| 
 | |
| 	    sgml_register_catalog_file/2, % +File, +StartOrEnd
 | |
| 
 | |
| 	    xml_quote_attribute/3,	% +In, -Quoted, +Encoding
 | |
| 	    xml_quote_cdata/3,		% +In, -Quoted, +Encoding
 | |
| 	    xml_quote_attribute/2,	% +In, -Quoted
 | |
| 	    xml_quote_cdata/2,		% +In, -Quoted
 | |
| 	    xml_name/1,			% +In
 | |
| 	    xml_is_dom/1		% +Term
 | |
| 	  ]).
 | |
| 
 | |
| :- expects_dialect(swi).
 | |
| 
 | |
| :- use_module(library(lists)).
 | |
| :- use_module(library(option)).
 | |
| 
 | |
| :- multifile user:file_search_path/2.
 | |
| :- dynamic   user:file_search_path/2.
 | |
| 
 | |
| user:file_search_path(dtd, '.').
 | |
| :- if(current_prolog_flag(version_data, swi(_,_,_,_))).
 | |
| user:file_search_path(dtd, swi('library/DTD')).
 | |
| :- else.
 | |
| user:file_search_path(dtd, library('DTD')).
 | |
| :- endif.
 | |
| 
 | |
| sgml_register_catalog_file(File, Location) :-
 | |
| 	prolog_to_os_filename(File, OsFile),
 | |
| 	'_sgml_register_catalog_file'(OsFile, Location).
 | |
| 
 | |
| load_foreign :-
 | |
| 	current_predicate(_, _:sgml_parse(_,_)), !.
 | |
| load_foreign :-
 | |
| 	load_foreign_library(foreign(sgml2pl)).
 | |
| 
 | |
| register_catalog(Base) :-
 | |
| 	absolute_file_name(dtd(Base),
 | |
| 			       [ extensions([soc]),
 | |
| 				 access(read),
 | |
| 				 file_errors(fail)
 | |
| 			       ],
 | |
| 			       SocFile),
 | |
| 	sgml_register_catalog_file(SocFile, end).
 | |
| 
 | |
| init :-
 | |
| 	load_foreign,
 | |
| 	ignore(register_catalog('HTML4')).
 | |
| 
 | |
| :- initialization
 | |
| 	init.
 | |
| 	
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	   DTD HANDLING		*
 | |
| 		 *******************************/
 | |
| 
 | |
| /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | |
| Note that concurrent access to DTD objects  is not allowed, and hence we
 | |
| will allocate and destroy them in each   thread.  Possibibly it would be
 | |
| nicer to find out why  concurrent  access   to  DTD's  is  flawed. It is
 | |
| diagnosed to mess with the entity resolution by Fabien Todescato.
 | |
| - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | |
| 
 | |
| :- thread_local
 | |
| 	current_dtd/2.
 | |
| :- volatile
 | |
| 	current_dtd/2.
 | |
| :- thread_local
 | |
| 	registered_cleanup/0.
 | |
| :- volatile
 | |
| 	registered_cleanup/0.
 | |
| 
 | |
| :- multifile
 | |
| 	dtd_alias/2.
 | |
| 
 | |
| dtd_alias(html, 'HTML4').
 | |
| 
 | |
| dtd(Type, DTD) :-
 | |
| 	current_dtd(Type, DTD), !.
 | |
| dtd(Type, DTD) :-
 | |
| 	new_dtd(Type, DTD),
 | |
| 	(   dtd_alias(Type, Base)
 | |
| 	->  true
 | |
| 	;   Base = Type
 | |
| 	),
 | |
| 	absolute_file_name(dtd(Base),
 | |
| 			   [ extensions([dtd]),
 | |
| 			     access(read)
 | |
| 			   ], DtdFile),
 | |
| 	load_dtd(DTD, DtdFile),
 | |
| 	register_cleanup,
 | |
| 	asserta(current_dtd(Type, DTD)).
 | |
| 
 | |
| %%	load_dtd(+DTD, +DtdFile, +Options)
 | |
| %	
 | |
| %	Load file into a DTD.  Defined options are:
 | |
| %	
 | |
| %		* dialect(+Dialect)
 | |
| %		Dialect to use (xml, xmlns, sgml)
 | |
| %
 | |
| %		* encoding(+Encoding)
 | |
| %		Encoding of DTD file
 | |
| 
 | |
| load_dtd(DTD, DtdFile) :-
 | |
| 	load_dtd(DTD, DtdFile, []).
 | |
| load_dtd(DTD, DtdFile, Options) :-
 | |
| 	split_dtd_options(Options, DTDOptions, FileOptions),
 | |
| 	open_dtd(DTD, DTDOptions, DtdOut),
 | |
| 	swi:swi_open(DtdFile, read, DtdIn, FileOptions),
 | |
| 	swi:swi_copy_stream_data(DtdIn, DtdOut),
 | |
| 	swi:swi_close(DtdIn),
 | |
| 	swi:swi_close(DtdOut).
 | |
| 
 | |
| split_dtd_options([], [], []).
 | |
| split_dtd_options([H|T], [H|TD], S) :-
 | |
| 	dtd_option(H), !,
 | |
| 	split_dtd_options(T, TD, S).
 | |
| split_dtd_options([H|T], TD, [H|S]) :-
 | |
| 	split_dtd_options(T, TD, S).
 | |
| 
 | |
| dtd_option(dialect(_)).
 | |
| 
 | |
| 
 | |
| %%	destroy_dtds
 | |
| %	
 | |
| %	Destroy  DTDs  cached  by  this  thread   as  they  will  become
 | |
| %	unreachable anyway.
 | |
| 
 | |
| destroy_dtds :-
 | |
| 	(   current_dtd(_Type, DTD),
 | |
| 	    free_dtd(DTD),
 | |
| 	    fail
 | |
| 	;   true
 | |
| 	).
 | |
| 
 | |
| %%	register_cleanup
 | |
| %
 | |
| %	Register cleanup of DTDs created for this thread.
 | |
| 
 | |
| register_cleanup :-
 | |
| 	registered_cleanup, !.
 | |
| register_cleanup :-
 | |
| 	catch(thread_at_exit(destroy_dtds), _, true),
 | |
| 	assert(registered_cleanup).
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	    EXAMINE DTD		*
 | |
| 		 *******************************/
 | |
| 
 | |
| prop(doctype(_), _).
 | |
| prop(elements(_), _).
 | |
| prop(entities(_), _).
 | |
| prop(notations(_), _).
 | |
| prop(entity(E, _), DTD) :-
 | |
| 	(   nonvar(E)
 | |
| 	->  true
 | |
| 	;   '$dtd_property'(DTD, entities(EL)),
 | |
| 	    member(E, EL)
 | |
| 	).
 | |
| prop(element(E, _, _), DTD) :-
 | |
| 	(   nonvar(E)
 | |
| 	->  true
 | |
| 	;   '$dtd_property'(DTD, elements(EL)),
 | |
| 	    member(E, EL)
 | |
| 	).
 | |
| prop(attributes(E, _), DTD) :-
 | |
| 	(   nonvar(E)
 | |
| 	->  true
 | |
| 	;   '$dtd_property'(DTD, elements(EL)),
 | |
| 	    member(E, EL)
 | |
| 	).
 | |
| prop(attribute(E, A, _, _), DTD) :-
 | |
| 	(   nonvar(E)
 | |
| 	->  true
 | |
| 	;   '$dtd_property'(DTD, elements(EL)),
 | |
| 	    member(E, EL)
 | |
| 	),
 | |
| 	(   nonvar(A)
 | |
| 	->  true
 | |
| 	;   '$dtd_property'(DTD, attributes(E, AL)),
 | |
| 	    member(A, AL)
 | |
| 	).
 | |
| prop(notation(N, _), DTD) :-
 | |
| 	(   nonvar(N)
 | |
| 	->  true
 | |
| 	;   '$dtd_property'(DTD, notations(NL)),
 | |
| 	    member(N, NL)
 | |
| 	).
 | |
| 
 | |
| dtd_property(DTD, Prop) :-
 | |
| 	prop(Prop, DTD),
 | |
| 	'$dtd_property'(DTD, Prop).
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	       SGML		*
 | |
| 		 *******************************/
 | |
| 
 | |
| parser_option(dialect(_)).
 | |
| parser_option(shorttag(_)).
 | |
| parser_option(file(_)).
 | |
| parser_option(line(_)).
 | |
| parser_option(space(_)).
 | |
| parser_option(number(_)).
 | |
| parser_option(defaults(_)).
 | |
| parser_option(doctype(_)).
 | |
| parser_option(qualify_attributes(_)).
 | |
| parser_option(encoding(_)).
 | |
| 
 | |
| set_parser_options(Parser, Options, RestOptions) :-
 | |
| 	parser_option(Option),
 | |
| 	select_option(Option, Options, RestOptions0), !,
 | |
| 	set_sgml_parser(Parser, Option),
 | |
| 	set_parser_options(Parser, RestOptions0, RestOptions).
 | |
| set_parser_options(_, Options, Options).
 | |
| 
 | |
| 
 | |
| load_structure(stream(In), Term, Options) :- !,
 | |
| 	(   select_option(offset(Offset), Options, Options1)
 | |
| 	->  seek(In, Offset, bof, _)
 | |
| 	;   Options1 = Options
 | |
| 	),
 | |
| 	(   select_option(dtd(DTD), Options1, Options2)
 | |
| 	->  ExplicitDTD = true
 | |
| 	;   ExplicitDTD = false,
 | |
| 	    Options2 = Options1
 | |
| 	),
 | |
| 	new_sgml_parser(Parser,
 | |
| 			[ dtd(DTD)
 | |
| 			]),
 | |
| 	def_entities(Options2, DTD, Options3),
 | |
| 	call_cleanup(parse(Parser, Options3, TermRead, In),
 | |
| 		     free_sgml_parser(Parser)),
 | |
| 	(   ExplicitDTD == true
 | |
| 	->  (   DTD = dtd(_, DocType),
 | |
| 	        dtd_property(DTD, doctype(DocType))
 | |
| 	    ->	true
 | |
| 	    ;	true
 | |
| 	    )
 | |
| 	;   free_dtd(DTD)
 | |
| 	),
 | |
| 	Term = TermRead.
 | |
| load_structure(Stream, Term, Options) :-
 | |
| 	swi:swi_is_stream(Stream), !,
 | |
| 	load_structure(stream(Stream), Term, Options).
 | |
| load_structure(File, Term, Options) :-
 | |
| 	swi:swi_open(File, read, In, [type(binary)]),
 | |
| 	load_structure(stream(In), Term, [file(File)|Options]),
 | |
| 	swi:swi_close(In).
 | |
| 
 | |
| parse(Parser, Options, Document, In) :-
 | |
| 	set_parser_options(Parser, Options, Options1),
 | |
| 	sgml_parse(Parser,
 | |
| 		   [ document(Document),
 | |
| 		     source(In)
 | |
| 		   | Options1
 | |
| 		   ]).
 | |
| 
 | |
| def_entities([], _, []).
 | |
| def_entities([entity(Name, Value)|T], DTD, Opts) :- !,
 | |
| 	def_entity(DTD, Name, Value),
 | |
| 	def_entities(T, DTD, Opts).
 | |
| def_entities([H|T0], DTD, [H|T]) :-
 | |
| 	def_entities(T0, DTD, T).
 | |
| 
 | |
| def_entity(DTD, Name, Value) :-
 | |
| 	open_dtd(DTD, [], Stream),
 | |
| 	xml_quote_attribute(Value, QValue),
 | |
| 	swi:swi_format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]),
 | |
| 	swi:close(Stream).
 | |
| 	
 | |
| 	
 | |
| 		 /*******************************
 | |
| 		 *	     UTILITIES		*
 | |
| 		 *******************************/
 | |
| 
 | |
| load_sgml_file(File, Term) :-
 | |
| 	load_structure(File, Term, [dialect(sgml)]).
 | |
| 
 | |
| load_xml_file(File, Term) :-
 | |
| 	load_structure(File, Term, [dialect(xml)]).
 | |
| 
 | |
| load_html_file(File, Term) :-
 | |
| 	dtd(html, DTD),
 | |
| 	load_structure(File, Term,
 | |
| 		       [ dtd(DTD),
 | |
| 			 dialect(sgml),
 | |
| 			 shorttag(false)
 | |
| 		       ]).
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      ENCODING		*
 | |
| 		 *******************************/
 | |
| 
 | |
| %	xml_quote_attribute(+In, -Quoted)
 | |
| %	xml_quote_cdata(+In, -Quoted)
 | |
| %	
 | |
| %	Backward  compatibility  for  versions  that  allow  to  specify
 | |
| %	encoding. All characters that cannot fit the encoding are mapped
 | |
| %	to XML character entities (&#dd;).  Using   ASCII  is the safest
 | |
| %	value.
 | |
| 
 | |
| xml_quote_attribute(In, Quoted) :-
 | |
| 	xml_quote_attribute(In, Quoted, ascii).
 | |
| 
 | |
| xml_quote_cdata(In, Quoted) :-
 | |
| 	xml_quote_cdata(In, Quoted, ascii).
 | |
| 
 | |
| xml_name(In) :-
 | |
| 	xml_name(In, ascii).
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	   TYPE CHECKING	*
 | |
| 		 *******************************/
 | |
| 
 | |
| %	xml_is_dome(@Term)
 | |
| %	
 | |
| %	True  if  term  statisfies   the    structure   as  returned  by
 | |
| %	load_structure/3 and friends.
 | |
| 
 | |
| xml_is_dom(0) :- !, fail.		% catch variables
 | |
| xml_is_dom([]) :- !.
 | |
| xml_is_dom([H|T]) :- !,
 | |
| 	xml_is_dom(H),
 | |
| 	xml_is_dom(T).
 | |
| xml_is_dom(element(Name, Attributes, Content)) :- !,
 | |
| 	dom_name(Name),
 | |
| 	dom_attributes(Attributes),
 | |
| 	xml_is_dom(Content).
 | |
| xml_is_dom(pi(Pi)) :- !,
 | |
| 	atom(Pi).
 | |
| xml_is_dom(CDATA) :-
 | |
| 	atom(CDATA).
 | |
| 
 | |
| dom_name(NS:Local) :-
 | |
| 	atom(NS),
 | |
| 	atom(Local), !.
 | |
| dom_name(Local) :-
 | |
| 	atom(Local).
 | |
| 
 | |
| dom_attributes(0) :- !, fail.
 | |
| dom_attributes([]).
 | |
| dom_attributes([H|T]) :-
 | |
| 	dom_attribute(H),
 | |
| 	dom_attributes(T).
 | |
| 
 | |
| dom_attribute(Name=Value) :-
 | |
| 	dom_name(Name),
 | |
| 	atomic(Value).
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	      MESSAGES		*
 | |
| 		 *******************************/
 | |
| :- multifile
 | |
| 	prolog:message/3.
 | |
| 
 | |
| %	Catch messages.  sgml/4 is generated by the SGML2PL binding.
 | |
| 
 | |
| prolog:message(sgml(Parser, File, Line, Message)) -->
 | |
| 	{ get_sgml_parser(Parser, dialect(Dialect))
 | |
| 	},
 | |
| 	[ 'SGML2PL(~w): ~w:~w: ~w'-[Dialect, File, Line, Message] ].
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	   XREF SUPPORT		*
 | |
| 		 *******************************/
 | |
| 
 | |
| :- multifile
 | |
| 	prolog:called_by/2.
 | |
| 
 | |
| prolog:called_by(sgml_parse(_, Options), Called) :-
 | |
| 	is_list(Options),
 | |
| 	findall(G+3,
 | |
| 		(   member(call(_, G), Options),
 | |
| 		    callable(G)
 | |
| 		),
 | |
| 		Called).
 |