242 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			242 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
| 
								 | 
							
								/*  $Id$
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Part of SWI-Prolog SGML/XML parser
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Author:  Jan Wielemaker
							 | 
						||
| 
								 | 
							
								    E-mail:  jan@swi.psy.uva.nl
							 | 
						||
| 
								 | 
							
								    WWW:     http://www.swi.psy.uva.nl/projects/SWI-Prolog/
							 | 
						||
| 
								 | 
							
								    Copying: LGPL-2.  See the file COPYING or http://www.gnu.org
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Copyright (C) 1990-2000 SWI, University of Amsterdam. All rights reserved.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- prolog_load_context(directory, CWD),
							 | 
						||
| 
								 | 
							
								   working_directory(_, CWD).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- asserta(file_search_path(foreign, '..')).
							 | 
						||
| 
								 | 
							
								:- asserta(file_search_path(library, '..')).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- use_module(library(sgml)).
							 | 
						||
| 
								 | 
							
								:- use_module(library(sgml_write)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								test :-					% default test
							 | 
						||
| 
								 | 
							
									fp('.').
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								test(File) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(_, xml, File), !,
							 | 
						||
| 
								 | 
							
									load_xml_file(File, Term),
							 | 
						||
| 
								 | 
							
									xml_write(user_output, Term, []).
							 | 
						||
| 
								 | 
							
								test(File) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(_, sgml, File), !,
							 | 
						||
| 
								 | 
							
									load_sgml_file(File, Term),
							 | 
						||
| 
								 | 
							
									sgml_write(user_output, Term, []).
							 | 
						||
| 
								 | 
							
								test(File) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(_, html, File), !,
							 | 
						||
| 
								 | 
							
									load_html_file(File, Term),
							 | 
						||
| 
								 | 
							
									html_write(user_output, Term, []).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								test(File, Into, Encoding) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(_, xml, File), !,
							 | 
						||
| 
								 | 
							
									load_xml_file(File, Term),
							 | 
						||
| 
								 | 
							
									open(Into, write, Out, [encoding(Encoding)]),
							 | 
						||
| 
								 | 
							
									xml_write(Out, Term, []),
							 | 
						||
| 
								 | 
							
									close(Out).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								fp(Dir) :-
							 | 
						||
| 
								 | 
							
									atom_concat(Dir, '/*', Pattern),
							 | 
						||
| 
								 | 
							
									expand_file_name(Pattern, Files),
							 | 
						||
| 
								 | 
							
									(   member(File, Files),
							 | 
						||
| 
								 | 
							
									    file_name_extension(_, Ext, File),
							 | 
						||
| 
								 | 
							
									    ml_file(Ext),
							 | 
						||
| 
								 | 
							
									    file_base_name(File, Base),
							 | 
						||
| 
								 | 
							
									    \+ blocked(Base),
							 | 
						||
| 
								 | 
							
									    format(user_error, '~w ... ', [Base]),
							 | 
						||
| 
								 | 
							
									    (	\+ utf8(Base)
							 | 
						||
| 
								 | 
							
									    ->  format(user_error, ' (ISO Latin-1) ... ', []),
							 | 
						||
| 
								 | 
							
										fixed_point(File, iso_latin_1)
							 | 
						||
| 
								 | 
							
									    ;	true
							 | 
						||
| 
								 | 
							
									    ),
							 | 
						||
| 
								 | 
							
									    format(user_error, ' (UTF-8) ... ', []),
							 | 
						||
| 
								 | 
							
									    fixed_point(File, utf8),
							 | 
						||
| 
								 | 
							
									    format(user_error, ' done~n', []),
							 | 
						||
| 
								 | 
							
									    fail
							 | 
						||
| 
								 | 
							
									;   true
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								ml_file(xml).
							 | 
						||
| 
								 | 
							
								ml_file(sgml).
							 | 
						||
| 
								 | 
							
								ml_file(html).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	blocked(+File)
							 | 
						||
| 
								 | 
							
								%	
							 | 
						||
| 
								 | 
							
								%	List of test-files that are blocked.  These are either negative
							 | 
						||
| 
								 | 
							
								%	tests or tests involving SDATA.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								blocked('bat.sgml').
							 | 
						||
| 
								 | 
							
								blocked('i.sgml').
							 | 
						||
| 
								 | 
							
								blocked('sdata.sgml').
							 | 
						||
| 
								 | 
							
								blocked('cent-nul.xml').
							 | 
						||
| 
								 | 
							
								blocked('defent.sgml').
							 | 
						||
| 
								 | 
							
								blocked('comment.xml').
							 | 
						||
| 
								 | 
							
								blocked('badxmlent.xml').
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	utf8(+File)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	File requires UTF-8.  These are files that have UTF-8 characters
							 | 
						||
| 
								 | 
							
								%	in element or attribute names.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								utf8('utf8-ru.xml').
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	fixed_point(+File, +Encoding)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Perform write/read round-trip and  validate   the  data  has not
							 | 
						||
| 
								 | 
							
								%	changed.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								fixed_point(File, Encoding) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(_, xml, File), !,
							 | 
						||
| 
								 | 
							
									fp(File, Encoding, load_xml_file, xml_write).
							 | 
						||
| 
								 | 
							
								fixed_point(File, Encoding) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(_, sgml, File), !,
							 | 
						||
| 
								 | 
							
									fp(File, Encoding, load_sgml_file, sgml_write).
							 | 
						||
| 
								 | 
							
								fixed_point(File, Encoding) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(_, html, File), !,
							 | 
						||
| 
								 | 
							
									fp(File, Encoding, load_html_file, html_write).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								fp(File, Encoding, Load, Write) :-
							 | 
						||
| 
								 | 
							
									put_char(user_error, r),
							 | 
						||
| 
								 | 
							
									call(Load, File, Term),
							 | 
						||
| 
								 | 
							
									tmp_file(xml, TmpFile),
							 | 
						||
| 
								 | 
							
									open(TmpFile, write, TmpOut, [encoding(Encoding)]),
							 | 
						||
| 
								 | 
							
									put_char(user_error, w),
							 | 
						||
| 
								 | 
							
									call(Write, TmpOut, Term, []),
							 | 
						||
| 
								 | 
							
									close(TmpOut),
							 | 
						||
| 
								 | 
							
								%	cat(TmpFile, Encoding),
							 | 
						||
| 
								 | 
							
									put_char(user_error, r),
							 | 
						||
| 
								 | 
							
									call(Load, TmpFile, Term2),
							 | 
						||
| 
								 | 
							
									delete_file(TmpFile),
							 | 
						||
| 
								 | 
							
									(   eq(Term, Term2)
							 | 
						||
| 
								 | 
							
									->  true
							 | 
						||
| 
								 | 
							
									;   format(user_error, 'First file:~n', []),
							 | 
						||
| 
								 | 
							
									    %pp(Term),
							 | 
						||
| 
								 | 
							
									    save_in_file(f1, Term),
							 | 
						||
| 
								 | 
							
									    format(user_error, 'Second file:~n', []),
							 | 
						||
| 
								 | 
							
									    %pp(Term2),
							 | 
						||
| 
								 | 
							
									    save_in_file(f2, Term2),
							 | 
						||
| 
								 | 
							
									    fail
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								save_in_file(File, Term) :-
							 | 
						||
| 
								 | 
							
									open(File, write, Out, [encoding(iso_latin_1)]),
							 | 
						||
| 
								 | 
							
									current_output(C0),
							 | 
						||
| 
								 | 
							
									set_output(Out),
							 | 
						||
| 
								 | 
							
									pp(Term),
							 | 
						||
| 
								 | 
							
									set_output(C0),
							 | 
						||
| 
								 | 
							
									close(Out).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								cat(File, Encoding) :-
							 | 
						||
| 
								 | 
							
									open(File, read, In, [encoding(Encoding)]),
							 | 
						||
| 
								 | 
							
									copy_stream_data(In, current_output),
							 | 
						||
| 
								 | 
							
									close(In).
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
								%	eq(M1, M2)
							 | 
						||
| 
								 | 
							
								%	
							 | 
						||
| 
								 | 
							
								%	Test two terms for equivalence.  The following mismatches are
							 | 
						||
| 
								 | 
							
								%	allowed:
							 | 
						||
| 
								 | 
							
								%	
							 | 
						||
| 
								 | 
							
								%		* Order of attributes
							 | 
						||
| 
								 | 
							
								%		* Layout in `element-only' content
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								eq(X, X) :- !.
							 | 
						||
| 
								 | 
							
								eq([], []) :- !.
							 | 
						||
| 
								 | 
							
								eq([B|T], L) :-				% delete blanks
							 | 
						||
| 
								 | 
							
									blank_atom(B), !,
							 | 
						||
| 
								 | 
							
									eq(T, L).
							 | 
						||
| 
								 | 
							
								eq(L, [B|T]) :-
							 | 
						||
| 
								 | 
							
									blank_atom(B), !,
							 | 
						||
| 
								 | 
							
									eq(T, L).
							 | 
						||
| 
								 | 
							
								eq([H1|T1], [H2|T2]) :- !,
							 | 
						||
| 
								 | 
							
									eq(H1, H2),
							 | 
						||
| 
								 | 
							
									eq(T1, T2).
							 | 
						||
| 
								 | 
							
								eq(element(Name, A1, C1), element(Name, A2, C2)) :-
							 | 
						||
| 
								 | 
							
									att_eq(A1, A2),
							 | 
						||
| 
								 | 
							
									ceq(C1, C2).
							 | 
						||
| 
								 | 
							
								eq(A1, A2) :-
							 | 
						||
| 
								 | 
							
									atom(A1),
							 | 
						||
| 
								 | 
							
									atom(A2), !,
							 | 
						||
| 
								 | 
							
									normalise_blanks(A1, B1),
							 | 
						||
| 
								 | 
							
									normalise_blanks(A2, B2),
							 | 
						||
| 
								 | 
							
									(   B1 == B2
							 | 
						||
| 
								 | 
							
									->  true
							 | 
						||
| 
								 | 
							
									;   format(user_error,
							 | 
						||
| 
								 | 
							
										   'ERROR: CDATA differs:~n\
							 | 
						||
| 
								 | 
							
										   \t~p~n\
							 | 
						||
| 
								 | 
							
										   \t~p~n',
							 | 
						||
| 
								 | 
							
										   [B1, B2])
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								eq(X, Y) :-
							 | 
						||
| 
								 | 
							
									format(user_error,
							 | 
						||
| 
								 | 
							
									       'ERROR: Content differs:~n\
							 | 
						||
| 
								 | 
							
									       \t~p~n\
							 | 
						||
| 
								 | 
							
									       \t~p~n',
							 | 
						||
| 
								 | 
							
									       [X, Y]).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								att_eq(A1, A2) :-			% ordering is unimportant
							 | 
						||
| 
								 | 
							
									sort(A1, S),
							 | 
						||
| 
								 | 
							
									sort(A2, S), !.
							 | 
						||
| 
								 | 
							
								att_eq(A1, A2) :-
							 | 
						||
| 
								 | 
							
									format(user_error,
							 | 
						||
| 
								 | 
							
									       'ERROR: Attribute lists differ:~n\
							 | 
						||
| 
								 | 
							
									       \t~p~n\
							 | 
						||
| 
								 | 
							
									       \t~p~n',
							 | 
						||
| 
								 | 
							
									       [A1, A2]).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								ceq(C1, C2) :-
							 | 
						||
| 
								 | 
							
									element_content(C1, E1),
							 | 
						||
| 
								 | 
							
									element_content(C2, E2), !,
							 | 
						||
| 
								 | 
							
									eq(E1, E2).
							 | 
						||
| 
								 | 
							
								ceq(C1, C2) :-
							 | 
						||
| 
								 | 
							
									eq(C1, C2).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								element_content([], []).
							 | 
						||
| 
								 | 
							
								element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- !,
							 | 
						||
| 
								 | 
							
									element_content(T0, T).
							 | 
						||
| 
								 | 
							
								element_content([Blank|T0], T) :-
							 | 
						||
| 
								 | 
							
									blank_atom(Blank),
							 | 
						||
| 
								 | 
							
									element_content(T0, T).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								blank_atom(Atom) :-
							 | 
						||
| 
								 | 
							
									atom(Atom),
							 | 
						||
| 
								 | 
							
									atom_codes(Atom, Codes),
							 | 
						||
| 
								 | 
							
									all_blanks(Codes).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								all_blanks([]).
							 | 
						||
| 
								 | 
							
								all_blanks([H|T]) :-
							 | 
						||
| 
								 | 
							
									code_type(H, space),
							 | 
						||
| 
								 | 
							
									all_blanks(T).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								normalise_blanks(Atom, Normalised) :-
							 | 
						||
| 
								 | 
							
									atom_codes(Atom, Codes),
							 | 
						||
| 
								 | 
							
									eat_blanks(Codes, Codes1),
							 | 
						||
| 
								 | 
							
									normalise_blanks2(Codes1, N),
							 | 
						||
| 
								 | 
							
									atom_codes(Normalised, N).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								normalise_blanks2([], []).
							 | 
						||
| 
								 | 
							
								normalise_blanks2([H|T0], T) :-
							 | 
						||
| 
								 | 
							
									code_type(H, space), !,
							 | 
						||
| 
								 | 
							
									eat_blanks(T0, T1),
							 | 
						||
| 
								 | 
							
									(   T1 == []
							 | 
						||
| 
								 | 
							
									->  T = []
							 | 
						||
| 
								 | 
							
									;   T = [32|T2],
							 | 
						||
| 
								 | 
							
									    normalise_blanks2(T1, T2)
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								normalise_blanks2([H|T0], [H|T]) :-
							 | 
						||
| 
								 | 
							
									normalise_blanks2(T0, T).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								eat_blanks([H|T0], T) :-
							 | 
						||
| 
								 | 
							
									code_type(H, space), !,
							 | 
						||
| 
								 | 
							
									eat_blanks(T0, T).
							 | 
						||
| 
								 | 
							
								eat_blanks(L, L).
							 |