242 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
			
		
		
	
	
			242 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
| /*  $Id$
 | |
| 
 | |
|     Part of SWI-Prolog SGML/XML parser
 | |
| 
 | |
|     Author:  Jan Wielemaker
 | |
|     E-mail:  jan@swi.psy.uva.nl
 | |
|     WWW:     http://www.swi.psy.uva.nl/projects/SWI-Prolog/
 | |
|     Copying: LGPL-2.  See the file COPYING or http://www.gnu.org
 | |
| 
 | |
|     Copyright (C) 1990-2000 SWI, University of Amsterdam. All rights reserved.
 | |
| */
 | |
| 
 | |
| :- prolog_load_context(directory, CWD),
 | |
|    working_directory(_, CWD).
 | |
| 
 | |
| :- asserta(file_search_path(foreign, '..')).
 | |
| :- asserta(file_search_path(library, '..')).
 | |
| 
 | |
| :- use_module(library(sgml)).
 | |
| :- use_module(library(sgml_write)).
 | |
| 
 | |
| test :-					% default test
 | |
| 	fp('.').
 | |
| 
 | |
| test(File) :-
 | |
| 	file_name_extension(_, xml, File), !,
 | |
| 	load_xml_file(File, Term),
 | |
| 	xml_write(user_output, Term, []).
 | |
| test(File) :-
 | |
| 	file_name_extension(_, sgml, File), !,
 | |
| 	load_sgml_file(File, Term),
 | |
| 	sgml_write(user_output, Term, []).
 | |
| test(File) :-
 | |
| 	file_name_extension(_, html, File), !,
 | |
| 	load_html_file(File, Term),
 | |
| 	html_write(user_output, Term, []).
 | |
| 
 | |
| test(File, Into, Encoding) :-
 | |
| 	file_name_extension(_, xml, File), !,
 | |
| 	load_xml_file(File, Term),
 | |
| 	open(Into, write, Out, [encoding(Encoding)]),
 | |
| 	xml_write(Out, Term, []),
 | |
| 	close(Out).
 | |
| 
 | |
| fp(Dir) :-
 | |
| 	atom_concat(Dir, '/*', Pattern),
 | |
| 	expand_file_name(Pattern, Files),
 | |
| 	(   member(File, Files),
 | |
| 	    file_name_extension(_, Ext, File),
 | |
| 	    ml_file(Ext),
 | |
| 	    file_base_name(File, Base),
 | |
| 	    \+ blocked(Base),
 | |
| 	    format(user_error, '~w ... ', [Base]),
 | |
| 	    (	\+ utf8(Base)
 | |
| 	    ->  format(user_error, ' (ISO Latin-1) ... ', []),
 | |
| 		fixed_point(File, iso_latin_1)
 | |
| 	    ;	true
 | |
| 	    ),
 | |
| 	    format(user_error, ' (UTF-8) ... ', []),
 | |
| 	    fixed_point(File, utf8),
 | |
| 	    format(user_error, ' done~n', []),
 | |
| 	    fail
 | |
| 	;   true
 | |
| 	).
 | |
| 
 | |
| ml_file(xml).
 | |
| ml_file(sgml).
 | |
| ml_file(html).
 | |
| 
 | |
| %%	blocked(+File)
 | |
| %	
 | |
| %	List of test-files that are blocked.  These are either negative
 | |
| %	tests or tests involving SDATA.
 | |
| 
 | |
| blocked('bat.sgml').
 | |
| blocked('i.sgml').
 | |
| blocked('sdata.sgml').
 | |
| blocked('cent-nul.xml').
 | |
| blocked('defent.sgml').
 | |
| blocked('comment.xml').
 | |
| blocked('badxmlent.xml').
 | |
| 
 | |
| 
 | |
| %%	utf8(+File)
 | |
| %
 | |
| %	File requires UTF-8.  These are files that have UTF-8 characters
 | |
| %	in element or attribute names.
 | |
| 
 | |
| utf8('utf8-ru.xml').
 | |
| 
 | |
| 
 | |
| %%	fixed_point(+File, +Encoding)
 | |
| %
 | |
| %	Perform write/read round-trip and  validate   the  data  has not
 | |
| %	changed.
 | |
| 
 | |
| fixed_point(File, Encoding) :-
 | |
| 	file_name_extension(_, xml, File), !,
 | |
| 	fp(File, Encoding, load_xml_file, xml_write).
 | |
| fixed_point(File, Encoding) :-
 | |
| 	file_name_extension(_, sgml, File), !,
 | |
| 	fp(File, Encoding, load_sgml_file, sgml_write).
 | |
| fixed_point(File, Encoding) :-
 | |
| 	file_name_extension(_, html, File), !,
 | |
| 	fp(File, Encoding, load_html_file, html_write).
 | |
| 
 | |
| fp(File, Encoding, Load, Write) :-
 | |
| 	put_char(user_error, r),
 | |
| 	call(Load, File, Term),
 | |
| 	tmp_file(xml, TmpFile),
 | |
| 	open(TmpFile, write, TmpOut, [encoding(Encoding)]),
 | |
| 	put_char(user_error, w),
 | |
| 	call(Write, TmpOut, Term, []),
 | |
| 	close(TmpOut),
 | |
| %	cat(TmpFile, Encoding),
 | |
| 	put_char(user_error, r),
 | |
| 	call(Load, TmpFile, Term2),
 | |
| 	delete_file(TmpFile),
 | |
| 	(   eq(Term, Term2)
 | |
| 	->  true
 | |
| 	;   format(user_error, 'First file:~n', []),
 | |
| 	    %pp(Term),
 | |
| 	    save_in_file(f1, Term),
 | |
| 	    format(user_error, 'Second file:~n', []),
 | |
| 	    %pp(Term2),
 | |
| 	    save_in_file(f2, Term2),
 | |
| 	    fail
 | |
| 	).
 | |
| 
 | |
| save_in_file(File, Term) :-
 | |
| 	open(File, write, Out, [encoding(iso_latin_1)]),
 | |
| 	current_output(C0),
 | |
| 	set_output(Out),
 | |
| 	pp(Term),
 | |
| 	set_output(C0),
 | |
| 	close(Out).
 | |
| 
 | |
| 
 | |
| cat(File, Encoding) :-
 | |
| 	open(File, read, In, [encoding(Encoding)]),
 | |
| 	copy_stream_data(In, current_output),
 | |
| 	close(In).
 | |
| 	
 | |
| %	eq(M1, M2)
 | |
| %	
 | |
| %	Test two terms for equivalence.  The following mismatches are
 | |
| %	allowed:
 | |
| %	
 | |
| %		* Order of attributes
 | |
| %		* Layout in `element-only' content
 | |
| 
 | |
| eq(X, X) :- !.
 | |
| eq([], []) :- !.
 | |
| eq([B|T], L) :-				% delete blanks
 | |
| 	blank_atom(B), !,
 | |
| 	eq(T, L).
 | |
| eq(L, [B|T]) :-
 | |
| 	blank_atom(B), !,
 | |
| 	eq(T, L).
 | |
| eq([H1|T1], [H2|T2]) :- !,
 | |
| 	eq(H1, H2),
 | |
| 	eq(T1, T2).
 | |
| eq(element(Name, A1, C1), element(Name, A2, C2)) :-
 | |
| 	att_eq(A1, A2),
 | |
| 	ceq(C1, C2).
 | |
| eq(A1, A2) :-
 | |
| 	atom(A1),
 | |
| 	atom(A2), !,
 | |
| 	normalise_blanks(A1, B1),
 | |
| 	normalise_blanks(A2, B2),
 | |
| 	(   B1 == B2
 | |
| 	->  true
 | |
| 	;   format(user_error,
 | |
| 		   'ERROR: CDATA differs:~n\
 | |
| 		   \t~p~n\
 | |
| 		   \t~p~n',
 | |
| 		   [B1, B2])
 | |
| 	).
 | |
| eq(X, Y) :-
 | |
| 	format(user_error,
 | |
| 	       'ERROR: Content differs:~n\
 | |
| 	       \t~p~n\
 | |
| 	       \t~p~n',
 | |
| 	       [X, Y]).
 | |
| 
 | |
| att_eq(A1, A2) :-			% ordering is unimportant
 | |
| 	sort(A1, S),
 | |
| 	sort(A2, S), !.
 | |
| att_eq(A1, A2) :-
 | |
| 	format(user_error,
 | |
| 	       'ERROR: Attribute lists differ:~n\
 | |
| 	       \t~p~n\
 | |
| 	       \t~p~n',
 | |
| 	       [A1, A2]).
 | |
| 
 | |
| ceq(C1, C2) :-
 | |
| 	element_content(C1, E1),
 | |
| 	element_content(C2, E2), !,
 | |
| 	eq(E1, E2).
 | |
| ceq(C1, C2) :-
 | |
| 	eq(C1, C2).
 | |
| 
 | |
| element_content([], []).
 | |
| element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- !,
 | |
| 	element_content(T0, T).
 | |
| element_content([Blank|T0], T) :-
 | |
| 	blank_atom(Blank),
 | |
| 	element_content(T0, T).
 | |
| 
 | |
| blank_atom(Atom) :-
 | |
| 	atom(Atom),
 | |
| 	atom_codes(Atom, Codes),
 | |
| 	all_blanks(Codes).
 | |
| 
 | |
| all_blanks([]).
 | |
| all_blanks([H|T]) :-
 | |
| 	code_type(H, space),
 | |
| 	all_blanks(T).
 | |
| 
 | |
| normalise_blanks(Atom, Normalised) :-
 | |
| 	atom_codes(Atom, Codes),
 | |
| 	eat_blanks(Codes, Codes1),
 | |
| 	normalise_blanks2(Codes1, N),
 | |
| 	atom_codes(Normalised, N).
 | |
| 
 | |
| normalise_blanks2([], []).
 | |
| normalise_blanks2([H|T0], T) :-
 | |
| 	code_type(H, space), !,
 | |
| 	eat_blanks(T0, T1),
 | |
| 	(   T1 == []
 | |
| 	->  T = []
 | |
| 	;   T = [32|T2],
 | |
| 	    normalise_blanks2(T1, T2)
 | |
| 	).
 | |
| normalise_blanks2([H|T0], [H|T]) :-
 | |
| 	normalise_blanks2(T0, T).
 | |
| 
 | |
| eat_blanks([H|T0], T) :-
 | |
| 	code_type(H, space), !,
 | |
| 	eat_blanks(T0, T).
 | |
| eat_blanks(L, L).
 |