diff --git a/packages/sgml/iso_639.pl b/packages/sgml/iso_639.pl index 09c91a9d1..1fe62c435 100644 --- a/packages/sgml/iso_639.pl +++ b/packages/sgml/iso_639.pl @@ -55,7 +55,7 @@ iso_639(Code, Lang) :- % l3(?Code, ?Lang) -% +% % ISO-639 3-letter codes l3(abk, 'Abkhazian'). @@ -465,7 +465,7 @@ l3(uzb, 'Uzbek'). l3(vai, 'Vai'). l3(ven, 'Venda'). l3(vie, 'Vietnamese'). -l3(vol, 'Volapük'). +l3(vol, 'Volap\u00fck'). % Use \uxxxx for portability. (= \"u) l3(vot, 'Votic'). l3(wak, 'Wakashan languages'). l3(wal, 'Walamo'). @@ -487,7 +487,7 @@ l3(zul, 'Zulu'). l3(zun, 'Zuni'). % l2(?Code, ?Lang) -% +% % ISO-639 2 letter codes l2(aa, 'Afar'). diff --git a/packages/sgml/sgml.pl b/packages/sgml/sgml.pl index 46f3750ad..549f8cc46 100644 --- a/packages/sgml/sgml.pl +++ b/packages/sgml/sgml.pl @@ -3,9 +3,9 @@ Part of SWI-Prolog Author: Jan Wielemaker - E-mail: wielemak@science.uva.nl + E-mail: J.Wielemaker@cs.vu.nl WWW: http://www.swi-prolog.org - Copyright (C): 1985-2005, University of Amsterdam + Copyright (C): 1985-2009, University of Amsterdam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -58,10 +58,13 @@ xml_quote_attribute/2, % +In, -Quoted xml_quote_cdata/2, % +In, -Quoted xml_name/1, % +In + xml_name/2, % +In, +Encoding + iri_xml_namespace/2, % +IRI, -Namespace + iri_xml_namespace/3, % +IRI, -Namespace, -LocalName xml_is_dom/1 % +Term ]). - :- expects_dialect(swi). +:- assert(system:swi_io). :- use_module(library(lists)). :- use_module(library(option)). @@ -73,17 +76,15 @@ user:file_search_path(dtd, '.'). :- if(current_prolog_flag(version_data, swi(_,_,_,_))). user:file_search_path(dtd, swi('library/DTD')). :- else. -user:file_search_path(dtd, library('DTD')). +user:file_search_path(dtd, swi('DTD')). :- endif. + sgml_register_catalog_file(File, Location) :- prolog_to_os_filename(File, OsFile), '_sgml_register_catalog_file'(OsFile, Location). -load_foreign :- - current_predicate(_, _:sgml_parse(_,_)), !. -load_foreign :- - load_foreign_library(foreign(sgml2pl)). +:- use_foreign_library(foreign(sgml2pl)). register_catalog(Base) :- absolute_file_name(dtd(Base), @@ -94,15 +95,9 @@ register_catalog(Base) :- SocFile), sgml_register_catalog_file(SocFile, end). -% make sure this is loaded in the current module, not user. -:- load_foreign. - -init :- +:- initialization ignore(register_catalog('HTML4')). -:- initialization - init. - /******************************* * DTD HANDLING * @@ -146,9 +141,9 @@ dtd(Type, DTD) :- asserta(current_dtd(Type, DTD)). %% load_dtd(+DTD, +DtdFile, +Options) -% +% % Load file into a DTD. Defined options are: -% +% % * dialect(+Dialect) % Dialect to use (xml, xmlns, sgml) % @@ -160,10 +155,10 @@ load_dtd(DTD, DtdFile) :- load_dtd(DTD, DtdFile, Options) :- split_dtd_options(Options, DTDOptions, FileOptions), open_dtd(DTD, DTDOptions, DtdOut), - system:swi_open(DtdFile, read, DtdIn, FileOptions), - system:swi_copy_stream_data(DtdIn, DtdOut), - system:swi_close(DtdIn), - system:swi_close(DtdOut). + open(DtdFile, read, DtdIn, FileOptions), + copy_stream_data(DtdIn, DtdOut), + close(DtdIn), + close(DtdOut). split_dtd_options([], [], []). split_dtd_options([H|T], [H|TD], S) :- @@ -176,7 +171,7 @@ dtd_option(dialect(_)). %% destroy_dtds -% +% % Destroy DTDs cached by this thread as they will become % unreachable anyway. @@ -270,7 +265,10 @@ set_parser_options(Parser, Options, RestOptions) :- set_parser_options(_, Options, Options). -load_structure(stream(In), Term, Options) :- !, +:- meta_predicate + load_structure(+, -, :). + +load_structure(stream(In), Term, M:Options) :- !, ( select_option(offset(Offset), Options, Options1) -> seek(In, Offset, bof, _) ; Options1 = Options @@ -283,8 +281,8 @@ load_structure(stream(In), Term, Options) :- !, new_sgml_parser(Parser, [ dtd(DTD) ]), - def_entities(Options2, DTD, Options3), - call_cleanup(parse(Parser, Options3, TermRead, In), + def_entities(Options2, Parser, Options3), + call_cleanup(parse(Parser, M:Options3, TermRead, In), free_sgml_parser(Parser)), ( ExplicitDTD == true -> ( DTD = dtd(_, DocType), @@ -296,35 +294,49 @@ load_structure(stream(In), Term, Options) :- !, ), Term = TermRead. load_structure(Stream, Term, Options) :- - system:swi_is_stream(Stream), !, + is_stream(Stream), !, load_structure(stream(Stream), Term, Options). -load_structure(File, Term, Options) :- - system:swi_open(File, read, In, [type(binary)]), - load_structure(stream(In), Term, [file(File)|Options]), - system:swi_close(In). +load_structure(File, Term, M:Options) :- + open(File, read, In, [type(binary)]), + load_structure(stream(In), Term, M:[file(File)|Options]), + close(In). -parse(Parser, Options, Document, In) :- +parse(Parser, M:Options, Document, In) :- set_parser_options(Parser, Options, Options1), + parser_meta_options(Options1, M, Options2), sgml_parse(Parser, [ document(Document), source(In) - | Options1 + | Options2 ]). -def_entities([], _, []). -def_entities([entity(Name, Value)|T], DTD, Opts) :- !, - def_entity(DTD, Name, Value), - def_entities(T, DTD, Opts). -def_entities([H|T0], DTD, [H|T]) :- - def_entities(T0, DTD, T). +parser_meta_options([], _, []). +parser_meta_options([call(When, Closure)|T0], M, [call(When, M:Closure)|T]) :- !, + parser_meta_options(T0, M, T). +parser_meta_options([H|T0], M, [H|T]) :- + parser_meta_options(T0, M, T). -def_entity(DTD, Name, Value) :- - open_dtd(DTD, [], Stream), + +def_entities([], _, []). +def_entities([H|T], Parser, Opts) :- + def_entity(H, Parser), !, + def_entities(T, Parser, Opts). +def_entities([H|T0], Parser, [H|T]) :- + def_entities(T0, Parser, T). + +def_entity(entity(Name, Value), Parser) :- + get_sgml_parser(Parser, dtd(DTD)), xml_quote_attribute(Value, QValue), - system:swi_format(Stream, '~n', [Name, QValue]), - system:close(Stream). - - + setup_call_cleanup(open_dtd(DTD, [], Stream), + format(Stream, '~n', + [Name, QValue]), + close(Stream)). +def_entity(xmlns(URI), Parser) :- + set_sgml_parser(Parser, xmlns(URI)). +def_entity(xmlns(NS, URI), Parser) :- + set_sgml_parser(Parser, xmlns(NS, URI)). + + /******************************* * UTILITIES * *******************************/ @@ -350,7 +362,7 @@ load_html_file(File, Term) :- % xml_quote_attribute(+In, -Quoted) % xml_quote_cdata(+In, -Quoted) -% +% % Backward compatibility for versions that allow to specify % encoding. All characters that cannot fit the encoding are mapped % to XML character entities (&#dd;). Using ASCII is the safest @@ -371,7 +383,7 @@ xml_name(In) :- *******************************/ % xml_is_dome(@Term) -% +% % True if term statisfies the structure as returned by % load_structure/3 and friends. @@ -434,3 +446,5 @@ prolog:called_by(sgml_parse(_, Options), Called) :- callable(G) ), Called). + +:- retract(system:swi_io). diff --git a/packages/sgml/sgml_mode.pl b/packages/sgml/sgml_mode.pl index 55c3a5f3b..02bd703a5 100644 --- a/packages/sgml/sgml_mode.pl +++ b/packages/sgml/sgml_mode.pl @@ -422,7 +422,7 @@ colourise(M, Parser, Options) :- E, show_message(M, E)), erase(Ref). - + on_begin(_Tag, _Attributes, Parser) :- get_sgml_parser(Parser, file(File)), current_tb(TB, File), @@ -503,7 +503,7 @@ colour_item(Class, TB, From, To, Fragment) :- Len > 0, new(Fragment, sgml_mode_fragment(TB, From, Len, Name)). colour_item(_, _, _, _, @nil). - + /******************************* * STYLES * @@ -680,7 +680,7 @@ fill_attribute_menu([A|T], DTD, E, P, Mode) :- dtd_property(DTD, attribute(E, A, Type, Default)), add_attribute_menu(Type, Default, A, P, Mode), fill_attribute_menu(T, DTD, E, P, Mode). - + add_attribute_menu(nameof(List), Def, A, P, Mode) :- !, send(P, append, new(P2, popup(A, message(Mode, insert_attribute, A, @arg1)))), @@ -797,7 +797,7 @@ tag_region(M, Tag:[name], From:int, To:int, -> send(M, tag_region, Tag, From, To, block) ; send(M, tag_region, Tag, From, To, inline) ). - + insert_element(M, Tag:element=name, Style:[{inline,shorttag,block}]) :-> "Insert a new empty element":: @@ -1002,7 +1002,7 @@ feed(In, Len, Parser) :- report_allowed(M) :-> % DEBUGGING "Report allowed elements at point":: get(M, allowed_elements, Allowed), - concat_atom(Allowed, ', ', Atom), + atomic_list_concat(Allowed, ', ', Atom), send(M, report, status, 'Allowed: %s', Atom). show_message(M, E) :- diff --git a/packages/sgml/sgml_write.pl b/packages/sgml/sgml_write.pl index 0517be1d8..d89736759 100644 --- a/packages/sgml/sgml_write.pl +++ b/packages/sgml/sgml_write.pl @@ -70,10 +70,10 @@ stage. %% xml_write(+Stream, +Data, +Options) is det. %% sgml_write(+Stream, +Data, +Options) is det. %% html_write(+Stream, +Data, +Options) is det. -% +% % Write a term as created by the SGML/XML parser to a stream in % SGML or XML format. Options: -% +% % * dtd(DTD) % The DTD. This is needed for SGML documents that contain % elements with content model EMPTY. Characters which may @@ -96,35 +96,35 @@ stage. % The public identifier to be written in the line. % % * system(SysId) -% The system identifier to be written in the line. -% +% The system identifier to be written in the line. +% % * header(Bool) % If Bool is 'false', do not emit the header % line. (xml_write/3 only) -% +% % * nsmap(Map:list(Id=URI)) % When emitting embedded XML, assume these namespaces % are already defined from the environment. (xml_write/3 % only). -% +% % * indent(Indent) % Indentation of the document (for embedding) -% +% % * layout(Bool) % Emit/do not emit layout characters to make output % readable. -% +% % * net(Bool) % Use/do not use Null End Tags. % For XML, this applies only to empty elements, so you get -% +% % == % (default, net(true)) % (net(false)) % == -% +% % For SGML, this applies to empty elements, so you get -% +% % == % (if foo is declared to be EMPTY in the DTD) % (default, net(false)) @@ -132,7 +132,7 @@ stage. % == % % and also to elements with character content not containing / -% +% % == % xxx (default, net(false)) % format(Out, '~n~n', []) ; Encoding == iso_latin_1 -> format(Out, '~n~n', []) @@ -248,7 +250,7 @@ emit_xml_encoding(_, _). %% emit_doctype(+Options, +Data, +Stream) -% +% % Emit the document-type declaration. % There is a problem with the first clause if we are emitting SGML: % the SGML DTDs for HTML up to HTML 4 *do not allow* any 'version' @@ -291,13 +293,16 @@ write_doctype(Out, DocType, PubId, SysId) :- %% emit(+Element, +Out, +State, +Options) -% +% % Emit a single element emit([], _, _) :- !. emit([H|T], Out, State) :- !, emit(H, Out, State), emit(T, Out, State). +emit(CDATA, Out, State) :- + atom(CDATA), !, + sgml_write_content(Out, CDATA, State). emit(Element, Out, State) :- \+ \+ emit_element(Element, Out, State). @@ -399,7 +404,7 @@ emit_name(Term, Out, _) :- write(Out, Term). %% update_nsmap(+Attributes, !State) -% +% % Modify the nsmap of State to reflect modifications due to xmlns % arguments. @@ -425,7 +430,7 @@ set_nsmap(NS, URI, Map, [NS=URI|Map]). %% content(+Content, +Out, +Element, +State, +Options) -% +% % Emit the content part of a structure as well as the termination % for the content. For empty content we have three versions: XML % style '/>', SGML declared EMPTY element (nothing) or normal SGML @@ -535,7 +540,7 @@ write_element_content([H|T], Out, State) :- *******************************/ %% add_missing_namespaces(+DOM0, +NsMap, -DOM) -% +% % Add xmlns:NS=URI definitions to the toplevel element(s) to % deal with missing namespaces. @@ -571,11 +576,12 @@ generate_ns(_, NS) :- rdf_db:ns/2. default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi). +default_ns('http://www.w3.org/1999/xhtml', xhtml). default_ns(URI, NS) :- rdf_db:ns(NS, URI). %% missing_namespaces(+DOM, +NSMap, -Missing) -% +% % Return a list of URIs appearing in DOM that are not covered % by xmlns definitions. @@ -650,7 +656,8 @@ writeq([H|T], Out, Escape, EntityMap) :- ( memberchk(H, Escape) -> write_entity(H, Out, EntityMap) ; H >= 256 - -> ( stream_property(Out, encoding(utf8)) + -> ( stream_property(Out, encoding(Enc)), + unicode_encoding(Enc) -> put_code(Out, H) ; write_entity(H, Out, EntityMap) ) @@ -658,6 +665,10 @@ writeq([H|T], Out, Escape, EntityMap) :- ), writeq(T, Out, Escape, EntityMap). +unicode_encoding(utf8). +unicode_encoding(wchar_t). +unicode_encoding(unicode_le). +unicode_encoding(unicode_be). write_entity(Code, Out, EntityMap) :- ( get_assoc(Code, EntityMap, EntityName) @@ -689,7 +700,7 @@ emit_indent(Indent, Out) :- format(Out, '~N', []), write_n(Tabs, '\t', Out), write_n(Spaces, ' ', Out). - + write_n(N, Char, Out) :- ( N > 0 -> put_char(Out, Char), @@ -697,7 +708,7 @@ write_n(N, Char, Out) :- write_n(N2, Char, Out) ; true ). - + inc_indent(State) :- inc_indent(State, 2). @@ -713,7 +724,7 @@ inc_indent(State, Inc) :- *******************************/ %% empty_element(+State, +Element) -% +% % True if Element is declared with EMPTY content in the (SGML) % DTD. @@ -721,9 +732,9 @@ empty_element(State, Element) :- get_state(State, dtd, DTD), DTD \== (-), dtd_property(DTD, element(Element, _, empty)). - + %% dtd_character_entities(+DTD, -Map) -% +% % Return an assoc mapping character entities to their name. Note % that the entity representation is a bit dubious. Entities should % allow for a wide-character version and avoid the &#..; trick. diff --git a/packages/sgml/xml_unicode.pl b/packages/sgml/xml_unicode.pl index 029b6064c..245071c1a 100644 --- a/packages/sgml/xml_unicode.pl +++ b/packages/sgml/xml_unicode.pl @@ -74,7 +74,7 @@ mkswitch(List, Indent) :- mkswitch(High, NextIndent), indent(Indent), format('}~n'). - + end(List, Max) :- last(List, Last), ( Last = _-Max diff --git a/packages/sgml/xsdp_types.pl b/packages/sgml/xsdp_types.pl index dc07eb361..ba9a373cd 100644 --- a/packages/sgml/xsdp_types.pl +++ b/packages/sgml/xsdp_types.pl @@ -31,6 +31,7 @@ :- module(xsdp_type, [ xsdp_type/1, % ?Type + xsdp_uri_type/2, % ?URI, ?Type xsdp_numeric_uri/2, % ?URI, ?Primary xsdp_subtype_of/2, % ?Type, ?Super xsdp_convert/3 % +Type, +Content, -Value @@ -64,14 +65,22 @@ ns('http://www.w3.org/2001/XMLSchema#'). *******************************/ %% xsdp_type(?Type) -% +% % Test/generate the names for the XML schema primitive types xsdp_type(Type) :- subtype_of(Type, _). +%% xsdp_uri_type(?URI, ?Type) +% +% True if URI is the URI for the the XML-Schema primitive Type. + +xsdp_uri_type(URI, Type) :- + xsd_local_id(URI, Type), + subtype_of(Type, _). + %% xsdp_subtype_of(?Type, ?Super) -% +% % True if Type is a (transitive) subtype of Super. xsdp_subtype_of(Type, Type). @@ -167,7 +176,7 @@ xsd_local_ids. numeric_uirs. %% xsdp_convert(+Type, +Content, -Value) -% +% % Convert the content model Content to an object of the given XSD % type and return the Prolog value in Value.