Experiment with porting SGML to YAP, and trying to preserve SWI code as much

as possible.
This commit is contained in:
Vitor Santos Costa
2009-03-13 19:39:06 +00:00
parent 754f00d311
commit d6a06fe092
260 changed files with 31894 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
<!DOCTYPE test [
<!ELEMENT test o o (p)*>
<!ELEMENT p o o (#PCDATA)>
<!ENTITY amp CDATA "&#38;">
]>
&amp;

View File

@@ -0,0 +1,3 @@
<?xml version="1.0"?>
<foo bar=10/>

View File

@@ -0,0 +1,5 @@
<?xml version="1.0">
<test a="John & Mary">
John & Mary
</test>

View File

@@ -0,0 +1,8 @@
<!DOCTYPE bar [
<!ELEMENT bar - O EMPTY>
<!ATTLIST bar a CDATA #REQUIRED b NAME #REQUIRED c NUMBER #REQUIRED
d CDATA #REQUIRED>
]>
<bar a="Major Mynah" b="&#65;&#66;&#67;" c="&#49;&#50;" d="foo
bar ugh">

View File

@@ -0,0 +1,68 @@
<!DOCTYPE bat [
<!-- The purpose of this is to test bad attribute values -->
<!NOTATION no SYSTEM "http://www.example.com/notations/no.xml">
<!ENTITY en SYSTEM "http://www.example.com/entities/en.not" NDATA no>
<!ELEMENT bat O O (x+)>
<!ELEMENT x - O EMPTY>
<!ATTLIST x
a CDATA #IMPLIED
b ENTITY #IMPLIED
c ENTITIES #IMPLIED
d ID #IMPLIED
e IDREF #IMPLIED
f IDREFS #IMPLIED
g NAME #IMPLIED
h NAMES #IMPLIED
i NMTOKEN #IMPLIED
j NMTOKENS #IMPLIED
k NUMBER #IMPLIED
l NUMBERS #IMPLIED
m NUTOKEN #IMPLIED
n NUTOKENS #IMPLIED
o NOTATION (no) #IMPLIED
>
]>
<x a=foo>
<x a=bar//
<x a=foo&bar>
<x a=file.cgi?y=1>
<x b=en>
<x b=en en>
<x b="en en">
<x c=en>
<x c=en en>
<x c="en en">
<x c=un>
<x c=12>
<x d=an-id>
<x d=an*id>
<x d=*id*>
<x d="an id">
<x e=an-id>
<x e=un-id>
<x f="">
<x f=an-id>
<x f=" an-id an-id ">
<x g=1>
<x g=''>
<x g=a-rather-long-name>
<x g=a%name%with%percents>
<x g=" a name ">
<x g=" a-name ">
<x h="">
<x h=a>
<x h="NAME">
<x h="A NAME">
<x k=1>
<x k=999999999999999999999999999999999999999999999>
<x k=1.2>
<x k="1.2">
<x k="-1.2">
<x n="one two">
<x n="1a 2a">
<x n="1*ft 2*in">
<x o=no>
<x o=un>
<x p=--a-->
<x p=--b-->
<x p=" --a-- ">

View File

@@ -0,0 +1,8 @@
<!DOCTYPE test [
<!ELEMENT test - - (#PCDATA)>
]>
<test>
<![CDATA[[Ora Lassila]]]>
</test>

View File

@@ -0,0 +1,7 @@
<!DOCTYPE test [
<!ELEMENT test - - (#PCDATA)>
]>
<test>
Test for handling character entities: &#65; &#RS;
</test>

View File

@@ -0,0 +1 @@
<test>This content holds a &#0; byte that should be skipped</test>

View File

@@ -0,0 +1,19 @@
<!DOCTYPE testdoc [
<!--* test swi: check to see how SWI handles non-ASCII characters
* in Unicode. Named entities, numeric character references,
* and native UTF8 may all be different.
*-->
<!ENTITY ntilde "&#241;" ><!-- small n, tilde -->
<!ENTITY lsquo "&#x2018;" ><!--=single quotation mark, left-->
<!ENTITY rsquo "&#8217;" ><!-- = x2019 single quotation mark, right-->
<!ENTITY townname "Espa&ntilde;ola">
<!ENTITY townnum "Espa&#241;ola">
<!ENTITY scarequote1 "a &lsquo;test&rsquo; for you">
<!ENTITY scarequote2 "a &#x2018;test&#x2019; for you">
]>
<testdoc id="t7-20020923" resp="MSM">
<names>From Espa&ntilde;ola -- a &lsquo;test&rsquo; for you.</names>
<nums>From Espa&#241;ola -- a &#x2018;test&#x2019; for you.</nums>
<names>From &townname; -- &scarequote1;.</names>
<nums>From &townname; -- &scarequote2;.</nums>
</testdoc>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE a
[ <!ENTITY %attrs
"id ID #implied -- document-wide unique id --
class CDATA #IMPLIED -- comment --">
<!ELEMENT a - - (#PCDATA) -- foo-bar -->
<!ATTLIST a %attrs;>
]
>
<!-- x -->
<a></a>

View File

@@ -0,0 +1,10 @@
<?xml version="1.0"?>
<test>
<!-- comment -->
<!--- bad comment -->
<!-- bad -- comment -->
<!-- ok-comment -->
<!-- bad comment end --->
</test>

View File

@@ -0,0 +1,8 @@
<!DOCTYPE con [
<!ELEMENT con - - (foo,bar)>
<!ATTLIST con missing (missing) #CONREF>
<!ELEMENT (foo|bar) - O (#PCDATA)>
]>
<con missing>

View File

@@ -0,0 +1,10 @@
<!DOCTYPE conref [
<!ELEMENT conref O O (b+)>
<!ELEMENT b - O RCDATA>
<!ATTLIST b x CDATA #CONREF>
]>
<b>1.</b>
<b x=2>
<b>3.</b>
<b x=4>
<b>5.</b>

View File

@@ -0,0 +1,11 @@
<!DOCTYPE text [
<!ENTITY new.p STARTTAG "p">
<!SHORTREF for.text
"&#RS;&#RE;" new.p>
<!ELEMENT text O O (p+)>
<!USEMAP for.text text>
<!ELEMENT p O O (#PCDATA)>
]>
par 1
par 2

View File

@@ -0,0 +1,10 @@
<!DOCTYPE def [
<!ENTITY #DEFAULT SDATA "?">
<!ELEMENT def - - RCDATA>
]>
<def>&#65
&#66 &#67@ &#68
&b
&#66
&c</def>

View File

@@ -0,0 +1,10 @@
<!DOCTYPE oops [
<!ENTITY s CDATA "attr">
<!ENTITY o STARTTAG "snag &#38;s;=2">
<!ELEMENT snag - O EMPTY>
<!ATTLIST snag attr NUMBER #REQUIRED>
<!ELEMENT oops - O RCDATA>
]>
<oops>
&o;

View File

@@ -0,0 +1,10 @@
<!DOCTYPE oops [
<!ENTITY o STARTTAG "snag attr=2">
<!ELEMENT snag - O EMPTY>
<!ATTLIST snag attr NUMBER #REQUIRED>
<!ELEMENT oops - O RCDATA>
]>
<oops>
&o;

View File

@@ -0,0 +1,7 @@
<!DOCTYPE foo [
<!ELEMENT foo O O (name+)>
<!ELEMENT name - O (#PCDATA)>
<!ATTLIST name title cdata #implied sex cdata #implied>
]>
<name title=Dr sex=M>Neddie Seagoon
<name title=Miss sex=F>Minnie Bannister

12
packages/sgml/Test/i.sgml Normal file
View File

@@ -0,0 +1,12 @@
<!DOCTYPE test [
<!ENTITY QS "<q>">
<!ENTITY QE ENDTAG "q">
<!ENTITY Q2 CDATA "<q>">
<!ENTITY S SDATA "special">
<!ELEMENT test - - (#PCDATA|q)*>
<!ELEMENT q - - (#PCDATA)>
]>
<test>
&Q2;! Hello &QS;you there&QE;, I'm &S;
</test>

View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<document name="value">
<ul>
<li>Line one</li>
<li>Line with <em>emphasised</em> text</li>
</ul>
<p>This is a nice paragraph with some <b>bold</b> text</p>
</document>

View File

@@ -0,0 +1,41 @@
<!DOCTYPE D [
<!ELEMENT D O O (P+)>
<!ELEMENT P O O (#PCDATA|Q|M)*>
<!ELEMENT Q - - (#PCDATA|M)*>
<!ELEMENT M - - (#PCDATA)>
<!ENTITY P STARTTAG "P">
<!ENTITY beg.q STARTTAG "Q">
<!ENTITY end.q ENDTAG "Q">
<!ENTITY err.q STARTTAG "MISS-Q">
<!ENTITY beg.m STARTTAG "M">
<!ENTITY end.m ENDTAG "M">
<!ENTITY err.m STARTTAG "MISS-PCT">
<!SHORTREF For.D
"%" beg.m
"&#RS;&#RE;" P
'"' beg.q
>
<!USEMAP For.D (D|P)>
<!SHORTREF For.Q
"%" beg.m
"&#RS;&#RE;" err.q
'"' end.q
>
<!USEMAP For.Q (Q)>
<!SHORTREF For.M
"%" end.m
"&#RS;&#RE;" err.m
>
<!USEMAP For.M (M)>
]>
The "first" paragraph.
The "%2%nd" paragraph.
The %3%rd paragraph.

View File

@@ -0,0 +1,7 @@
<!DOCTYPE ent [
<!ENTITY #DEFAULT CDATA "[missing]">
<!ELEMENT ent O O (#PCDATA)>
]>
One: &one;.
Two: &two;.
Three: &three;.

View File

@@ -0,0 +1,3 @@
<?xml version="1.0"?>
<x--y/>

View File

@@ -0,0 +1,12 @@
<!DOCTYPE shref [
<!ELEMENT shref - - (netc|twit|twat)*>
<!ELEMENT netc - - (frob+)>
<!ELEMENT frob O O (twit)>
<!ELEMENT twit - O (#PCDATA)>
<!ELEMENT twat - - CDATA>
]>
<shref>
<netc/<frob><twit>This slash should end netc:/
<twat/This is a twat/
<twit/This is a simple shortref/
</shref>

View File

@@ -0,0 +1,10 @@
<!DOCTYPE test
[ <!ELEMENT test - - EMPTY>
<!ATTLIST test
or (a|b|c) #IMPLIED
and (d&e&f) #IMPLIED
seq (g,h,i) #IMPLIED>
]>
<test or=a and=e seq=i>

View File

@@ -0,0 +1,14 @@
<!DOCTYPE tiny-htm [
<!ELEMENT tiny-htm - - (p|pre)+>
<!ELEMENT p - O (#PCDATA|i|b|var)*>
<!ELEMENT pre - O (#PCDATA|i|b|var)*>
<!ELEMENT (i|b|var) - - (#PCDATA|i|b|var)*>
]>
<tiny-htm>
<p>This demonstrates the problem:
<pre>
<var>status</var> = DOM_<var>operation</var
>, &<var>result</var>);
</pre>
<p>I hope this helps.
</tiny-htm>

View File

@@ -0,0 +1,13 @@
<!DOCTYPE not [
<!ELEMENT not - - (Formula*)>
<!NOTATION TeX SYSTEM "">
<!NOTATION Eqn SYSTEM "">
<!ELEMENT formula - O CDATA>
<!ATTLIST formula notation NOTATION (TeX|Eqn) #CURRENT>
]>
<not>
<formula notation=TeX>
$$E = MC^2$$
</formula>
</not>

View File

@@ -0,0 +1 @@
[element(test, [], [element(p, [], [&])])].

View File

@@ -0,0 +1,2 @@
[element(foo, [bar='10'], [])].
[].

View File

@@ -0,0 +1,2 @@
[element(test, [a='John & Mary'], ['\n John & Mary\n'])].
[sgml(sgml_parser(1949540), 'badxmlent.xml', 3, 'Syntax error: Illegal entity, found "& Mary""'), sgml(sgml_parser(1949540), 'badxmlent.xml', 4, 'Syntax error: Illegal entity, found "& "')].

View File

@@ -0,0 +1 @@
[element(bar, [a='Major Mynah', b=abc, c='12', d='foo bar ugh'], [])].

View File

@@ -0,0 +1,2 @@
[element(bat, [], [element(x, [a=foo], []), element(x, [a=bar], []), element(x, [a='foo&bar'], []), element(x, [a='file.cgi?y=1'], []), element(x, [b=en], []), element(x, [b=en], []), element(x, [b='en en'], []), element(x, [c=[en]], []), element(x, [c=[en]], []), element(x, [c=[en, en]], []), element(x, [c=[un]], []), element(x, [c=['12']], []), element(x, [d='an-id'], []), element(x, [d='an*id'], []), element(x, [d='*id*'], []), element(x, [d='an id'], []), element(x, [e='an-id'], []), element(x, [e='un-id'], []), element(x, [f=['']], []), element(x, [f=['an-id']], []), element(x, [f=['an-id', 'an-id']], []), element(x, [g='1'], []), element(x, [g=''], []), element(x, [g='a-rather-long-name'], []), element(x, [g='a%name%with%percents'], []), element(x, [g='a name'], []), element(x, [g='a-name'], []), element(x, [h=['']], []), element(x, [h=[a]], []), element(x, [h=[name]], []), element(x, [h=[a, name]], []), element(x, [k='1'], []), element(x, [k='999999999999999999999999999999999999999999999'], []), element(x, [k=0], []), element(x, [k=0], []), element(x, [k=0], []), element(x, [n=[one, two]], []), element(x, [n=['1a', '2a']], []), element(x, [n=['1*ft', '2*in']], []), element(x, [o=no], []), element(x, [o=un], []), element(x, [p='--a--'], []), element(x, [p='--b--'], []), element(x, [p=' --a-- '], [])])].
[sgml(sgml_parser(423746), 'bat.sgml', 27, 'Syntax error: Attribute value requires quotes, found "foo&bar"'), sgml(sgml_parser(423746), 'bat.sgml', 28, 'Syntax error: Attribute value requires quotes, found "file.cgi?y=1"'), sgml(sgml_parser(423746), 'bat.sgml', 30, 'Element "x" has no attribute with value "en"'), sgml(sgml_parser(423746), 'bat.sgml', 30, 'Syntax error: Bad attribute list, found "b=en en"'), sgml(sgml_parser(423746), 'bat.sgml', 33, 'Element "x" has no attribute with value "en"'), sgml(sgml_parser(423746), 'bat.sgml', 33, 'Syntax error: Bad attribute list, found "c=en en"'), sgml(sgml_parser(423746), 'bat.sgml', 36, 'Syntax error: entity NAMES expected, found "12"'), sgml(sgml_parser(423746), 'bat.sgml', 38, 'Syntax error: Attribute value requires quotes, found "an*id"'), sgml(sgml_parser(423746), 'bat.sgml', 38, 'Syntax error: NAME expected, found "an*id"'), sgml(sgml_parser(423746), 'bat.sgml', 39, 'Syntax error: Attribute value requires quotes, found "*id*"'), sgml(sgml_parser(423746), 'bat.sgml', 39, 'Syntax error: NAME expected, found "*id*"'), sgml(sgml_parser(423746), 'bat.sgml', 43, 'Syntax error: NAMES expected, found """"'), sgml(sgml_parser(423746), 'bat.sgml', 46, 'Syntax error: NAME expected, found "1"'), sgml(sgml_parser(423746), 'bat.sgml', 47, 'Syntax error: NAME expected, found "\'\'"'), sgml(sgml_parser(423746), 'bat.sgml', 49, 'Syntax error: Attribute value requires quotes, found "a%name%with%percents"'), sgml(sgml_parser(423746), 'bat.sgml', 49, 'Syntax error: NAME expected, found "a%name%with%percents"'), sgml(sgml_parser(423746), 'bat.sgml', 52, 'Syntax error: NAMES expected, found """"'), sgml(sgml_parser(423746), 'bat.sgml', 58, 'Syntax error: NUMBER expected, found "1.2"'), sgml(sgml_parser(423746), 'bat.sgml', 59, 'Syntax error: NUMBER expected, found ""1.2""'), sgml(sgml_parser(423746), 'bat.sgml', 60, 'Syntax error: NUMBER expected, found ""-1.2""'), sgml(sgml_parser(423746), 'bat.sgml', 61, 'Syntax error: NUTOKENS expected, found ""one two""'), sgml(sgml_parser(423746), 'bat.sgml', 63, 'Syntax error: NUTOKENS expected, found ""1*ft 2*in""'), sgml(sgml_parser(423746), 'bat.sgml', 66, 'Element "x" has no attribute "p"')].

View File

@@ -0,0 +1,2 @@
[element(test, [], ['[Ora Lassila]'])].
[].

View File

@@ -0,0 +1 @@
[element(test, [], ['Test for handling character entities: A \n'])].

View File

@@ -0,0 +1,2 @@
[element(test, [], ['This content holds a byte that should be skipped'])].
[sgml(sgml_parser(482992), 'cent-nul.xml', 1, 'Syntax error: Bad character entity, found "#0"')].

View File

@@ -0,0 +1,2 @@
[element(testdoc, [id='t7-20020923', resp='MSM'], ['\n', element(names, [], ['From Española -- a test for you.']), '\n', element(nums, [], ['From Española -- a test for you.']), '\n', element(names, [], ['From Española -- a test for you.']), '\n', element(nums, [], ['From Española -- a test for you.']), '\n'])].
[].

View File

@@ -0,0 +1 @@
[element(a, [], [])].

View File

@@ -0,0 +1,2 @@
[element(test, [], ['\n \n \n \n \n \n'])].
[sgml(sgml_parser(1951880), 'comment.xml', 5, 'Syntax error: Illegal comment, found "<!---"'), sgml(sgml_parser(1951880), 'comment.xml', 6, 'Syntax error: Illegal comment'), sgml(sgml_parser(1951880), 'comment.xml', 8, 'Syntax error: Illegal comment')].

View File

@@ -0,0 +1 @@
[element(con, [missing=missing], [])].

View File

@@ -0,0 +1,2 @@
[element(conref, [], [element(b, [], ['1.']), element(b, [x='2'], []), element(b, [], ['3.']), element(b, [x='4'], []), element(b, [], ['5.'])])].
[].

View File

@@ -0,0 +1,2 @@
[element(text, [], [element(p, [], ['par 1']), element(p, [], ['par 2'])])].
[].

View File

@@ -0,0 +1,2 @@
[element(def, [], ['AB C@ D', sdata(?), 'B', sdata(?)])].
[].

View File

@@ -0,0 +1 @@
[element(oops, [], ['<snag attr=2>'])].

View File

@@ -0,0 +1,2 @@
[element(oops, [], ['<snag attr=2>'])].
[].

View File

@@ -0,0 +1 @@
[element(foo, [], [element(name, [title='Dr', sex='M'], ['Neddie Seagoon']), element(name, [title='Miss', sex='F'], ['Minnie Bannister'])])].

View File

@@ -0,0 +1 @@
[element(test, [], ['<q>! Hello ', element(q, [], ['you there']), ', I\'m ', sdata(special)])].

View File

@@ -0,0 +1,2 @@
[element(document, [name=value], ['\n ', element(ul, [], ['\n ', element(li, [], ['Line one']), '\n ', element(li, [], ['Line with ', element(em, [], [emphasised]), ' text']), '\n ']), '\n\n', element(p, [], ['This is a nice paragraph with some ', element(b, [], [bold]), ' text']), '\n\n'])].
[].

View File

@@ -0,0 +1,2 @@
[element(d, [], [element(p, [], ['The ', element(q, [], [first]), ' paragraph.']), element(p, [], ['The ', element(q, [], [element(m, [], ['2']), nd]), ' paragraph.']), element(p, [], ['The ', element(m, [], ['3']), 'rd paragraph.']), element(p, [], [])])].
[].

View File

@@ -0,0 +1,2 @@
[element(ent, [], ['One: [missing].\nTwo: [missing].\nThree: [missing].'])].
[].

View File

@@ -0,0 +1,2 @@
[element('x--y', [], [])].
[].

View File

@@ -0,0 +1,2 @@
[element(shref, [], [element(netc, [], [element(frob, [], [element(twit, [], ['This slash should end netc:'])])]), element(twat, [], ['This is a twat']), element(twit, [], ['This is a simple shortref'])])].
[].

View File

@@ -0,0 +1,2 @@
[element(test, [or=a, and=e, seq=i], [])].
[].

View File

@@ -0,0 +1 @@
[element('tiny-htm', [], [element(p, [], ['This demonstrates the problem:']), element(pre, [], [' ', element(var, [], [status]), ' = DOM_', element(var, [], [operation]), ', &', element(var, [], [result]), ');']), element(p, [], ['I hope this helps.'])])].

View File

@@ -0,0 +1 @@
[element(not, [], [element(formula, [notation=tex], ['$$E = MC^2$$'])])].

View File

@@ -0,0 +1,2 @@
[element(oma, [foo=foo], [element(obo, [bar=bar], [element(p, [], ['Hello world.']), element(p, [], ['Where did the attributes go?'])])])].
[].

View File

@@ -0,0 +1 @@
[element(bar, [], [element(foo, [], [element(a, [], [element(b, [], [x]), element(c, [], [y])]), element(a, [], [element(b, [], [x]), element(c, [], [y])]), element(a, [], [element(b, [], [x]), element(c, [], [y])]), element(a, [], [element(b, [], [x]), element(c, [], [y])])])])].

View File

@@ -0,0 +1 @@
[element(per, [], [element(jim, [], [snark])])].

View File

@@ -0,0 +1,2 @@
[element(test, [], ['\n', pi('this is a pi'), '\n'])].
[].

View File

@@ -0,0 +1 @@
[element(doc, [], [element(t, [], ['This is rcdata Hello & World Hello']), element(a, [], ['This is an a'])])].

View File

@@ -0,0 +1,2 @@
[element(foo, [], ['_'])].
[].

View File

@@ -0,0 +1,2 @@
[element(text, [], [element(page, [], [element(line, [], ['SGML does NOT add record start characters at the beginning or']), element(line, [], ['record end characters at the end of entities unless they are']), element(line, [], ['already there. In this document, there IS a record start']), element(line, [], ['character for the first line of text and there IS a record end']), element(line, [], ['character for the last line of text. The #RS character reference']), element(line, [], ['should match at the beginning of each line except possibly the']), element(line, [], ['first one, so we expect SEVEN (LINE) elements for this file.'])])])].
[].

View File

@@ -0,0 +1 @@
[element(text, [], ['Van Alpha tot ', sdata('\\Omega')])].

View File

@@ -0,0 +1 @@
[element(num, [val='2'], [])].

View File

@@ -0,0 +1,2 @@
[element(test, [name=value], ['Some content'])].
[].

View File

@@ -0,0 +1 @@
[element(test, [], [element(p, [], ['Peter said: ', element(q, [], ['He, this is a nice program']), '.']), element(p, [], ['Bob said: ', element(q, [], ['Yes, it is'])])])].

View File

@@ -0,0 +1 @@
[element(test, [], [element(t, [], [element(name, [], [n]), element(arg, [], [a])])])].

View File

@@ -0,0 +1 @@
[element(ugh, [], [element(extension, [missing=missing, exchange='479'], []), element(extension, [exchange='479'], ['8494']), element(extension, [exchange='555'], ['1234']), element(extension, [exchange='03-555'], ['1234'])])].

View File

@@ -0,0 +1,2 @@
[element(testdoc, [id='t7-20020923', resp='MSM'], ['\n', element(names, [], ['From Española -- a test for you.']), '\n', element(nums, [], ['From Española -- a test for you.']), '\n', element(names, [], ['From Española -- a test for you.']), '\n', element(nums, [], ['From Española -- a test for you.']), '\n'])].
[].

View File

@@ -0,0 +1,2 @@
[element('Человек', [язык=мова], ['Borys'])].
[].

View File

@@ -0,0 +1,2 @@
[element(utf8, [], ['\n', element(name, [], ['Dürst']), '\n', element(name, [name='Dürst'], []), '\n'])].
[].

View File

@@ -0,0 +1,2 @@
[element(test, [], ['\n ', element(li, [], ['Some cyrillic chars: ий']), '\n ', element(li, [], ['This a an sizzors symbol: ✄']), '\n ', element(li, [], ['OK, entered via entity: ✓']), '\n ', element(a, [att='Cirle with 1: ➀', ok='✓'], []), '\n'])].
[].

View File

@@ -0,0 +1,2 @@
[element(test, [], ['\n', element(p, [], ['\nThis is a long test holding, with the intention to switch to wide character\nencoding from the local buffer to malloc\'ed buffer. This (✌) is a\npiece symbol.\n']), '\n', element(p, [], ['\nThis is a long test holding, with the intention to switch to wide character\nencoding after the output buffer has been switched to malloc\'ed mode. For this\nreason, our buffer should should hold more than 256 character, as defined in\nutil.h in the structure ocharbuf. This (➽) is a fat arrow.\n']), '\n'])].
[].

View File

@@ -0,0 +1,8 @@
<!DOCTYPE oma [ <!ELEMENT oma O O (obo)>
<!ATTLIST oma foo CDATA #FIXED "foo">
<!ELEMENT obo O O (p+)>
<!ATTLIST obo bar CDATA #FIXED "bar">
<!ELEMENT p - O (#PCDATA)>
]>
<p>Hello world.
<p>Where did the attributes go?

View File

@@ -0,0 +1,14 @@
<!DOCTYPE bar [
<!ELEMENT bar - - (foo)>
<!ELEMENT foo O O (a+)>
<!ELEMENT a O O (b,c)>
<!ELEMENT b - O (#PCDATA)>
<!ELEMENT c - O (#PCDATA)>
]>
<bar>
<b>x<c>y
<b>x<c>y
<b>x<c>y
<b>x<c>y
</bar>

View File

@@ -0,0 +1,9 @@
<!DOCTYPE per [
<!ENTITY % fred "jim">
<!ELEMENT per - - (%fred;)>
<!ELEMENT %fred - - (#PCDATA)>
<!ATTLIST %fred tom (dick|harry) #IMPLIED>
<!USEMAP #EMPTY %fred>
]>
<per><jim>snark</jim></per>

View File

@@ -0,0 +1,3 @@
<test>
<?this is a pi>
</test>

View File

@@ -0,0 +1,19 @@
<!DOCTYPE doc
[ <!ELEMENT doc - - (t|a)*>
<!ELEMENT a - - (#PCDATA)>
<!ELEMENT t - - rcdata>
<!ENTITY hello "Hello">
<!ENTITY world "World &hello;">
]
>
<!-- coment -->
<doc>
<t>
This is rcdata &hello; & &world
</t>
<a>This is an a</a>
</doc>

View File

@@ -0,0 +1,9 @@
<!DOCTYPE foo [
<!-- Test silently ignoring second definition of an entity -->
<!ENTITY nbsp "_">
<!ENTITY %latin SYSTEM "../DTD/HTMLlat1.ent">
%latin;
<!ELEMENT foo O O (#PCDATA)>
]>
&nbsp;

View File

@@ -0,0 +1,18 @@
<!DOCTYPE text [
<!ENTITY new.page STARTTAG "page">
<!ENTITY new.line STARTTAG "line">
<!SHORTREF for.text
"&#RS;" new.line>
<!ELEMENT text O O (page+)>
<!ATTLIST text original CDATA #IMPLIED>
<!USEMAP for.text text>
<!ELEMENT page O O (line+)>
<!ELEMENT line O O (#PCDATA)>
]>
SGML does NOT add record start characters at the beginning or
record end characters at the end of entities unless they are
already there. In this document, there IS a record start
character for the first line of text and there IS a record end
character for the last line of text. The #RS character reference
should match at the beginning of each line except possibly the
first one, so we expect SEVEN (LINE) elements for this file.

View File

@@ -0,0 +1,8 @@
<!DOCTYPE text [
<!ENTITY Omega SDATA "\Omega">
<!ELEMENT text - - (#PCDATA)>
]>
<text>
Van Alpha tot &Omega;
</text>

View File

@@ -0,0 +1,6 @@
<!DOCTYPE num [
<!ELEMENT num - O EMPTY>
<!ATTLIST num val (1|2|3) #REQUIRED>
]>
<num 2>

View File

@@ -0,0 +1 @@
<test name="value">Some content</test>

View File

@@ -0,0 +1,20 @@
<!DOCTYPE test [
<!ELEMENT test - - (p)*>
<!ELEMENT p o o (#PCDATA|q)*>
<!ELEMENT q - - (#PCDATA)>
<!ENTITY beg..q "<q><!USEMAP in..q>">
<!ENTITY end..q ENDTAG "q">
<!ENTITY beg..p STARTTAG "p">
<!SHORTREF in..p '"' beg..q
'&#RS;B&#RE;' beg..p>
<!SHORTREF in..q '"' end..q>
<!USEMAP in..p p>
]>
<test>
Peter said: "He, this is a nice program".
Bob said: "Yes, it is"
</test>

View File

@@ -0,0 +1,25 @@
<!DOCTYPE test
[ <!ELEMENT test - - (t*)>
<!ELEMENT t - - ((name|op),arg)>
<!ELEMENT name o o (#PCDATA)>
<!ELEMENT op - - (#PCDATA)>
<!ELEMENT arg - - (#PCDATA)>
<!ELEMENT desc o o (#PCDATA)>
<!ENTITY end-t ENDTAG "t">
<!ENTITY beg-arg STARTTAG "arg">
<!ENTITY end-arg ENDTAG "arg">
<!SHORTREF in.t
'&#RE' end-t
'(' beg-arg>
<!SHORTREF in.arg
')' end-arg>
<!USEMAP in.t t>
<!USEMAP in.arg arg>
]>
<test>
<t>n(a)
</test>

163
packages/sgml/Test/test.pl Normal file
View File

@@ -0,0 +1,163 @@
/* $Id$
Part of SWI-Prolog SGML/XML parser
Author: Jan Wielemaker
E-mail: jan@swi.psy.uva.nl
WWW: http://www.swi.psy.uva.nl/projects/SWI-Prolog/
Copying: LGPL-2. See the file COPYING or http://www.gnu.org
Copyright (C) 1990-2000 SWI, University of Amsterdam. All rights reserved.
*/
:- module(sgml_test,
[ test/1, % +File
testdir/1, % +Dir
pass/1, % +File
show/1, % +File
test/0
]).
:- prolog_load_context(directory, CWD),
working_directory(_, CWD).
:- asserta(user:file_search_path(library, '..')).
:- asserta(user:file_search_path(foreign, '..')).
:- use_module(library(sgml)).
test :-
testdir(.).
testdir(Dir) :-
atom_concat(Dir, '/*', Pattern),
expand_file_name(Pattern, Files),
maplist(dotest, Files).
dotest(File) :-
file_name_extension(_, Ext, File),
memberchk(Ext, [sgml, xml, html]), !,
test(File).
dotest(_).
test(File) :-
format('~NTest ~w ... ', [File]),
flush_output,
load_file(File, Term),
ground(Term), % make sure
okfile(File, OkFile),
( exists_file(OkFile)
-> load_prolog_file(OkFile, TermOk, ErrorsOk),
( compare_dom(Term, TermOk)
-> format('ok')
; format('WRONG'),
format('~NOK:~n'),
pp(TermOk),
format('~NANSWER:~n'),
pp(Term)
),
error_terms(Errors),
( compare_errors(Errors, ErrorsOk)
-> true
; format(' [Different errors]~nOK:~n'),
pp(ErrorsOk),
format('~NANSWER:~n'),
pp(Errors)
),
nl
; show_errors,
format('Loaded, no validating data~n'),
pp(Term)
).
show(File) :-
load_file(File, Term),
pp(Term).
pass(File) :-
load_file(File, Term),
okfile(File, OkFile),
open(OkFile, write, Fd),
format(Fd, '~q.~n', [Term]),
( error_terms(Errors)
-> format(Fd, '~q.~n', [Errors])
; true
),
close(Fd).
:- dynamic
error/3.
:- multifile
user:message_hook/3.
user:message_hook(Term, Kind, Lines) :-
Term = sgml(_,_,_,_),
assert(error(Term, Kind, Lines)).
show_errors :-
( error(_Term, Kind, Lines),
atom_concat(Kind, ': ', Prefix),
print_message_lines(user_error, Prefix, Lines),
fail
; true
).
error_terms(Errors) :-
findall(Term, error(Term, _, _), Errors).
compare_errors([], []).
compare_errors([sgml(_Parser1, _File1, Line, Msg)|T0],
[sgml(_Parser2, _File2, Line, Msg)|T]) :-
compare_errors(T0, T).
load_file(File, Term) :-
load_pred(Ext, Pred),
file_name_extension(_, Ext, File), !,
retractall(error(_,_,_)),
call(Pred, File, Term).
load_file(Base, Term) :-
load_pred(Ext, Pred),
file_name_extension(Base, Ext, File),
exists_file(File), !,
retractall(error(_,_,_)),
call(Pred, File, Term).
load_pred(sgml, load_sgml_file).
load_pred(xml, load_xml_file).
load_pred(html, load_html_file).
okfile(File, OkFile) :-
file_name_extension(Base, _, File),
file_directory_name(Base, Dir),
concat_atom([Dir, '/ok/', Base, '.ok'], OkFile).
load_prolog_file(File, Term, Errors) :-
open(File, read, Fd,
[ encoding(utf8)
]),
read(Fd, Term),
( read(Fd, Errors),
Errors \== end_of_file
-> true
; Errors = []
),
close(Fd).
compare_dom([], []) :- !.
compare_dom([H1|T1], [H2|T2]) :- !,
compare_dom(H1, H2),
compare_dom(T1, T2).
compare_dom(X, X) :- !.
compare_dom(element(Name, A1, Content1),
element(Name, A2, Content2)) :-
compare_attributes(A1, A2),
compare_dom(Content1, Content2).
compare_attributes(A1, A2) :-
sort(A1, L1),
sort(A2, L2),
L1 == L2.

View File

@@ -0,0 +1,9 @@
<!DOCTYPE ugh [
<!ELEMENT ugh O O (extension+)>
<!ELEMENT extension - O (#PCDATA)>
<!ATTLIST extension exchange NUTOKEN 479 missing (missing) #CONREF>
]>
<extension missing>
<extension>8494
<extension exchange=555>1234
<extension exchange=03-555>1234

View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<testdoc id="t7-20020923" resp="MSM">
<names>From Española -- a test for you.</names>
<nums>From Española -- a test for you.</nums>
<names>From Española -- a test for you.</names>
<nums>From Española -- a test for you.</nums>
</testdoc>

View File

@@ -0,0 +1,3 @@
<?xml version="1.0" encoding="utf-8"?>
<Человек язык="мова">Borys</Человек>

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- The ü below is a single character #xFC in NFC
(encoded as two UTF-8 octets #xC3 #xBC) -->
<utf8>
<name>Dürst</name>
<name name="Dürst"/>
</utf8>

View File

@@ -0,0 +1,11 @@
<?xml version="1.0"?>
<!DOCTYPE test [
<!ENTITY ok CDATA "&#10003">
]>
<test>
<li>Some cyrillic chars: &#1080;&#1081;</li>
<li>This a an sizzors symbol: &#9988;</li>
<li>OK, entered via entity: &ok;</li>
<a att="Cirle with 1: &#10112;" ok="&ok;"/>
</test>

View File

@@ -0,0 +1,16 @@
<?xml version="1.0"?>
<test>
<p>
This is a long test holding, with the intention to switch to wide character
encoding from the local buffer to malloc'ed buffer. This (&#9996;) is a
piece symbol.
</p>
<p>
This is a long test holding, with the intention to switch to wide character
encoding after the output buffer has been switched to malloc'ed mode. For this
reason, our buffer should should hold more than 256 character, as defined in
util.h in the structure ocharbuf. This (&#10173;) is a fat arrow.
</p>
</test>

View File

@@ -0,0 +1,241 @@
/* $Id$
Part of SWI-Prolog SGML/XML parser
Author: Jan Wielemaker
E-mail: jan@swi.psy.uva.nl
WWW: http://www.swi.psy.uva.nl/projects/SWI-Prolog/
Copying: LGPL-2. See the file COPYING or http://www.gnu.org
Copyright (C) 1990-2000 SWI, University of Amsterdam. All rights reserved.
*/
:- prolog_load_context(directory, CWD),
working_directory(_, CWD).
:- asserta(file_search_path(foreign, '..')).
:- asserta(file_search_path(library, '..')).
:- use_module(library(sgml)).
:- use_module(library(sgml_write)).
test :- % default test
fp('.').
test(File) :-
file_name_extension(_, xml, File), !,
load_xml_file(File, Term),
xml_write(user_output, Term, []).
test(File) :-
file_name_extension(_, sgml, File), !,
load_sgml_file(File, Term),
sgml_write(user_output, Term, []).
test(File) :-
file_name_extension(_, html, File), !,
load_html_file(File, Term),
html_write(user_output, Term, []).
test(File, Into, Encoding) :-
file_name_extension(_, xml, File), !,
load_xml_file(File, Term),
open(Into, write, Out, [encoding(Encoding)]),
xml_write(Out, Term, []),
close(Out).
fp(Dir) :-
atom_concat(Dir, '/*', Pattern),
expand_file_name(Pattern, Files),
( member(File, Files),
file_name_extension(_, Ext, File),
ml_file(Ext),
file_base_name(File, Base),
\+ blocked(Base),
format(user_error, '~w ... ', [Base]),
( \+ utf8(Base)
-> format(user_error, ' (ISO Latin-1) ... ', []),
fixed_point(File, iso_latin_1)
; true
),
format(user_error, ' (UTF-8) ... ', []),
fixed_point(File, utf8),
format(user_error, ' done~n', []),
fail
; true
).
ml_file(xml).
ml_file(sgml).
ml_file(html).
%% blocked(+File)
%
% List of test-files that are blocked. These are either negative
% tests or tests involving SDATA.
blocked('bat.sgml').
blocked('i.sgml').
blocked('sdata.sgml').
blocked('cent-nul.xml').
blocked('defent.sgml').
blocked('comment.xml').
blocked('badxmlent.xml').
%% utf8(+File)
%
% File requires UTF-8. These are files that have UTF-8 characters
% in element or attribute names.
utf8('utf8-ru.xml').
%% fixed_point(+File, +Encoding)
%
% Perform write/read round-trip and validate the data has not
% changed.
fixed_point(File, Encoding) :-
file_name_extension(_, xml, File), !,
fp(File, Encoding, load_xml_file, xml_write).
fixed_point(File, Encoding) :-
file_name_extension(_, sgml, File), !,
fp(File, Encoding, load_sgml_file, sgml_write).
fixed_point(File, Encoding) :-
file_name_extension(_, html, File), !,
fp(File, Encoding, load_html_file, html_write).
fp(File, Encoding, Load, Write) :-
put_char(user_error, r),
call(Load, File, Term),
tmp_file(xml, TmpFile),
open(TmpFile, write, TmpOut, [encoding(Encoding)]),
put_char(user_error, w),
call(Write, TmpOut, Term, []),
close(TmpOut),
% cat(TmpFile, Encoding),
put_char(user_error, r),
call(Load, TmpFile, Term2),
delete_file(TmpFile),
( eq(Term, Term2)
-> true
; format(user_error, 'First file:~n', []),
%pp(Term),
save_in_file(f1, Term),
format(user_error, 'Second file:~n', []),
%pp(Term2),
save_in_file(f2, Term2),
fail
).
save_in_file(File, Term) :-
open(File, write, Out, [encoding(iso_latin_1)]),
current_output(C0),
set_output(Out),
pp(Term),
set_output(C0),
close(Out).
cat(File, Encoding) :-
open(File, read, In, [encoding(Encoding)]),
copy_stream_data(In, current_output),
close(In).
% eq(M1, M2)
%
% Test two terms for equivalence. The following mismatches are
% allowed:
%
% * Order of attributes
% * Layout in `element-only' content
eq(X, X) :- !.
eq([], []) :- !.
eq([B|T], L) :- % delete blanks
blank_atom(B), !,
eq(T, L).
eq(L, [B|T]) :-
blank_atom(B), !,
eq(T, L).
eq([H1|T1], [H2|T2]) :- !,
eq(H1, H2),
eq(T1, T2).
eq(element(Name, A1, C1), element(Name, A2, C2)) :-
att_eq(A1, A2),
ceq(C1, C2).
eq(A1, A2) :-
atom(A1),
atom(A2), !,
normalise_blanks(A1, B1),
normalise_blanks(A2, B2),
( B1 == B2
-> true
; format(user_error,
'ERROR: CDATA differs:~n\
\t~p~n\
\t~p~n',
[B1, B2])
).
eq(X, Y) :-
format(user_error,
'ERROR: Content differs:~n\
\t~p~n\
\t~p~n',
[X, Y]).
att_eq(A1, A2) :- % ordering is unimportant
sort(A1, S),
sort(A2, S), !.
att_eq(A1, A2) :-
format(user_error,
'ERROR: Attribute lists differ:~n\
\t~p~n\
\t~p~n',
[A1, A2]).
ceq(C1, C2) :-
element_content(C1, E1),
element_content(C2, E2), !,
eq(E1, E2).
ceq(C1, C2) :-
eq(C1, C2).
element_content([], []).
element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- !,
element_content(T0, T).
element_content([Blank|T0], T) :-
blank_atom(Blank),
element_content(T0, T).
blank_atom(Atom) :-
atom(Atom),
atom_codes(Atom, Codes),
all_blanks(Codes).
all_blanks([]).
all_blanks([H|T]) :-
code_type(H, space),
all_blanks(T).
normalise_blanks(Atom, Normalised) :-
atom_codes(Atom, Codes),
eat_blanks(Codes, Codes1),
normalise_blanks2(Codes1, N),
atom_codes(Normalised, N).
normalise_blanks2([], []).
normalise_blanks2([H|T0], T) :-
code_type(H, space), !,
eat_blanks(T0, T1),
( T1 == []
-> T = []
; T = [32|T2],
normalise_blanks2(T1, T2)
).
normalise_blanks2([H|T0], [H|T]) :-
normalise_blanks2(T0, T).
eat_blanks([H|T0], T) :-
code_type(H, space), !,
eat_blanks(T0, T).
eat_blanks(L, L).