fix some encoding stuff and add some documentation.
git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@1863 b08c6af1-5177-4d33-ba66-4b1c6b8b522a
This commit is contained in:
parent
917c777381
commit
35174e0901
90
C/iopreds.c
90
C/iopreds.c
@ -1740,6 +1740,40 @@ get_wchar(int sno)
|
|||||||
#define MB_LEN_MAX 6
|
#define MB_LEN_MAX 6
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int
|
||||||
|
handle_write_encoding_error(int sno, wchar_t ch)
|
||||||
|
{
|
||||||
|
if (Stream[sno].status & RepError_Xml_f) {
|
||||||
|
/* use HTML/XML encoding in ASCII */
|
||||||
|
int i = ch, digits = 1;
|
||||||
|
Stream[sno].stream_putc(sno, '&');
|
||||||
|
Stream[sno].stream_putc(sno, '#');
|
||||||
|
while (digits < i)
|
||||||
|
digits *= 10;
|
||||||
|
if (digits > i)
|
||||||
|
digits /= 10;
|
||||||
|
while (i) {
|
||||||
|
Stream[sno].stream_putc(sno, i/digits);
|
||||||
|
i %= 10;
|
||||||
|
digits /= 10;
|
||||||
|
}
|
||||||
|
Stream[sno].stream_putc(sno, ';');
|
||||||
|
return ch;
|
||||||
|
} else if (Stream[sno].status & RepError_Prolog_f) {
|
||||||
|
/* write quoted */
|
||||||
|
Stream[sno].stream_putc(sno, '\\');
|
||||||
|
Stream[sno].stream_putc(sno, 'u');
|
||||||
|
Stream[sno].stream_putc(sno, ch>>24);
|
||||||
|
Stream[sno].stream_putc(sno, 256&(ch>>16));
|
||||||
|
Stream[sno].stream_putc(sno, 256&(ch>>8));
|
||||||
|
Stream[sno].stream_putc(sno, 256&ch);
|
||||||
|
return ch;
|
||||||
|
} else {
|
||||||
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, MkIntegerTerm(ch),"charater %ld cannot be encoded in stream %d",(unsigned long int)ch,sno);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
put_wchar(int sno, wchar_t ch)
|
put_wchar(int sno, wchar_t ch)
|
||||||
{
|
{
|
||||||
@ -1750,12 +1784,12 @@ put_wchar(int sno, wchar_t ch)
|
|||||||
return Stream[sno].stream_putc(sno, ch);
|
return Stream[sno].stream_putc(sno, ch);
|
||||||
case ENC_ISO_LATIN1:
|
case ENC_ISO_LATIN1:
|
||||||
if (ch >= 0xff) {
|
if (ch >= 0xff) {
|
||||||
/* error */
|
return handle_write_encoding_error(sno,ch);
|
||||||
}
|
}
|
||||||
return Stream[sno].stream_putc(sno, ch);
|
return Stream[sno].stream_putc(sno, ch);
|
||||||
case ENC_ISO_ASCII:
|
case ENC_ISO_ASCII:
|
||||||
if (ch >= 0x80) {
|
if (ch >= 0x80) {
|
||||||
/* error */
|
return handle_write_encoding_error(sno,ch);
|
||||||
}
|
}
|
||||||
return Stream[sno].stream_putc(sno, ch);
|
return Stream[sno].stream_putc(sno, ch);
|
||||||
case ENC_ISO_ANSI:
|
case ENC_ISO_ANSI:
|
||||||
@ -2264,6 +2298,12 @@ p_open (void)
|
|||||||
if (opts & 256) {
|
if (opts & 256) {
|
||||||
avoid_bom = TRUE;
|
avoid_bom = TRUE;
|
||||||
}
|
}
|
||||||
|
if (opts & 512) {
|
||||||
|
st->status |= RepError_Prolog_f;
|
||||||
|
}
|
||||||
|
if (opts & 1024) {
|
||||||
|
st->status |= RepError_Xml_f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
st->stream_wgetc = get_wchar;
|
st->stream_wgetc = get_wchar;
|
||||||
if (CharConversionTable != NULL)
|
if (CharConversionTable != NULL)
|
||||||
@ -3308,12 +3348,45 @@ p_set_output (void)
|
|||||||
static Int
|
static Int
|
||||||
p_has_bom (void)
|
p_has_bom (void)
|
||||||
{ /* '$set_output'(+Stream,-ErrorMessage) */
|
{ /* '$set_output'(+Stream,-ErrorMessage) */
|
||||||
Int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "has?bom/1");
|
Int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "has_bom/1");
|
||||||
if (sno < 0)
|
if (sno < 0)
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
return ((Stream[sno].status & HAS_BOM_f));
|
return ((Stream[sno].status & HAS_BOM_f));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Int
|
||||||
|
p_representation_error (void)
|
||||||
|
{ /* '$set_output'(+Stream,-ErrorMessage) */
|
||||||
|
Int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "representation_errors/1");
|
||||||
|
if (sno < 0)
|
||||||
|
return (FALSE);
|
||||||
|
Term t = Deref(ARG2);
|
||||||
|
|
||||||
|
if (IsVarTerm(t)) {
|
||||||
|
if (Stream[sno].status & RepError_Prolog_f) {
|
||||||
|
return Yap_unify(ARG2, MkIntegerTerm(512));
|
||||||
|
}
|
||||||
|
if (Stream[sno].status & RepError_Xml_f) {
|
||||||
|
return Yap_unify(ARG2, MkIntegerTerm(1024));
|
||||||
|
}
|
||||||
|
return Yap_unify(ARG2, MkIntegerTerm(0));
|
||||||
|
} else {
|
||||||
|
Int i = IntegerOfTerm(t);
|
||||||
|
switch (i) {
|
||||||
|
case 512:
|
||||||
|
Stream[sno].status &= ~RepError_Xml_f;
|
||||||
|
Stream[sno].status |= RepError_Prolog_f;
|
||||||
|
break;
|
||||||
|
case 1024:
|
||||||
|
Stream[sno].status &= ~RepError_Prolog_f;
|
||||||
|
Stream[sno].status |= RepError_Xml_f;
|
||||||
|
default:
|
||||||
|
Stream[sno].status &= ~(RepError_Prolog_f|RepError_Xml_f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
static Int
|
static Int
|
||||||
p_current_input (void)
|
p_current_input (void)
|
||||||
{ /* current_input(?Stream) */
|
{ /* current_input(?Stream) */
|
||||||
@ -5698,11 +5771,15 @@ p_get_default_encoding(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static Int
|
static Int
|
||||||
p_set_encoding (void)
|
p_encoding (void)
|
||||||
{ /* '$set_encoding'(Stream,N) */
|
{ /* '$encoding'(Stream,N) */
|
||||||
int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "encoding/2");
|
int sno = CheckStream (ARG1, Input_Stream_f|Output_Stream_f, "encoding/2");
|
||||||
|
Term t = Deref(ARG2);
|
||||||
if (sno < 0)
|
if (sno < 0)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
if (IsVarTerm(t)) {
|
||||||
|
return Yap_unify(ARG2, MkIntegerTerm(Stream[sno].encoding));
|
||||||
|
}
|
||||||
Stream[sno].encoding = IntegerOfTerm(Deref(ARG2));
|
Stream[sno].encoding = IntegerOfTerm(Deref(ARG2));
|
||||||
UNLOCK(Stream[sno].streamlock);
|
UNLOCK(Stream[sno].streamlock);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
@ -5829,6 +5906,7 @@ Yap_InitIOPreds(void)
|
|||||||
Yap_InitCPred ("$peek", 2, p_peek, SafePredFlag|SyncPredFlag),
|
Yap_InitCPred ("$peek", 2, p_peek, SafePredFlag|SyncPredFlag),
|
||||||
Yap_InitCPred ("$peek_byte", 2, p_peek_byte, SafePredFlag|SyncPredFlag),
|
Yap_InitCPred ("$peek_byte", 2, p_peek_byte, SafePredFlag|SyncPredFlag),
|
||||||
Yap_InitCPred ("$has_bom", 1, p_has_bom, SafePredFlag);
|
Yap_InitCPred ("$has_bom", 1, p_has_bom, SafePredFlag);
|
||||||
|
Yap_InitCPred ("$stream_representation_error", 2, p_representation_error, SafePredFlag|SyncPredFlag);
|
||||||
Yap_InitCPred ("current_input", 1, p_current_input, SafePredFlag|SyncPredFlag);
|
Yap_InitCPred ("current_input", 1, p_current_input, SafePredFlag|SyncPredFlag);
|
||||||
Yap_InitCPred ("current_output", 1, p_current_output, SafePredFlag|SyncPredFlag);
|
Yap_InitCPred ("current_output", 1, p_current_output, SafePredFlag|SyncPredFlag);
|
||||||
Yap_InitCPred ("prompt", 1, p_setprompt, SafePredFlag|SyncPredFlag);
|
Yap_InitCPred ("prompt", 1, p_setprompt, SafePredFlag|SyncPredFlag);
|
||||||
@ -5849,7 +5927,7 @@ Yap_InitIOPreds(void)
|
|||||||
Yap_InitCPred ("$fetch_stream_alias", 2, p_fetch_stream_alias, SafePredFlag|SyncPredFlag|HiddenPredFlag);
|
Yap_InitCPred ("$fetch_stream_alias", 2, p_fetch_stream_alias, SafePredFlag|SyncPredFlag|HiddenPredFlag);
|
||||||
Yap_InitCPred ("$stream", 1, p_stream, SafePredFlag|TestPredFlag);
|
Yap_InitCPred ("$stream", 1, p_stream, SafePredFlag|TestPredFlag);
|
||||||
Yap_InitCPred ("$get_default_encoding", 1, p_get_default_encoding, SafePredFlag|TestPredFlag);
|
Yap_InitCPred ("$get_default_encoding", 1, p_get_default_encoding, SafePredFlag|TestPredFlag);
|
||||||
Yap_InitCPred ("$set_encoding", 2, p_set_encoding, SafePredFlag|TestPredFlag),
|
Yap_InitCPred ("$encoding", 2, p_encoding, SafePredFlag|SyncPredFlag),
|
||||||
#if HAVE_SELECT
|
#if HAVE_SELECT
|
||||||
Yap_InitCPred ("stream_select", 3, p_stream_select, SafePredFlag|SyncPredFlag);
|
Yap_InitCPred ("stream_select", 3, p_stream_select, SafePredFlag|SyncPredFlag);
|
||||||
#endif
|
#endif
|
||||||
|
@ -123,6 +123,8 @@ StreamDesc;
|
|||||||
#define Popen_Stream_f 0x080000
|
#define Popen_Stream_f 0x080000
|
||||||
#define User_Stream_f 0x100000
|
#define User_Stream_f 0x100000
|
||||||
#define HAS_BOM_f 0x200000
|
#define HAS_BOM_f 0x200000
|
||||||
|
#define RepError_Prolog_f 0x400000
|
||||||
|
#define RepError_Xml_f 0x800000
|
||||||
|
|
||||||
#define StdInStream 0
|
#define StdInStream 0
|
||||||
#define StdOutStream 1
|
#define StdOutStream 1
|
||||||
|
197
docs/yap.tex
197
docs/yap.tex
@ -138,6 +138,7 @@ Subnodes of Running
|
|||||||
Subnodes of Syntax
|
Subnodes of Syntax
|
||||||
* Formal Syntax:: Syntax of Terms
|
* Formal Syntax:: Syntax of Terms
|
||||||
* Tokens:: Syntax of Prolog tokens
|
* Tokens:: Syntax of Prolog tokens
|
||||||
|
* Encoding:: How characters are encoded and Wide Character Support
|
||||||
|
|
||||||
Subnodes of Tokens
|
Subnodes of Tokens
|
||||||
* Numbers:: Integer and Floating-Point Numbers
|
* Numbers:: Integer and Floating-Point Numbers
|
||||||
@ -151,6 +152,10 @@ Subnodes of Numbers
|
|||||||
* Integers:: How Integers are read and represented
|
* Integers:: How Integers are read and represented
|
||||||
* Floats:: Floating Point Numbers
|
* Floats:: Floating Point Numbers
|
||||||
|
|
||||||
|
Subnodes of Encoding
|
||||||
|
* Stream Encoding:: How Prolog Streams can be coded
|
||||||
|
* BOM:: The Byte Order Mark
|
||||||
|
|
||||||
Subnodes of Loading Programs
|
Subnodes of Loading Programs
|
||||||
* Compiling:: Program Loading and Updating
|
* Compiling:: Program Loading and Updating
|
||||||
* Setting the Compiler:: Changing the compiler's parameters
|
* Setting the Compiler:: Changing the compiler's parameters
|
||||||
@ -1029,6 +1034,7 @@ built.
|
|||||||
@menu
|
@menu
|
||||||
* Formal Syntax:: Syntax of terms
|
* Formal Syntax:: Syntax of terms
|
||||||
* Tokens:: Syntax of Prolog tokens
|
* Tokens:: Syntax of Prolog tokens
|
||||||
|
* Encoding:: How characters are encoded and Wide Character Support
|
||||||
@end menu
|
@end menu
|
||||||
|
|
||||||
@node Formal Syntax, Tokens, ,Syntax
|
@node Formal Syntax, Tokens, ,Syntax
|
||||||
@ -1116,7 +1122,7 @@ dot with single quotes.
|
|||||||
|
|
||||||
@end itemize
|
@end itemize
|
||||||
|
|
||||||
@node Tokens, , Formal Syntax, Syntax
|
@node Tokens, Encoding, Formal Syntax, Syntax
|
||||||
@section Prolog Tokens
|
@section Prolog Tokens
|
||||||
@cindex token
|
@cindex token
|
||||||
|
|
||||||
@ -1362,6 +1368,159 @@ layout characters, the YAP parser behaves as if it had found a
|
|||||||
single blank character. The end of a file also counts as a blank
|
single blank character. The end of a file also counts as a blank
|
||||||
character for this purpose.
|
character for this purpose.
|
||||||
|
|
||||||
|
@node Encoding, , Tokens, Syntax
|
||||||
|
@section Wide Character Support
|
||||||
|
@cindex encodings
|
||||||
|
|
||||||
|
@menu
|
||||||
|
* Stream Encoding:: How Prolog Streams can be coded
|
||||||
|
* BOM:: The Byte Order Mark
|
||||||
|
@end menu
|
||||||
|
|
||||||
|
@cindex UTF-8
|
||||||
|
@cindex Unicode
|
||||||
|
@cindex UCS
|
||||||
|
@cindex internationalization
|
||||||
|
YAP now implements a SWI-Prolog compatible interface to wide
|
||||||
|
characters and the Universal Character Set (UCS). The following text
|
||||||
|
was adapted from the SWI-Prolog manual.
|
||||||
|
|
||||||
|
YAP now supports wide characters, characters with character
|
||||||
|
codes above 255 that cannot be represented in a single byte.
|
||||||
|
@emph{Universal Character Set} (UCS) is the ISO/IEC 10646 standard
|
||||||
|
that specifies a unique 31-bits unsigned integer for any character in
|
||||||
|
any language. It is a superset of 16-bit Unicode, which in turn is
|
||||||
|
a superset of ISO 8859-1 (ISO Latin-1), a superset of US-ASCII. UCS
|
||||||
|
can handle strings holding characters from multiple languages and
|
||||||
|
character classification (uppercase, lowercase, digit, etc.) and
|
||||||
|
operations such as case-conversion are unambiguously defined.
|
||||||
|
|
||||||
|
For this reason YAP, following SWI-Prolog, has two representations for
|
||||||
|
atoms. If the text fits in ISO Latin-1, it is represented as an array
|
||||||
|
of 8-bit characters. Otherwise the text is represented as an array of
|
||||||
|
wide chars, which may take 16 or 32 bits. This representational issue
|
||||||
|
is completely transparent to the Prolog user. Users of the foreign
|
||||||
|
language interface sometimes need to be aware of these issues though.
|
||||||
|
|
||||||
|
Character coding comes into view when characters of strings need to be
|
||||||
|
read from or written to file or when they have to be communicated to
|
||||||
|
other software components using the foreign language interface. In this
|
||||||
|
section we only deal with I/O through streams, which includes file I/O
|
||||||
|
as well as I/O through network sockets.
|
||||||
|
|
||||||
|
|
||||||
|
@node Stream Encoding, , BOM, Encoding
|
||||||
|
@subsection Wide character encodings on streams
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Although characters are uniquely coded using the UCS standard
|
||||||
|
internally, streams and files are byte (8-bit) oriented and there are a
|
||||||
|
variety of ways to represent the larger UCS codes in an 8-bit octet
|
||||||
|
stream. The most popular one, especially in the context of the web, is
|
||||||
|
UTF-8. Bytes 0...127 represent simply the corresponding US-ASCII
|
||||||
|
character, while bytes 128...255 are used for multi-byte
|
||||||
|
encoding of characters placed higher in the UCS space. Especially on
|
||||||
|
MS-Windows the 16-bit Unicode standard, represented by pairs of bytes is
|
||||||
|
also popular.
|
||||||
|
|
||||||
|
Prolog I/O streams have a property called @emph{encoding} which
|
||||||
|
specifies the used encoding that influence @code{get_code/2} and
|
||||||
|
@code{put_code/2} as well as all the other text I/O predicates.
|
||||||
|
|
||||||
|
The default encoding for files is derived from the Prolog flag
|
||||||
|
@code{encoding}, which is initialised from the environment. If the
|
||||||
|
environment variable @env{LANG} ends in "UTF-8", this encoding is
|
||||||
|
assumed. Otherwise the default is @code{text} and the translation is
|
||||||
|
left to the wide-character functions of the C-library (note that the
|
||||||
|
Prolog native UTF-8 mode is considerably faster than the generic
|
||||||
|
mbrtowc() one). The encoding can be specified explicitly in
|
||||||
|
@code{load_files/2} for loading Prolog source with an alternative
|
||||||
|
encoding, @code{open/4} when opening files or using set_stream/2 on
|
||||||
|
any open stream (not yet implemented). For Prolog source files we also
|
||||||
|
provide the @code{encoding/1} directive that can be used to switch
|
||||||
|
between encodings that are compatible to US-ASCII (@code{ascii},
|
||||||
|
@code{iso_latin_1}, @code{utf8} and many locales).
|
||||||
|
@c See also
|
||||||
|
@c \secref{intsrcfile} for writing Prolog files with non-US-ASCII
|
||||||
|
@c characters and \secref{unicodesyntax} for syntax issues.
|
||||||
|
For
|
||||||
|
additional information and Unicode resources, please visit
|
||||||
|
@uref{http://www.unicode.org/}.
|
||||||
|
|
||||||
|
YAP currently defines and supports the following encodings:
|
||||||
|
|
||||||
|
@table @code
|
||||||
|
@item octet
|
||||||
|
Default encoding for @emph{binary} streams. This causes
|
||||||
|
the stream to be read and written fully untranslated.
|
||||||
|
|
||||||
|
@item ascii
|
||||||
|
7-bit encoding in 8-bit bytes. Equivalent to @code{iso_latin_1},
|
||||||
|
but generates errors and warnings on encountering values above
|
||||||
|
127.
|
||||||
|
|
||||||
|
@item iso_latin_1
|
||||||
|
8-bit encoding supporting many western languages. This causes
|
||||||
|
the stream to be read and written fully untranslated.
|
||||||
|
|
||||||
|
@item text
|
||||||
|
C-library default locale encoding for text files. Files are read and
|
||||||
|
written using the C-library functions @code{mbrtowc()} and
|
||||||
|
@code{wcrtomb()}. This may be the same as one of the other locales,
|
||||||
|
notably it may be the same as @code{iso_latin_1} for western
|
||||||
|
languages and @code{utf8} in a UTF-8 context.
|
||||||
|
|
||||||
|
@item utf8
|
||||||
|
Multi-byte encoding of full UCS, compatible to @code{ascii}.
|
||||||
|
See above.
|
||||||
|
|
||||||
|
@item unicode_be
|
||||||
|
Unicode Big Endian. Reads input in pairs of bytes, most
|
||||||
|
significant byte first. Can only represent 16-bit characters.
|
||||||
|
|
||||||
|
@item unicode_le
|
||||||
|
Unicode Little Endian. Reads input in pairs of bytes, least
|
||||||
|
significant byte first. Can only represent 16-bit characters.
|
||||||
|
@end table
|
||||||
|
|
||||||
|
Note that not all encodings can represent all characters. This implies
|
||||||
|
that writing text to a stream may cause errors because the stream
|
||||||
|
cannot represent these characters. The behaviour of a stream on these
|
||||||
|
errors can be controlled using @code{open/4} or @code{set_stream/2} (not
|
||||||
|
implemented). Initially the terminal stream write the characters using
|
||||||
|
Prolog escape sequences while other streams generate an I/O exception.
|
||||||
|
|
||||||
|
|
||||||
|
@node BOM, Stream Encoding, , Encoding
|
||||||
|
@subsection BOM: Byte Order Mark
|
||||||
|
|
||||||
|
@cindex BOM
|
||||||
|
@cindex Byte Order Mark
|
||||||
|
From @ref{Stream Encoding}, you may have got the impression text-files are
|
||||||
|
complicated. This section deals with a related topic, making live often
|
||||||
|
easier for the user, but providing another worry to the programmer.
|
||||||
|
@strong{BOM} or @emph{Byte Order Marker} is a technique for
|
||||||
|
identifying Unicode text-files as well as the encoding they use. Such
|
||||||
|
files start with the Unicode character @code{0xFEFF}, a non-breaking,
|
||||||
|
zero-width space character. This is a pretty unique sequence that is not
|
||||||
|
likely to be the start of a non-Unicode file and uniquely distinguishes
|
||||||
|
the various Unicode file formats. As it is a zero-width blank, it even
|
||||||
|
doesn't produce any output. This solves all problems, or ...
|
||||||
|
|
||||||
|
Some formats start of as US-ASCII and may contain some encoding mark to
|
||||||
|
switch to UTF-8, such as the @code{encoding="UTF-8"} in an XML header.
|
||||||
|
Such formats often explicitly forbid the the use of a UTF-8 BOM. In
|
||||||
|
other cases there is additional information telling the encoding making
|
||||||
|
the use of a BOM redundant or even illegal.
|
||||||
|
|
||||||
|
The BOM is handled by the @code{open/4} predicate. By default, text-files are
|
||||||
|
probed for the BOM when opened for reading. If a BOM is found, the
|
||||||
|
encoding is set accordingly and the property @code{bom(true)} is
|
||||||
|
available through @code{stream_property/2}. When opening a file for
|
||||||
|
writing, writing a BOM can be requested using the option
|
||||||
|
@code{bom(true)} with @code{open/4}.
|
||||||
|
|
||||||
@node Loading Programs, Modules, Syntax, Top
|
@node Loading Programs, Modules, Syntax, Top
|
||||||
@chapter Loading Programs
|
@chapter Loading Programs
|
||||||
|
|
||||||
@ -3381,6 +3540,24 @@ concerning the stream.
|
|||||||
The operation will fail and give an error if the alias name is already
|
The operation will fail and give an error if the alias name is already
|
||||||
in use. YAP allows several aliases for the same file, but only
|
in use. YAP allows several aliases for the same file, but only
|
||||||
one is returned by @code{stream_property/2}
|
one is returned by @code{stream_property/2}
|
||||||
|
|
||||||
|
@item bom(+@var{Bool})
|
||||||
|
If present and @code{true}, a BOM (@emph{Byte Order Mark}) was
|
||||||
|
detected while opening the file for reading or a BOM was written while
|
||||||
|
opening the stream. See @ref{BOM} for details.
|
||||||
|
|
||||||
|
@item encoding(+@var{Encoding})
|
||||||
|
Set the encoding used for text. See @ref{Encoding} for an overview of
|
||||||
|
wide character and encoding issues.
|
||||||
|
|
||||||
|
@item representation_errors(+@var{Mode})
|
||||||
|
Change the behaviour when writing characters to the stream that cannot
|
||||||
|
be represented by the encoding. The behaviour is one of @code{error}
|
||||||
|
(throw and I/O error exception), @code{prolog} (write @code{\u...\}
|
||||||
|
escape code or @code{xml} (write @code{&#...;} XML character entity).
|
||||||
|
The initial mode is @code{prolog} for the user streams and
|
||||||
|
@code{error} for all other streams. See also @ref{Encoding}.
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
@item close(+@var{S}) [ISO]
|
@item close(+@var{S}) [ISO]
|
||||||
@ -3550,6 +3727,24 @@ seekable.
|
|||||||
@item type(@var{T})
|
@item type(@var{T})
|
||||||
Whether the stream is a @code{text} stream or a @code{binary} stream.
|
Whether the stream is a @code{text} stream or a @code{binary} stream.
|
||||||
|
|
||||||
|
@item bom(+@var{Bool})
|
||||||
|
If present and @code{true}, a BOM (@emph{Byte Order Mark}) was
|
||||||
|
detected while opening the file for reading or a BOM was written while
|
||||||
|
opening the stream. See @ref{BOM} for details.
|
||||||
|
|
||||||
|
@item encoding(+@var{Encoding})
|
||||||
|
Query the encoding used for text. See @ref{Encoding} for an
|
||||||
|
overview of wide character and encoding issues in YAP.
|
||||||
|
|
||||||
|
@item representation_errors(+@var{Mode})
|
||||||
|
Behaviour when writing characters to the stream that cannot be
|
||||||
|
represented by the encoding. The behaviour is one of @code{error}
|
||||||
|
(throw and I/O error exception), @code{prolog} (write @code{\u...\}
|
||||||
|
escape code or @code{xml} (write @code{&#...;} XML character entity).
|
||||||
|
The initial mode is @code{prolog} for the user streams and
|
||||||
|
@code{error} for all other streams. See also @ref{Encoding} and
|
||||||
|
@code{open/4}.
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
@end table
|
@end table
|
||||||
|
@ -45,6 +45,9 @@ true :- true.
|
|||||||
;
|
;
|
||||||
true
|
true
|
||||||
),
|
),
|
||||||
|
'$stream_representation_error'(user_input, 512),
|
||||||
|
'$stream_representation_error'(user_output, 512),
|
||||||
|
'$stream_representation_error'(user_error, 512),
|
||||||
'$allocate_default_arena'(1024, 64),
|
'$allocate_default_arena'(1024, 64),
|
||||||
'$enter_system_mode',
|
'$enter_system_mode',
|
||||||
set_value(fileerrors,1),
|
set_value(fileerrors,1),
|
||||||
|
@ -524,16 +524,16 @@ remove_from_path(New) :- '$check_path'(New,Path),
|
|||||||
'$valid_encoding'(iso_latin_1, 1).
|
'$valid_encoding'(iso_latin_1, 1).
|
||||||
% UTF-8: default 8 bits but 80 extends to 16bits
|
% UTF-8: default 8 bits but 80 extends to 16bits
|
||||||
'$valid_encoding'(utf8, 8).
|
'$valid_encoding'(utf8, 8).
|
||||||
% UNICODE: 16 bits throughout, the way Gates does it!
|
% UNICODE: 16 bits throughout, the way it was supposed to be!
|
||||||
'$valid_encoding'(unicode_be, 16).
|
'$valid_encoding'(unicode_be, 16).
|
||||||
'$valid_encoding'(unicode_le, 32).
|
'$valid_encoding'(unicode_le, 32).
|
||||||
% whatever the system tell us to do.
|
% whatever the system tell us to do.
|
||||||
'$valid_encoding'(text, 4).
|
'$valid_encoding'(text, 4).
|
||||||
|
|
||||||
'$default_encoding'(DefCode) :- nonvar(DefCode), !,
|
'$default_encoding'(DefCode) :- nonvar(DefCode), !,
|
||||||
'$set_encoding'('$stream'(0),DefCode),
|
'$encoding'('$stream'(0),DefCode),
|
||||||
'$set_encoding'('$stream'(1),DefCode),
|
'$encoding'('$stream'(1),DefCode),
|
||||||
'$set_encoding'('$stream'(2),DefCode),
|
'$encoding'('$stream'(2),DefCode),
|
||||||
set_value('$default_encoding',DefCode).
|
set_value('$default_encoding',DefCode).
|
||||||
'$default_encoding'(DefCode) :-
|
'$default_encoding'(DefCode) :-
|
||||||
get_value('$default_encoding',DefCode0),
|
get_value('$default_encoding',DefCode0),
|
||||||
|
32
pl/yio.yap
32
pl/yio.yap
@ -92,6 +92,10 @@ open(F,T,S,Opts) :-
|
|||||||
'$process_open_opts'([encoding(Enc)|L], N0, N, Aliases, EncCode) :-
|
'$process_open_opts'([encoding(Enc)|L], N0, N, Aliases, EncCode) :-
|
||||||
'$valid_encoding'(Enc, EncCode),
|
'$valid_encoding'(Enc, EncCode),
|
||||||
'$process_open_opts'(L, N0, N, Aliases, _).
|
'$process_open_opts'(L, N0, N, Aliases, _).
|
||||||
|
'$process_open_opts'([representation_errors(Mode)|L], N0, N, Aliases, EncCode) :-
|
||||||
|
'$valid_reperrorhandler'(Mode, Flag),
|
||||||
|
NI is N0 \/ Flag,
|
||||||
|
'$process_open_opts'(L, NI, N, Aliases, EncCode).
|
||||||
'$process_open_opts'([bom(BOM)|L], N0, N, Aliases, EncCode) :-
|
'$process_open_opts'([bom(BOM)|L], N0, N, Aliases, EncCode) :-
|
||||||
'$valid_bom'(BOM, Flag),
|
'$valid_bom'(BOM, Flag),
|
||||||
NI is N0 \/ Flag,
|
NI is N0 \/ Flag,
|
||||||
@ -114,10 +118,16 @@ open(F,T,S,Opts) :-
|
|||||||
'$value_open_opt'(reset,64, X) :- X is 128-32-16.
|
'$value_open_opt'(reset,64, X) :- X is 128-32-16.
|
||||||
%128 -> use bom
|
%128 -> use bom
|
||||||
%256 -> do not use bom
|
%256 -> do not use bom
|
||||||
|
%512 -> do prolog on unrepresentable char
|
||||||
|
%1024 -> do XML on unrepresentable char
|
||||||
|
|
||||||
'$valid_bom'(true, 128).
|
'$valid_bom'(true, 128).
|
||||||
'$valid_bom'(false, 256).
|
'$valid_bom'(false, 256).
|
||||||
|
|
||||||
|
'$valid_reperrorhandler'(error, 0). % default.
|
||||||
|
'$valid_reperrorhandler'(prolog, 512).
|
||||||
|
'$valid_reperrorhandler'(xml, 1024).
|
||||||
|
|
||||||
/* check whether a list of options is valid */
|
/* check whether a list of options is valid */
|
||||||
'$check_io_opts'(V,G) :- var(V), !,
|
'$check_io_opts'(V,G) :- var(V), !,
|
||||||
'$do_error'(instantiation_error,G).
|
'$do_error'(instantiation_error,G).
|
||||||
@ -157,6 +167,8 @@ open(F,T,S,Opts) :-
|
|||||||
'$check_open_eof_action_arg'(T, G).
|
'$check_open_eof_action_arg'(T, G).
|
||||||
'$check_opt_open'(encoding(T), G) :- !,
|
'$check_opt_open'(encoding(T), G) :- !,
|
||||||
'$check_open_encoding'(T, G).
|
'$check_open_encoding'(T, G).
|
||||||
|
'$check_opt_open'(representation_errors(M), G) :- !,
|
||||||
|
'$check_open_representation_errors'(M, G).
|
||||||
'$check_opt_open'(bom(T), G) :- !,
|
'$check_opt_open'(bom(T), G) :- !,
|
||||||
'$check_open_bom_arg'(T, G).
|
'$check_open_bom_arg'(T, G).
|
||||||
'$check_opt_open'(A, G) :-
|
'$check_opt_open'(A, G) :-
|
||||||
@ -183,6 +195,8 @@ open(F,T,S,Opts) :-
|
|||||||
'$check_opt_sp'(reposition(_), _) :- !.
|
'$check_opt_sp'(reposition(_), _) :- !.
|
||||||
'$check_opt_sp'(type(_), _) :- !.
|
'$check_opt_sp'(type(_), _) :- !.
|
||||||
'$check_opt_sp'(bom(_), _) :- !.
|
'$check_opt_sp'(bom(_), _) :- !.
|
||||||
|
'$check_opt_sp'(encoding(_), _) :- !.
|
||||||
|
'$check_opt_sp'(representation_errors(_), _) :- !.
|
||||||
'$check_opt_sp'(A, G) :-
|
'$check_opt_sp'(A, G) :-
|
||||||
'$do_error'(domain_error(stream_property,A),G).
|
'$do_error'(domain_error(stream_property,A),G).
|
||||||
|
|
||||||
@ -256,6 +270,13 @@ open(F,T,S,Opts) :-
|
|||||||
'$check_open_encoding'(Encoding,G) :-
|
'$check_open_encoding'(Encoding,G) :-
|
||||||
'$do_error'(domain_error(io_mode,encoding(Encoding)),G).
|
'$do_error'(domain_error(io_mode,encoding(Encoding)),G).
|
||||||
|
|
||||||
|
'$check_open_representation_errors'(X, G) :- var(X), !,
|
||||||
|
'$do_error'(instantiation_error,G).
|
||||||
|
'$check_open_representation_errors'(RepErrorHandler,_) :-
|
||||||
|
'$valid_reperrorhandler'(RepErrorHandler,_), !.
|
||||||
|
'$check_open_representation_errors'(Handler,G) :-
|
||||||
|
'$do_error'(domain_error(io_mode,representation_errors(Handler)),G).
|
||||||
|
|
||||||
'$check_read_syntax_errors_arg'(X, G) :- var(X), !,
|
'$check_read_syntax_errors_arg'(X, G) :- var(X), !,
|
||||||
'$do_error'(instantiation_error,G).
|
'$do_error'(instantiation_error,G).
|
||||||
'$check_read_syntax_errors_arg'(dec10,_) :- !.
|
'$check_read_syntax_errors_arg'(dec10,_) :- !.
|
||||||
@ -836,6 +857,8 @@ stream_property(Stream, Props) :-
|
|||||||
'$generate_prop'(type(_T)).
|
'$generate_prop'(type(_T)).
|
||||||
'$generate_prop'(alias(_A)).
|
'$generate_prop'(alias(_A)).
|
||||||
'$generate_prop'(bom(_B)).
|
'$generate_prop'(bom(_B)).
|
||||||
|
'$generate_prop'(encoding(_E)).
|
||||||
|
'$generate_prop'(representation_errors(_E)).
|
||||||
|
|
||||||
'$stream_property'(Stream, Props) :-
|
'$stream_property'(Stream, Props) :-
|
||||||
var(Props), !,
|
var(Props), !,
|
||||||
@ -865,6 +888,11 @@ stream_property(Stream, Props) :-
|
|||||||
'$process_stream_properties'([position(P)|Props], Stream, F, Mode) :-
|
'$process_stream_properties'([position(P)|Props], Stream, F, Mode) :-
|
||||||
'$show_stream_bom'(Stream, P),
|
'$show_stream_bom'(Stream, P),
|
||||||
'$process_stream_properties'(Props, Stream, F, Mode).
|
'$process_stream_properties'(Props, Stream, F, Mode).
|
||||||
|
'$process_stream_properties'([encoding(Enc)|Props], Stream, F, Mode) :-
|
||||||
|
% make sure this runs first, with EncCode unbound.
|
||||||
|
'$encoding'(Stream, EncCode),
|
||||||
|
'$valid_encoding'(Enc, EncCode),
|
||||||
|
'$process_stream_properties'(Props, Stream, F, Mode).
|
||||||
'$process_stream_properties'([bom(B)|Props], Stream, F, Mode) :-
|
'$process_stream_properties'([bom(B)|Props], Stream, F, Mode) :-
|
||||||
'$show_stream_bom'(Stream, B),
|
'$show_stream_bom'(Stream, B),
|
||||||
'$process_stream_properties'(Props, Stream, F, Mode).
|
'$process_stream_properties'(Props, Stream, F, Mode).
|
||||||
@ -879,6 +907,10 @@ stream_property(Stream, Props) :-
|
|||||||
'$show_stream_flags'(Stream, Fl),
|
'$show_stream_flags'(Stream, Fl),
|
||||||
'$show_stream_reposition'(Fl, P),
|
'$show_stream_reposition'(Fl, P),
|
||||||
'$process_stream_properties'(Props, Stream, F, Mode).
|
'$process_stream_properties'(Props, Stream, F, Mode).
|
||||||
|
'$process_stream_properties'([representation_errors(B)|Props], Stream, F, Mode) :-
|
||||||
|
'$stream_representation_error'(Stream, ErrorHandler),
|
||||||
|
'$valid_reperrorhandler'(B, ErrorHandler),
|
||||||
|
'$process_stream_properties'(Props, Stream, F, Mode).
|
||||||
'$process_stream_properties'([type(P)|Props], Stream, F, Mode) :-
|
'$process_stream_properties'([type(P)|Props], Stream, F, Mode) :-
|
||||||
'$show_stream_flags'(Stream, Fl),
|
'$show_stream_flags'(Stream, Fl),
|
||||||
'$show_stream_type'(Fl, P),
|
'$show_stream_type'(Fl, P),
|
||||||
|
Reference in New Issue
Block a user