2001-04-09 20:54:03 +01:00
|
|
|
|
/*************************************************************************
|
2017-09-23 02:17:55 +01:00
|
|
|
|
* *
|
|
|
|
|
* YAP Prolog *
|
|
|
|
|
* *
|
|
|
|
|
* Yap Prolog was developed at NCCUP - Universidade do Porto *
|
|
|
|
|
* *
|
|
|
|
|
* Copyright L.Damas, V.S.Costa and Universidade do Porto 1985-2003 *
|
|
|
|
|
* *
|
|
|
|
|
**************************************************************************
|
|
|
|
|
* *
|
|
|
|
|
* File: %W% %G% *
|
|
|
|
|
* Last rev: 22-1-03 *
|
|
|
|
|
* mods: *
|
|
|
|
|
* comments: Prolog's scanner *
|
|
|
|
|
* *
|
|
|
|
|
*************************************************************************/
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
|
|
|
|
/*
|
2015-02-10 00:03:02 +00:00
|
|
|
|
* Description:
|
2001-04-09 20:54:03 +01:00
|
|
|
|
*
|
|
|
|
|
* This module produces a list of tokens for use by the parser. The calling
|
|
|
|
|
* program should supply a routine int nextch(charpos) int *charpos; which,
|
|
|
|
|
* when called should produce the next char or -1 if none availlable. The
|
|
|
|
|
* scanner will stop producing tokens when it either finds an end of file
|
|
|
|
|
* (-1) or a token consisting of just '.' followed by a blank or control
|
|
|
|
|
* char. Scanner errors will be signalled by the scanner exiting with a non-
|
|
|
|
|
* zero ErrorMsg and ErrorPos. Note that, even in this case, the scanner
|
|
|
|
|
* will try to find the end of the term. A function char
|
|
|
|
|
* *AllocScannerMemory(nbytes) should be supplied for allocating (temporary)
|
|
|
|
|
* space for strings and for the table of prolog variables occurring in the
|
2015-02-10 00:03:02 +00:00
|
|
|
|
* term.
|
2001-04-09 20:54:03 +01:00
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2014-09-15 09:13:50 +01:00
|
|
|
|
/**
|
|
|
|
|
|
|
|
|
|
@defgroup Formal_Syntax Syntax of Terms
|
2017-04-13 21:42:34 +01:00
|
|
|
|
@ingroup YAPSyntax
|
2014-09-15 09:13:50 +01:00
|
|
|
|
@{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Prolog tokens are grouped into the following categories:
|
|
|
|
|
|
|
|
|
|
+ Numbers
|
|
|
|
|
|
|
|
|
|
Numbers can be further subdivided into integer and floating-point numbers.
|
|
|
|
|
|
|
|
|
|
+ Integers
|
|
|
|
|
|
|
|
|
|
Integer numbers
|
|
|
|
|
are described by the following regular expression:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
<integer> := {<digit>+<single-quote>|0{xXo}}<alpha_numeric_char>+
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
where {...} stands for optionality, \a + optional repetition (one or
|
|
|
|
|
more times), \a \\\<digit\\\> denotes one of the characters 0 ... 9, \a |
|
|
|
|
|
denotes or, and \a \\\<single-quote\\\> denotes the character "'". The digits
|
|
|
|
|
before the \a \\\<single-quote\\\> character, when present, form the number
|
|
|
|
|
basis, that can go from 0, 1 and up to 36. Letters from `A` to
|
|
|
|
|
`Z` are used when the basis is larger than 10.
|
|
|
|
|
|
|
|
|
|
Note that if no basis is specified then base 10 is assumed. Note also
|
|
|
|
|
that the last digit of an integer token can not be immediately followed
|
|
|
|
|
by one of the characters 'e', 'E', or '.'.
|
|
|
|
|
|
|
|
|
|
Following the ISO standard, YAP also accepts directives of the
|
|
|
|
|
form `0x` to represent numbers in hexadecimal base and of the form
|
|
|
|
|
`0o` to represent numbers in octal base. For usefulness,
|
|
|
|
|
YAP also accepts directives of the form `0X` to represent
|
|
|
|
|
numbers in hexadecimal base.
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
the following tokens all denote the same integer
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
10 2'1010 3'101 8'12 16'a 36'a 0xa 0o12
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
Numbers of the form `0'a` are used to represent character
|
|
|
|
|
constants. So, the following tokens denote the same integer:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
0'd 100
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
YAP (version 6.3.4) supports integers that can fit
|
|
|
|
|
the word size of the machine. This is 32 bits in most current machines,
|
|
|
|
|
but 64 in some others, such as the Alpha running Linux or Digital
|
|
|
|
|
Unix. The scanner will read larger or smaller integers erroneously.
|
|
|
|
|
|
|
|
|
|
+ Floats
|
|
|
|
|
|
|
|
|
|
Floating-point numbers are described by:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
<float> := <digit>+{<dot><digit>+}
|
|
|
|
|
<exponent-marker>{<sign>}<digit>+
|
|
|
|
|
|<digit>+<dot><digit>+
|
|
|
|
|
{<exponent-marker>{<sign>}<digit>+}
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
where \a \\\<dot\\\> denotes the decimal-point character '.',
|
2015-02-10 00:03:02 +00:00
|
|
|
|
\a \\\<exponent-marker\\\> denotes one of 'e' or 'E', and \a \\\<sign\\\>
|
|
|
|
|
denotes
|
2014-09-15 09:13:50 +01:00
|
|
|
|
one of '+' or '-'.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
10.0 10e3 10e-3 3.1415e+3
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
Floating-point numbers are represented as a double in the target
|
|
|
|
|
machine. This is usually a 64-bit number.
|
|
|
|
|
|
|
|
|
|
+ Strings Character Strings
|
|
|
|
|
|
|
|
|
|
Strings are described by the following rules:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
string --> '"' string_quoted_characters '"'
|
2015-02-10 00:03:02 +00:00
|
|
|
|
string --> '`' string_quoted_characters '`'
|
2014-09-15 09:13:50 +01:00
|
|
|
|
|
|
|
|
|
string_quoted_characters --> '"' '"' string_quoted_characters
|
2015-02-10 00:03:02 +00:00
|
|
|
|
string_quoted_characters --> '`' '`' string_quoted_characters
|
|
|
|
|
string_quoted_characters --> '\'
|
2014-09-15 09:13:50 +01:00
|
|
|
|
escape_sequence string_quoted_characters
|
|
|
|
|
string_quoted_characters -->
|
|
|
|
|
string_character string_quoted_characters
|
|
|
|
|
|
|
|
|
|
escape_sequence --> 'a' | 'b' | 'r' | 'f' | 't' | 'n' | 'v'
|
|
|
|
|
escape_sequence --> '\' | '"' | ''' | '`'
|
|
|
|
|
escape_sequence --> at_most_3_octal_digit_seq_char '\'
|
|
|
|
|
escape_sequence --> 'x' at_most_2_hexa_digit_seq_char '\'
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
where `string_character` in any character except the double quote
|
|
|
|
|
and escape characters.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
2015-02-10 00:03:02 +00:00
|
|
|
|
"" "a string" "a double-quote:"""
|
2014-09-15 09:13:50 +01:00
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
The first string is an empty string, the last string shows the use of
|
|
|
|
|
double-quoting. The implementation of YAP represents strings as
|
|
|
|
|
lists of integers. Since YAP 4.3.0 there is no static limit on string
|
|
|
|
|
size.
|
|
|
|
|
|
2016-01-31 10:19:08 +00:00
|
|
|
|
Escape sequences can be used anf include the non-printable characters
|
2014-09-15 09:13:50 +01:00
|
|
|
|
`a` (alert), `b` (backspace), `r` (carriage return),
|
|
|
|
|
`f` (form feed), `t` (horizontal tabulation), `n` (new
|
|
|
|
|
line), and `v` (vertical tabulation). Escape sequences also be
|
|
|
|
|
include the meta-characters `\\`, `"`, `'`, and
|
|
|
|
|
```. Last, one can use escape sequences to include the characters
|
|
|
|
|
either as an octal or hexadecimal number.
|
|
|
|
|
|
|
|
|
|
The next examples demonstrates the use of escape sequences in YAP:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
2015-02-10 00:03:02 +00:00
|
|
|
|
"\x0c\" "\01\" "\f" "\\"
|
2014-09-15 09:13:50 +01:00
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
The first three examples return a list including only character 12 (form
|
|
|
|
|
feed). The last example escapes the escape character.
|
|
|
|
|
|
|
|
|
|
Escape sequences were not available in C-Prolog and in original
|
|
|
|
|
versions of YAP up to 4.2.0. Escape sequences can be disable by using:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
:- yap_flag(character_escapes,false).
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
Since 6.3.4 YAP supports compact strings, that are not represented as
|
|
|
|
|
lists of codes, but instead as a sequence of UTF-8 encoded characters
|
|
|
|
|
in the execution stack. These strings do not require allocating a
|
|
|
|
|
symbol, as atoms do, but are much more compact than using lists of
|
|
|
|
|
codes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-09-15 09:13:50 +01:00
|
|
|
|
+ Atoms Atoms
|
|
|
|
|
|
|
|
|
|
Atoms are defined by one of the following rules:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
atom --> solo-character
|
|
|
|
|
atom --> lower-case-letter name-character*
|
|
|
|
|
atom --> symbol-character+
|
|
|
|
|
atom --> single-quote single-quote
|
|
|
|
|
atom --> ''' atom_quoted_characters '''
|
|
|
|
|
|
|
|
|
|
atom_quoted_characters --> ''' ''' atom_quoted_characters
|
|
|
|
|
atom_quoted_characters --> '\' atom_sequence string_quoted_characters
|
|
|
|
|
atom_quoted_characters --> character string_quoted_characters
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
where:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
<solo-character> denotes one of: ! ;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
<symbol-character> denotes one of: # & * + - . / : <
|
2014-09-15 09:13:50 +01:00
|
|
|
|
= > ? @ \ ^ ~ `
|
|
|
|
|
<lower-case-letter> denotes one of: a...z
|
|
|
|
|
<name-character> denotes one of: _ a...z A...Z 0....9
|
|
|
|
|
<single-quote> denotes: '
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
and `string_character` denotes any character except the double quote
|
|
|
|
|
and escape characters. Note that escape sequences in strings and atoms
|
|
|
|
|
follow the same rules.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
a a12x '$a' ! => '1 2'
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
Version `4.2.0` of YAP removed the previous limit of 256
|
|
|
|
|
characters on an atom. Size of an atom is now only limited by the space
|
|
|
|
|
available in the system.
|
|
|
|
|
|
|
|
|
|
+ Variables Variables
|
|
|
|
|
|
|
|
|
|
Variables are described by:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
<variable-starter><variable-character>+
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
where
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
<variable-starter> denotes one of: _ A...Z
|
|
|
|
|
<variable-character> denotes one of: _ a...z A...Z
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
If a variable is referred only once in a term, it needs not to be named
|
|
|
|
|
and one can use the character `_` to represent the variable. These
|
|
|
|
|
variables are known as anonymous variables. Note that different
|
|
|
|
|
occurrences of `_` on the same term represent <em>different</em>
|
2015-02-10 00:03:02 +00:00
|
|
|
|
anonymous variables.
|
2014-09-15 09:13:50 +01:00
|
|
|
|
|
|
|
|
|
+ Punctuation Tokens
|
|
|
|
|
|
|
|
|
|
Punctuation tokens consist of one of the following characters:
|
|
|
|
|
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
( ) , [ ] { } |
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
These characters are used to group terms.
|
|
|
|
|
|
|
|
|
|
@subsection Layout Layout
|
|
|
|
|
Any characters with ASCII code less than or equal to 32 appearing before
|
|
|
|
|
a token are ignored.
|
|
|
|
|
|
|
|
|
|
All the text appearing in a line after the character \a % is taken to
|
|
|
|
|
be a comment and ignored (including \a %). Comments can also be
|
|
|
|
|
inserted by using the sequence `/\*` to start the comment and
|
2015-02-10 00:03:02 +00:00
|
|
|
|
`\*` followed by `/` to finish it. In the presence of any sequence of comments
|
|
|
|
|
or
|
2014-09-15 09:13:50 +01:00
|
|
|
|
layout characters, the YAP parser behaves as if it had found a
|
|
|
|
|
single blank character. The end of a file also counts as a blank
|
|
|
|
|
character for this purpose.
|
|
|
|
|
|
|
|
|
|
+ Encoding Wide Character Support
|
|
|
|
|
|
|
|
|
|
YAP now implements a SWI-Prolog compatible interface to wide
|
|
|
|
|
characters and the Universal Character Set (UCS). The following text
|
|
|
|
|
was adapted from the SWI-Prolog manual.
|
|
|
|
|
|
|
|
|
|
YAP now supports wide characters, characters with character
|
|
|
|
|
codes above 255 that cannot be represented in a single byte.
|
|
|
|
|
<em>Universal Character Set</em> (UCS) is the ISO/IEC 10646 standard
|
|
|
|
|
that specifies a unique 31-bits unsigned integer for any character in
|
|
|
|
|
any language. It is a superset of 16-bit Unicode, which in turn is
|
|
|
|
|
a superset of ISO 8859-1 (ISO Latin-1), a superset of US-ASCII. UCS
|
|
|
|
|
can handle strings holding characters from multiple languages and
|
|
|
|
|
character classification (uppercase, lowercase, digit, etc.) and
|
|
|
|
|
operations such as case-conversion are unambiguously defined.
|
|
|
|
|
|
|
|
|
|
For this reason YAP, following SWI-Prolog, has two representations for
|
|
|
|
|
atoms. If the text fits in ISO Latin-1, it is represented as an array
|
|
|
|
|
of 8-bit characters. Otherwise the text is represented as an array of
|
|
|
|
|
wide chars, which may take 16 or 32 bits. This representational issue
|
|
|
|
|
is completely transparent to the Prolog user. Users of the foreign
|
|
|
|
|
language interface sometimes need to be aware of these issues though.
|
|
|
|
|
|
|
|
|
|
Character coding comes into view when characters of strings need to be
|
|
|
|
|
read from or written to file or when they have to be communicated to
|
|
|
|
|
other software components using the foreign language interface. In this
|
|
|
|
|
section we only deal with I/O through streams, which includes file I/O
|
|
|
|
|
as well as I/O through network sockets.
|
|
|
|
|
|
|
|
|
|
+ Stream_Encoding Wide character encodings on streams
|
|
|
|
|
|
|
|
|
|
Although characters are uniquely coded using the UCS standard
|
|
|
|
|
internally, streams and files are byte (8-bit) oriented and there are a
|
|
|
|
|
variety of ways to represent the larger UCS codes in an 8-bit octet
|
|
|
|
|
stream. The most popular one, especially in the context of the web, is
|
|
|
|
|
UTF-8. Bytes 0...127 represent simply the corresponding US-ASCII
|
|
|
|
|
character, while bytes 128...255 are used for multi-byte
|
|
|
|
|
encoding of characters placed higher in the UCS space. Especially on
|
|
|
|
|
MS-Windows the 16-bit Unicode standard, represented by pairs of bytes is
|
|
|
|
|
also popular.
|
|
|
|
|
|
|
|
|
|
Prolog I/O streams have a property called <em>encoding</em> which
|
|
|
|
|
specifies the used encoding that influence `get_code/2` and
|
|
|
|
|
`put_code/2` as well as all the other text I/O predicates.
|
|
|
|
|
|
|
|
|
|
The default encoding for files is derived from the Prolog flag
|
2015-11-05 15:28:39 +00:00
|
|
|
|
`encoding`, which is initialized from the environment. If the
|
2014-09-15 09:13:50 +01:00
|
|
|
|
environment variable `LANG` ends in "UTF-8", this encoding is
|
|
|
|
|
assumed. Otherwise the default is `text` and the translation is
|
|
|
|
|
left to the wide-character functions of the C-library (note that the
|
|
|
|
|
Prolog native UTF-8 mode is considerably faster than the generic
|
|
|
|
|
`mbrtowc()` one). The encoding can be specified explicitly in
|
|
|
|
|
load_files/2 for loading Prolog source with an alternative
|
|
|
|
|
encoding, `open/4` when opening files or using `set_stream/2` on
|
|
|
|
|
any open stream (not yet implemented). For Prolog source files we also
|
|
|
|
|
provide the `encoding/1` directive that can be used to switch
|
|
|
|
|
between encodings that are compatible to US-ASCII (`ascii`,
|
2015-02-10 00:03:02 +00:00
|
|
|
|
`iso_latin_1`, `utf8` and many locales).
|
2014-09-15 09:13:50 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
For
|
|
|
|
|
additional information and Unicode resources, please visit
|
|
|
|
|
<http://www.unicode.org/>.
|
|
|
|
|
|
|
|
|
|
YAP currently defines and supports the following encodings:
|
|
|
|
|
|
|
|
|
|
+ octet
|
|
|
|
|
Default encoding for <em>binary</em> streams. This causes
|
|
|
|
|
the stream to be read and written fully untranslated.
|
|
|
|
|
|
|
|
|
|
+ ascii
|
|
|
|
|
7-bit encoding in 8-bit bytes. Equivalent to `iso_latin_1`,
|
|
|
|
|
but generates errors and warnings on encountering values above
|
|
|
|
|
127.
|
|
|
|
|
|
|
|
|
|
+ iso_latin_1
|
|
|
|
|
8-bit encoding supporting many western languages. This causes
|
|
|
|
|
the stream to be read and written fully untranslated.
|
|
|
|
|
|
|
|
|
|
+ text
|
|
|
|
|
C-library default locale encoding for text files. Files are read and
|
|
|
|
|
written using the C-library functions `mbrtowc()` and
|
|
|
|
|
`wcrtomb()`. This may be the same as one of the other locales,
|
|
|
|
|
notably it may be the same as `iso_latin_1` for western
|
|
|
|
|
languages and `utf8` in a UTF-8 context.
|
|
|
|
|
|
|
|
|
|
+ utf8
|
|
|
|
|
Multi-byte encoding of full UCS, compatible to `ascii`.
|
|
|
|
|
See above.
|
|
|
|
|
|
|
|
|
|
+ unicode_be
|
|
|
|
|
Unicode Big Endian. Reads input in pairs of bytes, most
|
|
|
|
|
significant byte first. Can only represent 16-bit characters.
|
|
|
|
|
|
|
|
|
|
+ unicode_le
|
|
|
|
|
Unicode Little Endian. Reads input in pairs of bytes, least
|
|
|
|
|
significant byte first. Can only represent 16-bit characters.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Note that not all encodings can represent all characters. This implies
|
|
|
|
|
that writing text to a stream may cause errors because the stream
|
|
|
|
|
cannot represent these characters. The behaviour of a stream on these
|
|
|
|
|
errors can be controlled using `open/4` or `set_stream/2` (not
|
|
|
|
|
implemented). Initially the terminal stream write the characters using
|
|
|
|
|
Prolog escape sequences while other streams generate an I/O exception.
|
|
|
|
|
|
|
|
|
|
+ BOM BOM: Byte Order Mark
|
|
|
|
|
|
|
|
|
|
From Stream Encoding, you may have got the impression that
|
|
|
|
|
text-files are complicated. This section deals with a related topic,
|
|
|
|
|
making live often easier for the user, but providing another worry to
|
|
|
|
|
the programmer. *BOM* or <em>Byte Order Marker</em> is a technique
|
|
|
|
|
for identifying Unicode text-files as well as the encoding they
|
|
|
|
|
use. Such files start with the Unicode character `0xFEFF`, a
|
|
|
|
|
non-breaking, zero-width space character. This is a pretty unique
|
|
|
|
|
sequence that is not likely to be the start of a non-Unicode file and
|
|
|
|
|
uniquely distinguishes the various Unicode file formats. As it is a
|
|
|
|
|
zero-width blank, it even doesn't produce any output. This solves all
|
|
|
|
|
problems, or ...
|
|
|
|
|
|
|
|
|
|
Some formats start of as US-ASCII and may contain some encoding mark to
|
|
|
|
|
switch to UTF-8, such as the `encoding="UTF-8"` in an XML header.
|
|
|
|
|
Such formats often explicitly forbid the the use of a UTF-8 BOM. In
|
|
|
|
|
other cases there is additional information telling the encoding making
|
|
|
|
|
the use of a BOM redundant or even illegal.
|
|
|
|
|
|
|
|
|
|
The BOM is handled by the `open/4` predicate. By default, text-files are
|
|
|
|
|
probed for the BOM when opened for reading. If a BOM is found, the
|
|
|
|
|
encoding is set accordingly and the property `bom(true)` is
|
|
|
|
|
available through stream_property/2. When opening a file for
|
|
|
|
|
writing, writing a BOM can be requested using the option
|
|
|
|
|
`bom(true)` with `open/4`.
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
*/
|
2014-09-15 09:13:50 +01:00
|
|
|
|
|
2001-04-09 20:54:03 +01:00
|
|
|
|
#include "Yap.h"
|
2017-09-23 02:17:55 +01:00
|
|
|
|
#include "YapEval.h"
|
2009-10-23 14:22:17 +01:00
|
|
|
|
#include "YapHeap.h"
|
2016-04-22 18:19:58 +01:00
|
|
|
|
#include "Yatom.h"
|
2001-04-09 20:54:03 +01:00
|
|
|
|
#include "alloc.h"
|
2016-04-22 18:19:58 +01:00
|
|
|
|
#include "yapio.h"
|
2013-11-13 12:57:52 +00:00
|
|
|
|
/* stuff we want to use in standard YAP code */
|
2013-12-08 19:12:24 +00:00
|
|
|
|
#include "YapText.h"
|
2015-02-10 00:03:02 +00:00
|
|
|
|
#if _MSC_VER || defined(__MINGW32__)
|
|
|
|
|
#if HAVE_FINITE == 1
|
2002-08-28 15:02:35 +01:00
|
|
|
|
#undef HAVE_FINITE
|
|
|
|
|
#endif
|
|
|
|
|
#include <windows.h>
|
|
|
|
|
#endif
|
2002-06-17 16:28:01 +01:00
|
|
|
|
#include "iopreds.h"
|
2001-04-09 20:54:03 +01:00
|
|
|
|
#if HAVE_STRING_H
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#endif
|
2010-05-05 12:51:38 +01:00
|
|
|
|
#if HAVE_WCTYPE_H
|
|
|
|
|
#include <wctype.h>
|
|
|
|
|
#endif
|
2013-11-23 12:34:49 +00:00
|
|
|
|
#if O_LOCALE
|
|
|
|
|
#include "locale.h"
|
|
|
|
|
#endif
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
|
|
|
|
/* You just can't trust some machines */
|
2015-02-10 00:03:02 +00:00
|
|
|
|
#define my_isxdigit(C, SU, SL) \
|
|
|
|
|
(chtype(C) == NU || (C >= 'A' && C <= (SU)) || (C >= 'a' && C <= (SL)))
|
|
|
|
|
#define my_isupper(C) (C >= 'A' && C <= 'Z')
|
|
|
|
|
#define my_islower(C) (C >= 'a' && C <= 'z')
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
2013-04-25 23:15:04 +01:00
|
|
|
|
static Term float_send(char *, int);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
static Term get_num(int *, int *, struct stream_desc *, int);
|
2015-06-19 01:30:13 +01:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
static void Yap_setCurrentSourceLocation(struct stream_desc *s) {
|
2015-07-06 11:49:56 +01:00
|
|
|
|
CACHE_REGS
|
2015-10-18 11:40:12 +01:00
|
|
|
|
#if HAVE_SOCKET
|
|
|
|
|
if (s->status & Socket_Stream_f)
|
|
|
|
|
LOCAL_SourceFileName = AtomSocket;
|
2015-06-19 01:30:13 +01:00
|
|
|
|
else
|
|
|
|
|
#endif
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (s->status & Pipe_Stream_f)
|
|
|
|
|
LOCAL_SourceFileName = AtomPipe;
|
2015-06-19 01:30:13 +01:00
|
|
|
|
else if (s->status & InMemory_Stream_f)
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_SourceFileName = AtomCharsio;
|
2015-06-19 01:30:13 +01:00
|
|
|
|
else
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_SourceFileName = s->name;
|
2015-07-06 11:49:56 +01:00
|
|
|
|
LOCAL_SourceFileLineno = s->linecount;
|
|
|
|
|
}
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
|
|
|
|
/* token table with some help from Richard O'Keefe's PD scanner */
|
2015-10-20 08:06:46 +01:00
|
|
|
|
char_kind_t Yap_chtype0[NUMBER_OF_CHARS + 1] = {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
EF,
|
|
|
|
|
/* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si
|
2016-11-08 07:37:36 +00:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*/
|
2016-04-22 18:19:58 +01:00
|
|
|
|
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us
|
2017-09-23 02:17:55 +01:00
|
|
|
|
*/
|
2016-04-22 18:19:58 +01:00
|
|
|
|
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* sp ! " # $ % & ' ( ) * + , - . / */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
BS, SL, DC, SY, SY, CC, SY, QT, BK, BK, SY, SY, BK, SY, SY, SY,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
NU, NU, NU, NU, NU, NU, NU, NU, NU, NU, SY, SL, SY, SY, SY, SY,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* @ A B C D E F G H I J K L M N O */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* P Q R S T U V W X Y Z [ \ ] ^ _ */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, BK, SY, BK, SY, UL,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* ` a b c d e f g h i j k l m n o */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* p q r s t u v w x y z { | } ~ del */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, BK, BK, BK, SY, BS,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
2017-09-23 02:17:55 +01:00
|
|
|
|
*/
|
2016-04-22 18:19:58 +01:00
|
|
|
|
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* 144 145 147 148 149 150 151 152 153 154 155 156 157 158 159
|
2017-09-23 02:17:55 +01:00
|
|
|
|
*/
|
2016-04-22 18:19:58 +01:00
|
|
|
|
BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS, BS,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ® ¯ */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
BS, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SY, SY, SY,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
SY, SY, LC, LC, SY, SY, SY, SY, SY, LC, LC, SY, SY, SY, SY, SY,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß */
|
|
|
|
|
#ifdef vms
|
2016-04-22 18:19:58 +01:00
|
|
|
|
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, LC,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
#else
|
2016-04-22 18:19:58 +01:00
|
|
|
|
UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
#endif
|
|
|
|
|
/* à á â ã ä å æ ç è é ê ë ì í î ï */
|
2016-04-22 18:19:58 +01:00
|
|
|
|
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
/* ð ñ ò ó ô õ ö ÷ ø ù ú û ü cannot write the last
|
|
|
|
|
* three because of lcc */
|
|
|
|
|
#ifdef vms
|
2016-04-22 18:19:58 +01:00
|
|
|
|
LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC
|
2015-10-18 11:40:12 +01:00
|
|
|
|
#else
|
2016-04-22 18:19:58 +01:00
|
|
|
|
LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC
|
2015-10-18 11:40:12 +01:00
|
|
|
|
#endif
|
2001-04-09 20:54:03 +01:00
|
|
|
|
};
|
|
|
|
|
|
2015-09-21 23:05:36 +01:00
|
|
|
|
typedef struct scanner_internals {
|
|
|
|
|
StreamDesc *t;
|
|
|
|
|
TokEntry *ctok;
|
|
|
|
|
char *_ScannerStack; // = (char *)TR;
|
|
|
|
|
char *ScannerExtraBlocks;
|
|
|
|
|
CELL *CommentsTail;
|
|
|
|
|
CELL *Comments;
|
|
|
|
|
CELL *CommentsNextChar;
|
|
|
|
|
wchar_t *CommentsBuff;
|
|
|
|
|
size_t CommentsBuffLim;
|
|
|
|
|
} scanner_internals;
|
|
|
|
|
|
|
|
|
|
// standard get char, uses conversion table
|
|
|
|
|
// and converts to wide
|
2018-01-22 13:53:17 +00:00
|
|
|
|
static inline int getchr(struct stream_desc *inp) {
|
|
|
|
|
/* if (inp != inp0) { fprintf(stderr,"\n %s
|
|
|
|
|
* **********************************\n", AtomName(inp->name)); */
|
|
|
|
|
/* inp0 = inp; */
|
|
|
|
|
/* } */
|
2018-02-14 00:13:13 +00:00
|
|
|
|
int sno = inp - GLOBAL_Stream;
|
|
|
|
|
int ch = inp->stream_wgetc_for_read(sno);
|
2018-01-18 14:47:27 +00:00
|
|
|
|
// fprintf(stderr,"%c", ch);
|
|
|
|
|
return ch;
|
|
|
|
|
}
|
2015-09-21 23:05:36 +01:00
|
|
|
|
// get char for quoted data, eg, quoted atoms and so on
|
|
|
|
|
// converts to wide
|
2018-01-22 13:53:17 +00:00
|
|
|
|
static inline int getchrq(struct stream_desc *inp) {
|
2018-01-18 14:47:27 +00:00
|
|
|
|
int ch = inp->stream_wgetc(inp - GLOBAL_Stream);
|
|
|
|
|
return ch;
|
|
|
|
|
}
|
2011-02-15 07:39:27 +00:00
|
|
|
|
|
2004-12-28 22:20:37 +00:00
|
|
|
|
/* in case there is an overflow */
|
|
|
|
|
typedef struct scanner_extra_alloc {
|
|
|
|
|
struct scanner_extra_alloc *next;
|
|
|
|
|
void *filler;
|
|
|
|
|
} ScannerExtraBlock;
|
|
|
|
|
|
2016-02-22 13:01:09 +00:00
|
|
|
|
#define CodeSpaceError(t, p, l) CodeSpaceError__(t, p, l PASS_REGS)
|
2016-04-22 18:19:58 +01:00
|
|
|
|
static TokEntry *CodeSpaceError__(TokEntry *t, TokEntry *p,
|
|
|
|
|
TokEntry *l USES_REGS) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
LOCAL_Error_TYPE = RESOURCE_ERROR_HEAP;
|
|
|
|
|
LOCAL_ErrorMessage = "Code Space Overflow";
|
|
|
|
|
if (t) {
|
|
|
|
|
t->Tok = eot_tok;
|
|
|
|
|
t->TokInfo = TermOutOfHeapError;
|
|
|
|
|
}
|
|
|
|
|
/* serious error now */
|
|
|
|
|
return l;
|
|
|
|
|
}
|
2016-01-31 10:19:08 +00:00
|
|
|
|
|
2016-02-22 13:01:09 +00:00
|
|
|
|
#define TrailSpaceError(t, l) TrailSpaceError__(t, l PASS_REGS)
|
|
|
|
|
static TokEntry *TrailSpaceError__(TokEntry *t, TokEntry *l USES_REGS) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
LOCAL_ErrorMessage = "Trail Overflow";
|
|
|
|
|
LOCAL_Error_TYPE = RESOURCE_ERROR_TRAIL;
|
|
|
|
|
if (t) {
|
|
|
|
|
t->Tok = eot_tok;
|
|
|
|
|
t->TokInfo = TermOutOfTrailError;
|
|
|
|
|
}
|
|
|
|
|
return l;
|
2016-01-31 10:19:08 +00:00
|
|
|
|
}
|
2016-02-14 04:11:55 +00:00
|
|
|
|
|
2016-02-22 13:01:09 +00:00
|
|
|
|
#define AuxSpaceError(p, l, msg) AuxSpaceError__(p, l, msg PASS_REGS)
|
2016-04-22 18:19:58 +01:00
|
|
|
|
static TokEntry *AuxSpaceError__(TokEntry *p, TokEntry *l,
|
|
|
|
|
const char *msg USES_REGS) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
/* huge atom or variable, we are in trouble */
|
|
|
|
|
LOCAL_ErrorMessage = (char *)msg;
|
|
|
|
|
LOCAL_Error_TYPE = RESOURCE_ERROR_AUXILIARY_STACK;
|
2016-08-15 20:54:46 +01:00
|
|
|
|
Yap_ReleasePreAllocCodeSpace((COBEADDR)TokImage);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
if (p) {
|
|
|
|
|
p->Tok = eot_tok;
|
|
|
|
|
p->TokInfo = TermOutOfAuxspaceError;
|
|
|
|
|
}
|
|
|
|
|
/* serious error now */
|
|
|
|
|
return l;
|
2016-01-31 10:19:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-11-02 05:16:36 +00:00
|
|
|
|
static void *InitScannerMemory(void) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
CACHE_REGS
|
|
|
|
|
LOCAL_ErrorMessage = NULL;
|
|
|
|
|
LOCAL_Error_Size = 0;
|
|
|
|
|
LOCAL_ScannerExtraBlocks = NULL;
|
2016-08-15 20:54:46 +01:00
|
|
|
|
LOCAL_ScannerStack = (char *)TR;
|
|
|
|
|
return (char *)TR;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
static char *AllocScannerMemory(unsigned int size) {
|
2011-03-07 16:02:55 +00:00
|
|
|
|
CACHE_REGS
|
2015-10-18 11:40:12 +01:00
|
|
|
|
char *AuxSpScan;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
2011-05-04 10:11:41 +01:00
|
|
|
|
AuxSpScan = LOCAL_ScannerStack;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
size = AdjustSize(size);
|
2011-05-04 10:11:41 +01:00
|
|
|
|
if (LOCAL_ScannerExtraBlocks) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
struct scanner_extra_alloc *ptr;
|
|
|
|
|
|
2018-01-22 13:53:17 +00:00
|
|
|
|
if (!(ptr = (struct scanner_extra_alloc *)Malloc(
|
2015-10-18 11:40:12 +01:00
|
|
|
|
size + sizeof(ScannerExtraBlock)))) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
ptr->next = LOCAL_ScannerExtraBlocks;
|
|
|
|
|
LOCAL_ScannerExtraBlocks = ptr;
|
|
|
|
|
return (char *)(ptr + 1);
|
|
|
|
|
} else if (LOCAL_TrailTop <= AuxSpScan + size) {
|
|
|
|
|
UInt alloc_size = sizeof(CELL) * K16;
|
|
|
|
|
|
|
|
|
|
if (size > alloc_size)
|
|
|
|
|
alloc_size = size;
|
|
|
|
|
if (!Yap_growtrail(alloc_size, TRUE)) {
|
2004-12-28 22:20:37 +00:00
|
|
|
|
struct scanner_extra_alloc *ptr;
|
|
|
|
|
|
2018-01-22 13:53:17 +00:00
|
|
|
|
if (!(ptr = (struct scanner_extra_alloc *)Malloc(
|
2015-10-18 11:40:12 +01:00
|
|
|
|
size + sizeof(ScannerExtraBlock)))) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2011-05-04 10:11:41 +01:00
|
|
|
|
ptr->next = LOCAL_ScannerExtraBlocks;
|
|
|
|
|
LOCAL_ScannerExtraBlocks = ptr;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
return (char *)(ptr + 1);
|
2004-12-28 22:20:37 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2015-02-10 00:03:02 +00:00
|
|
|
|
LOCAL_ScannerStack = AuxSpScan + size;
|
2004-10-28 21:12:23 +01:00
|
|
|
|
return AuxSpScan;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
char *Yap_AllocScannerMemory(unsigned int size) {
|
2015-11-05 15:28:39 +00:00
|
|
|
|
/* I assume memory has been initialized */
|
2002-11-11 17:38:10 +00:00
|
|
|
|
return AllocScannerMemory(size);
|
|
|
|
|
}
|
|
|
|
|
|
2001-04-09 20:54:03 +01:00
|
|
|
|
extern double atof(const char *);
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
static Term float_send(char *s, int sign) {
|
|
|
|
|
Float f = (Float)(sign * atof(s));
|
2013-11-23 14:51:56 +00:00
|
|
|
|
#if HAVE_ISFINITE || defined(isfinite)
|
2015-06-19 01:30:13 +01:00
|
|
|
|
if (trueGlobalPrologFlag(ISO_FLAG)) { /* iso */
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (!isfinite(f)) {
|
|
|
|
|
CACHE_REGS
|
|
|
|
|
LOCAL_ErrorMessage = "Float overflow while scanning";
|
|
|
|
|
return (MkEvalFl(f));
|
2013-11-16 00:27:02 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2013-11-16 00:27:02 +00:00
|
|
|
|
#elif HAVE_FINITE
|
2015-06-19 01:30:13 +01:00
|
|
|
|
if (trueGlobalPrologFlag(ISO_FLAG)) { /* iso */
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (!finite(f)) {
|
|
|
|
|
LOCAL_ErrorMessage = "Float overflow while scanning";
|
|
|
|
|
return (MkEvalFl(f));
|
2001-04-09 20:54:03 +01:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2001-04-09 20:54:03 +01:00
|
|
|
|
#endif
|
2015-07-06 11:49:56 +01:00
|
|
|
|
{
|
|
|
|
|
CACHE_REGS
|
2018-05-14 16:03:02 +01:00
|
|
|
|
return MkFloatTerm(f);
|
2015-07-06 11:49:56 +01:00
|
|
|
|
}
|
2001-04-09 20:54:03 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* we have an overflow at s */
|
2015-02-10 00:03:02 +00:00
|
|
|
|
static Term read_int_overflow(const char *s, Int base, Int val, int sign) {
|
2001-04-09 20:54:03 +01:00
|
|
|
|
#ifdef USE_GMP
|
|
|
|
|
/* try to scan it as a bignum */
|
2006-01-02 02:16:19 +00:00
|
|
|
|
mpz_t new;
|
|
|
|
|
Term t;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
mpz_init_set_str(new, s, base);
|
2009-01-12 15:08:26 +00:00
|
|
|
|
if (sign < 0)
|
|
|
|
|
mpz_neg(new, new);
|
2006-01-02 02:16:19 +00:00
|
|
|
|
t = Yap_MkBigIntTerm(new);
|
2006-01-18 15:34:54 +00:00
|
|
|
|
mpz_clear(new);
|
2006-01-02 02:16:19 +00:00
|
|
|
|
return t;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
#else
|
2013-09-06 23:03:24 +01:00
|
|
|
|
CACHE_REGS
|
2015-10-18 11:40:12 +01:00
|
|
|
|
/* try to scan it as a float */
|
|
|
|
|
return MkIntegerTerm(val);
|
2015-02-10 00:03:02 +00:00
|
|
|
|
#endif
|
2001-04-09 20:54:03 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
static int send_error_message(char s[]) {
|
2011-03-07 16:02:55 +00:00
|
|
|
|
CACHE_REGS
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_ErrorMessage = s;
|
2011-03-07 16:02:55 +00:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
2011-02-15 07:39:27 +00:00
|
|
|
|
|
2018-01-05 16:57:38 +00:00
|
|
|
|
static wchar_t read_quoted_char(int *scan_nextp, struct stream_desc *st) {
|
2015-07-06 11:49:56 +01:00
|
|
|
|
int ch;
|
2003-01-08 16:45:35 +00:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
/* escape sequence */
|
2015-02-10 00:03:02 +00:00
|
|
|
|
do_switch:
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2003-01-08 16:45:35 +00:00
|
|
|
|
switch (ch) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
case 10:
|
|
|
|
|
return 0;
|
|
|
|
|
case '\\':
|
|
|
|
|
return '\\';
|
|
|
|
|
case 'a':
|
|
|
|
|
return '\a';
|
|
|
|
|
case 'b':
|
|
|
|
|
return '\b';
|
|
|
|
|
case 'c':
|
2017-11-29 13:47:57 +00:00
|
|
|
|
while (chtype((ch = getchrq(st))) == BS)
|
2015-10-18 11:40:12 +01:00
|
|
|
|
;
|
|
|
|
|
{
|
|
|
|
|
if (ch == '\\') {
|
|
|
|
|
goto do_switch;
|
2006-11-27 17:42:03 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return ch;
|
|
|
|
|
}
|
|
|
|
|
case 'd':
|
|
|
|
|
return 127;
|
|
|
|
|
case 'e':
|
|
|
|
|
return '\x1B'; /* <ESC>, a.k.a. \e */
|
|
|
|
|
case 'f':
|
|
|
|
|
return '\f';
|
|
|
|
|
case 'n':
|
|
|
|
|
return '\n';
|
|
|
|
|
case 'r':
|
|
|
|
|
return '\r';
|
|
|
|
|
case 's': /* space */
|
|
|
|
|
return ' ';
|
|
|
|
|
case 't':
|
|
|
|
|
return '\t';
|
|
|
|
|
case 'u': {
|
|
|
|
|
int i;
|
|
|
|
|
wchar_t wc = '\0';
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch >= '0' && ch <= '9') {
|
|
|
|
|
wc += (ch - '0') << ((3 - i) * 4);
|
|
|
|
|
} else if (ch >= 'a' && ch <= 'f') {
|
|
|
|
|
wc += ((ch - 'a') + 10) << ((3 - i) * 4);
|
|
|
|
|
} else if (ch >= 'A' && ch <= 'F') {
|
|
|
|
|
wc += ((ch - 'A') + 10) << ((3 - i) * 4);
|
|
|
|
|
} else {
|
|
|
|
|
return send_error_message("invalid escape sequence");
|
2006-11-27 17:42:03 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
return wc;
|
|
|
|
|
}
|
|
|
|
|
case 'U': {
|
|
|
|
|
int i;
|
|
|
|
|
wchar_t wc = '\0';
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 8; i++) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch >= '0' && ch <= '9') {
|
|
|
|
|
wc += (ch - '0') << ((7 - i) * 4);
|
|
|
|
|
} else if (ch >= 'a' && ch <= 'f') {
|
|
|
|
|
wc += ((ch - 'a') + 10) << ((7 - i) * 4);
|
|
|
|
|
} else if (ch >= 'A' && ch <= 'F') {
|
|
|
|
|
wc += ((ch - 'A') + 10) << ((7 - i) * 4);
|
|
|
|
|
} else {
|
|
|
|
|
return send_error_message("invalid escape sequence");
|
2003-01-08 16:45:35 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
return wc;
|
|
|
|
|
}
|
|
|
|
|
case 'v':
|
|
|
|
|
return '\v';
|
|
|
|
|
case 'z': /* Prolog end-of-file */
|
|
|
|
|
return send_error_message("invalid escape sequence \\z");
|
|
|
|
|
case '\'':
|
|
|
|
|
return '\'';
|
|
|
|
|
case '"':
|
|
|
|
|
return '"';
|
|
|
|
|
case '`':
|
|
|
|
|
return '`';
|
|
|
|
|
case '^':
|
|
|
|
|
if (trueGlobalPrologFlag(ISO_FLAG)) {
|
|
|
|
|
return send_error_message("invalid escape sequence");
|
|
|
|
|
} else {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == '?') { /* delete character */
|
|
|
|
|
return 127;
|
|
|
|
|
} else if (ch >= 'a' && ch < 'z') { /* hexa */
|
|
|
|
|
return ch - 'a';
|
|
|
|
|
} else if (ch >= 'A' && ch < 'Z') { /* hexa */
|
|
|
|
|
return ch - 'A';
|
|
|
|
|
} else {
|
|
|
|
|
return '^';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
case '0':
|
|
|
|
|
case '1':
|
|
|
|
|
case '2':
|
|
|
|
|
case '3':
|
|
|
|
|
case '4':
|
|
|
|
|
case '5':
|
|
|
|
|
case '6':
|
|
|
|
|
case '7':
|
|
|
|
|
/* character in octal: maximum of 3 digits, terminates with \ */
|
|
|
|
|
/* follow ISO */
|
|
|
|
|
{
|
|
|
|
|
unsigned char so_far = ch - '0';
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch >= '0' && ch < '8') { /* octal */
|
|
|
|
|
so_far = so_far * 8 + (ch - '0');
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-02-10 00:03:02 +00:00
|
|
|
|
if (ch >= '0' && ch < '8') { /* octal */
|
2015-10-18 11:40:12 +01:00
|
|
|
|
so_far = so_far * 8 + (ch - '0');
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch != '\\') {
|
2015-02-10 00:03:02 +00:00
|
|
|
|
return send_error_message("invalid octal escape sequence");
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return so_far;
|
|
|
|
|
} else if (ch == '\\') {
|
|
|
|
|
return so_far;
|
|
|
|
|
} else {
|
|
|
|
|
return send_error_message("invalid octal escape sequence");
|
|
|
|
|
}
|
|
|
|
|
} else if (ch == '\\') {
|
|
|
|
|
return so_far;
|
|
|
|
|
} else {
|
|
|
|
|
return send_error_message("invalid octal escape sequence");
|
2003-01-08 16:45:35 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
case 'x':
|
|
|
|
|
/* hexadecimal character (YAP allows empty hexadecimal */
|
|
|
|
|
{
|
|
|
|
|
unsigned char so_far = 0;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (my_isxdigit(ch, 'f', 'F')) { /* hexa */
|
|
|
|
|
so_far =
|
|
|
|
|
so_far * 16 + (chtype(ch) == NU
|
2015-07-06 11:49:56 +01:00
|
|
|
|
? ch - '0'
|
|
|
|
|
: (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (my_isxdigit(ch, 'f', 'F')) { /* hexa */
|
|
|
|
|
so_far =
|
|
|
|
|
so_far * 16 + (chtype(ch) == NU
|
|
|
|
|
? ch - '0'
|
|
|
|
|
: (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == '\\') {
|
2015-02-10 00:03:02 +00:00
|
|
|
|
return so_far;
|
|
|
|
|
} else {
|
|
|
|
|
return send_error_message("invalid hexadecimal escape sequence");
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else if (ch == '\\') {
|
|
|
|
|
return so_far;
|
|
|
|
|
} else {
|
|
|
|
|
return send_error_message("invalid hexadecimal escape sequence");
|
|
|
|
|
}
|
|
|
|
|
} else if (ch == '\\') {
|
|
|
|
|
return so_far;
|
|
|
|
|
} else {
|
|
|
|
|
return send_error_message("invalid hexadecimal escape sequence");
|
2003-01-08 16:45:35 +00:00
|
|
|
|
}
|
2015-07-06 11:49:56 +01:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
default:
|
|
|
|
|
/* accept sequence. Note that the ISO standard does not
|
|
|
|
|
consider this sequence legal, whereas SICStus would
|
|
|
|
|
eat up the escape sequence. */
|
|
|
|
|
return send_error_message("invalid escape sequence");
|
|
|
|
|
}
|
2003-01-08 16:45:35 +00:00
|
|
|
|
}
|
2015-02-10 00:03:02 +00:00
|
|
|
|
|
|
|
|
|
static int num_send_error_message(char s[]) {
|
2011-03-07 16:02:55 +00:00
|
|
|
|
CACHE_REGS
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_ErrorMessage = s;
|
2018-05-16 00:00:12 +01:00
|
|
|
|
return MkStringTerm(s);
|
2011-03-07 16:02:55 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-11-02 05:16:36 +00:00
|
|
|
|
#define number_overflow() \
|
|
|
|
|
{ \
|
2017-09-23 02:17:55 +01:00
|
|
|
|
size_t nsz = Yap_Min(max_size * 2, max_size); \
|
2016-11-02 05:16:36 +00:00
|
|
|
|
char *nbuf; \
|
|
|
|
|
\
|
|
|
|
|
if (buf == buf0) { \
|
2018-01-22 13:53:17 +00:00
|
|
|
|
nbuf = Malloc(nsz); \
|
2016-11-02 05:16:36 +00:00
|
|
|
|
} else { \
|
|
|
|
|
nbuf = realloc(buf, nsz); \
|
|
|
|
|
} \
|
|
|
|
|
if (!nbuf) { \
|
|
|
|
|
return num_send_error_message("Number Too Long"); \
|
|
|
|
|
} else { \
|
|
|
|
|
left = nsz - max_size; \
|
|
|
|
|
max_size = nsz; \
|
|
|
|
|
buf = nbuf; \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2001-04-09 20:54:03 +01:00
|
|
|
|
/* reads a number, either integer or float */
|
|
|
|
|
|
2017-11-29 13:47:57 +00:00
|
|
|
|
static Term get_num(int *chp, int *chbuffp, StreamDesc *st, int sign) {
|
2003-01-08 16:45:35 +00:00
|
|
|
|
int ch = *chp;
|
2015-01-18 01:32:13 +00:00
|
|
|
|
Int val = 0L, base = ch - '0';
|
|
|
|
|
int might_be_float = TRUE, has_overflow = FALSE;
|
2013-11-23 12:34:49 +00:00
|
|
|
|
const unsigned char *decimalpoint;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
char buf0[256], *sp = buf0, *buf = buf0;
|
|
|
|
|
int max_size = 254, left = 254;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
|
|
|
|
*sp++ = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2001-04-09 20:54:03 +01:00
|
|
|
|
/*
|
|
|
|
|
* because of things like 00'2, 03'2 and even better 12'2, I need to
|
2015-02-10 00:03:02 +00:00
|
|
|
|
* do this (have mercy)
|
2001-04-09 20:54:03 +01:00
|
|
|
|
*/
|
2007-12-29 12:26:41 +00:00
|
|
|
|
if (chtype(ch) == NU) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp++ = ch;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
base = 10 * base + ch - '0';
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2015-07-06 11:49:56 +01:00
|
|
|
|
if (ch == '\'') {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (base > 36) {
|
2018-05-16 00:00:12 +01:00
|
|
|
|
Yap_ThrowError(SYNTAX_ERROR, MkIntegerTerm(base), "Admissible bases are 0..36");
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
might_be_float = FALSE;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp++ = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (base == 0) {
|
|
|
|
|
CACHE_REGS
|
|
|
|
|
wchar_t ascii = ch;
|
|
|
|
|
int scan_extra = TRUE;
|
2015-07-06 11:49:56 +01:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == '\\' &&
|
|
|
|
|
Yap_GetModuleEntry(CurrentModule)->flags & M_CHARESCAPE) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ascii = read_quoted_char(&scan_extra, st);
|
2015-07-28 04:22:44 +01:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
/* a quick way to represent ASCII */
|
|
|
|
|
if (scan_extra)
|
2017-11-29 13:47:57 +00:00
|
|
|
|
*chp = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (sign == -1) {
|
|
|
|
|
return MkIntegerTerm(-ascii);
|
|
|
|
|
}
|
|
|
|
|
return MkIntegerTerm(ascii);
|
|
|
|
|
} else if (base >= 10 && base <= 36) {
|
|
|
|
|
int upper_case = 'A' - 11 + base;
|
|
|
|
|
int lower_case = 'a' - 11 + base;
|
|
|
|
|
|
|
|
|
|
while (my_isxdigit(ch, upper_case, lower_case)) {
|
|
|
|
|
Int oval = val;
|
|
|
|
|
int chval =
|
|
|
|
|
(chtype(ch) == NU ? ch - '0'
|
|
|
|
|
: (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp++ = ch;
|
|
|
|
|
val = oval * base + chval;
|
|
|
|
|
if (oval != (val - chval) / base) /* overflow */
|
|
|
|
|
has_overflow = (has_overflow || TRUE);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (ch == 'x' && base == 0) {
|
|
|
|
|
might_be_float = FALSE;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp++ = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2018-02-22 08:18:45 +00:00
|
|
|
|
if (!my_isxdigit(ch, 'F', 'f')) {
|
2018-05-16 00:00:12 +01:00
|
|
|
|
Term t = ( Yap_local.ActiveError->errorRawTerm ? Yap_local.ActiveError->errorRawTerm : MkIntegerTerm(ch) );
|
|
|
|
|
Yap_local.ActiveError->errorRawTerm = 0;
|
|
|
|
|
Yap_ThrowError(SYNTAX_ERROR, t, "invalid hexadecimal digit 0x%C",ch) ;
|
2018-02-21 13:02:20 +00:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
while (my_isxdigit(ch, 'F', 'f')) {
|
|
|
|
|
Int oval = val;
|
|
|
|
|
int chval =
|
|
|
|
|
(chtype(ch) == NU ? ch - '0'
|
|
|
|
|
: (my_isupper(ch) ? ch - 'A' : ch - 'a') + 10);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-01-18 01:32:13 +00:00
|
|
|
|
*sp++ = ch;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
val = val * 16 + chval;
|
|
|
|
|
if (oval != (val - chval) / 16) /* overflow */
|
|
|
|
|
has_overflow = TRUE;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2018-02-21 13:02:20 +00:00
|
|
|
|
|
2009-01-12 15:08:26 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*chp = ch;
|
|
|
|
|
} else if (ch == 'o' && base == 0) {
|
2018-02-21 13:02:20 +00:00
|
|
|
|
might_be_float = false;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
base = 8;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2018-02-21 13:02:20 +00:00
|
|
|
|
if (ch < '0' || ch > '7') {
|
2018-05-16 00:00:12 +01:00
|
|
|
|
Term t = ( Yap_local.ActiveError->errorRawTerm ? Yap_local.ActiveError->errorRawTerm : MkIntegerTerm(ch) );
|
|
|
|
|
Yap_local.ActiveError->errorRawTerm = 0;
|
|
|
|
|
Yap_ThrowError(SYNTAX_ERROR, t, "invalid octal digit 0x%C",ch) ;
|
2018-02-21 13:02:20 +00:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else if (ch == 'b' && base == 0) {
|
2018-02-21 13:02:20 +00:00
|
|
|
|
might_be_float = false;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
base = 2;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2018-05-14 16:03:02 +01:00
|
|
|
|
if (ch < '0' || ch > '1') {
|
2018-05-16 00:00:12 +01:00
|
|
|
|
Term t = ( Yap_local.ActiveError->errorRawTerm ? Yap_local.ActiveError->errorRawTerm : MkIntegerTerm(ch) );
|
|
|
|
|
Yap_local.ActiveError->errorRawTerm = 0;
|
|
|
|
|
Yap_ThrowError(SYNTAX_ERROR, t, "invalid binary digit 0x%C",ch) ;
|
2018-05-14 16:03:02 +01:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
2018-02-21 13:02:20 +00:00
|
|
|
|
|
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else {
|
|
|
|
|
val = base;
|
|
|
|
|
base = 10;
|
|
|
|
|
}
|
2015-07-06 11:49:56 +01:00
|
|
|
|
while (chtype(ch) == NU) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
Int oval = val;
|
|
|
|
|
if (!(val == 0 && ch == '0') || has_overflow) {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp++ = ch;
|
|
|
|
|
}
|
|
|
|
|
if (ch - '0' >= base) {
|
|
|
|
|
CACHE_REGS
|
|
|
|
|
if (sign == -1)
|
|
|
|
|
return MkIntegerTerm(-val);
|
|
|
|
|
return MkIntegerTerm(val);
|
|
|
|
|
}
|
|
|
|
|
val = val * base + ch - '0';
|
|
|
|
|
if (val / base != oval || val - oval * base != ch - '0') /* overflow */
|
2018-02-21 13:02:20 +00:00
|
|
|
|
has_overflow = true;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
if (might_be_float && (ch == '.' || ch == 'e' || ch == 'E')) {
|
|
|
|
|
int has_dot = (ch == '.');
|
|
|
|
|
if (has_dot) {
|
|
|
|
|
unsigned char *dp;
|
|
|
|
|
int dc;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
if (chtype(ch = getchr(st)) != NU) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == 'e' || ch == 'E') {
|
|
|
|
|
if (trueGlobalPrologFlag(ISO_FLAG))
|
|
|
|
|
return num_send_error_message(
|
|
|
|
|
"Float format not allowed in ISO mode");
|
|
|
|
|
} else { /* followed by a letter, end of term? */
|
2015-07-06 11:49:56 +01:00
|
|
|
|
CACHE_REGS
|
2015-10-18 11:40:12 +01:00
|
|
|
|
sp[0] = '\0';
|
|
|
|
|
*chbuffp = '.';
|
|
|
|
|
*chp = ch;
|
|
|
|
|
if (has_overflow)
|
2016-11-02 05:16:36 +00:00
|
|
|
|
return read_int_overflow(buf, base, val, sign);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (sign == -1)
|
|
|
|
|
return MkIntegerTerm(-val);
|
2015-02-10 00:03:02 +00:00
|
|
|
|
return MkIntegerTerm(val);
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2013-11-23 12:34:49 +00:00
|
|
|
|
#if O_LOCALE
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if ((decimalpoint = (unsigned char *)(localeconv()->decimal_point)) ==
|
|
|
|
|
NULL)
|
2013-11-23 12:34:49 +00:00
|
|
|
|
#endif
|
2015-10-18 11:40:12 +01:00
|
|
|
|
decimalpoint = (const unsigned char *)".";
|
|
|
|
|
dp = (unsigned char *)decimalpoint;
|
|
|
|
|
/* translate . to current locale */
|
|
|
|
|
while ((dc = *dp++) != '\0') {
|
|
|
|
|
*sp++ = dc;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
/* numbers after . */
|
|
|
|
|
if (chtype(ch) == NU) {
|
|
|
|
|
do {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-02-10 00:03:02 +00:00
|
|
|
|
*sp++ = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
} while (chtype(ch = getchr(st)) == NU);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (ch == 'e' || ch == 'E') {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp++ = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == '-') {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp++ = '-';
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else if (ch == '+') {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
if (chtype(ch) != NU) {
|
|
|
|
|
CACHE_REGS
|
|
|
|
|
if (has_dot)
|
2016-11-02 05:16:36 +00:00
|
|
|
|
return float_send(buf, sign);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return MkIntegerTerm(sign * val);
|
|
|
|
|
}
|
|
|
|
|
do {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (--left == 0)
|
|
|
|
|
number_overflow();
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp++ = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
} while (chtype(ch = getchr(st)) == NU);
|
2001-04-09 20:54:03 +01:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
*sp = '\0';
|
|
|
|
|
*chp = ch;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
return float_send(buf, sign);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else if (has_overflow) {
|
|
|
|
|
*sp = '\0';
|
|
|
|
|
/* skip base */
|
|
|
|
|
*chp = ch;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (buf[0] == '0' && buf[1] == 'x')
|
|
|
|
|
return read_int_overflow(buf + 2, 16, val, sign);
|
|
|
|
|
else if (buf[0] == '0' && buf[1] == 'o')
|
|
|
|
|
return read_int_overflow(buf + 2, 8, val, sign);
|
|
|
|
|
else if (buf[0] == '0' && buf[1] == 'b')
|
|
|
|
|
return read_int_overflow(buf + 2, 2, val, sign);
|
|
|
|
|
if (buf[1] == '\'')
|
|
|
|
|
return read_int_overflow(buf + 2, base, val, sign);
|
|
|
|
|
if (buf[2] == '\'')
|
|
|
|
|
return read_int_overflow(buf + 3, base, val, sign);
|
|
|
|
|
return read_int_overflow(buf, base, val, sign);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else {
|
|
|
|
|
CACHE_REGS
|
|
|
|
|
*chp = ch;
|
|
|
|
|
return MkIntegerTerm(val * sign);
|
|
|
|
|
}
|
2001-04-09 20:54:03 +01:00
|
|
|
|
}
|
|
|
|
|
|
2011-02-15 07:39:27 +00:00
|
|
|
|
/* given a function getchr scan until we either find the number
|
2001-04-09 20:54:03 +01:00
|
|
|
|
or end of file */
|
2016-11-08 07:37:36 +00:00
|
|
|
|
Term Yap_scan_num(StreamDesc *inp, bool error_on) {
|
2011-03-07 16:02:55 +00:00
|
|
|
|
CACHE_REGS
|
2015-10-18 11:40:12 +01:00
|
|
|
|
Term out;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
int sign = 1;
|
2017-09-23 02:17:55 +01:00
|
|
|
|
int ch, cherr = 0;
|
2016-11-08 07:37:36 +00:00
|
|
|
|
char *ptr;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
void *old_tr = TR;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
InitScannerMemory();
|
|
|
|
|
LOCAL_VarTable = LOCAL_AnonVarTable = NULL;
|
2004-12-28 22:20:37 +00:00
|
|
|
|
if (!(ptr = AllocScannerMemory(4096))) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_ErrorMessage = "Trail Overflow";
|
|
|
|
|
LOCAL_Error_TYPE = RESOURCE_ERROR_TRAIL;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2016-11-08 07:37:36 +00:00
|
|
|
|
#if HAVE_ISWSPACE
|
|
|
|
|
while (iswspace(ch = getchr(inp)))
|
2016-11-02 05:16:36 +00:00
|
|
|
|
;
|
2016-08-23 23:28:23 +01:00
|
|
|
|
#else
|
2016-11-02 05:16:36 +00:00
|
|
|
|
while (isspace(ch = getchr(inp)))
|
|
|
|
|
;
|
2016-08-23 23:28:23 +01:00
|
|
|
|
#endif
|
2015-10-18 11:40:12 +01:00
|
|
|
|
TokEntry *tokptr = (TokEntry *)AllocScannerMemory(sizeof(TokEntry));
|
2018-01-22 13:53:17 +00:00
|
|
|
|
tokptr->TokLine = GetCurInpLine(inp);
|
|
|
|
|
tokptr->TokPos = GetCurInpPos(inp);
|
2001-04-09 20:54:03 +01:00
|
|
|
|
if (ch == '-') {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
sign = -1;
|
|
|
|
|
ch = getchr(inp);
|
|
|
|
|
} else if (ch == '+') {
|
|
|
|
|
ch = getchr(inp);
|
|
|
|
|
}
|
|
|
|
|
if (chtype(ch) == NU) {
|
|
|
|
|
cherr = '\0';
|
|
|
|
|
if (ASP - HR < 1024) {
|
|
|
|
|
Yap_clean_tokenizer(old_tr, NULL, NULL);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
LOCAL_ErrorMessage = "Stack Overflow";
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_Error_TYPE = RESOURCE_ERROR_STACK;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
out = get_num(&ch, &cherr, inp, sign); /* */
|
2016-11-08 07:37:36 +00:00
|
|
|
|
} else {
|
|
|
|
|
out = 0;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2016-11-08 07:37:36 +00:00
|
|
|
|
#if HAVE_ISWSPACE
|
2016-12-05 20:50:04 +00:00
|
|
|
|
while (iswspace(ch = getchr(inp)))
|
|
|
|
|
;
|
2016-11-08 07:37:36 +00:00
|
|
|
|
#else
|
2016-12-05 20:50:04 +00:00
|
|
|
|
while (isspace(ch = getchr(inp)))
|
|
|
|
|
;
|
2016-11-08 07:37:36 +00:00
|
|
|
|
#endif
|
2018-05-16 00:00:12 +01:00
|
|
|
|
if (ch == EOF)
|
2007-01-28 14:26:37 +00:00
|
|
|
|
return out;
|
2018-05-16 00:00:12 +01:00
|
|
|
|
return 0;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
#define CHECK_SPACE() \
|
|
|
|
|
if (ASP - HR < 1024) { \
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_ErrorMessage = "Stack Overflow"; \
|
|
|
|
|
LOCAL_Error_TYPE = RESOURCE_ERROR_STACK; \
|
|
|
|
|
LOCAL_Error_Size = 0L; \
|
2016-02-14 04:11:55 +00:00
|
|
|
|
if (p) { \
|
2015-10-18 11:40:12 +01:00
|
|
|
|
p->Tok = Ord(kind = eot_tok); \
|
2016-01-31 10:19:08 +00:00
|
|
|
|
p->TokInfo = TermOutOfStackError; \
|
2016-02-14 04:11:55 +00:00
|
|
|
|
} \
|
2015-10-18 11:40:12 +01:00
|
|
|
|
/* serious error now */ \
|
|
|
|
|
return l; \
|
2015-02-10 00:03:02 +00:00
|
|
|
|
}
|
2006-11-27 17:42:03 +00:00
|
|
|
|
|
2016-11-08 07:37:36 +00:00
|
|
|
|
Term Yap_tokRep(void *tokptre) {
|
2016-12-05 20:50:04 +00:00
|
|
|
|
CACHE_REGS
|
|
|
|
|
TokEntry *tokptr = tokptre;
|
|
|
|
|
Term info = tokptr->TokInfo;
|
|
|
|
|
|
|
|
|
|
switch (tokptr->Tok) {
|
|
|
|
|
case Name_tok:
|
|
|
|
|
if (!info) {
|
|
|
|
|
info = TermNil;
|
2017-08-27 22:17:00 +01:00
|
|
|
|
} else {
|
|
|
|
|
info = MkAtomTerm((Atom)info);
|
2016-11-08 07:37:36 +00:00
|
|
|
|
}
|
2016-12-05 20:50:04 +00:00
|
|
|
|
return Yap_MkApplTerm(Yap_MkFunctor(AtomAtom, 1), 1, &info);
|
|
|
|
|
case QuasiQuotes_tok:
|
|
|
|
|
info = MkAtomTerm(Yap_LookupAtom("<QQ>"));
|
|
|
|
|
return Yap_MkApplTerm(Yap_MkFunctor(AtomAtom, 1), 1, &info);
|
|
|
|
|
case Number_tok:
|
2017-08-27 22:17:00 +01:00
|
|
|
|
return Yap_MkApplTerm(Yap_MkFunctor(AtomNumber, 1), 1, &info);
|
2016-12-05 20:50:04 +00:00
|
|
|
|
break;
|
|
|
|
|
case Var_tok: {
|
|
|
|
|
Term t[2];
|
|
|
|
|
VarEntry *varinfo = (VarEntry *)info;
|
|
|
|
|
if ((t[0] = varinfo->VarAdr) == TermNil) {
|
|
|
|
|
t[0] = varinfo->VarAdr = MkVarTerm();
|
|
|
|
|
}
|
|
|
|
|
t[1] = MkAtomTerm((Atom)(varinfo->VarRep));
|
|
|
|
|
return Yap_MkApplTerm(Yap_MkFunctor(AtomGVar, 2), 2, t);
|
|
|
|
|
}
|
|
|
|
|
case String_tok:
|
|
|
|
|
return Yap_MkApplTerm(Yap_MkFunctor(AtomString, 1), 1, &info);
|
|
|
|
|
case BQString_tok:
|
|
|
|
|
return Yap_MkApplTerm(Yap_MkFunctor(AtomString, 1), 1, &info);
|
|
|
|
|
case Error_tok:
|
|
|
|
|
return MkAtomTerm(AtomError);
|
|
|
|
|
case eot_tok:
|
|
|
|
|
return MkAtomTerm(Yap_LookupAtom("EOT"));
|
|
|
|
|
case Ponctuation_tok:
|
2016-12-16 09:40:37 +00:00
|
|
|
|
if (info == Terml)
|
|
|
|
|
return TermBeginBracket;
|
|
|
|
|
else
|
|
|
|
|
return info;
|
2016-12-05 20:50:04 +00:00
|
|
|
|
}
|
|
|
|
|
return TermDot;
|
2015-08-18 20:59:24 +01:00
|
|
|
|
}
|
|
|
|
|
|
2016-12-05 20:50:04 +00:00
|
|
|
|
const char *Yap_tokText(void *tokptre) {
|
|
|
|
|
CACHE_REGS
|
|
|
|
|
TokEntry *tokptr = tokptre;
|
|
|
|
|
Term info = tokptr->TokInfo;
|
|
|
|
|
|
|
|
|
|
switch (tokptr->Tok) {
|
|
|
|
|
case eot_tok:
|
|
|
|
|
return "EOT";
|
|
|
|
|
case Ponctuation_tok:
|
2017-09-23 02:17:55 +01:00
|
|
|
|
if (info == Terml)
|
|
|
|
|
return "(";
|
2016-12-05 20:50:04 +00:00
|
|
|
|
case Error_tok:
|
|
|
|
|
case BQString_tok:
|
|
|
|
|
case String_tok:
|
|
|
|
|
case Name_tok:
|
|
|
|
|
return AtomOfTerm(info)->StrOfAE;
|
|
|
|
|
case QuasiQuotes_tok:
|
|
|
|
|
return "<QQ>";
|
|
|
|
|
case Number_tok:
|
|
|
|
|
if (IsIntegerTerm(info)) {
|
2018-01-22 13:53:17 +00:00
|
|
|
|
char *s = Malloc(36);
|
2016-12-05 20:50:04 +00:00
|
|
|
|
snprintf(s, 35, Int_FORMAT, IntegerOfTerm(info));
|
|
|
|
|
return s;
|
|
|
|
|
} else if (IsFloatTerm(info)) {
|
2018-01-22 13:53:17 +00:00
|
|
|
|
char *s = Malloc(64);
|
2016-12-05 20:50:04 +00:00
|
|
|
|
snprintf(s, 63, "%6g", FloatOfTerm(info));
|
|
|
|
|
return s;
|
|
|
|
|
} else {
|
|
|
|
|
size_t len = Yap_gmp_to_size(info, 10);
|
2018-01-22 13:53:17 +00:00
|
|
|
|
char *s = Malloc(len + 2);
|
2016-12-05 20:50:04 +00:00
|
|
|
|
return Yap_gmp_to_string(info, s, len + 1, 10);
|
2016-11-08 07:37:36 +00:00
|
|
|
|
}
|
2016-12-05 20:50:04 +00:00
|
|
|
|
break;
|
|
|
|
|
case Var_tok:
|
|
|
|
|
if (info == 0)
|
|
|
|
|
return "[]";
|
|
|
|
|
return ((Atom)info)->StrOfAE;
|
|
|
|
|
}
|
|
|
|
|
return ".";
|
2016-11-08 07:37:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-11-29 13:47:57 +00:00
|
|
|
|
static void open_comment(int ch, StreamDesc *st USES_REGS) {
|
2014-01-19 21:15:05 +00:00
|
|
|
|
CELL *h0 = HR;
|
|
|
|
|
HR += 5;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
h0[0] = AbsAppl(h0 + 2);
|
2011-06-12 17:23:10 +01:00
|
|
|
|
h0[1] = TermNil;
|
|
|
|
|
if (!LOCAL_CommentsTail) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
/* first comment */
|
|
|
|
|
LOCAL_Comments = AbsPair(h0);
|
|
|
|
|
} else {
|
|
|
|
|
/* extra comment */
|
|
|
|
|
*LOCAL_CommentsTail = AbsPair(h0);
|
|
|
|
|
}
|
2015-02-10 00:03:02 +00:00
|
|
|
|
LOCAL_CommentsTail = h0 + 1;
|
2011-06-12 17:23:10 +01:00
|
|
|
|
h0 += 2;
|
|
|
|
|
h0[0] = (CELL)FunctorMinus;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
h0[1] = Yap_StreamPosition(st - GLOBAL_Stream);
|
2011-06-12 17:23:10 +01:00
|
|
|
|
h0[2] = TermNil;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
LOCAL_CommentsNextChar = h0 + 2;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
LOCAL_CommentsBuff = (wchar_t *)Malloc(1024 * sizeof(wchar_t));
|
2011-06-12 17:23:10 +01:00
|
|
|
|
LOCAL_CommentsBuffLim = 1024;
|
|
|
|
|
LOCAL_CommentsBuff[0] = ch;
|
|
|
|
|
LOCAL_CommentsBuffPos = 1;
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
static void extend_comment(int ch USES_REGS) {
|
2011-06-12 17:23:10 +01:00
|
|
|
|
LOCAL_CommentsBuff[LOCAL_CommentsBuffPos] = ch;
|
|
|
|
|
LOCAL_CommentsBuffPos++;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
if (LOCAL_CommentsBuffPos == LOCAL_CommentsBuffLim - 1) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_CommentsBuff = (wchar_t *)realloc(
|
|
|
|
|
LOCAL_CommentsBuff, sizeof(wchar_t) * (LOCAL_CommentsBuffLim + 4096));
|
|
|
|
|
LOCAL_CommentsBuffLim += 4096;
|
|
|
|
|
}
|
2011-06-12 17:23:10 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
static void close_comment(USES_REGS1) {
|
2011-06-12 17:23:10 +01:00
|
|
|
|
LOCAL_CommentsBuff[LOCAL_CommentsBuffPos] = '\0';
|
2013-12-02 14:49:41 +00:00
|
|
|
|
*LOCAL_CommentsNextChar = Yap_WCharsToString(LOCAL_CommentsBuff PASS_REGS);
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(LOCAL_CommentsBuff);
|
2011-06-21 15:11:07 +01:00
|
|
|
|
LOCAL_CommentsBuff = NULL;
|
2011-06-14 08:58:51 +01:00
|
|
|
|
LOCAL_CommentsBuffLim = 0;
|
2011-06-12 17:23:10 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-07-06 11:49:56 +01:00
|
|
|
|
// mark that we reached EOF,
|
|
|
|
|
// next token will be end_of_file)
|
2017-11-29 13:47:57 +00:00
|
|
|
|
static void mark_eof(struct stream_desc *st) {
|
|
|
|
|
st->status |= Push_Eof_Stream_f;
|
2015-07-06 11:49:56 +01:00
|
|
|
|
}
|
|
|
|
|
|
2015-02-10 00:03:02 +00:00
|
|
|
|
#define add_ch_to_buff(ch) \
|
2016-11-02 05:16:36 +00:00
|
|
|
|
{ charp += put_utf8(charp, ch); }
|
2013-11-25 10:24:13 +00:00
|
|
|
|
|
2017-11-29 13:47:57 +00:00
|
|
|
|
TokEntry *Yap_tokenizer(struct stream_desc *st, bool store_comments,
|
2015-10-18 11:40:12 +01:00
|
|
|
|
Term *tposp) {
|
2015-07-06 11:49:56 +01:00
|
|
|
|
|
|
|
|
|
CACHE_REGS
|
2015-10-18 11:40:12 +01:00
|
|
|
|
TokEntry *t, *l, *p;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
enum TokenKinds kind;
|
|
|
|
|
int solo_flag = TRUE;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
int32_t ch, och;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
struct qq_struct_t *cur_qq = NULL;
|
2016-12-05 20:50:04 +00:00
|
|
|
|
int sign = 1;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
InitScannerMemory();
|
2011-05-23 16:19:47 +01:00
|
|
|
|
LOCAL_VarTable = NULL;
|
|
|
|
|
LOCAL_AnonVarTable = NULL;
|
2004-11-22 22:28:06 +00:00
|
|
|
|
l = NULL;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
p = NULL; /* Just to make lint happy */
|
2018-01-05 16:57:38 +00:00
|
|
|
|
ch = getchr(st);
|
2008-10-23 22:17:45 +01:00
|
|
|
|
while (chtype(ch) == BS) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2017-11-29 13:47:57 +00:00
|
|
|
|
*tposp = Yap_StreamPosition(st - GLOBAL_Stream);
|
|
|
|
|
Yap_setCurrentSourceLocation(st);
|
|
|
|
|
LOCAL_StartLineCount = st->linecount;
|
|
|
|
|
LOCAL_StartLinePos = st->linepos;
|
2001-04-09 20:54:03 +01:00
|
|
|
|
do {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
int quote, isvar;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
unsigned char *charp, *mp;
|
2016-11-08 07:37:36 +00:00
|
|
|
|
size_t len;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
unsigned char *TokImage = NULL;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
t = (TokEntry *)AllocScannerMemory(sizeof(TokEntry));
|
|
|
|
|
t->TokNext = NULL;
|
|
|
|
|
if (t == NULL) {
|
2016-01-31 10:19:08 +00:00
|
|
|
|
return TrailSpaceError(p, l);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
if (!l)
|
|
|
|
|
l = t;
|
|
|
|
|
else
|
|
|
|
|
p->TokNext = t;
|
|
|
|
|
p = t;
|
|
|
|
|
restart:
|
|
|
|
|
while (chtype(ch) == BS) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2018-01-22 13:53:17 +00:00
|
|
|
|
t->TokPos = GetCurInpPos(st);
|
|
|
|
|
t->TokLine = GetCurInpLine(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
switch (chtype(ch)) {
|
|
|
|
|
|
|
|
|
|
case CC:
|
|
|
|
|
if (store_comments) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
open_comment(ch, st PASS_REGS);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
continue_comment:
|
2017-11-29 13:47:57 +00:00
|
|
|
|
while ((ch = getchr(st)) != 10 && chtype(ch) != EF) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
extend_comment(ch PASS_REGS);
|
|
|
|
|
}
|
|
|
|
|
extend_comment(ch PASS_REGS);
|
|
|
|
|
if (chtype(ch) != EF) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (chtype(ch) == CC) {
|
|
|
|
|
extend_comment(ch PASS_REGS);
|
|
|
|
|
goto continue_comment;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
close_comment(PASS_REGS1);
|
|
|
|
|
} else {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
while ((ch = getchr(st)) != 10 && chtype(ch) != EF)
|
2015-10-18 11:40:12 +01:00
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
if (chtype(ch) != EF) {
|
|
|
|
|
/* blank space */
|
|
|
|
|
if (t == l) {
|
|
|
|
|
/* we found a comment before reading characters */
|
|
|
|
|
while (chtype(ch) == BS) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2017-11-29 13:47:57 +00:00
|
|
|
|
*tposp = Yap_StreamPosition(st - GLOBAL_Stream);
|
|
|
|
|
Yap_setCurrentSourceLocation(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
goto restart;
|
|
|
|
|
} else {
|
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
mark_eof(st);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermEof;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case UC:
|
|
|
|
|
case UL:
|
2016-11-02 05:16:36 +00:00
|
|
|
|
case LC: {
|
|
|
|
|
int32_t och = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
size_t sz = 512;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
TokImage = Malloc(sz PASS_REGS);
|
2016-11-08 07:37:36 +00:00
|
|
|
|
scan_name:
|
2016-11-02 05:16:36 +00:00
|
|
|
|
charp = (unsigned char *)TokImage;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
isvar = (chtype(och) != LC);
|
|
|
|
|
add_ch_to_buff(och);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
for (; chtype(ch) <= NU; ch = getchr(st)) {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (charp == TokImage + (sz - 1)) {
|
|
|
|
|
unsigned char *p0 = TokImage;
|
2017-07-25 01:10:44 +01:00
|
|
|
|
sz = Yap_Min(sz * 2, sz + MBYTE);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
TokImage = Realloc(p0, sz);
|
|
|
|
|
if (TokImage == NULL) {
|
|
|
|
|
return CodeSpaceError(t, p, l);
|
|
|
|
|
}
|
2018-01-22 13:53:17 +00:00
|
|
|
|
charp = TokImage + (charp - p0);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
add_ch_to_buff(ch);
|
|
|
|
|
}
|
|
|
|
|
while (ch == '\'' && isvar &&
|
|
|
|
|
trueGlobalPrologFlag(VARIABLE_NAMES_MAY_END_WITH_QUOTES_FLAG)) {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (charp == (unsigned char *)AuxSp - 1024) {
|
|
|
|
|
return CodeSpaceError(t, p, l);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
add_ch_to_buff(ch);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
add_ch_to_buff('\0');
|
|
|
|
|
if (!isvar) {
|
|
|
|
|
Atom ae;
|
|
|
|
|
/* don't do this in iso */
|
2016-11-02 05:16:36 +00:00
|
|
|
|
ae = Yap_ULookupAtom(TokImage);
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(TokImage);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ae == NIL) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return CodeSpaceError(t, p, l);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = MkAtomTerm(ae);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == '(')
|
|
|
|
|
solo_flag = FALSE;
|
|
|
|
|
t->Tok = Ord(kind = Name_tok);
|
|
|
|
|
} else {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
VarEntry *ve = Yap_LookupVar((const char *)TokImage);
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(TokImage);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->TokInfo = Unsigned(ve);
|
|
|
|
|
if (cur_qq) {
|
|
|
|
|
ve->refs++;
|
|
|
|
|
}
|
|
|
|
|
t->Tok = Ord(kind = Var_tok);
|
|
|
|
|
}
|
2016-11-04 16:36:48 +00:00
|
|
|
|
|
2016-11-02 05:16:36 +00:00
|
|
|
|
} break;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
2016-12-05 20:50:04 +00:00
|
|
|
|
case NU: {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
int cherr;
|
2016-11-08 07:37:36 +00:00
|
|
|
|
int cha;
|
2016-12-05 20:50:04 +00:00
|
|
|
|
sign = 1;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
2016-12-05 20:50:04 +00:00
|
|
|
|
scan_number:
|
|
|
|
|
cha = ch;
|
|
|
|
|
cherr = 0;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
CHECK_SPACE();
|
2017-11-29 13:47:57 +00:00
|
|
|
|
if ((t->TokInfo = get_num(&cha, &cherr, st, sign)) == 0L) {
|
2018-02-21 13:02:20 +00:00
|
|
|
|
if (t->TokInfo == 0) {
|
2016-01-31 10:19:08 +00:00
|
|
|
|
p->Tok = eot_tok;
|
|
|
|
|
t->TokInfo = TermError;
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
/* serious error now */
|
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
ch = cha;
|
|
|
|
|
if (cherr) {
|
|
|
|
|
TokEntry *e;
|
|
|
|
|
t->Tok = Number_tok;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
t->TokPos = GetCurInpPos(st);
|
|
|
|
|
t->TokLine = GetCurInpLine(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
e = (TokEntry *)AllocScannerMemory(sizeof(TokEntry));
|
|
|
|
|
if (e == NULL) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return TrailSpaceError(p, l);
|
2016-01-31 10:19:08 +00:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else {
|
|
|
|
|
e->TokNext = NULL;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->TokNext = e;
|
|
|
|
|
t = e;
|
|
|
|
|
p = e;
|
|
|
|
|
switch (cherr) {
|
|
|
|
|
case 'e':
|
|
|
|
|
case 'E':
|
|
|
|
|
och = cherr;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
TokImage = Malloc(1024 PASS_REGS);
|
2016-12-05 20:50:04 +00:00
|
|
|
|
goto scan_name;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
break;
|
|
|
|
|
case '=':
|
|
|
|
|
case '_':
|
|
|
|
|
/* handle error while parsing a float */
|
|
|
|
|
{
|
|
|
|
|
TokEntry *e2;
|
|
|
|
|
|
|
|
|
|
t->Tok = Ord(Var_tok);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = (Term)Yap_LookupVar("E");
|
2018-01-22 13:53:17 +00:00
|
|
|
|
t->TokPos = GetCurInpPos(st);
|
|
|
|
|
t->TokLine = GetCurInpLine(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
e2 = (TokEntry *)AllocScannerMemory(sizeof(TokEntry));
|
|
|
|
|
if (e2 == NULL) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return TrailSpaceError(p, l);
|
2015-02-10 00:03:02 +00:00
|
|
|
|
} else {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
e2->TokNext = NULL;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->TokNext = e2;
|
|
|
|
|
t = e2;
|
|
|
|
|
p = e2;
|
|
|
|
|
if (cherr == '=')
|
|
|
|
|
och = '+';
|
|
|
|
|
else
|
|
|
|
|
och = '-';
|
|
|
|
|
}
|
|
|
|
|
goto enter_symbol;
|
|
|
|
|
case '+':
|
|
|
|
|
case '-':
|
|
|
|
|
/* handle error while parsing a float */
|
|
|
|
|
{
|
|
|
|
|
TokEntry *e2;
|
|
|
|
|
|
2016-12-05 20:50:04 +00:00
|
|
|
|
if (chtype(ch) == NU) {
|
|
|
|
|
if (och == '-')
|
|
|
|
|
sign = -1;
|
|
|
|
|
else
|
|
|
|
|
sign = 1;
|
|
|
|
|
goto scan_number;
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Name_tok;
|
|
|
|
|
if (ch == '(')
|
|
|
|
|
solo_flag = FALSE;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = MkAtomTerm(AtomE);
|
2018-01-22 13:53:17 +00:00
|
|
|
|
t->TokLine = GetCurInpLine(st);
|
|
|
|
|
t->TokPos = GetCurInpPos(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
e2 = (TokEntry *)AllocScannerMemory(sizeof(TokEntry));
|
|
|
|
|
if (e2 == NULL) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return TrailSpaceError(p, l);
|
2015-07-06 11:49:56 +01:00
|
|
|
|
} else {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
e2->TokNext = NULL;
|
2015-07-06 11:49:56 +01:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->TokNext = e2;
|
|
|
|
|
t = e2;
|
|
|
|
|
p = e2;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
och = cherr;
|
|
|
|
|
goto enter_symbol;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
t->Tok = Ord(kind = Number_tok);
|
|
|
|
|
}
|
|
|
|
|
} break;
|
|
|
|
|
|
|
|
|
|
case QT:
|
|
|
|
|
case DC:
|
|
|
|
|
quoted_string:
|
2018-01-22 13:53:17 +00:00
|
|
|
|
TokImage = Malloc(1048);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
charp = TokImage;
|
|
|
|
|
quote = ch;
|
|
|
|
|
len = 0;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
size_t sz = 1024;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
while (TRUE) {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (charp > TokImage + (sz - 1)) {
|
2017-07-25 01:10:44 +01:00
|
|
|
|
TokImage = Realloc(TokImage, Yap_Min(sz * 2, sz + MBYTE));
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (TokImage == NULL) {
|
|
|
|
|
return CodeSpaceError(t, p, l);
|
|
|
|
|
}
|
2015-02-10 00:03:02 +00:00
|
|
|
|
break;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
if (ch == 10 && trueGlobalPrologFlag(ISO_FLAG)) {
|
|
|
|
|
/* in ISO a new line terminates a string */
|
|
|
|
|
LOCAL_ErrorMessage = "layout character \n inside quotes";
|
|
|
|
|
break;
|
|
|
|
|
}
|
2016-12-05 20:50:04 +00:00
|
|
|
|
if (ch == EOFCHAR) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == quote) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch != quote)
|
|
|
|
|
break;
|
|
|
|
|
add_ch_to_buff(ch);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else if (ch == '\\' &&
|
|
|
|
|
Yap_GetModuleEntry(CurrentModule)->flags & M_CHARESCAPE) {
|
|
|
|
|
int scan_next = TRUE;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
if ((ch = read_quoted_char(&scan_next, st))) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
add_ch_to_buff(ch);
|
|
|
|
|
}
|
|
|
|
|
if (scan_next) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
add_ch_to_buff(ch);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
++len;
|
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
*charp = '\0';
|
|
|
|
|
if (quote == '"') {
|
|
|
|
|
t->TokInfo = Yap_CharsToTDQ((char *)TokImage, CurrentModule,
|
|
|
|
|
LOCAL_encoding PASS_REGS);
|
|
|
|
|
if (!(t->TokInfo)) {
|
|
|
|
|
return CodeSpaceError(t, p, l);
|
|
|
|
|
}
|
|
|
|
|
t->Tok = Ord(kind = String_tok);
|
|
|
|
|
} else if (quote == '`') {
|
|
|
|
|
t->TokInfo = Yap_CharsToTBQ((char *)TokImage, CurrentModule,
|
|
|
|
|
LOCAL_encoding PASS_REGS);
|
|
|
|
|
if (!(t->TokInfo)) {
|
|
|
|
|
return CodeSpaceError(t, p, l);
|
|
|
|
|
}
|
|
|
|
|
t->Tok = Ord(kind = String_tok);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = MkAtomTerm(Yap_ULookupAtom(TokImage));
|
|
|
|
|
if (!(t->TokInfo)) {
|
|
|
|
|
return CodeSpaceError(t, p, l);
|
|
|
|
|
}
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(TokImage);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->Tok = Ord(kind = Name_tok);
|
|
|
|
|
if (ch == '(')
|
|
|
|
|
solo_flag = false;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
break;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
|
|
|
|
|
case BS:
|
|
|
|
|
if (ch == '\0') {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
int pch;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
pch = Yap_peek(st - GLOBAL_Stream);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
if (chtype(pch) == EF) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
mark_eof(st);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermEof;
|
|
|
|
|
} else {
|
|
|
|
|
t->TokInfo = TermNewLine;
|
|
|
|
|
}
|
2016-01-31 10:19:08 +00:00
|
|
|
|
t->TokInfo = TermEof;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
|
|
|
|
} else
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
break;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
case SY: {
|
|
|
|
|
int pch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
if (ch == '.' && (pch = Yap_peek(st - GLOBAL_Stream)) &&
|
2016-12-05 20:50:04 +00:00
|
|
|
|
(chtype(pch) == BS || chtype(pch) == EF || pch == '%')) {
|
2018-04-14 16:25:29 +01:00
|
|
|
|
if (chtype(ch) != EF)
|
|
|
|
|
getchr(st);
|
2016-12-05 20:50:04 +00:00
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
|
|
|
|
// consume...
|
|
|
|
|
if (pch == '%') {
|
|
|
|
|
t->TokInfo = TermNewLine;
|
|
|
|
|
return l;
|
2016-02-14 04:11:55 +00:00
|
|
|
|
}
|
2016-12-05 20:50:04 +00:00
|
|
|
|
return l;
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == '`')
|
|
|
|
|
goto quoted_string;
|
2015-12-15 09:28:43 +00:00
|
|
|
|
och = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-12-15 09:28:43 +00:00
|
|
|
|
if (och == '.') {
|
|
|
|
|
if (chtype(ch) == BS || chtype(ch) == EF || ch == '%') {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-04-28 15:01:16 +01:00
|
|
|
|
if (ch == '%') {
|
|
|
|
|
t->TokInfo = TermNewLine;
|
2016-04-26 15:26:35 +01:00
|
|
|
|
return l;
|
2016-11-02 05:16:36 +00:00
|
|
|
|
}
|
2016-01-31 10:19:08 +00:00
|
|
|
|
if (chtype(ch) == EF) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
mark_eof(st);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = TermEof;
|
|
|
|
|
} else {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermNewLine;
|
2016-01-31 10:19:08 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-12-05 20:50:04 +00:00
|
|
|
|
if (och == '/' && ch == '*') {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (store_comments) {
|
|
|
|
|
CHECK_SPACE();
|
2017-11-29 13:47:57 +00:00
|
|
|
|
open_comment('/', st PASS_REGS);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
while ((och != '*' || ch != '/') && chtype(ch) != EF) {
|
|
|
|
|
och = ch;
|
|
|
|
|
CHECK_SPACE();
|
|
|
|
|
extend_comment(ch PASS_REGS);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
if (chtype(ch) != EF) {
|
|
|
|
|
CHECK_SPACE();
|
|
|
|
|
extend_comment(ch PASS_REGS);
|
|
|
|
|
}
|
|
|
|
|
close_comment(PASS_REGS1);
|
|
|
|
|
} else {
|
|
|
|
|
while ((och != '*' || ch != '/') && chtype(ch) != EF) {
|
|
|
|
|
och = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (chtype(ch) == EF) {
|
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-01-31 10:19:08 +00:00
|
|
|
|
t->TokInfo = TermEof;
|
2015-02-10 00:03:02 +00:00
|
|
|
|
break;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else {
|
|
|
|
|
/* leave comments */
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (t == l) {
|
|
|
|
|
/* we found a comment before reading characters */
|
|
|
|
|
while (chtype(ch) == BS) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-07-06 11:49:56 +01:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
CHECK_SPACE();
|
2017-11-29 13:47:57 +00:00
|
|
|
|
*tposp = Yap_StreamPosition(st - GLOBAL_Stream);
|
|
|
|
|
Yap_setCurrentSourceLocation(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
goto restart;
|
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
enter_symbol:
|
|
|
|
|
if (och == '.' && (chtype(ch) == BS || chtype(ch) == EF || ch == '%')) {
|
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (ch == '%') {
|
2016-04-28 15:01:16 +01:00
|
|
|
|
t->TokInfo = TermNewLine;
|
|
|
|
|
return l;
|
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (chtype(ch) == EF) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
mark_eof(st);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermEof;
|
|
|
|
|
} else {
|
|
|
|
|
t->TokInfo = TermNl;
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
|
|
|
|
} else {
|
|
|
|
|
Atom ae;
|
2016-11-08 07:37:36 +00:00
|
|
|
|
sz = 1024;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
TokImage = Malloc(sz);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
charp = TokImage;
|
|
|
|
|
add_ch_to_buff(och);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
for (; chtype(ch) == SY; ch = getchr(st)) {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (charp >= TokImage + (sz - 10)) {
|
2017-07-25 01:10:44 +01:00
|
|
|
|
sz = Yap_Min(sz * 2, sz + MBYTE);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
TokImage = Realloc(TokImage, sz);
|
|
|
|
|
if (!TokImage)
|
|
|
|
|
return CodeSpaceError(t, p, l);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
add_ch_to_buff(ch);
|
|
|
|
|
}
|
|
|
|
|
add_ch_to_buff('\0');
|
2016-11-02 05:16:36 +00:00
|
|
|
|
ae = Yap_ULookupAtom(TokImage);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ae == NIL) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return CodeSpaceError(t, p, l);
|
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = MkAtomTerm(ae);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (t->TokInfo == (CELL)NIL) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return CodeSpaceError(t, p, l);
|
|
|
|
|
}
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(TokImage);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = Name_tok);
|
|
|
|
|
if (ch == '(')
|
2015-12-15 09:28:43 +00:00
|
|
|
|
solo_flag = false;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
else
|
2015-12-15 09:28:43 +00:00
|
|
|
|
solo_flag = true;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
break;
|
2015-07-06 11:49:56 +01:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
case SL: {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
unsigned char chs[2];
|
2015-10-18 11:40:12 +01:00
|
|
|
|
chs[0] = ch;
|
|
|
|
|
chs[1] = '\0';
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = MkAtomTerm(Yap_ULookupAtom(chs));
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = Name_tok);
|
|
|
|
|
if (ch == '(')
|
|
|
|
|
solo_flag = FALSE;
|
|
|
|
|
} break;
|
|
|
|
|
|
|
|
|
|
case BK:
|
|
|
|
|
och = ch;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
{
|
|
|
|
|
unsigned char chs[10];
|
|
|
|
|
TokImage = charp = chs;
|
|
|
|
|
add_ch_to_buff(och);
|
|
|
|
|
charp[0] = '\0';
|
|
|
|
|
t->TokInfo = MkAtomTerm(Yap_ULookupAtom(chs));
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (och == '(') {
|
|
|
|
|
while (chtype(ch) == BS) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch == ')') {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = TermEmptyBrackets;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = Name_tok);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
solo_flag = FALSE;
|
|
|
|
|
break;
|
|
|
|
|
} else if (!solo_flag) {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = Terml;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
solo_flag = TRUE;
|
|
|
|
|
}
|
|
|
|
|
} else if (och == '[') {
|
|
|
|
|
while (chtype(ch) == BS) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
};
|
|
|
|
|
if (ch == ']') {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = TermNil;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = Name_tok);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
solo_flag = FALSE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else if (och == '{') {
|
|
|
|
|
if (ch == '|') {
|
|
|
|
|
qq_t *qq = (qq_t *)calloc(sizeof(qq_t), 1);
|
|
|
|
|
if (!qq) {
|
|
|
|
|
LOCAL_ErrorMessage = "not enough heap space to read in quasi quote";
|
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-01-31 10:19:08 +00:00
|
|
|
|
t->TokInfo = TermOutOfHeapError;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
if (cur_qq) {
|
|
|
|
|
LOCAL_ErrorMessage = "quasi quote in quasi quote";
|
|
|
|
|
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
|
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermOutOfHeapError;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(qq);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
|
|
|
|
} else {
|
|
|
|
|
cur_qq = qq;
|
|
|
|
|
}
|
|
|
|
|
t->TokInfo = (CELL)qq;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
if (st->status & Seekable_Stream_f) {
|
|
|
|
|
qq->start.byteno = fseek(st->file, 0, 0);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
qq->start.byteno = st->charcount - 1;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2017-11-29 13:47:57 +00:00
|
|
|
|
qq->start.lineno = st->linecount;
|
|
|
|
|
qq->start.linepos = st->linepos - 1;
|
|
|
|
|
qq->start.charno = st->charcount - 1;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = QuasiQuotes_tok);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
solo_flag = FALSE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
while (chtype(ch) == BS) {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
};
|
|
|
|
|
if (ch == '}') {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
t->TokInfo = TermBraces;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = Name_tok);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
solo_flag = FALSE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else if (och == '|' && ch == '|') {
|
|
|
|
|
qq_t *qq = cur_qq;
|
|
|
|
|
if (!qq) {
|
|
|
|
|
LOCAL_ErrorMessage = "quasi quoted's || without {|";
|
|
|
|
|
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(cur_qq);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
cur_qq = NULL;
|
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-01-31 10:19:08 +00:00
|
|
|
|
t->TokInfo = TermError;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
cur_qq = NULL;
|
|
|
|
|
t->TokInfo = (CELL)qq;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
if (st->status & Seekable_Stream_f) {
|
|
|
|
|
qq->mid.byteno = fseek(st->file, 0, 0);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
qq->mid.byteno = st->charcount - 1;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2017-11-29 13:47:57 +00:00
|
|
|
|
qq->mid.lineno = st->linecount;
|
|
|
|
|
qq->mid.linepos = st->linepos - 1;
|
|
|
|
|
qq->mid.charno = st->charcount - 1;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = QuasiQuotes_tok);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2016-11-02 05:16:36 +00:00
|
|
|
|
sz = 1024;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
TokImage = Malloc(sz);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (!TokImage) {
|
|
|
|
|
LOCAL_ErrorMessage =
|
|
|
|
|
"not enough heap space to read in a quasi quoted atom";
|
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermError;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
|
|
|
|
}
|
|
|
|
|
charp = TokImage;
|
|
|
|
|
quote = ch;
|
|
|
|
|
len = 0;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-02-10 00:03:02 +00:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
while (TRUE) {
|
|
|
|
|
if (ch == '|') {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (ch != '}') {
|
|
|
|
|
} else {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
charp += put_utf8((unsigned char *)charp, och);
|
|
|
|
|
charp += put_utf8((unsigned char *)charp, ch);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
/* we're done */
|
2015-02-10 00:03:02 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else if (chtype(ch) == EF) {
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(TokImage);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
mark_eof(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermOutOfHeapError;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
break;
|
|
|
|
|
} else {
|
2016-11-02 05:16:36 +00:00
|
|
|
|
charp += put_utf8(charp, ch);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchrq(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
if (charp > (unsigned char *)AuxSp - 1024) {
|
2015-10-18 11:40:12 +01:00
|
|
|
|
/* Not enough space to read in the string. */
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return AuxSpaceError(
|
|
|
|
|
t, l, "not enough space to read in string or quoted atom");
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
len = charp - (unsigned char *)TokImage;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
mp = Malloc(len + 1);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (mp == NULL) {
|
|
|
|
|
LOCAL_ErrorMessage = "not enough heap space to read in quasi quote";
|
|
|
|
|
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
|
2015-02-10 00:03:02 +00:00
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermOutOfHeapError;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
|
|
|
|
}
|
2016-11-02 05:16:36 +00:00
|
|
|
|
strncpy((char *)mp, (const char *)TokImage, len + 1);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
qq->text = (unsigned char *)mp;
|
|
|
|
|
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
|
2017-11-29 13:47:57 +00:00
|
|
|
|
if (st->status & Seekable_Stream_f) {
|
|
|
|
|
qq->end.byteno = fseek(st->file, 0, 0);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
} else {
|
2017-11-29 13:47:57 +00:00
|
|
|
|
qq->end.byteno = st->charcount - 1;
|
2016-12-05 20:50:04 +00:00
|
|
|
|
}
|
2017-11-29 13:47:57 +00:00
|
|
|
|
qq->end.lineno = st->linecount;
|
|
|
|
|
qq->end.linepos = st->linepos - 1;
|
|
|
|
|
qq->end.charno = st->charcount - 1;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
if (!(t->TokInfo)) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return CodeSpaceError(t, p, l);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
Yap_ReleasePreAllocCodeSpace((CODEADDR)TokImage);
|
|
|
|
|
solo_flag = FALSE;
|
2017-11-29 13:47:57 +00:00
|
|
|
|
ch = getchr(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
t->Tok = Ord(kind = Ponctuation_tok);
|
|
|
|
|
break;
|
|
|
|
|
case EF:
|
2017-11-29 13:47:57 +00:00
|
|
|
|
mark_eof(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermEof;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
return l;
|
2003-01-08 16:45:35 +00:00
|
|
|
|
|
2016-02-14 04:11:55 +00:00
|
|
|
|
default: {
|
|
|
|
|
char err[1024];
|
|
|
|
|
snprintf(err, 1023, "\n++++ token: unrecognised char %c (%d), type %c\n",
|
|
|
|
|
ch, ch, chtype(ch));
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
t->Tok = Ord(kind = eot_tok);
|
2016-02-14 04:11:55 +00:00
|
|
|
|
t->TokInfo = TermEof;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
}
|
|
|
|
|
if (LOCAL_ErrorMessage) {
|
|
|
|
|
/* insert an error token to inform the system of what happened */
|
|
|
|
|
TokEntry *e = (TokEntry *)AllocScannerMemory(sizeof(TokEntry));
|
|
|
|
|
if (e == NULL) {
|
2016-02-14 04:11:55 +00:00
|
|
|
|
return TrailSpaceError(p, l);
|
|
|
|
|
}
|
2015-10-18 11:40:12 +01:00
|
|
|
|
p->TokNext = e;
|
|
|
|
|
e->Tok = Error_tok;
|
|
|
|
|
e->TokInfo = MkAtomTerm(Yap_LookupAtom(LOCAL_ErrorMessage));
|
2018-01-22 13:53:17 +00:00
|
|
|
|
e->TokPos = GetCurInpPos(st);
|
|
|
|
|
e->TokLine = GetCurInpLine(st);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
e->TokNext = NULL;
|
|
|
|
|
LOCAL_ErrorMessage = NULL;
|
|
|
|
|
p = e;
|
|
|
|
|
}
|
|
|
|
|
} while (kind != eot_tok);
|
|
|
|
|
|
2001-04-09 20:54:03 +01:00
|
|
|
|
return (l);
|
|
|
|
|
}
|
2004-02-05 16:57:02 +00:00
|
|
|
|
|
2015-10-18 11:40:12 +01:00
|
|
|
|
void Yap_clean_tokenizer(TokEntry *tokstart, VarEntry *vartable,
|
2015-06-19 01:30:13 +01:00
|
|
|
|
VarEntry *anonvartable) {
|
2011-03-07 16:02:55 +00:00
|
|
|
|
CACHE_REGS
|
2016-11-02 05:16:36 +00:00
|
|
|
|
struct scanner_extra_alloc *ptr = LOCAL_ScannerExtraBlocks;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
while (ptr) {
|
|
|
|
|
struct scanner_extra_alloc *next = ptr->next;
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(ptr);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
ptr = next;
|
|
|
|
|
}
|
|
|
|
|
TR = (tr_fr_ptr)tokstart;
|
|
|
|
|
LOCAL_Comments = TermNil;
|
|
|
|
|
LOCAL_CommentsNextChar = LOCAL_CommentsTail = NULL;
|
|
|
|
|
if (LOCAL_CommentsBuff) {
|
2018-01-22 13:53:17 +00:00
|
|
|
|
Free(LOCAL_CommentsBuff);
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_CommentsBuff = NULL;
|
|
|
|
|
}
|
2016-08-15 20:54:46 +01:00
|
|
|
|
LOCAL_ScannerStack = NULL;
|
2015-10-18 11:40:12 +01:00
|
|
|
|
LOCAL_CommentsBuffLim = 0;
|
2004-02-05 16:57:02 +00:00
|
|
|
|
}
|
2005-11-08 13:51:15 +00:00
|
|
|
|
|
2014-09-15 09:13:50 +01:00
|
|
|
|
/// @}
|