annoying small bugs

This commit is contained in:
Vitor Santos Costa 2016-05-26 22:47:19 +01:00
parent a9fe413dd0
commit cd7571da7a
3 changed files with 358 additions and 341 deletions

View File

@ -29,8 +29,7 @@ static char SccsId[] = "%W% %G%";
///{@
/// @addtogroup CharProps
/// @addtogroup CharProps
/**
* @defgroup CharIO Character-Based Input/Output
* @ingroup InputOutput
@ -42,10 +41,10 @@ static char SccsId[] = "%W% %G%";
*/
#include "Yap.h"
#include "Yatom.h"
#include "YapHeap.h"
#include "yapio.h"
#include "YapText.h"
#include "Yatom.h"
#include "yapio.h"
#include <stdlib.h>
#if HAVE_UNISTD_H
#include <unistd.h>
@ -75,14 +74,14 @@ static char SccsId[] = "%W% %G%";
#define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR)
#endif
#endif
#include "iopreds.h"
#include "eval.h"
#include "iopreds.h"
static Int p_change_type_of_char(USES_REGS1);
Term Yap_StringToNumberTerm(char *s, encoding_t *encp) {
CACHE_REGS
int sno;
int sno;
Term t;
sno = Yap_open_buf_read_stream(s, strlen(s), encp, MEM_BUF_USER);
@ -92,14 +91,13 @@ Term Yap_StringToNumberTerm(char *s, encoding_t *encp) {
GLOBAL_Stream[sno].encoding = *encp;
else
GLOBAL_Stream[sno].encoding = LOCAL_encoding;
#ifdef __ANDROID__
#ifdef __ANDROID__
while (*s && isblank(*s) && Yap_wide_chtype(*s) == BS)
s++
;
#else
s++;
#else
while (*s && iswblank(*s++))
;
#endif
#endif
t = Yap_scan_num(GLOBAL_Stream + sno);
if (LOCAL_Error_TYPE == SYNTAX_ERROR)
LOCAL_Error_TYPE = YAP_NO_ERROR;
@ -117,30 +115,31 @@ typedef struct enc_map {
} enc_map_t;
static enc_map_t ematches[] = {
{"UTF-8", ENC_ISO_UTF8},
{"utf8", ENC_ISO_UTF8},
{"UTF-16", ENC_UTF16_LE}, // ok, this is a very bad name
{"UCS-2", ENC_UTF16_LE}, // ok, this is probably gone by now
{"ISO-LATIN1", ENC_ISO_LATIN1},
{"ISO-8859-1", ENC_ISO_LATIN1},
{"Windows-1252", ENC_ISO_LATIN1}, // almost, but not quite
{"CP-1252", ENC_ISO_LATIN1},
{"C", ENC_ISO_ASCII},
#ifdef _WIN32
{NULL, ENC_ISO_ASCII}
#else
{NULL, ENC_ISO_UTF8}
#endif
{"UTF-8", ENC_ISO_UTF8},
{"utf8", ENC_ISO_UTF8},
{"UTF-16", ENC_UTF16_LE}, // ok, this is a very bad name
{"UCS-2", ENC_UTF16_LE}, // ok, this is probably gone by now
{"ISO-LATIN1", ENC_ISO_LATIN1},
{"ISO-8859-1", ENC_ISO_LATIN1},
{"Windows-1252", ENC_ISO_LATIN1}, // almost, but not quite
{"CP-1252", ENC_ISO_LATIN1},
{"C", ENC_ISO_ASCII},
#ifdef _WIN32
{NULL, ENC_ISO_ASCII}
#else
{NULL, ENC_ISO_UTF8}
#endif
};
static encoding_t enc_os_default( encoding_t rc)\
{
static encoding_t enc_os_default(encoding_t rc) {
// by default, return UTF-8
// note that we match the C locale to UTF8/16, as all Unix machines will work on UNICODE.
// WIN32 we will rely on BOM
// note that we match the C locale to UTF8/16, as all Unix machines will work
// on UNICODE.
// WIN32 we will rely on BOM
if (rc == ENC_ISO_ASCII) {
return ENC_ISO_UTF8; }
return ENC_ISO_UTF8;
}
return rc;
}
@ -148,31 +147,29 @@ encoding_t Yap_SystemEncoding(void) {
int i = -1;
while (i == -1 || encvs[i]) {
char *v;
if ( i == -1 ) {
if ((v = setlocale(LC_CTYPE, NULL)) == NULL ||
!strcmp(v,"C")) {
if ((v = getenv("LC_CTYPE")))
setlocale(LC_CTYPE, v);
else if ((v = getenv("LANG")))
setlocale(LC_CTYPE, v);
if (i == -1) {
if ((v = setlocale(LC_CTYPE, NULL)) == NULL || !strcmp(v, "C")) {
if ((v = getenv("LC_CTYPE")))
setlocale(LC_CTYPE, v);
else if ((v = getenv("LANG")))
setlocale(LC_CTYPE, v);
}
} else {
v = getenv(encvs[i]);
v = getenv(encvs[i]);
}
if (v) {
int j = 0;
const char *coding;
while ((coding = ematches[j].s) != NULL) {
char *v1;
if ((v1 = strstr(v, coding)) &&
strlen(v1) == strlen(coding)) {
return ematches[j].e;
}
j++;
}
if (v) {
int j = 0;
const char *coding;
while ((coding = ematches[j].s) != NULL) {
char *v1;
if ((v1 = strstr(v, coding)) && strlen(v1) == strlen(coding)) {
return ematches[j].e;
}
i++;
j++;
}
}
i++;
}
return ENC_ISO_ASCII;
}
@ -182,12 +179,12 @@ static encoding_t DefaultEncoding(void) {
encoding_t Yap_DefaultEncoding(void) {
CACHE_REGS
return LOCAL_encoding;
return LOCAL_encoding;
}
void Yap_SetDefaultEncoding(encoding_t new_encoding) {
CACHE_REGS
LOCAL_encoding = new_encoding;
LOCAL_encoding = new_encoding;
}
static Int get_default_encoding(USES_REGS1) {
@ -202,9 +199,9 @@ static Int p_encoding(USES_REGS1) { /* '$encoding'(Stream,N) */
if (sno < 0)
return FALSE;
if (IsVarTerm(t)) {
UNLOCK(GLOBAL_Stream[sno].streamlock);
return Yap_unify(ARG2, MkIntegerTerm(GLOBAL_Stream[sno].encoding));
}
UNLOCK(GLOBAL_Stream[sno].streamlock);
return Yap_unify(ARG2, MkIntegerTerm(GLOBAL_Stream[sno].encoding));
}
GLOBAL_Stream[sno].encoding = IntegerOfTerm(Deref(ARG2));
UNLOCK(GLOBAL_Stream[sno].streamlock);
return TRUE;
@ -212,81 +209,81 @@ static Int p_encoding(USES_REGS1) { /* '$encoding'(Stream,N) */
static int get_char(Term t) {
if (IsVarTerm(t = Deref(t))) {
Yap_Error(INSTANTIATION_ERROR, t, NULL);
return 0;
}
Yap_Error(INSTANTIATION_ERROR, t, NULL);
return 0;
}
if (!IsAtomTerm(t)) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
Atom at = AtomOfTerm(t);
if (IsWideAtom(at)) {
wchar_t *s0 = RepAtom(AtomOfTerm(t))->WStrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
Atom at = AtomOfTerm(t);
if (IsWideAtom(at)) {
wchar_t *s0 = RepAtom(AtomOfTerm(t))->WStrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
return s0[0];
} else {
char *s0 = RepAtom(AtomOfTerm(t))->StrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
return s0[0];
return s0[0];
} else {
char *s0 = RepAtom(AtomOfTerm(t))->StrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
return s0[0];
}
return 0;
}
static int get_code(Term t) {
if (IsVarTerm(t = Deref(t))) {
Yap_Error(INSTANTIATION_ERROR, t, NULL);
return 0;
}
Yap_Error(INSTANTIATION_ERROR, t, NULL);
return 0;
}
if (!IsIntegerTerm(t)) {
Yap_Error(TYPE_ERROR_CHARACTER_CODE, t, NULL);
return 0;
}
Yap_Error(TYPE_ERROR_CHARACTER_CODE, t, NULL);
return 0;
}
Int ch = IntegerOfTerm(t);
if (ch < -1) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
return 0;
}
Yap_Error(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
return 0;
}
return ch;
}
static int get_char_or_code(Term t, bool *is_char) {
if (!IsAtomTerm(t)) {
if (!IsIntegerTerm(t)) {
Yap_Error(TYPE_ERROR_CHARACTER, t, NULL);
return 0;
}
Int ch = IntegerOfTerm(t);
if (ch < -1) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
return 0;
}
*is_char = false;
return ch;
if (!IsIntegerTerm(t)) {
Yap_Error(TYPE_ERROR_CHARACTER, t, NULL);
return 0;
}
Int ch = IntegerOfTerm(t);
if (ch < -1) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
return 0;
}
*is_char = false;
return ch;
}
Atom at = AtomOfTerm(t);
if (IsWideAtom(at)) {
wchar_t *s0 = RepAtom(AtomOfTerm(t))->WStrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
*is_char = true;
return s0[0];
} else {
char *s0 = RepAtom(AtomOfTerm(t))->StrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
*is_char = true;
return s0[0];
wchar_t *s0 = RepAtom(AtomOfTerm(t))->WStrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
*is_char = true;
return s0[0];
} else {
char *s0 = RepAtom(AtomOfTerm(t))->StrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
return 0;
}
*is_char = true;
return s0[0];
}
return 0;
}
@ -294,35 +291,33 @@ static Int toupper2(USES_REGS1) {
bool is_char = false;
Term t;
if (!IsVarTerm(t = Deref(ARG1))) {
Int out = get_char_or_code(t, &is_char), uout;
if (out < 128)
uout = toupper(out);
Int out = get_char_or_code(t, &is_char), uout;
if (out < 128)
uout = toupper(out);
else
uout = utf8proc_toupper(out);
if (is_char)
return Yap_unify(ARG2, MkCharTerm(uout));
else
return Yap_unify(ARG2, MkIntegerTerm(uout));
} else if (!IsVarTerm(t = Deref(ARG2))) {
Int uout = get_char_or_code(t, &is_char), out;
char_kind_t charp = Yap_wide_chtype(uout);
if (charp == UC) {
if (uout < 128)
out = tolower(uout);
else
uout = towupper(out);
if (is_char)
return Yap_unify(ARG2, MkCharTerm(uout));
else
return Yap_unify(ARG2, MkIntegerTerm(uout));
} else if (!IsVarTerm(t = Deref(ARG2))) {
Int uout = get_char_or_code(t, &is_char), out;
char_kind_t charp = Yap_wide_chtype(uout);
if (charp == UC) {
if (uout < 128)
out = tolower(uout);
else
out = towlower(uout);
} else if (charp == LC) {
return false;
} else {
out = uout;
}
if (is_char)
return Yap_unify(ARG2, MkCharTerm(out));
else
return Yap_unify(ARG2, MkIntegerTerm(out));
out = utf8proc_tolower(uout);
} else {
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
out = uout;
}
if (is_char)
return Yap_unify(ARG1, MkCharTerm(out));
else
return Yap_unify(ARG1, MkIntegerTerm(out));
} else {
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
}
return false;
}
@ -330,41 +325,39 @@ static Int tolower2(USES_REGS1) {
bool is_char = false;
Term t;
if (!IsVarTerm(t = Deref(ARG1))) {
bool is_char = false;
Int out = get_char_or_code(ARG1, &is_char), uout;
if (out < 128)
uout = tolower(out);
bool is_char = false;
Int out = get_char_or_code(ARG1, &is_char), uout;
if (out < 128)
uout = tolower(out);
else
uout = utf8proc_tolower(out);
if (is_char)
return Yap_unify(ARG2, MkCharTerm(uout));
else
return Yap_unify(ARG2, MkIntegerTerm(uout));
} else if (IsVarTerm(t = Deref(ARG2))) {
Int uout = get_char_or_code(t, &is_char), out;
char_kind_t charp = Yap_wide_chtype(uout);
if (charp == LC) {
if (uout < 128)
out = toupper(uout);
else
uout = towlower(out);
if (is_char)
return Yap_unify(ARG2, MkCharTerm(uout));
else
return Yap_unify(ARG2, MkIntegerTerm(uout));
} else if (IsVarTerm(t = Deref(ARG2))) {
Int uout = get_char_or_code(t, &is_char), out;
char_kind_t charp = Yap_wide_chtype(uout);
if (charp == LC) {
if (uout < 128)
out = toupper(uout);
else
out = towupper(uout);
} else if (charp == UC) {
return false;
} else {
out = uout;
}
if (is_char)
return Yap_unify(ARG2, MkCharTerm(out));
else
return Yap_unify(ARG2, MkIntegerTerm(out));
out = utf8proc_toupper(uout);
} else {
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
out = uout;
}
if (is_char)
return Yap_unify(ARG1, MkCharTerm(out));
else
return Yap_unify(ARG1, MkIntegerTerm(out));
} else {
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
}
return false;
}
static Int
p_change_type_of_char(USES_REGS1) { /* change_type_of_char(+char,+type) */
p_change_type_of_char(USES_REGS1) { /* change_type_of_char(+char,+type) */
Term t1 = Deref(ARG1);
Term t2 = Deref(ARG2);
if (!IsVarTerm(t1) && !IsIntegerTerm(t1))
@ -407,9 +400,9 @@ static Int char_type_ascii(USES_REGS1) {
static Int char_type_white(USES_REGS1) {
int ch = get_char(ARG1);
if (ch < 256) {
char_kind_t k = Yap_chtype[ch];
return k == BS;
}
char_kind_t k = Yap_chtype[ch];
return k == BS;
}
utf8proc_category_t ct = utf8proc_category(ch);
return ct == UTF8PROC_CATEGORY_ZS;
}
@ -457,18 +450,18 @@ static Int char_type_upper(USES_REGS1) {
static Int char_type_punct(USES_REGS1) {
int ch = get_char(ARG1);
if (ch < 256) {
char_kind_t k = Yap_chtype[ch];
return k >= QT && k <= BK;
}
char_kind_t k = Yap_chtype[ch];
return k >= QT && k <= BK;
}
return false;
}
static Int char_type_space(USES_REGS1) {
int ch = get_char(ARG1);
if (ch < 256) {
char_kind_t k = Yap_chtype[ch];
return k == BS;
}
char_kind_t k = Yap_chtype[ch];
return k == BS;
}
utf8proc_category_t ct = utf8proc_category(ch);
return (ct >= UTF8PROC_CATEGORY_ZS && ct <= UTF8PROC_CATEGORY_PO);
}
@ -481,8 +474,8 @@ static Int char_type_end_of_file(USES_REGS1) {
static Int char_type_end_of_line(USES_REGS1) {
Int ch = get_char(ARG1);
if (ch < 256) {
return ch >= 10 && ch <= 13;
}
return ch >= 10 && ch <= 13;
}
utf8proc_category_t ct = utf8proc_category(ch);
return (ct >= UTF8PROC_CATEGORY_ZL && ct <= UTF8PROC_CATEGORY_ZP);
}
@ -490,8 +483,8 @@ static Int char_type_end_of_line(USES_REGS1) {
static Int char_type_newline(USES_REGS1) {
Int ch = get_char(ARG1);
if (ch < 256) {
return ch == 10;
}
return ch == 10;
}
return false;
}
@ -568,9 +561,9 @@ static Int code_type_ascii(USES_REGS1) {
static Int code_type_white(USES_REGS1) {
int ch = get_code(ARG1);
if (ch < 256) {
char_kind_t k = Yap_chtype[ch];
return k == BS;
}
char_kind_t k = Yap_chtype[ch];
return k == BS;
}
utf8proc_category_t ct = utf8proc_category(ch);
return ct == UTF8PROC_CATEGORY_ZS;
}
@ -618,18 +611,18 @@ static Int code_type_upper(USES_REGS1) {
static Int code_type_punct(USES_REGS1) {
int ch = get_char(ARG1);
if (ch < 256) {
char_kind_t k = Yap_chtype[ch];
return k >= QT && k <= BK;
}
char_kind_t k = Yap_chtype[ch];
return k >= QT && k <= BK;
}
return false;
}
static Int code_type_space(USES_REGS1) {
int ch = get_code(ARG1);
if (ch < 256) {
char_kind_t k = Yap_chtype[ch];
return k == BS;
}
char_kind_t k = Yap_chtype[ch];
return k == BS;
}
utf8proc_category_t ct = utf8proc_category(ch);
return (ct >= UTF8PROC_CATEGORY_ZS && ct <= UTF8PROC_CATEGORY_PO);
}
@ -642,8 +635,8 @@ static Int code_type_end_of_file(USES_REGS1) {
static Int code_type_end_of_line(USES_REGS1) {
Int ch = get_code(ARG1);
if (ch < 256) {
return ch >= 10 && ch <= 13;
}
return ch >= 10 && ch <= 13;
}
utf8proc_category_t ct = utf8proc_category(ch);
return (ct >= UTF8PROC_CATEGORY_ZL && ct <= UTF8PROC_CATEGORY_ZP);
}
@ -651,8 +644,8 @@ static Int code_type_end_of_line(USES_REGS1) {
static Int code_type_newline(USES_REGS1) {
Int ch = get_code(ARG1);
if (ch < 256) {
return ch == 10;
}
return ch == 10;
}
return false;
}
@ -701,11 +694,11 @@ int ISOWGetc(int sno) {
int ch = GLOBAL_Stream[sno].stream_wgetc(sno);
if (ch != EOF && GLOBAL_CharConversionTable != NULL) {
if (ch < NUMBER_OF_CHARS) {
/* only do this in ASCII */
return GLOBAL_CharConversionTable[ch];
}
if (ch < NUMBER_OF_CHARS) {
/* only do this in ASCII */
return GLOBAL_CharConversionTable[ch];
}
}
return ch;
}
@ -716,9 +709,9 @@ static Int p_force_char_conversion(USES_REGS1) {
if (GLOBAL_CharConversionTable2 == NULL)
return (TRUE);
for (i = 0; i < MaxStreams; i++) {
if (!(GLOBAL_Stream[i].status & Free_Stream_f))
GLOBAL_Stream[i].stream_wgetc_for_read = ISOWGetc;
}
if (!(GLOBAL_Stream[i].status & Free_Stream_f))
GLOBAL_Stream[i].stream_wgetc_for_read = ISOWGetc;
}
GLOBAL_CharConversionTable = GLOBAL_CharConversionTable2;
return (TRUE);
}
@ -727,9 +720,9 @@ static Int p_disable_char_conversion(USES_REGS1) {
int i;
for (i = 0; i < MaxStreams; i++) {
if (!(GLOBAL_Stream[i].status & Free_Stream_f))
GLOBAL_Stream[i].stream_wgetc_for_read = GLOBAL_Stream[i].stream_wgetc;
}
if (!(GLOBAL_Stream[i].status & Free_Stream_f))
GLOBAL_Stream[i].stream_wgetc_for_read = GLOBAL_Stream[i].stream_wgetc;
}
GLOBAL_CharConversionTable = NULL;
return (TRUE);
}
@ -739,54 +732,54 @@ static Int char_conversion(USES_REGS1) {
unsigned char *s0, *s1;
if (IsVarTerm(t)) {
Yap_Error(INSTANTIATION_ERROR, t, "char_conversion/2");
return (FALSE);
}
Yap_Error(INSTANTIATION_ERROR, t, "char_conversion/2");
return (FALSE);
}
if (!IsAtomTerm(t)) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
return (FALSE);
}
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
return (FALSE);
}
s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
return (FALSE);
}
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
return (FALSE);
}
if (IsVarTerm(t1)) {
Yap_Error(INSTANTIATION_ERROR, t1, "char_conversion/2");
return (FALSE);
}
Yap_Error(INSTANTIATION_ERROR, t1, "char_conversion/2");
return (FALSE);
}
if (!IsAtomTerm(t1)) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
return (FALSE);
}
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
return (FALSE);
}
s1 = RepAtom(AtomOfTerm(t1))->UStrOfAE;
if (s1[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
return (FALSE);
}
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
return (FALSE);
}
/* check if we do have a table for converting characters */
if (GLOBAL_CharConversionTable2 == NULL) {
int i;
int i;
/* don't create a table if we don't need to */
if (s0[0] == s1[0])
return (TRUE);
GLOBAL_CharConversionTable2 =
Yap_AllocCodeSpace(NUMBER_OF_CHARS * sizeof(char));
while (GLOBAL_CharConversionTable2 == NULL) {
if (!Yap_growheap(FALSE, NUMBER_OF_CHARS * sizeof(char), NULL)) {
Yap_Error(RESOURCE_ERROR_HEAP, TermNil, LOCAL_ErrorMessage);
return (FALSE);
}
}
if (trueGlobalPrologFlag(CHAR_CONVERSION_FLAG)) {
CACHE_REGS
if (p_force_char_conversion(PASS_REGS1) == FALSE)
return (FALSE);
}
for (i = 0; i < NUMBER_OF_CHARS; i++)
GLOBAL_CharConversionTable2[i] = i;
/* don't create a table if we don't need to */
if (s0[0] == s1[0])
return (TRUE);
GLOBAL_CharConversionTable2 =
Yap_AllocCodeSpace(NUMBER_OF_CHARS * sizeof(char));
while (GLOBAL_CharConversionTable2 == NULL) {
if (!Yap_growheap(FALSE, NUMBER_OF_CHARS * sizeof(char), NULL)) {
Yap_Error(RESOURCE_ERROR_HEAP, TermNil, LOCAL_ErrorMessage);
return (FALSE);
}
}
if (trueGlobalPrologFlag(CHAR_CONVERSION_FLAG)) {
CACHE_REGS
if (p_force_char_conversion(PASS_REGS1) == FALSE)
return (FALSE);
}
for (i = 0; i < NUMBER_OF_CHARS; i++)
GLOBAL_CharConversionTable2[i] = i;
}
/* just add the new entry */
GLOBAL_CharConversionTable2[(int)s0[0]] = s1[0];
/* done */
@ -798,43 +791,43 @@ static Int p_current_char_conversion(USES_REGS1) {
unsigned char *s0, *s1;
if (GLOBAL_CharConversionTable == NULL) {
return (FALSE);
}
return (FALSE);
}
t = Deref(ARG1);
if (IsVarTerm(t)) {
Yap_Error(INSTANTIATION_ERROR, t, "current_char_conversion/2");
return (FALSE);
}
Yap_Error(INSTANTIATION_ERROR, t, "current_char_conversion/2");
return (FALSE);
}
if (!IsAtomTerm(t)) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
return (FALSE);
}
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
return (FALSE);
}
s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
if (s0[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
return (FALSE);
}
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
return (FALSE);
}
t1 = Deref(ARG2);
if (IsVarTerm(t1)) {
char out[2];
if (GLOBAL_CharConversionTable[(int)s0[0]] == '\0')
return (FALSE);
out[0] = GLOBAL_CharConversionTable[(int)s0[0]];
out[1] = '\0';
return (Yap_unify(ARG2, MkAtomTerm(Yap_LookupAtom(out))));
}
if (!IsAtomTerm(t1)) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
char out[2];
if (GLOBAL_CharConversionTable[(int)s0[0]] == '\0')
return (FALSE);
}
out[0] = GLOBAL_CharConversionTable[(int)s0[0]];
out[1] = '\0';
return (Yap_unify(ARG2, MkAtomTerm(Yap_LookupAtom(out))));
}
if (!IsAtomTerm(t1)) {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
return (FALSE);
}
s1 = RepAtom(AtomOfTerm(t1))->UStrOfAE;
if (s1[1] != '\0') {
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
return (FALSE);
} else {
return (GLOBAL_CharConversionTable[(int)s0[0]] == '\0' &&
GLOBAL_CharConversionTable[(int)s0[0]] == s1[0]);
}
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
return (FALSE);
} else {
return (GLOBAL_CharConversionTable[(int)s0[0]] == '\0' &&
GLOBAL_CharConversionTable[(int)s0[0]] == s1[0]);
}
}
static Int p_all_char_conversions(USES_REGS1) {
@ -842,28 +835,28 @@ static Int p_all_char_conversions(USES_REGS1) {
int i;
if (GLOBAL_CharConversionTable == NULL) {
return (FALSE);
}
return (FALSE);
}
for (i = NUMBER_OF_CHARS; i > 0;) {
i--;
if (GLOBAL_CharConversionTable[i] != '\0') {
Term t1, t2;
char s[2];
s[1] = '\0';
s[0] = GLOBAL_CharConversionTable[i];
t1 = MkAtomTerm(Yap_LookupAtom(s));
out = MkPairTerm(t1, out);
s[0] = i;
t2 = MkAtomTerm(Yap_LookupAtom(s));
out = MkPairTerm(t2, out);
}
i--;
if (GLOBAL_CharConversionTable[i] != '\0') {
Term t1, t2;
char s[2];
s[1] = '\0';
s[0] = GLOBAL_CharConversionTable[i];
t1 = MkAtomTerm(Yap_LookupAtom(s));
out = MkPairTerm(t1, out);
s[0] = i;
t2 = MkAtomTerm(Yap_LookupAtom(s));
out = MkPairTerm(t2, out);
}
}
return (Yap_unify(ARG1, out));
}
void Yap_InitChtypes(void) {
CACHE_REGS
LOCAL_encoding = DefaultEncoding();
LOCAL_encoding = DefaultEncoding();
Yap_InitCPred("$change_type_of_char", 2, p_change_type_of_char,
SafePredFlag | SyncPredFlag | HiddenPredFlag);
Yap_InitCPred("toupper", 2, toupper2, SafePredFlag);

View File

@ -140,135 +140,159 @@ Char is an uppercase version of Lower. Only true if Char is uppercase and Lower
:- discontiguous digit_weight/2, digit_weight/3.
prolog:char_type( CH, TYPE) :-
( var( CH ) -> between(-1,+inf,CH) ),
prolog:char_type( CH, TYPE) .
prolog:char_type( ALNUM, alnum) :-
(nonvar( CH )
->
true
;
arg(1,TYPE, A),
atomic(A)
->
true
;
between(0,0x10FFFF,I),
atom_codes(CH, [I])
),
p_char_type( CH, TYPE).
p_char_type( ALNUM, alnum) :-
char_type_alnum( ALNUM ).
prolog:char_type( ALPHA, alpha) :-
p_char_type( ALPHA, alpha) :-
char_type_alpha( ALPHA).
prolog:char_type( CSYM, csym) :-
p_char_type( CSYM, csym) :-
char_type_csym( CSYM ).
prolog:char_type( CSYMF, csymf) :-
p_char_type( CSYMF, csymf) :-
char_type_csymf( CSYMF).
prolog:char_type( ASCII, ascii ) :-
p_char_type( ASCII, ascii ) :-
char_type_ascii( ASCII ).
prolog:char_type( WHITE, white) :-
p_char_type( WHITE, white) :-
char_type_white( WHITE ).
prolog:char_type( CNTRL , cntrl) :-
p_char_type( CNTRL , cntrl) :-
char_type_cntrl( CNTRL ).
prolog:char_type( DIGIT , digit) :-
p_char_type( DIGIT , digit) :-
char_type_digit( DIGIT ).
prolog:char_type( DIGIT, digit(Weight) ) :-
p_char_type( DIGIT, digit(Weight) ) :-
char_type_digit( DIGIT ),
digit_weight( DIGIT, Weight ).
prolog:char_type( XDIGIT, xdigit(Weight) ) :-
p_char_type( XDIGIT, xdigit(Weight) ) :-
char_type_digit( XDIGIT ),
xdigit_weight( XDIGIT, Weight ).
prolog:char_type( GRAPH , graph) :-
p_char_type( GRAPH , graph) :-
char_type_graph( GRAPH ).
prolog:char_type( LOWER , lower) :-
p_char_type( LOWER , lower) :-
char_type_lower( LOWER ).
prolog:char_type( LOWER, lower( Upper)) :-
p_char_type( LOWER, lower( Upper)) :-
toupper( LOWER, Upper),
char_type_lower( LOWER ).
prolog:char_type( LOWER, to_lower( Upper)) :-
p_char_type( LOWER, to_lower( Upper)) :-
toupper( LOWER, Upper).
prolog:char_type( UPPER, upper ) :-
p_char_type( UPPER, upper ) :-
char_type_upper( UPPER ).
prolog:char_type( UPPER , upper( Lower)) :-
p_char_type( UPPER , upper( Lower)) :-
char_type_upper( UPPER ),
tolower( UPPER, Lower).
prolog:char_type( UPPER, to_upper( Lower) ) :-
p_char_type( UPPER, to_upper( Lower) ) :-
tolower( UPPER, Lower).
prolog:char_type( PUNCT , punct) :-
p_char_type( PUNCT , punct) :-
char_type_punct( PUNCT ).
prolog:char_type( SPACE , space) :-
p_char_type( SPACE , space) :-
char_type_space( SPACE ).
prolog:char_type( END_OF_FILE , end_of_file) :-
p_char_type( END_OF_FILE , end_of_file) :-
char_type_end_of_file( END_OF_FILE ).
prolog:char_type( END_OF_LINE , end_of_line) :-
p_char_type( END_OF_LINE , end_of_line) :-
char_type_end_of_line( END_OF_LINE ).
prolog:char_type( NEWLINE , newline) :-
p_char_type( NEWLINE , newline) :-
char_type_newline( NEWLINE ).
prolog:char_type( PERIOD , period) :-
p_char_type( PERIOD , period) :-
char_type_period( PERIOD ).
prolog:char_type( QUOTE , quote) :-
p_char_type( QUOTE , quote) :-
char_type_quote( QUOTE ).
prolog:char_type( Parent_Open, paren( PAREN_CLOSE) ) :-
p_char_type( Parent_Open, paren( PAREN_CLOSE) ) :-
paren_paren(Parent_Open, PAREN_CLOSE).
prolog:char_type( PROLOG_VAR_START , prolog_var_start) :-
p_char_type( PROLOG_VAR_START , prolog_var_start) :-
char_type_prolog_var_start( PROLOG_VAR_START ).
prolog:char_type( PROLOG_ATOM_START , prolog_atom_start) :-
p_char_type( PROLOG_ATOM_START , prolog_atom_start) :-
char_type_prolog_atom_start( PROLOG_ATOM_START ).
prolog:char_type( PROLOG_IDENTIFIER_CONTINUE , prolog_identifier_continue) :-
p_char_type( PROLOG_IDENTIFIER_CONTINUE , prolog_identifier_continue) :-
char_type_prolog_identifier_continue( PROLOG_IDENTIFIER_CONTINUE ).
prolog:char_type( PROLOG_PROLOG_SYMBOL , prolog_prolog_symbol) :-
p_char_type( PROLOG_PROLOG_SYMBOL , prolog_prolog_symbol) :-
char_type_prolog_prolog_symbol( PROLOG_PROLOG_SYMBOL ).
prolog:code_type( CH, TYPE) :-
( var( CH ) -> between(-1,+inf,CH) ),
prolog:code_type( CH, TYPE) .
prolog:code_type( ALNUM, alnum) :-
prolog:code_type(CH, TYPE) :-
(nonvar( CH )
->
true
;
arg(1,TYPE, A),
atomic(A)
->
true
;
between(0,0x10FFFF,CH)
),
p_code_type( CH, TYPE).
p_code_type( ALNUM, alnum) :-
code_type_alnum( ALNUM ).
prolog:code_type( ALPHA, alpha) :-
p_code_type( ALPHA, alpha) :-
code_type_alpha( ALPHA).
prolog:code_type( CSYM, csym) :-
p_code_type( CSYM, csym) :-
code_type_csym( CSYM ).
prolog:code_type( CSYMF, csymf) :-
p_code_type( CSYMF, csymf) :-
code_type_csymf( CSYMF).
prolog:code_type( ASCII, ascii ) :-
p_code_type( ASCII, ascii ) :-
code_type_ascii( ASCII ).
prolog:code_type( WHITE, white) :-
p_code_type( WHITE, white) :-
code_type_white( WHITE ).
prolog:code_type( CNTRL , cntrl) :-
p_code_type( CNTRL , cntrl) :-
code_type_cntrl( CNTRL ).
prolog:code_type( DIGIT , digit) :-
p_code_type( DIGIT , digit) :-
code_type_digit( DIGIT ).
prolog:code_type( DIGIT, digit(Weight) ) :-
p_code_type( DIGIT, digit(Weight) ) :-
code_type_digit( DIGIT ),
digit_weight( DIGIT, Weight ).
prolog:code_type( XDIGIT, xdigit(Weight) ) :-
p_code_type( XDIGIT, xdigit(Weight) ) :-
code_type_digit( XDIGIT ),
xdigit_weight( XDIGIT, Weight ).
prolog:code_type( GRAPH , graph) :-
p_code_type( GRAPH , graph) :-
code_type_graph( GRAPH ).
prolog:code_type( LOWER , lower) :-
p_code_type( LOWER , lower) :-
code_type_lower( LOWER ).
prolog:code_type( LOWER, lower( Upper)) :-
p_code_type( LOWER, lower( Upper)) :-
toupper( LOWER, Upper),
code_type_lower( LOWER ).
prolog:code_type( LOWER, to_lower( Upper)) :-
p_code_type( LOWER, to_lower( Upper)) :-
toupper( LOWER, Upper).
prolog:code_type( UPPER, upper ) :-
p_code_type( UPPER, upper ) :-
code_type_upper( UPPER ).
prolog:code_type( UPPER , upper( Lower)) :-
p_code_type( UPPER , upper( Lower)) :-
tolower( UPPER, Lower).
prolog:code_type( UPPER, to_upper( Lower) ) :-
p_code_type( UPPER, to_upper( Lower) ) :-
tolower( UPPER, Lower),
char_type_upper( UPPER).
prolog:code_type( PUNCT , punct) :-
p_code_type( PUNCT , punct) :-
code_type_punct( PUNCT ).
prolog:code_type( SPACE , space) :-
p_code_type( SPACE , space) :-
code_type_space( SPACE ).
prolog:code_type( END_OF_FILE , end_of_file) :-
p_code_type( END_OF_FILE , end_of_file) :-
code_type_end_of_file( END_OF_FILE ).
prolog:code_type( END_OF_LINE , end_of_line) :-
p_code_type( END_OF_LINE , end_of_line) :-
code_type_end_of_line( END_OF_LINE ).
prolog:code_type( NEWLINE , newline) :-
p_code_type( NEWLINE , newline) :-
code_type_newline( NEWLINE ).
prolog:code_type( PERIOD , period) :-
p_code_type( PERIOD , period) :-
code_type_period( PERIOD ).
prolog:code_type( QUOTE , quote) :-
p_code_type( QUOTE , quote) :-
code_type_quote( QUOTE ).
prolog:code_type( Parent_Open, paren( PAREN_CLOSE) ) :-
p_code_type( Parent_Open, paren( PAREN_CLOSE) ) :-
paren_paren(Parent_Open, PAREN_CLOSE).
prolog:code_type( PROLOG_VAR_START , prolog_var_start) :-
p_code_type( PROLOG_VAR_START , prolog_var_start) :-
code_type_prolog_var_start( PROLOG_VAR_START ).
prolog:code_type( PROLOG_ATOM_START , prolog_atom_start) :-
p_code_type( PROLOG_ATOM_START , prolog_atom_start) :-
code_type_prolog_atom_start( PROLOG_ATOM_START ).
prolog:code_type( PROLOG_IDENTIFIER_CONTINUE , prolog_identifier_continue) :-
p_code_type( PROLOG_IDENTIFIER_CONTINUE , prolog_identifier_continue) :-
code_type_prolog_identifier_continue( PROLOG_IDENTIFIER_CONTINUE ).
prolog:code_type( PROLOG_PROLOG_SYMBOL , prolog_prolog_symbol) :-
p_code_type( PROLOG_PROLOG_SYMBOL , prolog_prolog_symbol) :-
code_type_prolog_prolog_symbol( PROLOG_PROLOG_SYMBOL ).

View File

@ -471,7 +471,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
* error, a negative error code is returned (@ref utf8proc_errmsg).
*
* If the number of written codepoints would be bigger than `bufsize`, the
* required buffer size is returned, while the buffer will be overwritten with
* required buffer size is returned, while the buffer will be overwritten with
* undefined data.
*/
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(