ok, even if the locale is C, unicode is most often supported.
This commit is contained in:
parent
5f9752baff
commit
d3796aba5a
564
os/chartypes.c
564
os/chartypes.c
@ -21,10 +21,10 @@ static char SccsId[] = "%W% %G%";
|
|||||||
* @file chartypes.c
|
* @file chartypes.c
|
||||||
* @author VITOR SANTOS COSTA <vsc@VITORs-MBP.lan>
|
* @author VITOR SANTOS COSTA <vsc@VITORs-MBP.lan>
|
||||||
* @date Thu Nov 19 12:05:14 2015
|
* @date Thu Nov 19 12:05:14 2015
|
||||||
*
|
*
|
||||||
* @brief Character Properties
|
* @brief Character Properties
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
///{@
|
///{@
|
||||||
@ -79,7 +79,7 @@ static Int p_change_type_of_char(USES_REGS1);
|
|||||||
|
|
||||||
Term Yap_StringToNumberTerm(char *s, encoding_t *encp) {
|
Term Yap_StringToNumberTerm(char *s, encoding_t *encp) {
|
||||||
CACHE_REGS
|
CACHE_REGS
|
||||||
int sno;
|
int sno;
|
||||||
Term t;
|
Term t;
|
||||||
|
|
||||||
sno = Yap_open_buf_read_stream(s, strlen(s), encp, MEM_BUF_USER);
|
sno = Yap_open_buf_read_stream(s, strlen(s), encp, MEM_BUF_USER);
|
||||||
@ -101,76 +101,78 @@ Term Yap_StringToNumberTerm(char *s, encoding_t *encp) {
|
|||||||
|
|
||||||
const char *encvs[] = {"LANG", "LC_ALL", "LC_CTYPE", NULL};
|
const char *encvs[] = {"LANG", "LC_ALL", "LC_CTYPE", NULL};
|
||||||
|
|
||||||
// wher we can fins an encoding
|
// where we can fins an encoding
|
||||||
typedef struct enc_map {
|
typedef struct enc_map {
|
||||||
const char *s;
|
const char *s;
|
||||||
encoding_t e;
|
encoding_t e;
|
||||||
} enc_map_t;
|
} enc_map_t;
|
||||||
|
|
||||||
static enc_map_t ematches[] = {
|
static enc_map_t ematches[] = {
|
||||||
{"UTF-8", ENC_ISO_UTF8},
|
{"UTF-8", ENC_ISO_UTF8},
|
||||||
{"utf8", ENC_ISO_UTF8},
|
{"utf8", ENC_ISO_UTF8},
|
||||||
{"UTF-16", ENC_UTF16_LE}, // ok, this is a very bad name
|
{"UTF-16", ENC_UTF16_LE}, // ok, this is a very bad name
|
||||||
{"UCS-2", ENC_UTF16_LE}, // ok, this is probably gone by now
|
{"UCS-2", ENC_UTF16_LE}, // ok, this is probably gone by now
|
||||||
{"ISO-LATIN1", ENC_ISO_LATIN1},
|
{"ISO-LATIN1", ENC_ISO_LATIN1},
|
||||||
{"ISO-8859-1", ENC_ISO_LATIN1},
|
{"ISO-8859-1", ENC_ISO_LATIN1},
|
||||||
{"Windows-1252", ENC_ISO_LATIN1}, // almost, but not quite
|
{"Windows-1252", ENC_ISO_LATIN1}, // almost, but not quite
|
||||||
{"CP-1252", ENC_ISO_LATIN1},
|
{"CP-1252", ENC_ISO_LATIN1},
|
||||||
{"C", ENC_ISO_ASCII},
|
{"C", ENC_ISO_ASCII},
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
{NULL, ENC_UTF16_LE}
|
{NULL, ENC_UTF16_LE}
|
||||||
#else
|
#else
|
||||||
{NULL, ENC_ISO_UTF8}
|
{NULL, ENC_ISO_UTF8}
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static encoding_t DefaultEncoding(void) {
|
static encoding_t enc_os_default( encoding_t rc)\
|
||||||
encoding_t rc;
|
{
|
||||||
int i = 0, j;
|
// by default, return UTF-8
|
||||||
char *enc;
|
// except in _WIN32
|
||||||
while (encvs[i]) {
|
// note that we match the C locale to UTF8/16, as all Unix maachines will work on UNICODE.
|
||||||
char *v = getenv(encvs[i]);
|
|
||||||
if (v) {
|
if (rc == ENC_ISO_ASCII) {
|
||||||
enc = strrchr(v, '.');
|
|
||||||
/* that's how it is supposed to be, except in OSX */
|
|
||||||
if (!enc)
|
|
||||||
enc = v;
|
|
||||||
else
|
|
||||||
enc++;
|
|
||||||
// now that we have one name, try to match it
|
|
||||||
j = 0;
|
|
||||||
while (ematches[j].s) {
|
|
||||||
if (!strcmp(ematches[j].s, enc))
|
|
||||||
return ematches[j].e;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
// by default, return UTF-8
|
|
||||||
// except in _WIN32
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
rc = ENC_UTF16_BE;
|
return = ENC_UTF16_BE;
|
||||||
#else
|
#else
|
||||||
rc = ENC_ISO_UTF8;
|
return ENC_ISO_UTF8;
|
||||||
#endif
|
#endif
|
||||||
{
|
}
|
||||||
int j = 0;
|
|
||||||
while (rc != ematches[j].e)
|
|
||||||
j++;
|
|
||||||
// Yap_Warning("YAP will use default encoding %s", ematches[j].s);
|
|
||||||
}
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static encoding_t DefaultEncoding(void) {
|
||||||
|
int i = 0;
|
||||||
|
while (encvs[i]) {
|
||||||
|
char *v = getenv(encvs[i]);
|
||||||
|
if (v) {
|
||||||
|
int j = 0;
|
||||||
|
size_t sz = strlen(v);
|
||||||
|
const char *coding;
|
||||||
|
while ((coding = ematches[j].s) != NULL) {
|
||||||
|
size_t sz2 = strlen(coding);
|
||||||
|
if (sz2 > sz) {
|
||||||
|
j++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!strcmp(coding+(sz-sz2), v) ) {
|
||||||
|
return enc_os_default(ematches[j].e);
|
||||||
|
}
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return enc_os_default(ENC_ISO_ASCII);
|
||||||
|
}
|
||||||
|
|
||||||
encoding_t Yap_DefaultEncoding(void) {
|
encoding_t Yap_DefaultEncoding(void) {
|
||||||
CACHE_REGS
|
CACHE_REGS
|
||||||
return LOCAL_encoding;
|
return LOCAL_encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Yap_SetDefaultEncoding(encoding_t new_encoding) {
|
void Yap_SetDefaultEncoding(encoding_t new_encoding) {
|
||||||
CACHE_REGS
|
CACHE_REGS
|
||||||
LOCAL_encoding = new_encoding;
|
LOCAL_encoding = new_encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Int get_default_encoding(USES_REGS1) {
|
static Int get_default_encoding(USES_REGS1) {
|
||||||
@ -185,9 +187,9 @@ static Int p_encoding(USES_REGS1) { /* '$encoding'(Stream,N) */
|
|||||||
if (sno < 0)
|
if (sno < 0)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
if (IsVarTerm(t)) {
|
if (IsVarTerm(t)) {
|
||||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
||||||
return Yap_unify(ARG2, MkIntegerTerm(GLOBAL_Stream[sno].encoding));
|
return Yap_unify(ARG2, MkIntegerTerm(GLOBAL_Stream[sno].encoding));
|
||||||
}
|
}
|
||||||
GLOBAL_Stream[sno].encoding = IntegerOfTerm(Deref(ARG2));
|
GLOBAL_Stream[sno].encoding = IntegerOfTerm(Deref(ARG2));
|
||||||
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
UNLOCK(GLOBAL_Stream[sno].streamlock);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
@ -195,81 +197,81 @@ static Int p_encoding(USES_REGS1) { /* '$encoding'(Stream,N) */
|
|||||||
|
|
||||||
static int get_char(Term t) {
|
static int get_char(Term t) {
|
||||||
if (IsVarTerm(t = Deref(t))) {
|
if (IsVarTerm(t = Deref(t))) {
|
||||||
Yap_Error(INSTANTIATION_ERROR, t, NULL);
|
Yap_Error(INSTANTIATION_ERROR, t, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (!IsAtomTerm(t)) {
|
if (!IsAtomTerm(t)) {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
Atom at = AtomOfTerm(t);
|
Atom at = AtomOfTerm(t);
|
||||||
if (IsWideAtom(at)) {
|
if (IsWideAtom(at)) {
|
||||||
wchar_t *s0 = RepAtom(AtomOfTerm(t))->WStrOfAE;
|
wchar_t *s0 = RepAtom(AtomOfTerm(t))->WStrOfAE;
|
||||||
if (s0[1] != '\0') {
|
if (s0[1] != '\0') {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
return s0[0];
|
||||||
|
} else {
|
||||||
|
char *s0 = RepAtom(AtomOfTerm(t))->StrOfAE;
|
||||||
|
if (s0[1] != '\0') {
|
||||||
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return s0[0];
|
||||||
}
|
}
|
||||||
return s0[0];
|
|
||||||
} else {
|
|
||||||
char *s0 = RepAtom(AtomOfTerm(t))->StrOfAE;
|
|
||||||
if (s0[1] != '\0') {
|
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return s0[0];
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_code(Term t) {
|
static int get_code(Term t) {
|
||||||
if (IsVarTerm(t = Deref(t))) {
|
if (IsVarTerm(t = Deref(t))) {
|
||||||
Yap_Error(INSTANTIATION_ERROR, t, NULL);
|
Yap_Error(INSTANTIATION_ERROR, t, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (!IsIntegerTerm(t)) {
|
if (!IsIntegerTerm(t)) {
|
||||||
Yap_Error(TYPE_ERROR_CHARACTER_CODE, t, NULL);
|
Yap_Error(TYPE_ERROR_CHARACTER_CODE, t, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
Int ch = IntegerOfTerm(t);
|
Int ch = IntegerOfTerm(t);
|
||||||
if (ch < -1) {
|
if (ch < -1) {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_char_or_code(Term t, bool *is_char) {
|
static int get_char_or_code(Term t, bool *is_char) {
|
||||||
if (!IsAtomTerm(t)) {
|
if (!IsAtomTerm(t)) {
|
||||||
if (!IsIntegerTerm(t)) {
|
if (!IsIntegerTerm(t)) {
|
||||||
Yap_Error(TYPE_ERROR_CHARACTER, t, NULL);
|
Yap_Error(TYPE_ERROR_CHARACTER, t, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
Int ch = IntegerOfTerm(t);
|
||||||
|
if (ch < -1) {
|
||||||
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*is_char = false;
|
||||||
|
return ch;
|
||||||
}
|
}
|
||||||
Int ch = IntegerOfTerm(t);
|
|
||||||
if (ch < -1) {
|
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER_CODE, t, NULL);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
*is_char = false;
|
|
||||||
return ch;
|
|
||||||
}
|
|
||||||
Atom at = AtomOfTerm(t);
|
Atom at = AtomOfTerm(t);
|
||||||
if (IsWideAtom(at)) {
|
if (IsWideAtom(at)) {
|
||||||
wchar_t *s0 = RepAtom(AtomOfTerm(t))->WStrOfAE;
|
wchar_t *s0 = RepAtom(AtomOfTerm(t))->WStrOfAE;
|
||||||
if (s0[1] != '\0') {
|
if (s0[1] != '\0') {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
*is_char = true;
|
||||||
|
return s0[0];
|
||||||
|
} else {
|
||||||
|
char *s0 = RepAtom(AtomOfTerm(t))->StrOfAE;
|
||||||
|
if (s0[1] != '\0') {
|
||||||
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*is_char = true;
|
||||||
|
return s0[0];
|
||||||
}
|
}
|
||||||
*is_char = true;
|
|
||||||
return s0[0];
|
|
||||||
} else {
|
|
||||||
char *s0 = RepAtom(AtomOfTerm(t))->StrOfAE;
|
|
||||||
if (s0[1] != '\0') {
|
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, NULL);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
*is_char = true;
|
|
||||||
return s0[0];
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -277,35 +279,35 @@ static Int toupper2(USES_REGS1) {
|
|||||||
bool is_char = false;
|
bool is_char = false;
|
||||||
Term t;
|
Term t;
|
||||||
if (!IsVarTerm(t = Deref(ARG1))) {
|
if (!IsVarTerm(t = Deref(ARG1))) {
|
||||||
Int out = get_char_or_code(t, &is_char), uout;
|
Int out = get_char_or_code(t, &is_char), uout;
|
||||||
if (out < 128)
|
if (out < 128)
|
||||||
uout = toupper(out);
|
uout = toupper(out);
|
||||||
else
|
|
||||||
uout = towupper(out);
|
|
||||||
if (is_char)
|
|
||||||
return Yap_unify(ARG2, MkCharTerm(uout));
|
|
||||||
else
|
|
||||||
return Yap_unify(ARG2, MkIntegerTerm(uout));
|
|
||||||
} else if (!IsVarTerm(t = Deref(ARG2))) {
|
|
||||||
Int uout = get_char_or_code(t, &is_char), out;
|
|
||||||
char_kind_t charp = Yap_wide_chtype(uout);
|
|
||||||
if (charp == UC) {
|
|
||||||
if (uout < 128)
|
|
||||||
out = tolower(uout);
|
|
||||||
else
|
else
|
||||||
out = towlower(uout);
|
uout = towupper(out);
|
||||||
} else if (charp == LC) {
|
if (is_char)
|
||||||
return false;
|
return Yap_unify(ARG2, MkCharTerm(uout));
|
||||||
|
else
|
||||||
|
return Yap_unify(ARG2, MkIntegerTerm(uout));
|
||||||
|
} else if (!IsVarTerm(t = Deref(ARG2))) {
|
||||||
|
Int uout = get_char_or_code(t, &is_char), out;
|
||||||
|
char_kind_t charp = Yap_wide_chtype(uout);
|
||||||
|
if (charp == UC) {
|
||||||
|
if (uout < 128)
|
||||||
|
out = tolower(uout);
|
||||||
|
else
|
||||||
|
out = towlower(uout);
|
||||||
|
} else if (charp == LC) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
out = uout;
|
||||||
|
}
|
||||||
|
if (is_char)
|
||||||
|
return Yap_unify(ARG2, MkCharTerm(out));
|
||||||
|
else
|
||||||
|
return Yap_unify(ARG2, MkIntegerTerm(out));
|
||||||
} else {
|
} else {
|
||||||
out = uout;
|
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
|
||||||
}
|
}
|
||||||
if (is_char)
|
|
||||||
return Yap_unify(ARG2, MkCharTerm(out));
|
|
||||||
else
|
|
||||||
return Yap_unify(ARG2, MkIntegerTerm(out));
|
|
||||||
} else {
|
|
||||||
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -313,41 +315,41 @@ static Int tolower2(USES_REGS1) {
|
|||||||
bool is_char = false;
|
bool is_char = false;
|
||||||
Term t;
|
Term t;
|
||||||
if (!IsVarTerm(t = Deref(ARG1))) {
|
if (!IsVarTerm(t = Deref(ARG1))) {
|
||||||
bool is_char = false;
|
bool is_char = false;
|
||||||
Int out = get_char_or_code(ARG1, &is_char), uout;
|
Int out = get_char_or_code(ARG1, &is_char), uout;
|
||||||
if (out < 128)
|
if (out < 128)
|
||||||
uout = tolower(out);
|
uout = tolower(out);
|
||||||
else
|
|
||||||
uout = towlower(out);
|
|
||||||
if (is_char)
|
|
||||||
return Yap_unify(ARG2, MkCharTerm(uout));
|
|
||||||
else
|
|
||||||
return Yap_unify(ARG2, MkIntegerTerm(uout));
|
|
||||||
} else if (IsVarTerm(t = Deref(ARG2))) {
|
|
||||||
Int uout = get_char_or_code(t, &is_char), out;
|
|
||||||
char_kind_t charp = Yap_wide_chtype(uout);
|
|
||||||
if (charp == LC) {
|
|
||||||
if (uout < 128)
|
|
||||||
out = toupper(uout);
|
|
||||||
else
|
else
|
||||||
out = towupper(uout);
|
uout = towlower(out);
|
||||||
} else if (charp == UC) {
|
if (is_char)
|
||||||
return false;
|
return Yap_unify(ARG2, MkCharTerm(uout));
|
||||||
|
else
|
||||||
|
return Yap_unify(ARG2, MkIntegerTerm(uout));
|
||||||
|
} else if (IsVarTerm(t = Deref(ARG2))) {
|
||||||
|
Int uout = get_char_or_code(t, &is_char), out;
|
||||||
|
char_kind_t charp = Yap_wide_chtype(uout);
|
||||||
|
if (charp == LC) {
|
||||||
|
if (uout < 128)
|
||||||
|
out = toupper(uout);
|
||||||
|
else
|
||||||
|
out = towupper(uout);
|
||||||
|
} else if (charp == UC) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
out = uout;
|
||||||
|
}
|
||||||
|
if (is_char)
|
||||||
|
return Yap_unify(ARG2, MkCharTerm(out));
|
||||||
|
else
|
||||||
|
return Yap_unify(ARG2, MkIntegerTerm(out));
|
||||||
} else {
|
} else {
|
||||||
out = uout;
|
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
|
||||||
}
|
}
|
||||||
if (is_char)
|
|
||||||
return Yap_unify(ARG2, MkCharTerm(out));
|
|
||||||
else
|
|
||||||
return Yap_unify(ARG2, MkIntegerTerm(out));
|
|
||||||
} else {
|
|
||||||
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Int
|
static Int
|
||||||
p_change_type_of_char(USES_REGS1) { /* change_type_of_char(+char,+type) */
|
p_change_type_of_char(USES_REGS1) { /* change_type_of_char(+char,+type) */
|
||||||
Term t1 = Deref(ARG1);
|
Term t1 = Deref(ARG1);
|
||||||
Term t2 = Deref(ARG2);
|
Term t2 = Deref(ARG2);
|
||||||
if (!IsVarTerm(t1) && !IsIntegerTerm(t1))
|
if (!IsVarTerm(t1) && !IsIntegerTerm(t1))
|
||||||
@ -390,9 +392,9 @@ static Int char_type_ascii(USES_REGS1) {
|
|||||||
static Int char_type_white(USES_REGS1) {
|
static Int char_type_white(USES_REGS1) {
|
||||||
int ch = get_char(ARG1);
|
int ch = get_char(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
char_kind_t k = Yap_chtype[ch];
|
char_kind_t k = Yap_chtype[ch];
|
||||||
return k == BS;
|
return k == BS;
|
||||||
}
|
}
|
||||||
utf8proc_category_t ct = utf8proc_category(ch);
|
utf8proc_category_t ct = utf8proc_category(ch);
|
||||||
return ct == UTF8PROC_CATEGORY_ZS;
|
return ct == UTF8PROC_CATEGORY_ZS;
|
||||||
}
|
}
|
||||||
@ -440,18 +442,18 @@ static Int char_type_upper(USES_REGS1) {
|
|||||||
static Int char_type_punct(USES_REGS1) {
|
static Int char_type_punct(USES_REGS1) {
|
||||||
int ch = get_char(ARG1);
|
int ch = get_char(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
char_kind_t k = Yap_chtype[ch];
|
char_kind_t k = Yap_chtype[ch];
|
||||||
return k >= QT && k <= BK;
|
return k >= QT && k <= BK;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Int char_type_space(USES_REGS1) {
|
static Int char_type_space(USES_REGS1) {
|
||||||
int ch = get_char(ARG1);
|
int ch = get_char(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
char_kind_t k = Yap_chtype[ch];
|
char_kind_t k = Yap_chtype[ch];
|
||||||
return k == BS;
|
return k == BS;
|
||||||
}
|
}
|
||||||
utf8proc_category_t ct = utf8proc_category(ch);
|
utf8proc_category_t ct = utf8proc_category(ch);
|
||||||
return (ct >= UTF8PROC_CATEGORY_ZS && ct <= UTF8PROC_CATEGORY_PO);
|
return (ct >= UTF8PROC_CATEGORY_ZS && ct <= UTF8PROC_CATEGORY_PO);
|
||||||
}
|
}
|
||||||
@ -464,8 +466,8 @@ static Int char_type_end_of_file(USES_REGS1) {
|
|||||||
static Int char_type_end_of_line(USES_REGS1) {
|
static Int char_type_end_of_line(USES_REGS1) {
|
||||||
Int ch = get_char(ARG1);
|
Int ch = get_char(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
return ch >= 10 && ch <= 13;
|
return ch >= 10 && ch <= 13;
|
||||||
}
|
}
|
||||||
utf8proc_category_t ct = utf8proc_category(ch);
|
utf8proc_category_t ct = utf8proc_category(ch);
|
||||||
return (ct >= UTF8PROC_CATEGORY_ZL && ct <= UTF8PROC_CATEGORY_ZP);
|
return (ct >= UTF8PROC_CATEGORY_ZL && ct <= UTF8PROC_CATEGORY_ZP);
|
||||||
}
|
}
|
||||||
@ -473,8 +475,8 @@ static Int char_type_end_of_line(USES_REGS1) {
|
|||||||
static Int char_type_newline(USES_REGS1) {
|
static Int char_type_newline(USES_REGS1) {
|
||||||
Int ch = get_char(ARG1);
|
Int ch = get_char(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
return ch == 10;
|
return ch == 10;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -551,9 +553,9 @@ static Int code_type_ascii(USES_REGS1) {
|
|||||||
static Int code_type_white(USES_REGS1) {
|
static Int code_type_white(USES_REGS1) {
|
||||||
int ch = get_code(ARG1);
|
int ch = get_code(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
char_kind_t k = Yap_chtype[ch];
|
char_kind_t k = Yap_chtype[ch];
|
||||||
return k == BS;
|
return k == BS;
|
||||||
}
|
}
|
||||||
utf8proc_category_t ct = utf8proc_category(ch);
|
utf8proc_category_t ct = utf8proc_category(ch);
|
||||||
return ct == UTF8PROC_CATEGORY_ZS;
|
return ct == UTF8PROC_CATEGORY_ZS;
|
||||||
}
|
}
|
||||||
@ -601,18 +603,18 @@ static Int code_type_upper(USES_REGS1) {
|
|||||||
static Int code_type_punct(USES_REGS1) {
|
static Int code_type_punct(USES_REGS1) {
|
||||||
int ch = get_char(ARG1);
|
int ch = get_char(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
char_kind_t k = Yap_chtype[ch];
|
char_kind_t k = Yap_chtype[ch];
|
||||||
return k >= QT && k <= BK;
|
return k >= QT && k <= BK;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static Int code_type_space(USES_REGS1) {
|
static Int code_type_space(USES_REGS1) {
|
||||||
int ch = get_code(ARG1);
|
int ch = get_code(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
char_kind_t k = Yap_chtype[ch];
|
char_kind_t k = Yap_chtype[ch];
|
||||||
return k == BS;
|
return k == BS;
|
||||||
}
|
}
|
||||||
utf8proc_category_t ct = utf8proc_category(ch);
|
utf8proc_category_t ct = utf8proc_category(ch);
|
||||||
return (ct >= UTF8PROC_CATEGORY_ZS && ct <= UTF8PROC_CATEGORY_PO);
|
return (ct >= UTF8PROC_CATEGORY_ZS && ct <= UTF8PROC_CATEGORY_PO);
|
||||||
}
|
}
|
||||||
@ -625,8 +627,8 @@ static Int code_type_end_of_file(USES_REGS1) {
|
|||||||
static Int code_type_end_of_line(USES_REGS1) {
|
static Int code_type_end_of_line(USES_REGS1) {
|
||||||
Int ch = get_code(ARG1);
|
Int ch = get_code(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
return ch >= 10 && ch <= 13;
|
return ch >= 10 && ch <= 13;
|
||||||
}
|
}
|
||||||
utf8proc_category_t ct = utf8proc_category(ch);
|
utf8proc_category_t ct = utf8proc_category(ch);
|
||||||
return (ct >= UTF8PROC_CATEGORY_ZL && ct <= UTF8PROC_CATEGORY_ZP);
|
return (ct >= UTF8PROC_CATEGORY_ZL && ct <= UTF8PROC_CATEGORY_ZP);
|
||||||
}
|
}
|
||||||
@ -634,8 +636,8 @@ static Int code_type_end_of_line(USES_REGS1) {
|
|||||||
static Int code_type_newline(USES_REGS1) {
|
static Int code_type_newline(USES_REGS1) {
|
||||||
Int ch = get_code(ARG1);
|
Int ch = get_code(ARG1);
|
||||||
if (ch < 256) {
|
if (ch < 256) {
|
||||||
return ch == 10;
|
return ch == 10;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -684,11 +686,11 @@ int ISOWGetc(int sno) {
|
|||||||
int ch = GLOBAL_Stream[sno].stream_wgetc(sno);
|
int ch = GLOBAL_Stream[sno].stream_wgetc(sno);
|
||||||
if (ch != EOF && GLOBAL_CharConversionTable != NULL) {
|
if (ch != EOF && GLOBAL_CharConversionTable != NULL) {
|
||||||
|
|
||||||
if (ch < NUMBER_OF_CHARS) {
|
if (ch < NUMBER_OF_CHARS) {
|
||||||
/* only do this in ASCII */
|
/* only do this in ASCII */
|
||||||
return GLOBAL_CharConversionTable[ch];
|
return GLOBAL_CharConversionTable[ch];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -699,9 +701,9 @@ static Int p_force_char_conversion(USES_REGS1) {
|
|||||||
if (GLOBAL_CharConversionTable2 == NULL)
|
if (GLOBAL_CharConversionTable2 == NULL)
|
||||||
return (TRUE);
|
return (TRUE);
|
||||||
for (i = 0; i < MaxStreams; i++) {
|
for (i = 0; i < MaxStreams; i++) {
|
||||||
if (!(GLOBAL_Stream[i].status & Free_Stream_f))
|
if (!(GLOBAL_Stream[i].status & Free_Stream_f))
|
||||||
GLOBAL_Stream[i].stream_wgetc_for_read = ISOWGetc;
|
GLOBAL_Stream[i].stream_wgetc_for_read = ISOWGetc;
|
||||||
}
|
}
|
||||||
GLOBAL_CharConversionTable = GLOBAL_CharConversionTable2;
|
GLOBAL_CharConversionTable = GLOBAL_CharConversionTable2;
|
||||||
return (TRUE);
|
return (TRUE);
|
||||||
}
|
}
|
||||||
@ -710,9 +712,9 @@ static Int p_disable_char_conversion(USES_REGS1) {
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < MaxStreams; i++) {
|
for (i = 0; i < MaxStreams; i++) {
|
||||||
if (!(GLOBAL_Stream[i].status & Free_Stream_f))
|
if (!(GLOBAL_Stream[i].status & Free_Stream_f))
|
||||||
GLOBAL_Stream[i].stream_wgetc_for_read = GLOBAL_Stream[i].stream_wgetc;
|
GLOBAL_Stream[i].stream_wgetc_for_read = GLOBAL_Stream[i].stream_wgetc;
|
||||||
}
|
}
|
||||||
GLOBAL_CharConversionTable = NULL;
|
GLOBAL_CharConversionTable = NULL;
|
||||||
return (TRUE);
|
return (TRUE);
|
||||||
}
|
}
|
||||||
@ -722,54 +724,54 @@ static Int char_conversion(USES_REGS1) {
|
|||||||
unsigned char *s0, *s1;
|
unsigned char *s0, *s1;
|
||||||
|
|
||||||
if (IsVarTerm(t)) {
|
if (IsVarTerm(t)) {
|
||||||
Yap_Error(INSTANTIATION_ERROR, t, "char_conversion/2");
|
Yap_Error(INSTANTIATION_ERROR, t, "char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
if (!IsAtomTerm(t)) {
|
if (!IsAtomTerm(t)) {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
|
s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
|
||||||
if (s0[1] != '\0') {
|
if (s0[1] != '\0') {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
if (IsVarTerm(t1)) {
|
if (IsVarTerm(t1)) {
|
||||||
Yap_Error(INSTANTIATION_ERROR, t1, "char_conversion/2");
|
Yap_Error(INSTANTIATION_ERROR, t1, "char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
if (!IsAtomTerm(t1)) {
|
if (!IsAtomTerm(t1)) {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
s1 = RepAtom(AtomOfTerm(t1))->UStrOfAE;
|
s1 = RepAtom(AtomOfTerm(t1))->UStrOfAE;
|
||||||
if (s1[1] != '\0') {
|
if (s1[1] != '\0') {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
/* check if we do have a table for converting characters */
|
/* check if we do have a table for converting characters */
|
||||||
if (GLOBAL_CharConversionTable2 == NULL) {
|
if (GLOBAL_CharConversionTable2 == NULL) {
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* don't create a table if we don't need to */
|
/* don't create a table if we don't need to */
|
||||||
if (s0[0] == s1[0])
|
if (s0[0] == s1[0])
|
||||||
return (TRUE);
|
return (TRUE);
|
||||||
GLOBAL_CharConversionTable2 =
|
GLOBAL_CharConversionTable2 =
|
||||||
Yap_AllocCodeSpace(NUMBER_OF_CHARS * sizeof(char));
|
Yap_AllocCodeSpace(NUMBER_OF_CHARS * sizeof(char));
|
||||||
while (GLOBAL_CharConversionTable2 == NULL) {
|
while (GLOBAL_CharConversionTable2 == NULL) {
|
||||||
if (!Yap_growheap(FALSE, NUMBER_OF_CHARS * sizeof(char), NULL)) {
|
if (!Yap_growheap(FALSE, NUMBER_OF_CHARS * sizeof(char), NULL)) {
|
||||||
Yap_Error(RESOURCE_ERROR_HEAP, TermNil, LOCAL_ErrorMessage);
|
Yap_Error(RESOURCE_ERROR_HEAP, TermNil, LOCAL_ErrorMessage);
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if (trueGlobalPrologFlag(CHAR_CONVERSION_FLAG)) {
|
||||||
|
CACHE_REGS
|
||||||
|
if (p_force_char_conversion(PASS_REGS1) == FALSE)
|
||||||
|
return (FALSE);
|
||||||
|
}
|
||||||
|
for (i = 0; i < NUMBER_OF_CHARS; i++)
|
||||||
|
GLOBAL_CharConversionTable2[i] = i;
|
||||||
}
|
}
|
||||||
if (trueGlobalPrologFlag(CHAR_CONVERSION_FLAG)) {
|
|
||||||
CACHE_REGS
|
|
||||||
if (p_force_char_conversion(PASS_REGS1) == FALSE)
|
|
||||||
return (FALSE);
|
|
||||||
}
|
|
||||||
for (i = 0; i < NUMBER_OF_CHARS; i++)
|
|
||||||
GLOBAL_CharConversionTable2[i] = i;
|
|
||||||
}
|
|
||||||
/* just add the new entry */
|
/* just add the new entry */
|
||||||
GLOBAL_CharConversionTable2[(int)s0[0]] = s1[0];
|
GLOBAL_CharConversionTable2[(int)s0[0]] = s1[0];
|
||||||
/* done */
|
/* done */
|
||||||
@ -781,43 +783,43 @@ static Int p_current_char_conversion(USES_REGS1) {
|
|||||||
unsigned char *s0, *s1;
|
unsigned char *s0, *s1;
|
||||||
|
|
||||||
if (GLOBAL_CharConversionTable == NULL) {
|
if (GLOBAL_CharConversionTable == NULL) {
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
t = Deref(ARG1);
|
t = Deref(ARG1);
|
||||||
if (IsVarTerm(t)) {
|
if (IsVarTerm(t)) {
|
||||||
Yap_Error(INSTANTIATION_ERROR, t, "current_char_conversion/2");
|
Yap_Error(INSTANTIATION_ERROR, t, "current_char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
if (!IsAtomTerm(t)) {
|
if (!IsAtomTerm(t)) {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
|
s0 = RepAtom(AtomOfTerm(t))->UStrOfAE;
|
||||||
if (s0[1] != '\0') {
|
if (s0[1] != '\0') {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t, "current_char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
t1 = Deref(ARG2);
|
t1 = Deref(ARG2);
|
||||||
if (IsVarTerm(t1)) {
|
if (IsVarTerm(t1)) {
|
||||||
char out[2];
|
char out[2];
|
||||||
if (GLOBAL_CharConversionTable[(int)s0[0]] == '\0')
|
if (GLOBAL_CharConversionTable[(int)s0[0]] == '\0')
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
out[0] = GLOBAL_CharConversionTable[(int)s0[0]];
|
out[0] = GLOBAL_CharConversionTable[(int)s0[0]];
|
||||||
out[1] = '\0';
|
out[1] = '\0';
|
||||||
return (Yap_unify(ARG2, MkAtomTerm(Yap_LookupAtom(out))));
|
return (Yap_unify(ARG2, MkAtomTerm(Yap_LookupAtom(out))));
|
||||||
}
|
}
|
||||||
if (!IsAtomTerm(t1)) {
|
if (!IsAtomTerm(t1)) {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
s1 = RepAtom(AtomOfTerm(t1))->UStrOfAE;
|
s1 = RepAtom(AtomOfTerm(t1))->UStrOfAE;
|
||||||
if (s1[1] != '\0') {
|
if (s1[1] != '\0') {
|
||||||
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
|
Yap_Error(REPRESENTATION_ERROR_CHARACTER, t1, "current_char_conversion/2");
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
} else {
|
} else {
|
||||||
return (GLOBAL_CharConversionTable[(int)s0[0]] == '\0' &&
|
return (GLOBAL_CharConversionTable[(int)s0[0]] == '\0' &&
|
||||||
GLOBAL_CharConversionTable[(int)s0[0]] == s1[0]);
|
GLOBAL_CharConversionTable[(int)s0[0]] == s1[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static Int p_all_char_conversions(USES_REGS1) {
|
static Int p_all_char_conversions(USES_REGS1) {
|
||||||
@ -825,28 +827,28 @@ static Int p_all_char_conversions(USES_REGS1) {
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (GLOBAL_CharConversionTable == NULL) {
|
if (GLOBAL_CharConversionTable == NULL) {
|
||||||
return (FALSE);
|
return (FALSE);
|
||||||
}
|
}
|
||||||
for (i = NUMBER_OF_CHARS; i > 0;) {
|
for (i = NUMBER_OF_CHARS; i > 0;) {
|
||||||
i--;
|
i--;
|
||||||
if (GLOBAL_CharConversionTable[i] != '\0') {
|
if (GLOBAL_CharConversionTable[i] != '\0') {
|
||||||
Term t1, t2;
|
Term t1, t2;
|
||||||
char s[2];
|
char s[2];
|
||||||
s[1] = '\0';
|
s[1] = '\0';
|
||||||
s[0] = GLOBAL_CharConversionTable[i];
|
s[0] = GLOBAL_CharConversionTable[i];
|
||||||
t1 = MkAtomTerm(Yap_LookupAtom(s));
|
t1 = MkAtomTerm(Yap_LookupAtom(s));
|
||||||
out = MkPairTerm(t1, out);
|
out = MkPairTerm(t1, out);
|
||||||
s[0] = i;
|
s[0] = i;
|
||||||
t2 = MkAtomTerm(Yap_LookupAtom(s));
|
t2 = MkAtomTerm(Yap_LookupAtom(s));
|
||||||
out = MkPairTerm(t2, out);
|
out = MkPairTerm(t2, out);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return (Yap_unify(ARG1, out));
|
return (Yap_unify(ARG1, out));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Yap_InitChtypes(void) {
|
void Yap_InitChtypes(void) {
|
||||||
CACHE_REGS
|
CACHE_REGS
|
||||||
LOCAL_encoding = DefaultEncoding();
|
LOCAL_encoding = DefaultEncoding();
|
||||||
Yap_InitCPred("$change_type_of_char", 2, p_change_type_of_char,
|
Yap_InitCPred("$change_type_of_char", 2, p_change_type_of_char,
|
||||||
SafePredFlag | SyncPredFlag | HiddenPredFlag);
|
SafePredFlag | SyncPredFlag | HiddenPredFlag);
|
||||||
Yap_InitCPred("toupper", 2, toupper2, SafePredFlag);
|
Yap_InitCPred("toupper", 2, toupper2, SafePredFlag);
|
||||||
|
Reference in New Issue
Block a user