diff --git a/C/atomic.c b/C/atomic.c index 2a854160f..7b30503ea 100644 --- a/C/atomic.c +++ b/C/atomic.c @@ -1252,15 +1252,14 @@ error: static Int atom_length(USES_REGS1) { Term t1 = Deref(ARG1); - ; Term t2 = Deref(ARG2); size_t len; if (!Yap_IsGroundTerm(t1)) { - Yap_Error(INSTANTIATION_ERROR, t1, "atom_length/2"); + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); return (FALSE); } else if (!IsAtomTerm(t1)) { - Yap_Error(TYPE_ERROR_ATOM, t1, "atom_length/2"); + Yap_Error(TYPE_ERROR_ATOM, t1, "at first argument"); return (FALSE); } @@ -1291,21 +1290,18 @@ static Int atomic_length(USES_REGS1) { size_t len; if (!Yap_IsGroundTerm(t1)) { - Yap_Error(INSTANTIATION_ERROR, t1, "atomic_length/2"); - return (FALSE); - } else if (!IsAtomicTerm(t1)) { - Yap_Error(TYPE_ERROR_ATOM, t1, "atomic_length/2"); + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); return (FALSE); } - if (Yap_IsGroundTerm(t2)) { + if (IsNonVarTerm(t2)) { + if (!IsIntegerTerm(t2)) { - Yap_Error(TYPE_ERROR_INTEGER, t2, "atomic_length/2"); + Yap_Error(TYPE_ERROR_INTEGER, t2, "atom_length/2"); return (FALSE); - } - if ((Int)(len = IntegerOfTerm(t2)) < 0) { - Yap_Error(DOMAIN_ERROR_NOT_LESS_THAN_ZERO, t2, "atomic_length/2"); + } else if ((Int)(len = IntegerOfTerm(t2)) < 0) { + Yap_Error(DOMAIN_ERROR_NOT_LESS_THAN_ZERO, t2, "atom_length/2"); return (FALSE); } } @@ -1348,6 +1344,271 @@ restart_aux: return FALSE; } +/** @pred downcase_text_to_atom(+Text, -Atom) + * + * Convert all upper case code-points in text _Text_ to downcase. Unify the result as atom _Atom_ with the second argument. + * + */ +static Int downcase_text_to_atom(USES_REGS1) { + Term t1 = Deref(ARG1); + Term t2 = Deref(ARG2); + size_t len; + + if (!Yap_IsGroundTerm(t1)) { + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); + return false; + } + + if (IsNonVarTerm(t2)) { + if (!IsAtomTerm(t2)) { + Yap_Error(TYPE_ERROR_ATOM, t2, "at second argument"); + return (FALSE); + } + } + while (true) { + Atom at = Yap_AtomicToLowAtom(t1); + if (at == NULL) { + if (LOCAL_Error_TYPE && Yap_HandleError("downcase_text_to_atom/2")) + continue; + return false; + } + return Yap_unify(MkAtomTerm(at), t2); + } + return false; +} + +/** @pred upcase_text_to_atom(+Text, -Atom) + * + * Convert all lower case code-points in text _Text_ to up case. Unify the result as atom _Atom_ with the second argument. + * + */ +static Int upcase_text_to_atom(USES_REGS1) { + Term t1 = Deref(ARG1); + Term t2 = Deref(ARG2); + size_t len; + + if (!Yap_IsGroundTerm(t1)) { + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); + return false; + } + + if (IsNonVarTerm(t2)) { + if (!IsAtomTerm(t2)) { + Yap_Error(TYPE_ERROR_ATOM, t2, "at second argument"); + return (FALSE); + } + } + while (true) { + Atom at = Yap_AtomicToUpAtom(t1); + if (at == NULL) { + if (LOCAL_Error_TYPE && Yap_HandleError("upcase_text_to_atom/2")) + continue; + return false; + } + return Yap_unify(MkAtomTerm(at), t2); + } + return false; +} + +/** @pred downcase_text_to_string(+Text, -String) + * + * Convert all upper case code-points in text _Text_ to downcase. Unify the result as string _String_ with the second argument. + * + */ +static Int downcase_text_to_string(USES_REGS1) { + Term t1 = Deref(ARG1); + Term t2 = Deref(ARG2); + size_t len; + + if (!Yap_IsGroundTerm(t1)) { + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); + return false; + } + + if (IsNonVarTerm(t2)) { + if (!IsStringTerm(t2)) { + Yap_Error(TYPE_ERROR_STRING, t2, "at second argument"); + return (FALSE); + } + } + while (true) { + Term t = Yap_AtomicToLowString(t1); + if (t == TermZERO) { + if (LOCAL_Error_TYPE && Yap_HandleError("downcase_text_to_string/2")) + continue; + return false; + } + return Yap_unify(t, t2); + } + return false; +} + +/** @pred upcase_text_to_string(+Text, -String) + * + * Convert all lower case code-points in text _Text_ to up case. Unify the result as string _String_ with the second argument. + * + */ +static Int upcase_text_to_string(USES_REGS1) { + Term t1 = Deref(ARG1); + Term t2 = Deref(ARG2); + size_t len; + + if (!Yap_IsGroundTerm(t1)) { + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); + return false; + } + + if (IsNonVarTerm(t2)) { + if (!IsStringTerm(t2)) { + Yap_Error(TYPE_ERROR_STRING, t2, "at second argument"); + return (FALSE); + } + } + while (true) { + Term t = Yap_AtomicToUpString(t1); + if (t == TermZERO) { + if (LOCAL_Error_TYPE && Yap_HandleError("upcase_text_to_string/2")) + continue; + return false; + } + return Yap_unify(t, t2); + } + return false; +} + +/** @pred downcase_text_to_codes(+Text, -Codes) + * + * Convert all upper case code-points in text _Text_ to downcase. Unify the result as a sequence of codes _Codes_ with the second argument. + * + */ +static Int downcase_text_to_codes(USES_REGS1) { + Term t1 = Deref(ARG1); + Term t2 = Deref(ARG2); + size_t len; + + if (!Yap_IsGroundTerm(t1)) { + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); + return false; + } + + if (IsNonVarTerm(t2)) { + if (!Yap_IsListTerm(t2)) { + Yap_Error(TYPE_ERROR_LIST, t2, "at second argument"); + return false; + } + } + while (true) { + Term t = Yap_AtomicToLowListOfCodes(t1); + if (t == TermZERO) { + if (LOCAL_Error_TYPE && Yap_HandleError("downcase_text_to_codes/2")) + continue; + return false; + } + return Yap_unify(t, t2); + } + return false; +} + +/** @pred upcase_text_to_codes(+Text, -Codes) + * + * Convert all lower case code-points in text _Text_ to up case. Unify the result as a sequence of codes _Codes_ with the second argument. + * + */ +static Int upcase_text_to_codes(USES_REGS1) { + Term t1 = Deref(ARG1); + Term t2 = Deref(ARG2); + size_t len; + + if (!Yap_IsGroundTerm(t1)) { + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); + return false; + } + + if (IsNonVarTerm(t2)) { + if (!Yap_IsListTerm(t2)) { + Yap_Error(TYPE_ERROR_LIST, t2, "at second argument"); + return (FALSE); + } + } + while (true) { + Term t = Yap_AtomicToUpListOfCodes(t1); + if (t == TermZERO) { + if (LOCAL_Error_TYPE && Yap_HandleError("upcase_text_to_codes/2")) + continue; + return false; + } + return Yap_unify(t, t2); + } + return false; +} + + +/** @pred downcase_text_to_chars(+Text, -Chars) + * + * Convert all upper case code-points in text _Text_ to downcase. Unify the result as a sequence of chars _Chars_ with the second argument. + * + */ +static Int downcase_text_to_chars(USES_REGS1) { + Term t1 = Deref(ARG1); + Term t2 = Deref(ARG2); + size_t len; + + if (!Yap_IsGroundTerm(t1)) { + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); + return false; + } + + if (IsNonVarTerm(t2)) { + if (!Yap_IsListTerm(t2)) { + Yap_Error(TYPE_ERROR_LIST, t2, "at second argument"); + return false; + } + } + while (true) { + Term t = Yap_AtomicToLowListOfAtoms(t1); + if (t == TermZERO) { + if (LOCAL_Error_TYPE && Yap_HandleError("downcase_text_to_to_chars/2")) + continue; + return false; + } + return Yap_unify(t, t2); + } + return false; +} + +/** @pred upcase_text_to_chars(+Text, -Chars) + * + * Convert all lower case code-points in text _Text_ to up case. Unify the result as a sequence of chars _Chars_ with the second argument. + * + */ +static Int upcase_text_to_chars(USES_REGS1) { + Term t1 = Deref(ARG1); + Term t2 = Deref(ARG2); + size_t len; + + if (!Yap_IsGroundTerm(t1)) { + Yap_Error(INSTANTIATION_ERROR, t1, "at first argument"); + return false; + } + + if (IsNonVarTerm(t2)) { + if (!Yap_IsListTerm(t2)) { + Yap_Error(TYPE_ERROR_LIST, t2, "at second argument"); + return (FALSE); + } + } + while (true) { + Term t = Yap_AtomicToUpListOfAtoms(t1); + if (t == TermZERO) { + if (LOCAL_Error_TYPE && Yap_HandleError("upcase_text_to_chars/2")) + continue; + return false; + } + return Yap_unify(t, t2); + } + return false; +} + static int is_wide(wchar_t *s) { wchar_t ch; @@ -2369,6 +2630,18 @@ void Yap_InitAtomPreds(void) { Yap_InitCPred("atomics_to_string", 2, atomics_to_string2, 0); Yap_InitCPred("atomics_to_string", 3, atomics_to_string3, 0); Yap_InitCPred("get_string_code", 3, get_string_code3, 0); + + Yap_InitCPred("downcase_text_to_atom", 2, downcase_text_to_atom, 0); + Yap_InitCPred("downcase_atom", 2, downcase_text_to_atom, 0); + Yap_InitCPred("upcase_text_to_atom", 2, upcase_text_to_atom, 0); + Yap_InitCPred("upcase_atom", 2, upcase_text_to_atom, 0); + Yap_InitCPred("downcase_text_to_string", 2, downcase_text_to_string, 0); + Yap_InitCPred("upcase_text_to_string", 2, upcase_text_to_string, 0); + Yap_InitCPred("downcase_text_to_codes", 2, downcase_text_to_codes, 0); + Yap_InitCPred("upcase_text_to_codes", 2, upcase_text_to_codes, 0); + Yap_InitCPred("downcase_text_to_chars", 2, downcase_text_to_chars, 0); + Yap_InitCPred("upcase_text_to_chars", 2, upcase_text_to_chars, 0); + /* hiding and unhiding some predicates */ Yap_InitCPred("hide_atom", 1, hide_atom, SafePredFlag | SyncPredFlag); Yap_InitCPred("hide", 1, hide_atom, SafePredFlag | SyncPredFlag); diff --git a/C/text.c b/C/text.c index 28a67c700..1207bc2d0 100644 --- a/C/text.c +++ b/C/text.c @@ -392,6 +392,7 @@ void *Yap_readText(void *buf, seq_tv_t *inp, encoding_t *enc, int *minimal, return NULL; } + static Term write_strings(void *s0, seq_tv_t *out, encoding_t enc, int minimal, size_t leng USES_REGS) { size_t min = 0, max = leng; @@ -1006,7 +1007,7 @@ static Term string_to_term(void *s0, seq_tv_t *out, encoding_t enc, int minimal, return o; } -int write_Text(void *inp, seq_tv_t *out, encoding_t enc, int minimal, +bool write_Text(void *inp, seq_tv_t *out, encoding_t enc, int minimal, size_t leng USES_REGS) { /* we know what the term is */ switch (out->type & YAP_TYPE_MASK) { @@ -1054,18 +1055,129 @@ int write_Text(void *inp, seq_tv_t *out, encoding_t enc, int minimal, 0L) return out->val.t != 0; } - return FALSE; + return false; +} + + +static size_t upcase(void *s0, seq_tv_t *out, encoding_t enc USES_REGS) { + size_t max = -1; + + + switch (enc) { + case ENC_ISO_UTF8: { + unsigned char *s = s0; + while (*s) { + // assumes the two code have always the same size; + utf8proc_int32_t chr; + get_utf8(s, -1, &chr); + chr = utf8proc_toupper(chr); + s += put_utf8(s, chr); + } + return true; + } + + case ENC_ISO_LATIN1: { + unsigned char *s = s0; + utf8proc_int32_t chr; + + while ((chr = *s)) { + // assumes the two code have always the same size; + chr = *s; + chr = utf8proc_toupper(chr); + *s++ = chr; + } + return true; + } + + case ENC_WCHAR: { + wchar_t *s = s0; + utf8proc_int32_t chr; + + while ((chr = *s)) { + // assumes the two code have always the same size; + chr = *s; + chr = utf8proc_toupper(chr); + *s++ = chr; + } + return true; + } + default: + Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", + enc_name(enc), __FUNCTION__); + + } + return false; +} + +static size_t downcase(void *s0, seq_tv_t *out, encoding_t enc USES_REGS) { + size_t max = -1; + + + switch (enc) { + case ENC_ISO_UTF8: { + unsigned char *s = s0; + while (*s) { + // assumes the two code have always the same size; + utf8proc_int32_t chr; + get_utf8(s, -1, &chr); + chr = utf8proc_tolower(chr); + s += put_utf8(s, chr); + } + return true; + } + + case ENC_ISO_LATIN1: { + unsigned char *s = s0; + utf8proc_int32_t chr; + + while ((chr = *s)) { + // assumes the two code have always the same size; + chr = *s; + chr = utf8proc_tolower(chr); + *s++ = chr; + } + return true; + } + case ENC_WCHAR: { + wchar_t *s = s0; + utf8proc_int32_t chr; + + while ((chr = *s)) { + // assumes the two code have always the same size; + chr = *s; + chr = utf8proc_tolower(chr); + *s++ = chr; + } + return true; + } + default: + Yap_Error(SYSTEM_ERROR_INTERNAL, TermNil, "Unsupported Encoding ~s in %s", + enc_name(enc), __FUNCTION__); + + } + return false; } int Yap_CVT_Text(seq_tv_t *inp, seq_tv_t *out USES_REGS) { encoding_t enc; int minimal = FALSE; - char *buf; + char *buf; size_t leng; buf = Yap_readText(NULL, inp, &enc, &minimal, &leng PASS_REGS); if (!buf) return 0L; + if (out->type & (YAP_STRING_UPCASE|YAP_STRING_DOWNCASE)) { + if (out->type & YAP_STRING_UPCASE) { + if (!upcase(buf, out, enc)) + return false; + } + if (out->type & YAP_STRING_DOWNCASE) { + if (!downcase(buf, out, enc)) + return false; + } + } + return write_Text(buf, out, enc, minimal, leng PASS_REGS); } diff --git a/H/YapText.h b/H/YapText.h index a4fb70db0..eb31a9b07 100644 --- a/H/YapText.h +++ b/H/YapText.h @@ -296,7 +296,9 @@ typedef enum { YAP_STRING_WQ = 0x10000, // output with write_quote YAP_STRING_WC = 0x20000, // output with write_canonical YAP_STRING_WITH_BUFFER = 0x40000, // output on existing buffer - YAP_STRING_MALLOC = 0x80000 // output on malloced buffer + YAP_STRING_MALLOC = 0x80000, // output on malloced buffer + YAP_STRING_UPCASE = 0x100000, // output on malloced buffer + YAP_STRING_DOWNCASE = 0x200000 // output on malloced buffer } enum_seq_type_t; typedef UInt seq_type_t; @@ -384,7 +386,7 @@ static inline seq_type_t mod_to_bqtype(Term mod USES_REGS) { extern void *Yap_readText(void *buf, seq_tv_t *inp, encoding_t *enc, int *minimal, size_t *lengp USES_REGS); -extern int write_Text(void *inp, seq_tv_t *out, encoding_t enc, int minimal, +extern bool write_Text(void *inp, seq_tv_t *out, encoding_t enc, int minimal, size_t leng USES_REGS); extern int Yap_CVT_Text(seq_tv_t *inp, seq_tv_t *out USES_REGS); extern void *Yap_Concat_Text(int n, seq_tv_t inp[], seq_tv_t *out USES_REGS); @@ -393,6 +395,104 @@ extern void *Yap_Splice_Text(int n, size_t cuts[], seq_tv_t *inp, // user friendly interface +static inline Atom Yap_AtomicToLowAtom(Term t0 USES_REGS) { + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING | YAP_STRING_CODES | YAP_STRING_ATOMS | + YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG | YAP_STRING_TERM; + out.type = YAP_STRING_ATOM|YAP_STRING_DOWNCASE; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.a; +} + +static inline Atom Yap_AtomicToUpAtom(Term t0 USES_REGS) { + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING | YAP_STRING_CODES | YAP_STRING_ATOMS | + YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG | YAP_STRING_TERM; + out.type = YAP_STRING_ATOM|YAP_STRING_UPCASE; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.a; +} + +static inline Term Yap_AtomicToLowString(Term t0 USES_REGS) { + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING | YAP_STRING_CODES | YAP_STRING_ATOMS | + YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG | YAP_STRING_TERM; + out.type = YAP_STRING_STRING|YAP_STRING_DOWNCASE; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term Yap_AtomicToUpString(Term t0 USES_REGS) { + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING | YAP_STRING_CODES | YAP_STRING_ATOMS | + YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG | YAP_STRING_TERM; + out.type = YAP_STRING_STRING|YAP_STRING_UPCASE; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term Yap_AtomicToLowListOfCodes(Term t0 USES_REGS) { + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING | YAP_STRING_CODES | YAP_STRING_ATOMS | + YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG | YAP_STRING_TERM; + out.type = YAP_STRING_CODES|YAP_STRING_DOWNCASE; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term Yap_AtomicToUpListOfCodes(Term t0 USES_REGS) { + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING | YAP_STRING_CODES | YAP_STRING_ATOMS | + YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG | YAP_STRING_TERM; + out.type = YAP_STRING_CODES|YAP_STRING_UPCASE; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term Yap_AtomicToLowListOfAtoms(Term t0 USES_REGS) { + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING | YAP_STRING_CODES | YAP_STRING_ATOMS | + YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG | YAP_STRING_TERM; + out.type = YAP_STRING_ATOMS|YAP_STRING_DOWNCASE; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term Yap_AtomicToUpListOfAtoms(Term t0 USES_REGS) { + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING | YAP_STRING_CODES | YAP_STRING_ATOMS | + YAP_STRING_ATOM | YAP_STRING_INT | YAP_STRING_FLOAT | + YAP_STRING_BIG | YAP_STRING_TERM; + out.type = YAP_STRING_ATOMS|YAP_STRING_UPCASE; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + + + static inline size_t Yap_AtomicToLength(Term t0 USES_REGS) { seq_tv_t inp, out; inp.val.t = t0;