diff --git a/C/atoms.c b/C/atomic.c similarity index 94% rename from C/atoms.c rename to C/atomic.c index 5f785074d..649d5bd2f 100644 --- a/C/atoms.c +++ b/C/atomic.c @@ -31,7 +31,8 @@ static char SccsId[] = "%W% %G%"; #include "eval.h" #include "yapio.h" #include "pl-shared.h" -#include "YapMirror.h" +#include "pl-utf8.h" +#include "YapText.h" #ifdef TABLING #include "tab.macros.h" #endif /* TABLING */ @@ -564,7 +565,7 @@ init_atomic_concat3( USES_REGS1 ) else cut_fail(); } /* Error handling */ - if (LOCAL_Error_TYPE && Yap_HandleError( "atoicm_concat/3" )) { + if (LOCAL_Error_TYPE && Yap_HandleError( "atomic_concat/3" )) { goto restart_aux; } return FALSE; @@ -627,12 +628,92 @@ init_string_concat3( USES_REGS1 ) else cut_fail(); } /* Error handling */ - if (LOCAL_Error_TYPE && Yap_HandleError( "atom_concat/3" )) { + if (LOCAL_Error_TYPE && Yap_HandleError( "string_concat/3" )) { goto restart_aux; } return FALSE; } +static Int +cont_string_code3( USES_REGS1 ) +{ + Term t2; + Int i, j; + int chr; + char *s; + const char *s0; + restart_aux: + t2 = Deref(ARG2); + s0 = StringOfTerm( t2 ); + i = IntOfTerm(EXTRA_CBACK_ARG(3,1)); // offset in coded string, increases by 1..6 + j = IntOfTerm(EXTRA_CBACK_ARG(3,2)); // offset in UNICODE string, always increases by 1 + s = utf8_get_char( s0+i, &chr ); + if (s[0]) { + EXTRA_CBACK_ARG(3,1) = MkIntTerm(s-s0); + EXTRA_CBACK_ARG(3,2) = MkIntTerm(j+1); + return Yap_unify(MkIntegerTerm( chr ), ARG3) && Yap_unify(MkIntegerTerm( j ), ARG1); + } + if (Yap_unify(MkIntegerTerm( chr ), ARG3) && Yap_unify(MkIntegerTerm( j ), ARG1)) + cut_succeed(); + else + cut_fail(); + /* Error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "get_code/3" )) { + goto restart_aux; + } + return FALSE; +} + + +static Int +init_string_code3( USES_REGS1 ) +{ + Term t1; + Term t2; + const char *s; + restart_aux: + t1 = Deref(ARG1); + t2 = Deref(ARG2); + if (IsVarTerm(t2)) { + LOCAL_Error_TYPE = INSTANTIATION_ERROR; + LOCAL_Error_Term = t2; + } else if (!IsStringTerm(t2)) { + LOCAL_Error_TYPE = TYPE_ERROR_STRING; + LOCAL_Error_Term = t2; + } else { + s = StringOfTerm( t2 ); + t1 = Deref(ARG1); + if (IsVarTerm(t1)) { + EXTRA_CBACK_ARG(3,1) = MkIntTerm(0); + EXTRA_CBACK_ARG(3,2) = MkIntTerm(0); + return cont_string_code3( PASS_REGS1 ); + } else if (!IsIntegerTerm( t1 )) { + LOCAL_Error_TYPE = TYPE_ERROR_INTEGER; + LOCAL_Error_Term = t1; + } else { + const char *ns = s; + int chr; + Int indx = IntegerOfTerm( t1 ); + if (indx < 0) { + LOCAL_Error_TYPE = DOMAIN_ERROR_NOT_LESS_THAN_ZERO; + LOCAL_Error_Term = t1; + } + ns = utf8_skip(s,indx); + if (ns == NULL) { + cut_fail(); // silently fail? + } + utf8_get_char( ns, &chr); + if ( chr == '\0') cut_fail(); + if (Yap_unify(ARG3, MkIntegerTerm(chr))) cut_succeed(); + cut_fail(); + } + } + /* Error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "get_code/3" )) { + goto restart_aux; + } + return FALSE; +} static Int p_atom_concat2( USES_REGS1 ) @@ -1083,7 +1164,7 @@ check_sub_string_at(int min, Term at, Term nat) const char *p1, *p2; int c1; - p1 = utf8_n(StringOfTerm(at), min); + p1 = utf8_skip(StringOfTerm(at), min); p2 = StringOfTerm(nat); while ( (c1 = *p1++) == *p2++ && c1); return c1 == 0; @@ -1137,7 +1218,7 @@ check_sub_string_bef(int max, Term at, Term nat) if ((Int)(min - len) < 0) return FALSE; - p1 = utf8_n(StringOfTerm(at),min); + p1 = utf8_skip(StringOfTerm(at),min); p2 = StringOfTerm(nat); while ( (c1 = *p1++) == *p2++ && c1); return c1 == 0; @@ -1241,7 +1322,7 @@ cont_sub_atomic( USES_REGS1 ) } } else { while (!found) { - p = (char *)utf8_n(p, min); + p = (char *)utf8_skip(p, min); if (utf8_strncmp(p, StringOfTerm(nat), len) == 0) { Yap_unify(ARG2, MkIntegerTerm(min)); Yap_unify(ARG3, MkIntegerTerm(len)); @@ -1455,14 +1536,14 @@ init_sub_atomic( int sub_atom USES_REGS ) if (!sub_atom) { out = (utf8_strlen1(StringOfTerm(tout)) == len); if (!out) cut_fail(); - } else if (IsWideAtom(AtomOfTerm(nat))) { + } else if (IsWideAtom(AtomOfTerm(tout))) { if (!(mask & SUB_ATOM_HAS_VAL)) { cut_fail(); } /* just check length, they may still be several occurrences :( */ - out = (wcslen(RepAtom(AtomOfTerm(nat))->WStrOfAE) == len); + out = (wcslen(RepAtom(AtomOfTerm(tout))->WStrOfAE) == len); } else { - out = (strlen(RepAtom(AtomOfTerm(nat))->StrOfAE) == len); + out = (strlen(RepAtom(AtomOfTerm(tout))->StrOfAE) == len); if (!out) cut_fail(); } if (len == sz) { @@ -1681,6 +1762,7 @@ Yap_InitBackAtoms(void) Yap_InitCPredBack("string_concat", 3, 2, init_string_concat3, cont_string_concat3, 0); Yap_InitCPredBack("sub_atom", 5, 5, init_sub_atom, cont_sub_atomic, 0); Yap_InitCPredBack("sub_string", 5, 5, init_sub_string, cont_sub_atomic, 0); + Yap_InitCPredBack("string_code", 3, 1, init_string_code3, cont_string_code3, 0); } diff --git a/C/bignum.c b/C/bignum.c index 51792dc67..a7ec93ae9 100644 --- a/C/bignum.c +++ b/C/bignum.c @@ -348,6 +348,17 @@ p_is_bignum( USES_REGS1 ) #endif } +static Int +p_is_string( USES_REGS1 ) +{ + Term t = Deref(ARG1); + return( + IsNonVarTerm(t) && + IsApplTerm(t) && + FunctorOfTerm(t) == FunctorString + ); +} + static Int p_nb_set_bit( USES_REGS1 ) { @@ -476,6 +487,7 @@ Yap_InitBigNums(void) Yap_InitCPred("$bignum", 1, p_is_bignum, SafePredFlag); Yap_InitCPred("rational", 3, p_rational, 0); Yap_InitCPred("rational", 1, p_is_rational, SafePredFlag); + Yap_InitCPred("string", 1, p_is_string, SafePredFlag); Yap_InitCPred("opaque", 1, p_is_opaque, SafePredFlag); Yap_InitCPred("nb_set_bit", 2, p_nb_set_bit, SafePredFlag); } diff --git a/C/cmppreds.c b/C/cmppreds.c index dcb26e961..42e76f7e0 100644 --- a/C/cmppreds.c +++ b/C/cmppreds.c @@ -360,7 +360,7 @@ compare(Term t1, Term t2) /* compare terms t1 and t2 */ case db_ref_e: return 1; case string_e: - return 1; + return -1; } } return -1; @@ -437,10 +437,22 @@ compare(Term t1, Term t2) /* compare terms t1 and t2 */ case string_e: { if (IsApplTerm(t2)) { - Functor f2 = FunctorOfTerm(t2); - if (f2 == FunctorString) + Functor fun2 = FunctorOfTerm(t2); + switch ((CELL)fun2) { + case double_e: + return 1; + case long_int_e: + return 1; +#ifdef USE_GMP + case big_int_e: + return 1; +#endif + case db_ref_e: + return 1; + case string_e: return strcmp(StringOfTerm(t1), StringOfTerm(t2)); - return 1; + } + return -1; } return -1; } diff --git a/C/strings.c b/C/text.c similarity index 99% rename from C/strings.c rename to C/text.c index ef67e4c24..75928cc96 100644 --- a/C/strings.c +++ b/C/text.c @@ -21,7 +21,7 @@ #include "eval.h" #include "yapio.h" #include "pl-shared.h" -#include "YapMirror.h" +#include "YapText.h" #include @@ -1013,7 +1013,7 @@ advance_Text( void *s, int l, encoding_t enc ) case YAP_CHAR: return ((char *)s)+l; case YAP_UTF8: - return (char *)utf8_n((const char *)s,l); + return (char *)utf8_skip((const char *)s,l); case YAP_WCHAR: return ((wchar_t *)s)+l; } @@ -1198,7 +1198,7 @@ slice( int min, int max, void *buf, seq_tv_t *out, encoding_t enc USES_REGS ) int chr; while ( min++ < max ) { chr = *ptr++; nbuf = utf8_put_char(nbuf, chr); } } else { - const char *ptr = utf8_n ( (const char *)buf, min ); + const char *ptr = utf8_skip ( (const char *)buf, min ); int chr; while ( min++ < max ) { ptr = utf8_get_char(ptr, & chr); nbuf = utf8_put_char(nbuf, chr); } } @@ -1231,7 +1231,7 @@ slice( int min, int max, void *buf, seq_tv_t *out, encoding_t enc USES_REGS ) /* atom */ wchar_t *nbuf = (wchar_t *)H; Term t = ARG1; - const char *ptr = utf8_n ( (const char *)buf, min ); + const char *ptr = utf8_skip ( (const char *)buf, min ); int chr; LOCAL_ERROR( max-min ); diff --git a/H/YapMirror.h b/H/YapText.h similarity index 99% rename from H/YapMirror.h rename to H/YapText.h index f6459553a..b6bff0083 100644 --- a/H/YapMirror.h +++ b/H/YapText.h @@ -46,6 +46,7 @@ typedef enum { YAP_STRING_BIG = 0x100, YAP_STRING_LITERAL = 0x200, YAP_STRING_LENGTH = 0x400, + YAP_STRING_NTH = 0x800, YAP_STRING_TERM = 0x1000, // joint with other flags that define possible values YAP_STRING_DIFF = 0x2000, // difference list YAP_STRING_NCHARS= 0x4000, // size of input/result @@ -63,6 +64,7 @@ typedef union { const wchar_t *w; Atom a; size_t l; + int d; Term t;// depends on other flags } seq_val_t; @@ -968,3 +970,4 @@ Yap_SubtractTailAtomic(Term t1, Term th USES_REGS) else return 0L; } + diff --git a/pl/utils.yap b/pl/utils.yap index 6fd9aa38b..9dcd1ff4a 100644 --- a/pl/utils.yap +++ b/pl/utils.yap @@ -342,12 +342,12 @@ atom_concat(Xs,At) :- ( var(At) -> '$atom_concat'(Xs, At ) ; - '$atom_concat_constraints'(Xs, start, At, Unbound), + '$atom_concat_constraints'(Xs, 0, At, Unbound), '$process_atom_holes'(Unbound) ). % the constraints are of the form hole: HoleAtom, Begin, Atom, End -'$atom_concat_constraints'([At], 0, At, _, []) :- !. +'$atom_concat_constraints'([At], 0, At, []) :- !. '$atom_concat_constraints'([At0], mid(Next, At), At, [hole(At0, Next, At, end)]) :- !. % just slice first atom '$atom_concat_constraints'([At0|Xs], 0, At, Unbound) :- @@ -356,7 +356,7 @@ atom_concat(Xs,At) :- sub_atom(At, _, L, 0, Atr ), %remainder '$atom_concat_constraints'(Xs, 0, Atr, Unbound). % first hole: Follow says whether we have two holes in a row, At1 will be our atom -'$atom_concat_constraints'([At0|Xs], start, At, [hole(At0, 0, At, Next)|Unbound]) :- +'$atom_concat_constraints'([At0|Xs], 0, At, [hole(At0, 0, At, Next)|Unbound]) :- '$atom_concat_constraints'(Xs, mid(Next,At1), At, Unbound). % end of a run '$atom_concat_constraints'([At0|Xs], mid(end, At1), At, Unbound) :- @@ -364,7 +364,7 @@ atom_concat(Xs,At) :- sub_atom(At, Next, Sz, L, At0), sub_atom(At, 0, Next, Next, At1), sub_atom(At, _, L, 0, Atr), %remainder - '$atom_concat_constraints'(Xs, 0, Atr, _, Unbound). + '$atom_concat_constraints'(Xs, 0, Atr, Unbound). '$atom_concat_constraints'([At0|Xs], mid(Next,At1), At, Next, [hole(At0, Next, At, Follow)|Unbound]) :- '$atom_concat_constraints'(Xs, mid(NextFollow, At1), At, Unbound).