diff --git a/C/adtdefs.c b/C/adtdefs.c index 8449e10d2..a7956aafe 100755 --- a/C/adtdefs.c +++ b/C/adtdefs.c @@ -302,17 +302,17 @@ Yap_LookupMaybeWideAtom(wchar_t *atom) } Atom -Yap_LookupMaybeWideAtomWithLength(wchar_t *atom, size_t len) +Yap_LookupMaybeWideAtomWithLength(wchar_t *atom, size_t len0) { /* lookup atom in atom table */ wchar_t *p = atom, c; - size_t len0 = 0; + size_t len = 0; Atom at; int wide = FALSE; while ((c = *p++)) { if (c > 255) wide = TRUE; - len0++; - if (len0 == len) break; + len++; + if (len == len0) break; } if (p[0] == '\0' && wide) return LookupWideAtom(atom); else if (wide) { diff --git a/C/amasm.c b/C/amasm.c index c7d529346..6639bc32c 100755 --- a/C/amasm.c +++ b/C/amasm.c @@ -3280,7 +3280,7 @@ do_pass(int pass_no, yamop **entry_codep, int assembling, int *clause_has_blobsp code_p = a_rb(_get_bigint, clause_has_blobsp, code_p, pass_no, cip); break; case get_string_op: - code_p = a_rb(_get_string, clause_has_blobsp, code_p, pass_no, cip); + code_p = a_rstring(_get_string, clause_has_blobsp, code_p, pass_no, cip); break; case get_dbterm_op: code_p = a_dbt(_get_dbterm, clause_has_dbtermp, code_p, pass_no, cip); @@ -3364,7 +3364,7 @@ do_pass(int pass_no, yamop **entry_codep, int assembling, int *clause_has_blobsp code_p = a_ublob(cip->cpc->rnd1, _unify_bigint, _unify_atom_write, clause_has_blobsp, code_p, pass_no, cip); break; case unify_string_op: - code_p = a_ublob(cip->cpc->rnd1, _unify_string, _unify_atom_write, clause_has_blobsp, code_p, pass_no, cip); + code_p = a_ustring(cip->cpc->rnd1, _unify_string, _unify_atom_write, clause_has_blobsp, code_p, pass_no, cip); break; case unify_dbterm_op: code_p = a_udbt(cip->cpc->rnd1, _unify_dbterm, _unify_atom_write, clause_has_dbtermp, code_p, pass_no, cip); @@ -3385,7 +3385,7 @@ do_pass(int pass_no, yamop **entry_codep, int assembling, int *clause_has_blobsp code_p = a_ublob(cip->cpc->rnd1, _unify_l_bigint, _unify_l_atom_write, clause_has_blobsp, code_p, pass_no, cip); break; case unify_last_string_op: - code_p = a_ublob(cip->cpc->rnd1, _unify_l_bigint, _unify_l_atom_write, clause_has_blobsp, code_p, pass_no, cip); + code_p = a_ustring(cip->cpc->rnd1, _unify_l_bigint, _unify_l_atom_write, clause_has_blobsp, code_p, pass_no, cip); break; case unify_last_dbterm_op: code_p = a_udbt(cip->cpc->rnd1, _unify_l_dbterm, _unify_l_atom_write, clause_has_dbtermp, code_p, pass_no, cip); diff --git a/C/atoms.c b/C/atoms.c index 058cc412c..8980b5e02 100644 --- a/C/atoms.c +++ b/C/atoms.c @@ -148,6 +148,32 @@ p_name( USES_REGS1 ) return FALSE; } +static Int +p_string_to_atomic( USES_REGS1 ) +{ /* string_to_atom(?String,?Atom) */ + Term t2 = Deref(ARG2), t1 = Deref(ARG1); + LOCAL_MAX_SIZE = 1024; + + restart_aux: + if (!IsVarTerm(t1)) { + Term t; + // verify if an atom, int, float or bignnum + t = Yap_StringToAtomic( t1 PASS_REGS ); + if (t != 0L) + return Yap_unify(t, t2); + // else + } else { + Term t0 = Yap_AtomicToString( t2 PASS_REGS ); + if (t0) return Yap_unify(t0, t1); + } + if (LOCAL_Error_TYPE && Yap_HandleError( "string_to_atom/2" )) { + t1 = Deref(ARG1); + t2 = Deref(ARG2); + goto restart_aux; + } + return FALSE; +} + static Int p_string_to_atom( USES_REGS1 ) { /* string_to_atom(?String,?Atom) */ @@ -158,12 +184,12 @@ p_string_to_atom( USES_REGS1 ) if (!IsVarTerm(t1)) { Atom at; // verify if an atom, int, float or bignnum - at = Yap_ListToAtom( t1 PASS_REGS ); + at = Yap_StringToAtom( t1 PASS_REGS ); if (at) return Yap_unify(MkAtomTerm(at), t2); // else } else { - Term t0 = Yap_AtomicToString( t2 PASS_REGS ); + Term t0 = Yap_AtomToString( t2 PASS_REGS ); if (t0) return Yap_unify(t0, t1); } if (LOCAL_Error_TYPE && Yap_HandleError( "string_to_atom/2" )) { @@ -186,7 +212,7 @@ p_string_to_list( USES_REGS1 ) if (t1) return Yap_unify( ARG1, t1 ); } else { - Term tf = Yap_AtomicToListOfCodes(string PASS_REGS); + Term tf = Yap_StringToListOfCodes(string PASS_REGS); return Yap_unify( ARG2, tf ); } if (LOCAL_Error_TYPE && Yap_HandleError( "string_to_list/2" )) { @@ -197,6 +223,16 @@ p_string_to_list( USES_REGS1 ) return FALSE; } +static Int +p_atom_string( USES_REGS1 ) +{ + // swap arguments + Term t1 = ARG2; + ARG2 = ARG1; + ARG1 = t1; + return p_string_to_atom( PASS_REGS1 ); +} + static Int p_atom_chars( USES_REGS1 ) { @@ -206,13 +242,13 @@ p_atom_chars( USES_REGS1 ) restart_aux: t1 = Deref(ARG1); if (!IsVarTerm(t1)) { - Term tf = Yap_AtomicToListOfAtoms(t1 PASS_REGS); + Term tf = Yap_AtomToListOfAtoms(t1 PASS_REGS); if (tf) return Yap_unify( ARG2, tf ); } else { /* ARG1 unbound */ Term t = Deref(ARG2); - Atom af = Yap_ListToAtom(t PASS_REGS); + Atom af = Yap_ListOfAtomsToAtom(t PASS_REGS); if (af) return Yap_unify( ARG1, MkAtomTerm(af) ); /* error handling */ @@ -230,7 +266,7 @@ p_atom_codes( USES_REGS1 ) restart_aux: t1 = Deref(ARG1); if (!IsVarTerm(t1)) { - Term tf = Yap_AtomicToListOfCodes(t1 PASS_REGS); + Term tf = Yap_AtomToListOfCodes(t1 PASS_REGS); if (tf) return Yap_unify( ARG2, tf ); } else { @@ -325,380 +361,189 @@ p_number_codes( USES_REGS1 ) } static Int -p_atom_concat( USES_REGS1 ) +cont_atom_concat3( USES_REGS1 ) +{ + Term t3; + Atom ats[2]; + Int i; + restart_aux: + t3 = Deref(ARG3); + i = IntOfTerm(EXTRA_CBACK_ARG(3,1)); + EXTRA_CBACK_ARG(3,1) = MkIntTerm(++i); + if ( ! Yap_SpliceAtom( t3, ats, i PASS_REGS ) ) { + cut_fail(); + } else return Yap_unify( ARG1, MkAtomTerm(ats[0])) && + Yap_unify( ARG2, MkAtomTerm(ats[1])) ; + /* Error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "atom_concat/3" )) { + goto restart_aux; + } + return FALSE; +} + + +static Int +init_atom_concat3( USES_REGS1 ) { Term t1; - int wide_mode = FALSE; - - restart: + Term t2, t3, ot; + Atom at; + restart_aux: t1 = Deref(ARG1); - /* we need to have a list */ - if (IsVarTerm(t1)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return FALSE; - } - if (wide_mode) { - wchar_t *cptr = (wchar_t *)(((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE), *cpt0; - wchar_t *top = (wchar_t *)AuxSp; - unsigned char *atom_str = NULL; - Atom ahead; - UInt sz; - - cpt0 = cptr; - while (IsPairTerm(t1)) { - Term thead = HeadOfTerm(t1); - if (IsVarTerm(thead)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return(FALSE); - } - if (!IsAtomTerm(thead)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(TYPE_ERROR_ATOM, ARG1, "atom_concat/2"); - return(FALSE); - } - ahead = AtomOfTerm(thead); - if (IsWideAtom(ahead)) { - /* check for overflows */ - sz = wcslen(RepAtom(ahead)->WStrOfAE); - } else { - atom_str = (unsigned char *)RepAtom(ahead)->StrOfAE; - sz = strlen((char *)atom_str); - } - if (cptr+sz > top+1024) { - cptr = (wchar_t *)Yap_ExpandPreAllocCodeSpace(sz+1024,NULL, TRUE); - if (cptr+sz > (wchar_t *)AuxSp+1024) { - /* crash in flames */ - Yap_Error(OUT_OF_AUXSPACE_ERROR, ARG1, "allocating temp space in atom_concat/2"); - return FALSE; - } - top = (wchar_t *)AuxSp; - goto restart; - } - if (IsWideAtom(ahead)) { - memcpy((void *)cptr, RepAtom(ahead)->WStrOfAE, sz*sizeof(wchar_t)); - cptr += sz; - } else { - UInt i; - - for (i=0; i < sz; i++) { - *cptr++ = *atom_str++; - } - } - t1 = TailOfTerm(t1); - if (IsVarTerm(t1)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return FALSE; - } - } - if (t1 == TermNil) { - Atom at; - - cptr[0] = '\0'; - while ((at = Yap_LookupWideAtom(cpt0)) == NIL) { - if (!Yap_growheap(FALSE, 0, NULL)) { - Yap_Error(OUT_OF_HEAP_ERROR, TermNil, LOCAL_ErrorMessage); - return FALSE; - } - } - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - return Yap_unify(ARG2, MkAtomTerm(at)); - } + t2 = Deref(ARG2); + t3 = Deref(ARG3); + if (!IsVarTerm(t1) && !IsVarTerm(t2)) { + at = Yap_ConcatAtoms( t1, t2 PASS_REGS ); + ot = ARG3; + } else if (!IsVarTerm(t1) && !IsVarTerm(t3) ) { + at = Yap_SubtractHeadAtom( Deref(ARG3), t1 PASS_REGS ); + ot = ARG2; + } else if (!IsVarTerm(t2) && !IsVarTerm(t3)) { + at = Yap_SubtractTailAtom( Deref(ARG3), t2 PASS_REGS ); + ot = ARG1; } else { - char *cptr = ((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE, *cpt0; - char *top = (char *)AuxSp; - unsigned char *atom_str; - UInt sz; - - cpt0 = cptr; - while (IsPairTerm(t1)) { - Term thead = HeadOfTerm(t1); - if (IsVarTerm(thead)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return(FALSE); - } - if (!IsAtomTerm(thead)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(TYPE_ERROR_ATOM, ARG1, "atom_concat/2"); - return(FALSE); - } - if (IsWideAtom(AtomOfTerm(thead)) && !wide_mode) { - wide_mode = TRUE; - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - goto restart; - } - atom_str = (unsigned char *)RepAtom(AtomOfTerm(thead))->StrOfAE; - /* check for overflows */ - sz = strlen((char *)atom_str); - if (cptr+sz >= top-1024) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - if (!Yap_growheap(FALSE, sz+1024, NULL)) { - Yap_Error(OUT_OF_HEAP_ERROR, TermNil, LOCAL_ErrorMessage); - return FALSE; - } - goto restart; - } - memcpy((void *)cptr, (void *)atom_str, sz); - cptr += sz; - t1 = TailOfTerm(t1); - if (IsVarTerm(t1)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return FALSE; - } - } - if (t1 == TermNil) { - Atom at; - - cptr[0] = '\0'; - while ((at = Yap_LookupAtom(cpt0)) == NIL) { - if (!Yap_growheap(FALSE, 0, NULL)) { - Yap_Error(OUT_OF_HEAP_ERROR, TermNil, LOCAL_ErrorMessage); - return FALSE; - } - } - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - return Yap_unify(ARG2, MkAtomTerm(at)); - } + EXTRA_CBACK_ARG(3,1) = MkIntTerm(0); + return cont_atom_concat3( PASS_REGS1 ); + } + if (at) { + if (Yap_unify(ot, MkAtomTerm(at))) cut_succeed(); + else cut_fail(); + } + /* Error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "atom_concat/3" )) { + goto restart_aux; } - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(TYPE_ERROR_LIST, ARG1, "atom_concat/2"); return FALSE; } static Int -p_atomic_concat( USES_REGS1 ) +p_atom_concat2( USES_REGS1 ) { Term t1; - int wide_mode = FALSE; - char *base; - - restart: - base = ((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE; - while (base+1024 > (char *)AuxSp) { - base = Yap_ExpandPreAllocCodeSpace(0,NULL, TRUE); - if (base + 1024 > (char *)AuxSp) { - /* crash in flames */ - Yap_Error(OUT_OF_AUXSPACE_ERROR, ARG1, "allocating temp space in atomic_concat/2"); - return FALSE; - } - } + Term *tailp; + Int n; + restart_aux: t1 = Deref(ARG1); - /* we need to have a list */ - if (IsVarTerm(t1)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return FALSE; - } - if (wide_mode) { - wchar_t *wcptr = (wchar_t *)base, *wcpt0; - wchar_t *wtop = (wchar_t *)AuxSp; - - wcpt0 = wcptr; - while (IsPairTerm(t1)) { - Term thead = HeadOfTerm(t1); - if (IsVarTerm(thead)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return FALSE; - } - if (!IsAtomicTerm(thead)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(TYPE_ERROR_ATOMIC, ARG1, "atom_concat/2"); - return FALSE; - } - if (IsAtomTerm(thead)) { - Atom at = AtomOfTerm(thead); - - if (IsWideAtom(at)) { - wchar_t *watom_str = (wchar_t *)RepAtom(AtomOfTerm(thead))->StrOfAE; - UInt sz = wcslen(watom_str); - - if (wcptr+sz >= wtop-1024) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - if (!Yap_growheap(FALSE, sz+1024, NULL)) { - Yap_Error(OUT_OF_HEAP_ERROR, TermNil, LOCAL_ErrorMessage); - return FALSE; - } - goto restart; - } - memcpy((void *)wcptr, (void *)watom_str, sz*sizeof(wchar_t)); - wcptr += sz; - } else { - unsigned char *atom_str = (unsigned char *)RepAtom(AtomOfTerm(thead))->StrOfAE; - /* check for overflows */ - UInt sz = strlen((char *)atom_str); - if (wcptr+sz >= wtop-1024) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - if (!Yap_growheap(FALSE, sz+1024, NULL)) { - Yap_Error(OUT_OF_HEAP_ERROR, TermNil, LOCAL_ErrorMessage); - return FALSE; - } - goto restart; - } - while ((*wcptr++ = *atom_str++)); - wcptr--; - } - } else if (IsIntegerTerm(thead)) { - UInt sz, i; - char *cptr = (char *)wcptr; - -#if HAVE_SNPRINTF - sz = snprintf(cptr, (wtop-wcptr)-1024,Int_FORMAT, IntegerOfTerm(thead)); -#else - sz = sprintf(cptr,Int_FORMAT, IntegerOfTerm(thead)); -#endif - for (i=sz; i>0; i--) { - wcptr[i-1] = cptr[i-1]; - } - wcptr += sz; - } else if (IsFloatTerm(thead)) { - char *cptr = (char *)wcptr; - UInt i, sz; - -#if HAVE_SNPRINTF - sz = snprintf(cptr,(wtop-wcptr)-1024,"%g", FloatOfTerm(thead)); -#else - sz = sprintf(cptr,"%g", FloatOfTerm(thead)); -#endif - for (i=sz; i>0; i--) { - wcptr[i-1] = cptr[i-1]; - } - wcptr += sz; -#if USE_GMP - } else if (IsBigIntTerm(thead)) { - size_t sz, i; - char *tmp = (char *)wcptr; - - sz = Yap_gmp_to_size(thead, 10); - if (!Yap_gmp_to_string(thead, tmp, (wtop-wcptr)-1024, 10 )) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - if (!Yap_growheap(FALSE, sz+1024, NULL)) { - Yap_Error(OUT_OF_AUXSPACE_ERROR, TermNil, LOCAL_ErrorMessage); - return(FALSE); - } - goto restart; - } - for (i=sz; i>0; i--) { - wcptr[i-1] = tmp[i-1]; - } - wcptr += sz; -#endif - } - t1 = TailOfTerm(t1); - if (IsVarTerm(t1)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return(FALSE); - } - } - if (t1 == TermNil) { - Atom at; - - wcptr[0] = '\0'; - while ((at = Yap_LookupWideAtom(wcpt0)) == NIL) { - if (!Yap_growheap(FALSE, 0, NULL)) { - Yap_Error(OUT_OF_HEAP_ERROR, TermNil, LOCAL_ErrorMessage); - return FALSE; - } - } - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - return Yap_unify(ARG2, MkAtomTerm(at)); - } + n = Yap_SkipList(&t1, &tailp); + if (*tailp != TermNil) { + LOCAL_Error_TYPE = TYPE_ERROR_LIST; } else { - char *top = (char *)AuxSp; - char *cpt0 = base; - char *cptr = base; + seq_tv_t *inpv = (seq_tv_t *)malloc(n*sizeof(seq_tv_t)), out; + int i = 0; + Atom at; + + if (!inpv) { + LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR; + free(inpv); + goto error; + } - while (IsPairTerm(t1)) { - Term thead = HeadOfTerm(t1); - if (IsVarTerm(thead)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return(FALSE); - } - if (!IsAtomicTerm(thead)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(TYPE_ERROR_ATOMIC, ARG1, "atom_concat/2"); - return(FALSE); - } - if (IsAtomTerm(thead)) { - unsigned char *atom_str; - UInt sz; - - if (IsWideAtom(AtomOfTerm(thead))) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - wide_mode = TRUE; - goto restart; - } - atom_str = (unsigned char *)RepAtom(AtomOfTerm(thead))->StrOfAE; - /* check for overflows */ - sz = strlen((char *)atom_str); - if (cptr+sz >= top-1024) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - if (!Yap_growheap(FALSE, sz+1024, NULL)) { - Yap_Error(OUT_OF_HEAP_ERROR, TermNil, LOCAL_ErrorMessage); - return(FALSE); - } - goto restart; - } - memcpy((void *)cptr, (void *)atom_str, sz); - cptr += sz; - } else if (IsIntegerTerm(thead)) { -#if HAVE_SNPRINTF - snprintf(cptr, (top-cptr)-1024,Int_FORMAT, IntegerOfTerm(thead)); -#else - sprintf(cptr, Int_FORMAT, IntegerOfTerm(thead)); -#endif - while (*cptr && cptr < top-1024) cptr++; - } else if (IsFloatTerm(thead)) { -#if HAVE_SNPRINTF - snprintf(cptr,(top-cptr)-1024,"%g", FloatOfTerm(thead)); -#else - sprintf(cptr,"%g", FloatOfTerm(thead)); -#endif - while (*cptr && cptr < top-1024) cptr++; -#if USE_GMP - } else if (IsBigIntTerm(thead)) { - if (!Yap_gmp_to_string(thead, cptr, (top-cptr)-1024, 10 )) { - size_t sz = Yap_gmp_to_size(thead, 10); - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - if (!Yap_growheap(FALSE, sz+1024, NULL)) { - Yap_Error(OUT_OF_AUXSPACE_ERROR, TermNil, LOCAL_ErrorMessage); - return(FALSE); - } - goto restart; - } - while (*cptr) cptr++; -#endif - } + while (t1 != TermNil) { + inpv[i].type = YAP_STRING_ATOM; + inpv[i].val.t = HeadOfTerm(t1); + i++; t1 = TailOfTerm(t1); - if (IsVarTerm(t1)) { - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(INSTANTIATION_ERROR, ARG1, "atom_concat/2"); - return(FALSE); - } } - if (t1 == TermNil) { - Atom at; - - cptr[0] = '\0'; - while ((at = Yap_LookupAtom(cpt0)) == NIL) { - if (!Yap_growheap(FALSE, 0, NULL)) { - Yap_Error(OUT_OF_HEAP_ERROR, TermNil, LOCAL_ErrorMessage); - return FALSE; - } - } - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - return Yap_unify(ARG2, MkAtomTerm(at)); + out.type = YAP_STRING_ATOM; + if (!Yap_Concat_Text(n, inpv, &out PASS_REGS)) { + free(inpv); + goto error; } + free(inpv); + at = out.val.a; + if (at) return Yap_unify(ARG2, MkAtomTerm(at)); } - Yap_ReleasePreAllocCodeSpace((ADDR)cpt0); - Yap_Error(TYPE_ERROR_LIST, ARG1, "atom_concat/2"); - return(FALSE); + error: + /* Error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "atom_concat/3" )) { + goto restart_aux; + } + return FALSE; +} + +static Int +p_atomic_concat3( USES_REGS1 ) +{ + Term t1; + Term t2; + restart_aux: + t1 = Deref(ARG1); + t2 = Deref(ARG2); + if (!IsVarTerm(t1) && !IsVarTerm(t2)) { + Atom at = Yap_ConcatAtomics( t1, t2 PASS_REGS ); + if (at) return Yap_unify(ARG2, MkAtomTerm(at)); + } + /* error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "atomic_concat/2" )) { + goto restart_aux; + } + return FALSE; +} + + +static Int +p_string_concat3( USES_REGS1 ) +{ + Term t1; + Term t2; + restart_aux: + t1 = Deref(ARG1); + t2 = Deref(ARG2); + if (!IsVarTerm(t1) && !IsVarTerm(t2)) { + Term t = Yap_ConcatStrings( t1, t2 PASS_REGS ); + if (t) return Yap_unify(ARG2, t); + } + /* error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "atomic_concat/2" )) { + goto restart_aux; + } + return FALSE; +} + +static Int +p_string_concat2( USES_REGS1 ) +{ + Term t1; + Term *tailp; + Int n; + + restart_aux: + t1 = Deref(ARG1); + n = Yap_SkipList(&t1, &tailp); + if (*tailp == TermNil) { + LOCAL_Error_TYPE = TYPE_ERROR_LIST; + } else { + seq_tv_t *inpv = (seq_tv_t *)malloc(n*sizeof(seq_tv_t)), out; + int i = 0; + Term tf; + + if (!inpv) { + LOCAL_Error_TYPE = OUT_OF_HEAP_ERROR; + free(inpv); + goto error; + } + + while (t1 != TermNil) { + inpv[i].type = YAP_STRING_STRING; + inpv[i].val.t = HeadOfTerm(t1); + i++; + t1 = TailOfTerm(t1); + } + out.type = YAP_STRING_STRING; + if (!Yap_Concat_Text(n, inpv, &out PASS_REGS)) { + free(inpv); + goto error; + } + free(inpv); + tf = out.val.t; + if (tf) return Yap_unify(ARG2, tf); + } + error: + /* error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "string_concat/2" )) { + goto restart_aux; + } + return FALSE; } static Int @@ -706,7 +551,10 @@ p_atom_length( USES_REGS1 ) { Term t1; Term t2 = Deref(ARG2); + ssize_t len; + if (!IsVarTerm(t2)) { + if (!IsIntegerTerm(t2)) { Yap_Error(TYPE_ERROR_INTEGER, t2, "atom_length/2"); return(FALSE); @@ -718,9 +566,9 @@ p_atom_length( USES_REGS1 ) } restart_aux: t1 = Deref(ARG1); - tf = Yap_TextToUTF8(t1 PASS_REGS); - if (tf) - return Yap_unify( ARG2, utf8_strlen(tf) ); + len = Yap_AtomToLength(t1 PASS_REGS); + if (len != (size_t)-1) + return Yap_unify( ARG2, MkIntegerTerm(len) ); /* error handling */ if (LOCAL_Error_TYPE && Yap_HandleError( "atom_length/2" )) { goto restart_aux; @@ -728,6 +576,66 @@ restart_aux: return FALSE; } +static Int +p_atomic_length( USES_REGS1 ) +{ + Term t1; + Term t2 = Deref(ARG2); + ssize_t len; + + if (!IsVarTerm(t2)) { + + if (!IsIntegerTerm(t2)) { + Yap_Error(TYPE_ERROR_INTEGER, t2, "atomic_length/2"); + return(FALSE); + } + if ((len = IntegerOfTerm(t2)) < 0) { + Yap_Error(DOMAIN_ERROR_NOT_LESS_THAN_ZERO, t2, "atomic_length/2"); + return(FALSE); + } + } +restart_aux: + t1 = Deref(ARG1); + len = Yap_AtomicToLength(t1 PASS_REGS); + if (len != (size_t)-1) + return Yap_unify( ARG2, MkIntegerTerm(len) ); + /* error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "atomic_length/2" )) { + goto restart_aux; + } + return FALSE; +} + +static Int +p_string_length( USES_REGS1 ) +{ + Term t1; + Term t2 = Deref(ARG2); + ssize_t len; + + if (!IsVarTerm(t2)) { + + if (!IsIntegerTerm(t2)) { + Yap_Error(TYPE_ERROR_INTEGER, t2, "string_length/2"); + return(FALSE); + } + if ((len = IntegerOfTerm(t2)) < 0) { + Yap_Error(DOMAIN_ERROR_NOT_LESS_THAN_ZERO, t2, "string_length/2"); + return(FALSE); + } + } +restart_aux: + t1 = Deref(ARG1); + len = Yap_StringToLength(t1 PASS_REGS); + if (len != (size_t)-1) + return Yap_unify( ARG2, MkIntegerTerm(len) ); + /* error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "string_length/2" )) { + goto restart_aux; + } + return FALSE; +} + static int is_wide(wchar_t *s) @@ -851,7 +759,33 @@ p_atom_number( USES_REGS1 ) return Yap_unify( ARG1, MkAtomTerm(af) ); } /* error handling */ - if (LOCAL_Error_TYPE && Yap_HandleError( "atom_codes/2" )) { + if (LOCAL_Error_TYPE && Yap_HandleError( "atom_number/2" )) { + t1 = Deref(ARG1); + goto restart_aux; + } + return FALSE; +} + + +static Int +p_string_number( USES_REGS1 ) +{ + Term t1; + restart_aux: + t1 = Deref(ARG1); + if (!IsVarTerm(t1)) { + Term tf = Yap_StringToNumber(t1 PASS_REGS); + if (tf) + return Yap_unify( ARG2, tf ); + } else { + /* ARG1 unbound */ + Term t = Deref(ARG2); + Term tf = Yap_NumberToString(t PASS_REGS); + if (tf) + return Yap_unify( ARG1, tf ); + } + /* error handling */ + if (LOCAL_Error_TYPE && Yap_HandleError( "string_number/2" )) { t1 = Deref(ARG1); goto restart_aux; } @@ -864,6 +798,7 @@ p_atom_number( USES_REGS1 ) #define SUB_ATOM_HAS_AFTER 4 #define SUB_ATOM_HAS_VAL 8 #define SUB_ATOM_HAS_WIDE 16 +#define SUB_ATOM_HAS_UTF8 32 static void * alloc_tmp_stack(size_t sz USES_REGS) { @@ -877,8 +812,8 @@ alloc_tmp_stack(size_t sz USES_REGS) { return pt; } -static Atom -build_new_atom(int mask, wchar_t *wp, char *p, size_t min, size_t len USES_REGS) +static Term +build_new_atomic(int mask, wchar_t *wp, char *p, size_t min, size_t len USES_REGS) { Atom nat; if (mask & SUB_ATOM_HAS_WIDE) { @@ -889,7 +824,9 @@ build_new_atom(int mask, wchar_t *wp, char *p, size_t min, size_t len USES_REGS) wcsncpy(d, src, len); d[len] = '\0'; nat = Yap_LookupMaybeWideAtom(d); - } else { + if (nat) + return MkAtomTerm(nat); + } else if (mask & SUB_ATOM_HAS_UTF8) { char *src = p+min; char *d = alloc_tmp_stack((len+1)*sizeof(char) PASS_REGS); if (!d) return NIL; @@ -897,8 +834,28 @@ build_new_atom(int mask, wchar_t *wp, char *p, size_t min, size_t len USES_REGS) strncpy(d, src, len); d[len] = '\0'; nat = Yap_LookupAtom(d); + if (nat) + return MkAtomTerm(nat); + } else { + char *src = p; + int i, chr; + Term t = init_tstring( PASS_REGS1 ); + for (i = 0; i < min; i++) src = utf8_get_char(src, &chr); + char *cp = src, *buf, *lim = cp+strlen(cp); + + LOCAL_TERM_ERROR( 4*(len+1) ); + buf = buf_from_tstring(H); + while (cp < lim) { + int chr; + cp = utf8_get_char(cp, &chr); + buf = utf8_put_char(buf, chr); + } + *buf++ = '\0'; + + close_tstring( buf PASS_REGS ); + return t; } - return nat; + return 0L; } static Int wcsstrcmp(wchar_t *p, char *p2, size_t len) @@ -941,6 +898,18 @@ check_sub_atom_at(int min, Atom at, Atom nat) } } +static int +check_sub_string_at(int min, Term at, Term nat) +{ + const char *p1, *p2; + int c1; + + p1 = utf8_n(StringOfTerm(at), min); + p2 = StringOfTerm(nat); + while ( (c1 = *p1++) == *p2++ && c1); + return c1 == 0; +} + static int check_sub_atom_bef(int max, Atom at, Atom nat) { @@ -979,16 +948,33 @@ check_sub_atom_bef(int max, Atom at, Atom nat) } } +static int +check_sub_string_bef(int max, Term at, Term nat) +{ + size_t len = utf8_strlen1(StringOfTerm(nat)); + int min = max- len; + const char *p1, *p2; + int c1; + + if ((Int)(min - len) < 0) return FALSE; + + p1 = utf8_n(StringOfTerm(at),min); + p2 = StringOfTerm(nat); + while ( (c1 = *p1++) == *p2++ && c1); + return c1 == 0; +} + static Int -cont_sub_atom( USES_REGS1 ) +cont_sub_atomic( USES_REGS1 ) { Term tat1= Deref(ARG1); - Atom at = AtomOfTerm(tat1); + Atom at = NULL; int mask; size_t min, len, after, sz; wchar_t *wp = NULL; char *p = NULL; - Atom nat; + Term nat; + int sub_atom = TRUE; mask = IntegerOfTerm(EXTRA_CBACK_ARG(5,1)); min = IntegerOfTerm(EXTRA_CBACK_ARG(5,2)); @@ -996,20 +982,25 @@ cont_sub_atom( USES_REGS1 ) after = IntegerOfTerm(EXTRA_CBACK_ARG(5,4)); sz = IntegerOfTerm(EXTRA_CBACK_ARG(5,5)); - if (mask & SUB_ATOM_HAS_WIDE) { + if (mask & SUB_ATOM_HAS_UTF8) { + sub_atom = FALSE; + p = (char *)StringOfTerm(tat1); + } else if (mask & SUB_ATOM_HAS_WIDE) { + at = AtomOfTerm(tat1); wp = RepAtom(at)->WStrOfAE; } else { + at = AtomOfTerm(tat1); p = RepAtom(at)->StrOfAE; } /* we can have one of two cases: A5 bound or unbound */ if (mask & SUB_ATOM_HAS_VAL) { int found = FALSE; - nat = AtomOfTerm(Deref(ARG5)); + nat = Deref(ARG5); if (mask & SUB_ATOM_HAS_WIDE) { wp = RepAtom(at)->WStrOfAE; - if (IsWideAtom(nat)) { + if (IsWideAtom(AtomOfTerm(nat))) { while (!found) { - if (wcsncmp(wp+min, nat->WStrOfAE, len) == 0) { + if (wcsncmp(wp+min, AtomOfTerm(nat)->WStrOfAE, len) == 0) { Yap_unify(ARG2, MkIntegerTerm(min)); Yap_unify(ARG3, MkIntegerTerm(len)); Yap_unify(ARG4, MkIntegerTerm(after)); @@ -1018,7 +1009,7 @@ cont_sub_atom( USES_REGS1 ) while (min <= sz-len) { after--; min++; - if (wcsncmp(wp+min, nat->WStrOfAE, len) == 0) + if (wcsncmp(wp+min, AtomOfTerm(nat)->WStrOfAE, len) == 0) break; } } else { @@ -1029,7 +1020,7 @@ cont_sub_atom( USES_REGS1 ) } } else { while (!found) { - if (wcsstrcmp(wp+min, nat->StrOfAE, len) == 0) { + if (wcsstrcmp(wp+min, AtomOfTerm(nat)->StrOfAE, len) == 0) { Yap_unify(ARG2, MkIntegerTerm(min)); Yap_unify(ARG3, MkIntegerTerm(len)); Yap_unify(ARG4, MkIntegerTerm(after)); @@ -1038,7 +1029,7 @@ cont_sub_atom( USES_REGS1 ) while (min <= sz-len) { after--; min++; - if (wcsstrcmp(wp+min, nat->StrOfAE, len) == 0) + if (wcsstrcmp(wp+min, AtomOfTerm(nat)->StrOfAE, len) == 0) break; } } else { @@ -1048,10 +1039,10 @@ cont_sub_atom( USES_REGS1 ) } } } - } else { + } else if (sub_atom) { p = RepAtom(at)->StrOfAE; while (!found) { - if (strncmp(p+min, nat->StrOfAE, len) == 0) { + if (strncmp(p+min, AtomOfTerm(nat)->StrOfAE, len) == 0) { Yap_unify(ARG2, MkIntegerTerm(min)); Yap_unify(ARG3, MkIntegerTerm(len)); Yap_unify(ARG4, MkIntegerTerm(after)); @@ -1060,7 +1051,7 @@ cont_sub_atom( USES_REGS1 ) while (min <= sz-len) { after--; min++; - if (strncmp(p+min, nat->StrOfAE, len) == 0) + if (strncmp(p+min, AtomOfTerm(nat)->StrOfAE, len) == 0) break; } } else { @@ -1069,6 +1060,29 @@ cont_sub_atom( USES_REGS1 ) min++; } } + } else { + while (!found) { + p = (char *)utf8_n(p, min); + if (utf8_strncmp(p, StringOfTerm(nat), len) == 0) { + Yap_unify(ARG2, MkIntegerTerm(min)); + Yap_unify(ARG3, MkIntegerTerm(len)); + Yap_unify(ARG4, MkIntegerTerm(after)); + found = TRUE; + /* found one, check if there is any left */ + while (min <= sz-len) { + int chr; + p = utf8_get_char(p, &chr); + after--; + min++; + if (utf8_strncmp(p, StringOfTerm(nat), len) == 0) + break; + } + } else { + if (min == sz-len) break; + after--; + min++; + } + } } if (found) { if (min > sz-len) cut_succeed(); @@ -1076,34 +1090,34 @@ cont_sub_atom( USES_REGS1 ) cut_fail(); } } else if (mask & SUB_ATOM_HAS_SIZE) { - nat = build_new_atom(mask, wp, p, min, len PASS_REGS); + nat = build_new_atomic(mask, wp, p, min, len PASS_REGS); Yap_unify(ARG2, MkIntegerTerm(min)); Yap_unify(ARG4, MkIntegerTerm(after)); - Yap_unify(ARG5, MkAtomTerm(nat)); + Yap_unify(ARG5, nat); min++; if (after-- == 0) cut_succeed(); } else if (mask & SUB_ATOM_HAS_MIN) { after = sz-(min+len); - nat = build_new_atom(mask, wp, p, min, len PASS_REGS); + nat = build_new_atomic(mask, wp, p, min, len PASS_REGS); Yap_unify(ARG3, MkIntegerTerm(len)); Yap_unify(ARG4, MkIntegerTerm(after)); - Yap_unify(ARG5, MkAtomTerm(nat)); + Yap_unify(ARG5, nat); len++; if (after-- == 0) cut_succeed(); } else if (mask & SUB_ATOM_HAS_AFTER) { len = sz-(min+after); - nat = build_new_atom(mask, wp, p, min, len PASS_REGS); + nat = build_new_atomic(mask, wp, p, min, len PASS_REGS); Yap_unify(ARG2, MkIntegerTerm(min)); Yap_unify(ARG3, MkIntegerTerm(len)); - Yap_unify(ARG5, MkAtomTerm(nat)); + Yap_unify(ARG5, nat); min++; if (len-- == 0) cut_succeed(); } else { - nat = build_new_atom(mask, wp, p, min, len PASS_REGS); + nat = build_new_atomic(mask, wp, p, min, len PASS_REGS); Yap_unify(ARG2, MkIntegerTerm(min)); Yap_unify(ARG3, MkIntegerTerm(len)); Yap_unify(ARG4, MkIntegerTerm(after)); - Yap_unify(ARG5, MkAtomTerm(nat)); + Yap_unify(ARG5, nat); len++; if (after-- == 0) { if (min == sz) cut_succeed(); @@ -1121,7 +1135,7 @@ cont_sub_atom( USES_REGS1 ) } static Int -init_sub_atom( USES_REGS1 ) +init_sub_atomic( int sub_atom USES_REGS ) { Term tat1, tbef, tsize, tafter, tout; int mask = 0; @@ -1129,7 +1143,8 @@ init_sub_atom( USES_REGS1 ) wchar_t *wp = NULL; char *p = NULL; int bnds = 0; - Atom nat = NIL, at; + Term nat = 0L; + Atom at = NULL; tat1 = Deref(ARG1); tbef = Deref(ARG5); @@ -1137,9 +1152,12 @@ init_sub_atom( USES_REGS1 ) if (IsVarTerm(tat1)) { Yap_Error(INSTANTIATION_ERROR, tat1, "sub_atom/5: first argument"); return FALSE; - } else if (!IsAtomTerm(tat1)) { + } else if (sub_atom && !IsAtomTerm(tat1)) { Yap_Error(TYPE_ERROR_ATOM, tat1, "sub_atom/5"); return FALSE; + } else if (!sub_atom && !IsStringTerm(tat1)) { + Yap_Error(TYPE_ERROR_STRING, tat1, "sub_string/5"); + return FALSE; } if (IsVarTerm(tbef = Deref(ARG2))) { min = 0; @@ -1172,29 +1190,46 @@ init_sub_atom( USES_REGS1 ) bnds++; } if (!IsVarTerm(tout = Deref(ARG5))) { - if (!IsAtomTerm(tout)) { - Yap_Error(TYPE_ERROR_ATOM, tout, "sub_atom/5"); - return FALSE; + if (sub_atom) { + if (!IsAtomTerm(tout)) { + Yap_Error(TYPE_ERROR_ATOM, tout, "sub_atom/5"); + return FALSE; + } else { + Atom oat; + mask |= SUB_ATOM_HAS_VAL|SUB_ATOM_HAS_SIZE; + oat = AtomOfTerm(tout); + if (IsWideAtom(oat)) + len = wcslen(RepAtom(oat)->WStrOfAE); + else + len = strlen(RepAtom(oat)->StrOfAE); + } } else { - mask |= SUB_ATOM_HAS_VAL|SUB_ATOM_HAS_SIZE; - nat = AtomOfTerm(tout); - if (IsWideAtom(nat)) - len = wcslen(RepAtom(nat)->WStrOfAE); - else - len = strlen(RepAtom(nat)->StrOfAE); - if (!Yap_unify(ARG3, MkIntegerTerm(len))) - cut_fail(); - bnds+=2; + if (!IsStringTerm(tout)) { + Yap_Error(TYPE_ERROR_STRING, tout, "sub_string/5"); + return FALSE; + } else { + mask |= SUB_ATOM_HAS_VAL|SUB_ATOM_HAS_SIZE; + len = utf8_strlen1( StringOfTerm(tout) ); + } } + if (!Yap_unify(ARG3, MkIntegerTerm(len))) + cut_fail(); + bnds+=2; } - at = AtomOfTerm(tat1); - if (IsWideAtom(at)) { - mask |= SUB_ATOM_HAS_WIDE; - wp = RepAtom(at)->WStrOfAE; - sz = wcslen(wp); + if (sub_atom) { + at = AtomOfTerm(tat1); + if (IsWideAtom(at)) { + mask |= SUB_ATOM_HAS_WIDE; + wp = RepAtom(at)->WStrOfAE; + sz = wcslen(wp); + } else { + p = RepAtom(at)->StrOfAE; + sz = strlen(p); + } } else { - p = RepAtom(at)->StrOfAE; - sz = strlen(p); + mask |= SUB_ATOM_HAS_UTF8; + p = (char *)StringOfTerm(tat1); + sz = utf8_strlen1(p); } /* the problem is deterministic if we have two cases */ if (bnds > 1) { @@ -1204,42 +1239,51 @@ init_sub_atom( USES_REGS1 ) (SUB_ATOM_HAS_MIN|SUB_ATOM_HAS_SIZE)) { if (min+len > sz) cut_fail(); if ((Int)(after = (sz-(min+len))) < 0) cut_fail(); - nat = build_new_atom(mask, wp, p, min, len PASS_REGS); + nat = build_new_atomic(mask, wp, p, min, len PASS_REGS); if (!nat) cut_fail(); out = Yap_unify(ARG4,MkIntegerTerm(after)) && - Yap_unify(ARG5, MkAtomTerm(nat)); + Yap_unify(ARG5, nat); } else if ((mask & (SUB_ATOM_HAS_MIN|SUB_ATOM_HAS_AFTER)) == (SUB_ATOM_HAS_MIN|SUB_ATOM_HAS_AFTER)) { if (sz < min+after) cut_fail(); len = sz-(min+after); - nat = build_new_atom(mask, wp, p, min, len PASS_REGS); + nat = build_new_atomic(mask, wp, p, min, len PASS_REGS); if (!nat) cut_fail(); out = Yap_unify(ARG3,MkIntegerTerm(len)) && - Yap_unify(ARG5, MkAtomTerm(nat)); + Yap_unify(ARG5, nat); } else if ((mask & (SUB_ATOM_HAS_SIZE|SUB_ATOM_HAS_AFTER)) == (SUB_ATOM_HAS_SIZE|SUB_ATOM_HAS_AFTER) ) { if (len+after > sz) cut_fail(); min = sz-(len+after); - nat = build_new_atom(mask, wp, p, min, len PASS_REGS); + nat = build_new_atomic(mask, wp, p, min, len PASS_REGS); if (!nat) cut_fail(); out = Yap_unify(ARG2,MkIntegerTerm(min)) && - Yap_unify(ARG5, MkAtomTerm(nat)); + Yap_unify(ARG5, nat); } else if ((mask & (SUB_ATOM_HAS_MIN|SUB_ATOM_HAS_VAL)) == (SUB_ATOM_HAS_MIN|SUB_ATOM_HAS_VAL)) { - out = check_sub_atom_at(min, at, nat); + if (sub_atom) + out = check_sub_atom_at(min, at, AtomOfTerm(nat)); + else + out = check_sub_string_at(min, tat1, tout); } else if ((mask & (SUB_ATOM_HAS_AFTER|SUB_ATOM_HAS_VAL)) == (SUB_ATOM_HAS_AFTER|SUB_ATOM_HAS_VAL)) { - out = check_sub_atom_bef(sz - after, at, nat); + if (sub_atom) + out = check_sub_atom_bef(sz - after, at, AtomOfTerm(nat)); + else + out = check_sub_string_bef(sz - after, tat1, tout); } else if ((mask & (SUB_ATOM_HAS_SIZE|SUB_ATOM_HAS_VAL)) == (SUB_ATOM_HAS_SIZE|SUB_ATOM_HAS_VAL)) { - if (IsWideAtom(nat)) { + if (!sub_atom) { + out = (utf8_strlen1(StringOfTerm(tout)) == len); + if (!out) cut_fail(); + } else if (IsWideAtom(AtomOfTerm(nat))) { if (!(mask & SUB_ATOM_HAS_VAL)) { cut_fail(); } /* just check length, they may still be several occurrences :( */ - out = (wcslen(RepAtom(nat)->WStrOfAE) == len); + out = (wcslen(RepAtom(AtomOfTerm(nat))->WStrOfAE) == len); } else { - out = (strlen(RepAtom(nat)->StrOfAE) == len); + out = (strlen(RepAtom(AtomOfTerm(nat))->StrOfAE) == len); if (!out) cut_fail(); } if (len == sz) { @@ -1269,9 +1313,20 @@ init_sub_atom( USES_REGS1 ) EXTRA_CBACK_ARG(5,3) = MkIntegerTerm(len); EXTRA_CBACK_ARG(5,4) = MkIntegerTerm(after); EXTRA_CBACK_ARG(5,5) = MkIntegerTerm(sz); - return cont_sub_atom( PASS_REGS1 ); + return cont_sub_atomic( PASS_REGS1 ); } +static Int +init_sub_atom( USES_REGS1 ) +{ + return init_sub_atomic( TRUE PASS_REGS ); +} + +static Int +init_sub_string( USES_REGS1 ) +{ + return init_sub_atomic( FALSE PASS_REGS ); +} static Int cont_current_atom( USES_REGS1 ) @@ -1442,7 +1497,9 @@ Yap_InitBackAtoms(void) Yap_InitCPredBack("$current_wide_atom", 1, 2, init_current_wide_atom, cont_current_wide_atom, SafePredFlag|SyncPredFlag); - Yap_InitCPredBack("sub_atom", 5, 5, init_sub_atom, cont_sub_atom, 0); + Yap_InitCPredBack("atom_concat", 3, 1, init_atom_concat3, cont_atom_concat3, 0); + Yap_InitCPredBack("sub_atom", 5, 5, init_sub_atom, cont_sub_atomic, 0); + Yap_InitCPredBack("sub_string", 5, 5, init_sub_string, cont_sub_atomic, 0); } @@ -1451,17 +1508,23 @@ Yap_InitAtomPreds(void) { Yap_InitCPred("name", 2, p_name, 0); Yap_InitCPred("string_to_atom", 2, p_string_to_atom, 0); + Yap_InitCPred("atom_string", 2, p_atom_string, 0); + Yap_InitCPred("string_to_atomic", 2, p_string_to_atomic, 0); Yap_InitCPred("string_to_list", 2, p_string_to_list, 0); Yap_InitCPred("char_code", 2, p_char_code, SafePredFlag); Yap_InitCPred("atom_chars", 2, p_atom_chars, 0); Yap_InitCPred("atom_codes", 2, p_atom_codes, 0); Yap_InitCPred("atom_length", 2, p_atom_length, SafePredFlag); - Yap_InitCPred("string_length", 2, p_atom_length, SafePredFlag); + Yap_InitCPred("atomic_length", 2, p_atomic_length, SafePredFlag); + Yap_InitCPred("string_length", 2, p_string_length, SafePredFlag); Yap_InitCPred("$atom_split", 4, p_atom_split, SafePredFlag); Yap_InitCPred("number_chars", 2, p_number_chars, 0); Yap_InitCPred("number_atom", 2, p_number_atom, 0); Yap_InitCPred("number_codes", 2, p_number_codes, 0); Yap_InitCPred("atom_number", 2, p_atom_number, 0); - Yap_InitCPred("atom_concat", 2, p_atom_concat, 0); - Yap_InitCPred("atomic_concat", 2, p_atomic_concat, 0); + Yap_InitCPred("string_number", 2, p_string_number, 0); + Yap_InitCPred("$atom_concat", 2, p_atom_concat2, 0); + Yap_InitCPred("atomic_concat", 3, p_atomic_concat3, 0); + Yap_InitCPred("$string_concat", 3, p_string_concat3, 0); + Yap_InitCPred("$string_concat", 2, p_string_concat2, 0); } diff --git a/C/compiler.c b/C/compiler.c index c73613de0..8e32eed54 100644 --- a/C/compiler.c +++ b/C/compiler.c @@ -714,7 +714,7 @@ c_arg(Int argno, Term t, unsigned int arity, unsigned int level, compiler_struct /* The argument to pass to the structure is now the label for where we are storing the blob */ if (level == 0) - Yap_emit((cglobs->onhead ? get_string_op : put_string_op), t, argno, &cglobs->cint); + Yap_emit((cglobs->onhead ? get_string_op : put_string_op), l1, argno, &cglobs->cint); else Yap_emit((cglobs->onhead ? (argno == (Int)arity ? unify_last_string_op : unify_string_op) : diff --git a/C/strings.c b/C/strings.c index fc0c45535..1386413be 100644 --- a/C/strings.c +++ b/C/strings.c @@ -129,29 +129,6 @@ get_wide_from_list( Term t, seq_tv_t *inp, wchar_t *s, int atoms USES_REGS) } -static inline Term -init_tstring( USES_REGS1 ) { - Term t = AbsAppl(H); - - H[0] = (CELL)FunctorString; - return t; -} - -static inline char * -buf_from_tstring( CELL *p ) { - char *out = (char *)(p + 2); - return out; -} - -static inline void -close_tstring( char *p USES_REGS ) { - CELL *szp = H+1; - H = (CELL *)ALIGN_YAPTYPE( p ,CELL); - *szp = (H - szp)-1; - *H++ = EndSpecials; -} - - static Int SkipListCodes(Term *l, Term **tailp, Int *atoms, int *wide) { @@ -208,13 +185,13 @@ SkipListCodes(Term *l, Term **tailp, Int *atoms, int *wide) static void * -Yap_ListOfAtomsToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) +Yap_ListOfAtomsToBuffer(void *buf, Term t, seq_tv_t *inp, int *widep USES_REGS) { Int atoms = 0; CELL *r = NULL; Int n; - widep = FALSE; + *widep = FALSE; n = SkipListCodes(&t, &r, &atoms, widep); if (n < 0) { LOCAL_Error_TYPE = -n; @@ -232,12 +209,16 @@ Yap_ListOfAtomsToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) if (n && !atoms) return NULL; if (*widep) { - wchar_t *s = ((AtomEntry *)Yap_PreAllocCodeSpace())->WStrOfAE; + wchar_t *s; + if (buf) s = buf; + else s = ((AtomEntry *)Yap_PreAllocCodeSpace())->WStrOfAE; AUX_ERROR( t, 2*(n+1), s, wchar_t); s = get_wide_from_list( t, inp, s, atoms PASS_REGS); return s; } else { - char *s = ((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE; + char *s; + if (buf) s = buf; + else s = ((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE; AUX_ERROR( t, 2*(n+1), s, char); s = get_string_from_list( t, inp, s, atoms PASS_REGS); return s; @@ -245,13 +226,13 @@ Yap_ListOfAtomsToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) } static void * -Yap_ListOfCodesToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) +Yap_ListOfCodesToBuffer(void *buf, Term t, seq_tv_t *inp, int *widep USES_REGS) { Int atoms = 0; CELL *r = NULL; Int n; - widep = FALSE; + *widep = FALSE; n = SkipListCodes(&t, &r, &atoms, widep); if (n < 0) { LOCAL_Error_TYPE = -n; @@ -269,12 +250,16 @@ Yap_ListOfCodesToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) if (n && !atoms) return NULL; if (*widep) { - wchar_t *s = ((AtomEntry *)Yap_PreAllocCodeSpace())->WStrOfAE; + wchar_t *s; + if (buf) s = buf; + else s = ((AtomEntry *)Yap_PreAllocCodeSpace())->WStrOfAE; AUX_ERROR( t, 2*(n+1), s, wchar_t); s = get_wide_from_list( t, inp, s, atoms PASS_REGS); return s; } else { - char *s = ((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE; + char *s; + if (buf) s = buf; + else s = ((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE; AUX_ERROR( t, 2*(n+1), s, char); s = get_string_from_list( t, inp, s, atoms PASS_REGS); return s; @@ -282,13 +267,13 @@ Yap_ListOfCodesToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) } static void * -Yap_ListToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) +Yap_ListToBuffer(void *buf, Term t, seq_tv_t *inp, int *widep USES_REGS) { Int atoms = 0; CELL *r = NULL; Int n; - widep = FALSE; + *widep = FALSE; n = SkipListCodes(&t, &r, &atoms, widep); if (n < 0) { LOCAL_Error_TYPE = -n; @@ -304,12 +289,16 @@ Yap_ListToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) return NULL; } if (*widep) { - wchar_t *s = ((AtomEntry *)Yap_PreAllocCodeSpace())->WStrOfAE; + wchar_t *s; + if (buf) s = buf; + else s = ((AtomEntry *)Yap_PreAllocCodeSpace())->WStrOfAE; AUX_ERROR( t, 2*(n+1), s, wchar_t); s = get_wide_from_list( t, inp, s, atoms PASS_REGS); return s; } else { - char *s = ((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE; + char *s; + if (buf) s = buf; + else s = ((AtomEntry *)Yap_PreAllocCodeSpace())->StrOfAE; AUX_ERROR( t, 2*(n+1), s, char); s = get_string_from_list( t, inp, s, atoms PASS_REGS); return s; @@ -317,7 +306,7 @@ Yap_ListToBuffer(Term t, seq_tv_t *inp, int *widep USES_REGS) } static void * -read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) +read_Text( void *buf, seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) { char *s; wchar_t *ws; @@ -325,7 +314,13 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) /* we know what the term is */ switch (inp->type & YAP_TYPE_MASK) { case YAP_STRING_STRING: - { const char *s = StringOfTerm( inp->val.t ); + { const char *s; + if (!IsStringTerm(inp->val.t)) { + LOCAL_Error_TYPE = TYPE_ERROR_STRING; + LOCAL_Error_Term = inp->val.t; + return 0L; + } + s = StringOfTerm( inp->val.t ); if ( s == NULL ) return 0L; // this is a term, extract the UTF8 representation @@ -338,7 +333,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) *minimal = TRUE; { int wide = FALSE; - s = Yap_ListOfCodesToBuffer(inp->val.t, inp, &wide PASS_REGS); + s = Yap_ListOfCodesToBuffer( buf, inp->val.t, inp, &wide PASS_REGS); if (!s) return NULL; *enc = ( wide ? YAP_WCHAR : YAP_CHAR ); } @@ -348,7 +343,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) *minimal = TRUE; { int wide = FALSE; - s = Yap_ListOfAtomsToBuffer(inp->val.t, inp, &wide PASS_REGS); + s = Yap_ListOfAtomsToBuffer( buf, inp->val.t, inp, &wide PASS_REGS); if (!s) return NULL; if (wide) { *enc = YAP_WCHAR; } else { *enc = YAP_CHAR; } @@ -359,7 +354,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) *minimal = TRUE; { int wide = FALSE; - s = Yap_ListToBuffer(inp->val.t, inp, &wide PASS_REGS); + s = Yap_ListToBuffer( buf, inp->val.t, inp, &wide PASS_REGS); if (!s) return NULL; *enc = ( wide ? YAP_WCHAR : YAP_CHAR ); } @@ -367,36 +362,44 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) case YAP_STRING_ATOM: // this is a term, extract to a buffer, and representation is wide *minimal = TRUE; - { - if (IsWideAtom(inp->val.a)) { - ws = inp->val.a->WStrOfAE; + if (!IsAtomTerm(inp->val.t)) { + LOCAL_Error_TYPE = TYPE_ERROR_ATOM; + LOCAL_Error_Term = inp->val.t; + return NULL; + } else { + Atom at = AtomOfTerm(inp->val.t); + if (IsWideAtom(at)) { + ws = at->WStrOfAE; *enc = YAP_WCHAR; return ws; } else { - s = inp->val.a->StrOfAE; + s = at->StrOfAE; *enc = YAP_CHAR; return s; } } break; - case YAP_STRING_INT: - s = Yap_PreAllocCodeSpace(); + case YAP_STRING_INT: + if (buf) s = buf; + else s = Yap_PreAllocCodeSpace(); AUX_ERROR( MkIntTerm(inp->val.i), LOCAL_MAX_SIZE, s, char); if (snprintf(s, LOCAL_MAX_SIZE-1, Int_FORMAT, inp->val.i) < 0) { AUX_ERROR( MkIntTerm(inp->val.i), 2*LOCAL_MAX_SIZE, s, char); } *enc = YAP_CHAR; return s; - case YAP_STRING_FLOAT: - s = Yap_PreAllocCodeSpace(); + case YAP_STRING_FLOAT: + if (buf) s = buf; + else s = Yap_PreAllocCodeSpace(); AUX_ERROR( MkFloatTerm(inp->val.f), LOCAL_MAX_SIZE, s, char); if ( !Yap_FormatFloat( inp->val.f, s, LOCAL_MAX_SIZE-1 ) ) { AUX_ERROR( MkFloatTerm(inp->val.f), 2*LOCAL_MAX_SIZE, s, char); } *enc = YAP_CHAR; return s; - case YAP_STRING_BIG: - s = Yap_PreAllocCodeSpace(); + case YAP_STRING_BIG: + if (buf) s = buf; + else s = Yap_PreAllocCodeSpace(); if ( !Yap_mpz_to_string( inp->val.b, s, LOCAL_MAX_SIZE-1 , 10 ) ) { AUX_ERROR( MkIntTerm(0), LOCAL_MAX_SIZE, s, char); } @@ -411,7 +414,8 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) case YAP_STRING_LITERAL: { Int CurSlot = Yap_StartSlots( PASS_REGS1 ); - s = Yap_PreAllocCodeSpace(); + if (buf) s = buf; + else s = Yap_PreAllocCodeSpace(); size_t sz = LOCAL_MAX_SIZE-1; IOSTREAM *fd; AUX_ERROR( inp->val.t, LOCAL_MAX_SIZE, s, char); @@ -436,7 +440,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) if (IsStringTerm(t)) { if (inp->type & (YAP_STRING_STRING)) { inp->type &= (YAP_STRING_STRING); - return read_Text( inp, enc, minimal PASS_REGS); + return read_Text( buf, inp, enc, minimal PASS_REGS); } else { LOCAL_Error_TYPE = TYPE_ERROR_STRING; LOCAL_Error_Term = t; @@ -444,7 +448,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) } else if (IsPairTerm(t)) { if (inp->type & (YAP_STRING_CODES|YAP_STRING_ATOMS)) { inp->type &= (YAP_STRING_CODES|YAP_STRING_ATOMS); - return read_Text( inp, enc, minimal PASS_REGS); + return read_Text( buf, inp, enc, minimal PASS_REGS); } else { LOCAL_Error_TYPE = TYPE_ERROR_LIST; LOCAL_Error_Term = t; @@ -453,7 +457,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) if (inp->type & (YAP_STRING_ATOM)) { inp->type &= (YAP_STRING_ATOM); inp->val.a = AtomOfTerm(t); - return read_Text( inp, enc, minimal PASS_REGS); + return read_Text( buf, inp, enc, minimal PASS_REGS); } else { LOCAL_Error_TYPE = TYPE_ERROR_ATOM; LOCAL_Error_Term = t; @@ -462,7 +466,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) if (inp->type & (YAP_STRING_INT)) { inp->type &= (YAP_STRING_INT); inp->val.i = IntegerOfTerm(t); - return read_Text( inp, enc, minimal PASS_REGS); + return read_Text( buf, inp, enc, minimal PASS_REGS); } else { LOCAL_Error_TYPE = TYPE_ERROR_INTEGER; LOCAL_Error_Term = t; @@ -471,7 +475,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) if (inp->type & (YAP_STRING_FLOAT)) { inp->type &= (YAP_STRING_FLOAT); inp->val.f = FloatOfTerm(t); - return read_Text( inp, enc, minimal PASS_REGS); + return read_Text( buf, inp, enc, minimal PASS_REGS); } else { LOCAL_Error_TYPE = TYPE_ERROR_FLOAT; LOCAL_Error_Term = t; @@ -480,7 +484,7 @@ read_Text( seq_tv_t *inp, encoding_t *enc, int *minimal USES_REGS) if (inp->type & (YAP_STRING_BIG)) { inp->type &= (YAP_STRING_BIG); inp->val.b = Yap_BigIntOfTerm(t); - return read_Text( inp, enc, minimal PASS_REGS); + return read_Text( buf, inp, enc, minimal PASS_REGS); } else { LOCAL_Error_TYPE = TYPE_ERROR_BIGNUM; LOCAL_Error_Term = t; @@ -505,7 +509,7 @@ write_strings( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) Term t = init_tstring( PASS_REGS1 ); char *cp = s, *buf; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); buf = buf_from_tstring(H); while (cp < lim) { int chr; @@ -527,7 +531,7 @@ write_strings( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) Term t = init_tstring( PASS_REGS1 ); char *cp = s, *buf; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); buf = buf_from_tstring(H); while (cp < lim) { int chr; @@ -536,9 +540,9 @@ write_strings( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) } if (max >= min) *buf++ = '\0'; else while (max < min) { - max++; - buf = utf8_put_char(buf, '\0'); - } + max++; + buf = utf8_put_char(buf, '\0'); + } close_tstring( buf PASS_REGS ); out->val.t = t; } @@ -549,7 +553,7 @@ write_strings( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) wchar_t *wp = s; char *buf; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); buf = buf_from_tstring(H); while (wp < lim) { int chr; @@ -584,7 +588,7 @@ write_atoms( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) char *cp = s; wchar_t w[2]; w[1] = '\0'; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); while (cp < lim) { int chr; cp = utf8_get_char(cp, &chr); @@ -603,7 +607,7 @@ write_atoms( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) char w[2]; w[1] = '\0'; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); while (cp < lim) { int chr; cp = get_char(cp, &chr); @@ -622,7 +626,7 @@ write_atoms( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) wchar_t w[2]; w[1] = '\0'; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); while (cp < lim) { int chr; cp = get_wchar(cp, &chr); @@ -666,7 +670,7 @@ write_codes( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) case YAP_UTF8: { char *s = s0, *lim = s + strnlen(s, max); char *cp = s; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); while (cp < lim) { int chr; cp = utf8_get_char(cp, &chr); @@ -682,7 +686,7 @@ write_codes( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) { char *s = s0, *lim = s + strnlen(s, max); char *cp = s; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); while (cp < lim) { int chr; cp = get_char(cp, &chr); @@ -698,7 +702,7 @@ write_codes( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) { wchar_t *s = s0, *lim = s + wcsnlen(s, max); wchar_t *cp = s; - LOCAL_ERROR( lim-s ); + LOCAL_TERM_ERROR( 2*(lim-s) ); while (cp < lim) { int chr; cp = get_wchar(cp, &chr); @@ -779,6 +783,38 @@ write_atom( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) return at; } } + return NULL; +} + + +static ssize_t +write_length( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) +{ + size_t max = -1; + + if (out->type & (YAP_STRING_NCHARS|YAP_STRING_TRUNC)) { + if (out->type & YAP_STRING_NCHARS) return out->sz; + if (out->type & YAP_STRING_TRUNC) max = out->max; + } + + switch (enc) { + case YAP_UTF8: + { + const char *s = s0; + return utf8_strlen(s, strlen(s)); + } + case YAP_CHAR: + { + const char *s = s0; + return strnlen(s, max); + } + case YAP_WCHAR: + { + const wchar_t *s = s0; + return wcsnlen(s, max); + } + } + return (size_t)-1; } static Term @@ -788,7 +824,6 @@ write_number( void *s0, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) IOSTREAM *st; char *s = s0; Term t = 0L; - fprintf(stderr,"s=%s\n",s); if ( (st=Sopenmem( &s, NULL, "r")) != NULL ) { if (enc == YAP_UTF8) @@ -870,6 +905,10 @@ write_Text( void *inp, seq_tv_t *out, encoding_t enc, int minimal USES_REGS) out->val.t = write_codes( inp, out, enc, minimal PASS_REGS); return out->val.t != 0; + case YAP_STRING_LENGTH: + out->val.l = + write_length( inp, out, enc, minimal PASS_REGS); + return out->val.l != (ssize_t)(-1); case YAP_STRING_ATOM: out->val.a = write_atom( inp, out, enc, minimal PASS_REGS); @@ -917,9 +956,349 @@ Yap_CVT_Text( seq_tv_t *inp, seq_tv_t *out USES_REGS) int minimal = FALSE; char *buf; - buf = read_Text( inp, &enc, &minimal PASS_REGS ); + buf = read_Text( NULL, inp, &enc, &minimal PASS_REGS ); if (!buf) return 0L; return write_Text( buf, out, enc, minimal PASS_REGS ); } +static void * +compute_end( void *s0, encoding_t enc ) +{ + switch (enc) { + case YAP_CHAR: + case YAP_UTF8: + { + char *s = (char *)s0; + return s+(1+strlen(s)); + } + case YAP_WCHAR: + { + wchar_t *s = (wchar_t *)s0; + return s + (1+wcslen(s)); + } + } + return NULL; +} + +static void * +advance_Text( void *s, int l, encoding_t enc ) +{ + switch (enc) { + case YAP_CHAR: + return ((char *)s)+l; + case YAP_UTF8: + return (char *)utf8_n((const char *)s,l); + case YAP_WCHAR: + return ((wchar_t *)s)+l; + } + return s; +} + +static int +cmp_Text( void *s1, void *s2, encoding_t enc1, encoding_t enc2, int l ) +{ + int i; + switch (enc1) { + case YAP_CHAR: + { + char *w1 = (char *)s1; + switch (enc2) { + case YAP_CHAR: + return strncmp(s1, s2, l); + case YAP_UTF8: + { + int chr1, chr2; + char *w2 = s2; + for (i = 0; i < l; i++) { chr1 = *w1++; w2 = utf8_get_char(w2, &chr2); if (chr1-chr2) return chr1-chr2; } + } + return 0; + case YAP_WCHAR: + { + int chr1, chr2; + wchar_t *w2 = s2; + for (i = 0; i < l; i++) { chr1 = *w1++; chr2 = *w2++; if (chr1-chr2) return chr1-chr2; } + } + return 0; + } + } + case YAP_UTF8: + { + char *w1 = (char *)s1; + switch (enc2) { + case YAP_CHAR: + { + int chr1, chr2; + char *w2 = s2; + for (i = 0; i < l; i++) { chr2 = *w2++; w1 = utf8_get_char(w1, &chr1); if (chr1-chr2) return chr1-chr2; } + } + return 0; + case YAP_UTF8: + { + int chr1, chr2; + char *w2 = s2; + for (i = 0; i < l; i++) { w2 = utf8_get_char(w2, &chr2); w1 = utf8_get_char(w1, &chr1); if (chr1-chr2) return chr1-chr2; } + } + return 0; + case YAP_WCHAR: + { + int chr1, chr2; + wchar_t *w2 = s2; + for (i = 0; i < l; i++) { chr2 = *w2++; w1 = utf8_get_char(w1, &chr1); if (chr1-chr2) return chr1-chr2; } + } + return 0; + } + } + case YAP_WCHAR: + { + wchar_t *w1 = (wchar_t *)s1; + switch (enc2) { + case YAP_CHAR: + { + int chr1, chr2; + char *w2 = s2; + for (i = 0; i < l; i++) { chr1 = *w1++; chr2 = *w2++; if (chr1-chr2) return chr1-chr2; } + } + return 0; + case YAP_UTF8: + { + int chr1, chr2; + char *w2 = s2; + for (i = 0; i < l; i++) { chr1 = *w1++; w2 = utf8_get_char(w2, &chr2); if (chr1-chr2) return chr1-chr2; } + } + return 0; + case YAP_WCHAR: + return wcsncmp(s1, s2, l); + } + } + } + return 0; +} + +static void * +concat( int n, seq_tv_t *out, void *sv[], encoding_t encv[] USES_REGS ) +{ + if (out->type == YAP_STRING_STRING) { + /* we assume we concatenate strings only, or ASCII stuff like numbers */ + Term t = init_tstring( PASS_REGS1 ); + char *buf = buf_from_tstring(H); + int i; + for (i = 0; i < n; i++) { + if (encv[i] == YAP_WCHAR) { + wchar_t *ptr = sv[i]; + int chr; + while ( (chr = *ptr++) ) buf = utf8_put_char(buf, chr); + } else if (encv[i] == YAP_CHAR) { + char *ptr = sv[i]; + int chr; + while ( (chr = *ptr++) ) buf = utf8_put_char(buf, chr); + } else { + char *ptr = sv[i]; + int chr; + while ( (chr = *ptr++) ) *buf++ = chr; + } + } + *buf ++ = '\0'; + close_tstring( buf PASS_REGS ); + out->val.t = t; + return H; + } else { + encoding_t enc = YAP_CHAR; + size_t sz = 0; + + int i; + for (i = 0; i < n; i++) { + if (encv[i] != YAP_CHAR) { + enc = YAP_WCHAR; + } + sz += write_length(sv[i], out, encv[i], FALSE PASS_REGS); + } + if (enc == YAP_WCHAR) { + /* wide atom */ + wchar_t *buf = (wchar_t *)H; + Atom at; + Term t = ARG1; + LOCAL_ERROR( sz+3 ); + for (i = 0; i < n ; i ++) { + if (encv[i] == YAP_WCHAR) { + wchar_t *ptr = sv[i]; + int chr; + while ( (chr = *ptr++) != '\0' ) *buf++ = chr; + } else if (encv[i] == YAP_CHAR) { + char *ptr = sv[i]; + int chr; + while ( (chr = *ptr++) != '\0' ) *buf++ = chr; + } else { + char *ptr = sv[i]; + int chr; + while ( (ptr = utf8_get_char( ptr, &chr )) != NULL ) { if (chr == '\0') break; else *buf++ = chr; } + } + } + *buf++ = '\0'; + at = out->val.a = Yap_LookupWideAtom((wchar_t *)H); + return at; + } else { + /* atom */ + char *buf = (char *)H; + Atom at; + Term t = ARG1; + + LOCAL_TERM_ERROR( sz/sizeof(CELL)+3 ); + for (i = 0; i < n ; i ++) { + char *ptr = sv[i]; + int chr; + while ( (chr = *ptr++) != '\0' ) *buf++ = chr; + } + *buf++ = '\0'; + at = out->val.a = Yap_LookupAtom((char *)H); + return at; + } + } + return NULL; +} + +static void * +slice( int min, int max, void *buf, seq_tv_t *out, encoding_t enc USES_REGS ) +{ + if (out->type == YAP_STRING_STRING) { + /* we assume we concatenate strings only, or ASCII stuff like numbers */ + Term t = init_tstring( PASS_REGS1 ); + char *nbuf = buf_from_tstring(H); + if (enc == YAP_WCHAR) { + wchar_t *ptr = (wchar_t *)buf + min; + int chr; + while ( min++ < max ) { chr = *ptr++; nbuf = utf8_put_char(nbuf, chr); } + } else if (enc == YAP_CHAR) { + char *ptr = (char *)buf + min; + int chr; + while ( min++ < max ) { chr = *ptr++; nbuf = utf8_put_char(nbuf, chr); } + } else { + const char *ptr = utf8_n ( (const char *)buf, min ); + int chr; + while ( min++ < max ) { ptr = utf8_get_char(ptr, & chr); nbuf = utf8_put_char(nbuf, chr); } + } + *nbuf ++ = '\0'; + close_tstring( buf PASS_REGS ); + out->val.t = t; + return H; + } else { + Atom at; + /* atom */ + if (enc == YAP_WCHAR) { + /* wide atom */ + wchar_t *nbuf = (wchar_t *)H; + Term t = ARG1; + wchar_t *ptr = (wchar_t *)buf + min; + LOCAL_ERROR( (max-min)*sizeof(wchar_t) ); + memcpy( nbuf, ptr, (max - min)*sizeof(wchar_t)); + nbuf[max-min] = '\0'; + at = Yap_LookupMaybeWideAtom( nbuf ); + } else if (enc == YAP_CHAR) { + /* atom */ + char *nbuf = (char *)H; + Term t = ARG1; + char *ptr = (char *)buf + min; + LOCAL_ERROR( max-min ); + memcpy( nbuf, ptr, (max - min)); + nbuf[max-min] = '\0'; + at = Yap_LookupAtom( nbuf ); + } else { + /* atom */ + wchar_t *nbuf = (wchar_t *)H; + Term t = ARG1; + const char *ptr = utf8_n ( (const char *)buf, min ); + int chr; + + LOCAL_ERROR( max-min ); + while ( min++ < max ) { ptr = utf8_get_char(ptr, & chr); *nbuf++ = chr; } + nbuf[0] = '\0'; + at = Yap_LookupMaybeWideAtom( (wchar_t*)H ); + } + out->val.a = at; + return at; + } + return NULL; +} + + +// +// Out must be an atom or a string +void * +Yap_Concat_Text( int n, seq_tv_t inp[], seq_tv_t *out USES_REGS) +{ + encoding_t * encv; + void **bufv; + int minimal = FALSE; + void *buf; + int i; + Term t = ARG1; + bufv = (void **)malloc(n*sizeof(void *)); + HEAP_TERM_ERROR(bufv, void *); + encv = (encoding_t *)malloc(n*sizeof(encoding_t)); + HEAP_ERROR(encv, encoding_t); + buf = NULL; + for (i = 0 ; i < n ; i++) { + void *nbuf = read_Text( buf, inp+i, encv+i, &minimal PASS_REGS ); + if (!nbuf) + return 0L; + bufv[i] = nbuf; + if ((char *)nbuf >= AuxBase && (char *)nbuf < AuxTop) { + buf = compute_end( nbuf, encv[i] ); + } + } + buf = concat(n, out, bufv, encv PASS_REGS); + return buf; +} + +// +// out must be an atom or a string +void * +Yap_Splice_Text( int n, size_t cuts[], seq_tv_t *inp, seq_tv_t outv[] USES_REGS) +{ + encoding_t enc; + int minimal = FALSE; + void *buf; + size_t l; + int i, min; + + buf = read_Text( NULL, inp, &enc, &minimal PASS_REGS ); + l = write_length( buf, inp, enc, minimal PASS_REGS); + if (!buf) + return NULL; + + if (!cuts) { + if (n == 2) { + size_t l0, l1; + encoding_t enc0, enc1; + int minimal0, minimal1; + void *buf0, *buf1; + if (outv[0].val.t) { + buf0 = read_Text( buf, outv, &enc0, &minimal0 PASS_REGS ); + l0 = write_length( buf0, outv, enc, minimal0 PASS_REGS); + if (cmp_Text( buf, buf0, l0, enc, enc0) == 0) + return NULL; + + l1 = l-l0; + slice(l0, l, buf, outv+1, enc PASS_REGS); + return buf1; + } else /* if (outv[1].val.t) */ { + buf1 = read_Text( buf, outv, &enc1, &minimal1 PASS_REGS ); + l1 = write_length( buf1, outv, enc1, minimal1 PASS_REGS); + l0 = l-l1; + if (cmp_Text( advance_Text(buf, l0, enc), buf1, l1, enc, enc1) == 0) + return NULL; + slice(0, l0, buf, outv, enc PASS_REGS); + return buf0; + } + } + } + for (i = 0; i < n-1; i++) { + if (i == 0) min = 0; + else min = cuts[i-1]; + slice(min, cuts[i], buf, outv+i, enc PASS_REGS); + if (!(outv[i].val.a)) + return NULL; + } + return (void *)outv;; +} + diff --git a/C/unify.c b/C/unify.c index f95c9c9ac..1619cb7a5 100644 --- a/C/unify.c +++ b/C/unify.c @@ -377,7 +377,7 @@ oc_unify_nvar_nvar: case (CELL)FunctorDouble: return(FloatOfTerm(AbsAppl(pt0)) == FloatOfTerm(AbsAppl(pt1))); case (CELL)FunctorString: - return(strcmp( (const char *)(pt0+2), (const char )(pt1+2)) == 0); + return(strcmp( (const char *)(pt0+2), (const char *)(pt1+2)) == 0); #ifdef USE_GMP case (CELL)FunctorBigInt: return(Yap_gmp_tcmp_big_big(AbsAppl(pt0),AbsAppl(pt0)) == 0); @@ -508,7 +508,7 @@ unify_nvar_nvar: case (CELL)FunctorLongInt: return(pt0[1] == pt1[1]); case (CELL)FunctorString: - return(strcmp( (const char *)(pt0+2), (const char )(pt1+2)) == 0); + return(strcmp( (const char *)(pt0+2), (const char *)(pt1+2)) == 0); case (CELL)FunctorDouble: return(FloatOfTerm(AbsAppl(pt0)) == FloatOfTerm(AbsAppl(pt1))); #ifdef USE_GMP @@ -875,7 +875,7 @@ unifiable_nvar_nvar: case (CELL)FunctorLongInt: return(pt0[1] == pt1[1]); case (CELL)FunctorString: - return(strcmp( (const char *)(pt0+2), (const char )(pt1+2)) == 0); + return(strcmp( (const char *)(pt0+2), (const char *)(pt1+2)) == 0); case (CELL)FunctorDouble: return(FloatOfTerm(AbsAppl(pt0)) == FloatOfTerm(AbsAppl(pt1))); #ifdef USE_GMP diff --git a/H/ScannerTypes.h b/H/ScannerTypes.h index c315d768d..79c95e047 100644 --- a/H/ScannerTypes.h +++ b/H/ScannerTypes.h @@ -7,6 +7,7 @@ typedef enum TokenKinds { Ponctuation_tok, Error_tok, QuasiQuotes_tok, + WQuasiQuotes_tok, eot_tok } tkinds; diff --git a/H/YapMirror.h b/H/YapMirror.h index 07b0c577c..9bb1756dc 100644 --- a/H/YapMirror.h +++ b/H/YapMirror.h @@ -19,8 +19,8 @@ static char SccsId[] = "%W% %G%"; #endif /* - * This file defines main data-structure for term conversion - * + * This file defines main data-structure for text conversion and + * mirroring */ #include "pl-utf8.h" @@ -39,6 +39,7 @@ typedef enum { YAP_STRING_FLOAT = 0x80, YAP_STRING_BIG = 0x100, YAP_STRING_LITERAL = 0x200, + YAP_STRING_LENGTH = 0x400, YAP_STRING_TERM = 0x1000, // joint with other flags that define possible values YAP_STRING_DIFF = 0x2000, // difference list YAP_STRING_NCHARS= 0x4000, // size of input/result @@ -55,6 +56,7 @@ typedef union { const char *c; const wchar_t *w; Atom a; + size_t l; Term t;// depends on other flags } seq_val_t; @@ -74,7 +76,32 @@ typedef enum internal_encoding { YAP_WCHAR } encoding_t; +// string construction +#ifdef H +static inline Term +init_tstring( USES_REGS1 ) { + Term t = AbsAppl(H); + H[0] = (CELL)FunctorString; + return t; +} + +static inline char * +buf_from_tstring( CELL *p ) { + char *out = (char *)(p + 2); + return out; +} + +static inline void +close_tstring( char *p USES_REGS ) { + CELL *szp = H+1; + H = (CELL *)ALIGN_YAPTYPE( p ,CELL); + *szp = (H - szp)-1; + *H++ = EndSpecials; +} +#endif + +// string type depends on current module static inline seq_type_t mod_to_type( Term mod USES_REGS ) { @@ -89,21 +116,26 @@ mod_to_type( Term mod USES_REGS ) return YAP_STRING_ATOM; } -int Yap_CVT_Text( seq_tv_t *inp, seq_tv_t *out USES_REGS); +// the routines -static inline Term -Yap_AtomToNumber(Term t0 USES_REGS) +extern int Yap_CVT_Text( seq_tv_t *inp, seq_tv_t *out USES_REGS); +extern void *Yap_Concat_Text( int n, seq_tv_t inp[], seq_tv_t *out USES_REGS); +extern void *Yap_Splice_Text( int n, size_t cuts[], seq_tv_t *inp, seq_tv_t outv[] USES_REGS); + +// user friendly interface + +static inline size_t +Yap_AtomicToLength(Term t0 USES_REGS) { seq_tv_t inp, out; inp.val.t = t0; - inp.type = YAP_STRING_ATOM; - out.type = YAP_STRING_INT|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; + inp.type = YAP_STRING_STRING|YAP_STRING_CODES|YAP_STRING_ATOMS|YAP_STRING_ATOM|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; + out.type = YAP_STRING_LENGTH; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) return 0L; return out.val.t; } - static inline Term Yap_AtomicToListOfAtoms(Term t0 USES_REGS) { @@ -129,6 +161,68 @@ Yap_AtomicToListOfCodes(Term t0 USES_REGS) return out.val.t; } +static inline size_t +Yap_AtomToLength(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_ATOM; + out.type = YAP_STRING_LENGTH; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term +Yap_AtomToListOfAtoms(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_ATOM; + out.type = YAP_STRING_ATOMS; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term +Yap_AtomToListOfCodes(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_ATOM; + out.type = YAP_STRING_CODES; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term +Yap_AtomToNumber(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_ATOM; + out.type = YAP_STRING_INT|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term +Yap_AtomToString(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + + inp.val.t = t0; + inp.type = YAP_STRING_ATOM; + out.type = YAP_STRING_STRING; + + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + static inline Term Yap_AtomicToString(Term t0 USES_REGS) { @@ -223,6 +317,18 @@ Yap_CharsToTDQ( const char *s, Term mod USES_REGS ) return out.val.t; } +static inline Atom +Yap_ListOfAtomsToAtom(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_ATOMS; + out.type = YAP_STRING_ATOM; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.a; +} + static inline Atom Yap_ListToAtom(Term t0 USES_REGS) { @@ -398,6 +504,18 @@ Yap_NumberToListOfCodes(Term t0 USES_REGS) return out.val.t; } +static inline Term +Yap_NumberToString(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_INT|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; + out.type = YAP_STRING_STRING; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + static inline Atom Yap_NWCharsToAtom( const wchar_t *s, size_t len USES_REGS ) { @@ -454,14 +572,73 @@ Yap_NWCharsToString( const wchar_t *s, size_t len USES_REGS ) return out.val.t; } - -Yap_TextToUTF8(Term t0 USES_REGS) +static inline Atom +Yap_StringToAtom(Term t0 USES_REGS) { seq_tv_t inp, out; inp.val.t = t0; - inp.type = YAP_STRING_STRING|YAP_STRING_CODES|YAP_STRING_ATOMS|YAP_STRING_ATOM|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; + inp.type = YAP_STRING_STRING; + out.type = YAP_STRING_ATOM; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.a; +} + +static inline size_t +Yap_StringToAtomic(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING; + out.type = YAP_STRING_ATOM|YAP_STRING_INT|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline size_t +Yap_StringToLength(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING; + out.type = YAP_STRING_LENGTH; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline size_t +Yap_StringToListOfAtom(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING; + out.type = YAP_STRING_ATOMS; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline size_t +Yap_StringToListOfCodes(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING; out.type = YAP_STRING_CODES; - out.encoding = YAP_UTF8; + if (!Yap_CVT_Text(&inp, &out PASS_REGS)) + return 0L; + return out.val.t; +} + +static inline Term +Yap_StringToNumber(Term t0 USES_REGS) +{ + seq_tv_t inp, out; + inp.val.t = t0; + inp.type = YAP_STRING_STRING; + out.type = YAP_STRING_INT|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; if (!Yap_CVT_Text(&inp, &out PASS_REGS)) return 0L; return out.val.t; @@ -505,3 +682,90 @@ Yap_WCharsToString(const wchar_t *s USES_REGS) return out.val.t; } +static inline Atom +Yap_ConcatAtoms(Term t1, Term t2 USES_REGS) +{ + seq_tv_t inpv[2], out; + inpv[0].val.t = t1; + inpv[0].type = YAP_STRING_ATOM; + inpv[1].val.t = t2; + inpv[1].type = YAP_STRING_ATOM; + out.type = YAP_STRING_ATOM; + if (!Yap_Concat_Text(2, inpv, &out PASS_REGS)) + return NULL; + return out.val.a; +} + +static inline Atom +Yap_ConcatAtomics(Term t1, Term t2 USES_REGS) +{ + seq_tv_t inpv[2], out; + inpv[0].val.t = t1; + inpv[0].type = YAP_STRING_STRING|YAP_STRING_ATOM|YAP_STRING_INT|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; + inpv[1].val.t = t2; + inpv[1].type = YAP_STRING_STRING|YAP_STRING_ATOM|YAP_STRING_INT|YAP_STRING_FLOAT|YAP_STRING_BIG|YAP_STRING_TERM; + out.type = YAP_STRING_ATOM; + if (!Yap_Concat_Text(2, inpv, &out PASS_REGS)) + return NULL; + return out.val.a; +} + +static inline Term +Yap_ConcatStrings(Term t1, Term t2 USES_REGS) +{ + seq_tv_t inpv[2], out; + inpv[0].val.t = t1; + inpv[0].type = YAP_STRING_STRING; + inpv[1].val.t = t2; + inpv[1].type = YAP_STRING_STRING; + out.type = YAP_STRING_STRING; + if (!Yap_Concat_Text(2, inpv, &out PASS_REGS)) + return 0L; + return out.val.t; +} + + +static inline Atom +Yap_SpliceAtom(Term t1, Atom ats[], size_t cut USES_REGS) +{ + seq_tv_t outv[2], inp; + inp.type = YAP_STRING_ATOM; + inp.val.t = t1; + outv[0].type = YAP_STRING_ATOM; + outv[1].type = YAP_STRING_ATOM; + if (!Yap_Splice_Text(2, &cut, &inp, outv PASS_REGS)) + return NULL; + ats[0] = outv[0].val.a; + ats[1] = outv[1].val.a; + return ats[0]; +} + +static inline Atom +Yap_SubtractHeadAtom(Term t1, Term th USES_REGS) +{ + seq_tv_t outv[2], inp; + inp.type = YAP_STRING_ATOM; + inp.val.t = t1; + outv[0].type = YAP_STRING_ATOM; + outv[0].val.t = th; + outv[1].type = YAP_STRING_ATOM; + outv[1].val.t = 0; + if (!Yap_Splice_Text(2, NULL, &inp, outv PASS_REGS)) + return NULL; + return outv[1].val.a; +} + +static inline Atom +Yap_SubtractTailAtom(Term t1, Term th USES_REGS) +{ + seq_tv_t outv[2], inp; + inp.type = YAP_STRING_ATOM; + inp.val.t = t1; + outv[0].type = YAP_STRING_ATOM; + outv[0].val.t = 0; + outv[1].type = YAP_STRING_ATOM; + outv[1].val.t = th; + if (!Yap_Splice_Text(2, NULL, &inp, outv PASS_REGS)) + return NULL; + return outv[0].val.a; +} diff --git a/Makefile.in b/Makefile.in index 80e9a6f6c..ad08c22b4 100755 --- a/Makefile.in +++ b/Makefile.in @@ -189,7 +189,7 @@ HEADERS = \ $(srcdir)/H/tracer.h \ $(srcdir)/H/trim_trail.h \ $(srcdir)/H/yapio.h \ - $(srcdir)/H/YapMirrorn.h \ + $(srcdir)/H/YapMirror.h \ $(srcdir)/BEAM/eam.h $(srcdir)/BEAM/eamamasm.h \ $(srcdir)/OPTYap/opt.config.h \ $(srcdir)/OPTYap/opt.proto.h $(srcdir)/OPTYap/opt.structs.h \ diff --git a/config.h.in b/config.h.in index 909d61ce1..486f52575 100755 --- a/config.h.in +++ b/config.h.in @@ -283,6 +283,7 @@ #undef HAVE_USLEEP #undef HAVE_VSNPRINTF #undef HAVE_WAITPID +#undef HAVE_WCSDUP #undef HAVE_MPZ_XOR #if HAVE_GETHOSTNAME==1 diff --git a/configure b/configure index df81cdfb9..b7b48b10a 100755 --- a/configure +++ b/configure @@ -8806,7 +8806,7 @@ _ACEOF fi done -for ac_func in time times tmpnam usleep utime vsnprintf +for ac_func in time times tmpnam usleep utime vsnprintf wcsdup do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" diff --git a/configure.in b/configure.in index a486a7060..d31500e90 100755 --- a/configure.in +++ b/configure.in @@ -1573,7 +1573,7 @@ AC_CHECK_FUNCS(setitimer setlocale setsid setlinebuf sigaction) AC_CHECK_FUNCS(siggetmask siginterrupt) AC_CHECK_FUNCS(signal sigprocmask socket srand srandom stat) AC_CHECK_FUNCS(strchr strerror stricmp strlwr strncat strncpy strtod) -AC_CHECK_FUNCS(time times tmpnam usleep utime vsnprintf) +AC_CHECK_FUNCS(time times tmpnam usleep utime vsnprintf wcsdup) AC_CHECK_FUNC(regexec, [NO_BUILTIN_REGEXP="#"], [NO_BUILTIN_REGEXP=""]) diff --git a/include/YapError.h b/include/YapError.h index 79add0620..b5b0ab70c 100644 --- a/include/YapError.h +++ b/include/YapError.h @@ -121,14 +121,6 @@ typedef enum UNKNOWN_ERROR } yap_error_number; -#define LOCAL_ERROR(v) \ - if (H + 2*(v) > ASP-1024) { \ - LOCAL_Error_TYPE = OUT_OF_STACK_ERROR;\ - LOCAL_Error_Term = t;\ - LOCAL_Error_Size = 2*(v)*sizeof(CELL);\ - return 0L; \ - } - #define JMP_LOCAL_ERROR(v, LAB) \ if (H + 2*(v) > ASP-1024) { \ LOCAL_Error_TYPE = OUT_OF_STACK_ERROR;\ @@ -137,6 +129,22 @@ typedef enum goto LAB; \ } +#define LOCAL_ERROR(v) \ + if (H + (v) > ASP-1024) { \ + LOCAL_Error_TYPE = OUT_OF_STACK_ERROR;\ + LOCAL_Error_Term = t;\ + LOCAL_Error_Size = 2*(v)*sizeof(CELL);\ + return NULL; \ + } + +#define LOCAL_TERM_ERROR(v) \ + if (H + (v) > ASP-1024) { \ + LOCAL_Error_TYPE = OUT_OF_STACK_ERROR;\ + LOCAL_Error_Term = t;\ + LOCAL_Error_Size = 2*(v)*sizeof(CELL);\ + return 0L; \ + } + #define AUX_ERROR(t, n, s, TYPE) \ if (s + (n+1) > (TYPE *)AuxSp) { \ LOCAL_Error_TYPE = OUT_OF_AUXSPACE_ERROR;\ diff --git a/os/pl-utf8.c b/os/pl-utf8.c index 0e05020a5..e5b405002 100644 --- a/os/pl-utf8.c +++ b/os/pl-utf8.c @@ -163,3 +163,64 @@ utf8_strlen(const char *s, size_t len) return l; } + +size_t +utf8_strlen1(const char *s) +{ + unsigned int l = 0; + + while(1) + { int chr; + + s = utf8_get_char(s, &chr); + if (!chr) break; + l++; + } + + return l; +} + +const char * +utf8_n(const char *s, int n) +{ + while(n--) + { int chr; + + s = utf8_get_char(s, &chr); + if (!chr) return NULL; + } + + return s; +} + +int +utf8_strncmp(const char *s1, const char *s2, size_t n) +{ + + while(n-- >0) + { int chr1, chr2; + + s1 = utf8_get_char(s1, &chr1); + s2 = utf8_get_char(s2, &chr2); + if (chr1-chr2) return chr1-chr2; + if (!chr1) return 0; + } + + return 0; +} + +int +utf8_strprefix(const char *s1, const char *s2) +{ + + while(1) + { int chr1, chr2; + + s1 = utf8_get_char(s1, &chr1); + s2 = utf8_get_char(s2, &chr2); + if (!chr2) return 1; + if (chr1-chr2) return 0; + } + + return 0; +} diff --git a/os/pl-utf8.h b/os/pl-utf8.h index 6dbdcc936..be4f7da11 100644 --- a/os/pl-utf8.h +++ b/os/pl-utf8.h @@ -58,6 +58,10 @@ extern char *_PL__utf8_get_char(const char *in, int *chr); extern char *_PL__utf8_put_char(char *out, int chr); extern size_t utf8_strlen(const char *s, size_t len); +extern size_t utf8_strlen1(const char *s); +extern const char * utf8_n(const char *s, int n); +extern int utf8_strncmp(const char *s1, const char *s2, size_t n); +extern int utf8_strprefix(const char *s1, const char *s2); typedef enum { S_ASCII, diff --git a/packages/real b/packages/real index 5a72fe49e..f60caaf8b 160000 --- a/packages/real +++ b/packages/real @@ -1 +1 @@ -Subproject commit 5a72fe49e5a5c651a890a388eb967b83da8e2c52 +Subproject commit f60caaf8b2134b6a64e4923b2a471cdcd8026c2f diff --git a/pl/utils.yap b/pl/utils.yap index 66381a7e2..dec4f0f28 100644 --- a/pl/utils.yap +++ b/pl/utils.yap @@ -338,39 +338,54 @@ current_atom(A) :- % generate current_atom(A) :- % generate '$current_wide_atom'(A). -atom_concat(X,Y,At) :- - ( - nonvar(X), nonvar(Y) - -> - atom_concat([X,Y],At) - ; - atom(At) -> - '$atom_contact_split'(At,X,Y) - ; - var(At) -> - '$do_error'(instantiation_error,atom_concat(X,Y,At)) - ; - '$do_error'(type_error(atom,At),atomic_concat(X,Y,At)) - ). +atom_concat(Xs,At) :- + ( var(At) -> + '$atom_concat'(Xs, At ) + ; + '$atom_concat_constraints'(Xs, start, At, Unbound), + '$process_atom_holes'(Unbound) + ). -'$atom_contact_split'(At,X,Y) :- - nonvar(X), !, - atom_codes(At, Codes), - atom_codes(X, Xs), - lists:append(Xs,Ys,Codes), - atom_codes(Y, Ys). -'$atom_contact_split'(At,X,Y) :- - nonvar(Y), !, - atom_codes(At, Codes), - atom_codes(Y, Ys), - once(lists:append(Xs,Ys,Codes)), - atom_codes(X, Xs). -'$atom_contact_split'(At,X,Y) :- - atom_codes(At, Codes), - lists:append(Xs, Ys, Codes), - atom_codes(X, Xs), - atom_codes(Y, Ys). +% the constraints are of the form hole: HoleAtom, Begin, Atom, End +'$atom_concat_constraints'([At], start, At, _, []) :- !. +'$atom_concat_constraints'([At0], mid(Next, At), At, [hole(At0, Next, At, end)]) :- !. +% just slice first atom +'$atom_concat_constraints'([At0|Xs], start, At, Unbound) :- + atom(At0), !, + sub_atom(At, 0, Sz, L, At0), + sub_atom(At, _, L, 0, Atr), %remainder + '$atom_concat_constraints'(Xs, start, Atr, Unbound). +% first hole: Follow says whether we have two holes in a row, At1 will be our atom +'$atom_concat_constraints'([At0|Xs], start, At, [hole(At0, 0, At1, Next)|Unbound]) :- + '$atom_concat_constraints'(Xs, mid(Next,At1), Atr, Unbound). +% end of a run +'$atom_concat_constraints'([At0|Xs], mid(end, At1), At, Unbound) :- + atom(At0), !, + sub_atom(At, Next, Sz, L, At0), + sub_atom(At, 0, Next, Next, At1), + sub_atom(At, _, L, 0, Atr), %remainder + '$atom_concat_constraints'(Xs, start, Atr, _, Unbound). +'$atom_concat_constraints'([At0|Xs], mid(Next,At1), At, Next, [hole(At0, Next, At1, Follow)|Unbound]) :- + '$atom_concat_constraints'(Xs, mid(NextFollow, At1), At, Unbound). +'$process_atom_holes'([]). +'$process_atom_holes'([hole(At0, Next, At1, end)|Unbound]) :- + sub_atom(At1, Next, _, 0, At0), + '$process_atom_holes'(Unbound). +'$process_atom_holes'([hole(At0, Next, At1, Follow)|Unbound]) :- + sub_atom(At1, Next, Sz, _Left, At0), + Follow is Next+Sz, + '$process_atom_holes'(Unbound). + + +string_concat(X,Y,St) :- + ( var(St) -> + '$string_concat'(X, Y, At ) + ; + sub_string(At, 0, _, Left, X), + sub_string(At, Left, _, 0, Y) + ). + callable(A) :- ( var(A) -> fail ; number(A) -> fail ; true ). @@ -412,53 +427,9 @@ atomic_list_concat(L, El, At) :- atom_codes(A, S), '$atomify_list'(SubS, L). -atomic_concat(X,Y,At) :- - ( - nonvar(X), nonvar(Y) - -> - atomic_concat([X,Y],At) - ; - atom(At) -> - atom_length(At,Len), - '$atom_contact_split'(At,X,Y) - ; - number(At) -> - '$number_contact_split'(At,X,Y) - ; - var(At) -> - '$do_error'(instantiation_error,atomic_concat(X,Y,At)) - ; - '$do_error'(type_error(atomic,At),atomic_concat(X,Y,At)) - ). - -'$number_contact_split'(At,X,Y) :- - nonvar(X), !, - number_codes(At, Codes), - name(X, Xs), - lists:append(Xs,Ys,Codes), - name(Y, Ys). -'$number_contact_split'(At,X,Y) :- - nonvar(Y), !, - number_codes(At, Codes), - name(Y, Ys), - once(lists:append(Xs,Ys,Codes)), - name(X, Xs). -'$number_contact_split'(At,X,Y) :- - number_codes(At, Codes), - lists:append(Xs, Ys, Codes), - name(X, Xs), - name(Y, Ys). - % % small compatibility hack -% -sub_string(String, Bef, Size, After, SubStr) :- - catch(string_to_atom(String, A), _, true), - catch(string_to_atom(SubStr, SubA), _, true), - sub_atom(A, Bef, Size, After, SubA), - catch(string_to_atom(String, A), _, true), - catch(string_to_atom(SubStr, SubA), _, true). '$singletons_in_term'(T,VL) :- '$variables_in_term'(T,[],V10),