new string_code/3 plus some fixes

This commit is contained in:
Vitor Santos Costa 2013-12-06 23:24:01 +00:00
parent 342477f6e0
commit 863b796370
6 changed files with 130 additions and 21 deletions

View File

@ -31,7 +31,8 @@ static char SccsId[] = "%W% %G%";
#include "eval.h"
#include "yapio.h"
#include "pl-shared.h"
#include "YapMirror.h"
#include "pl-utf8.h"
#include "YapText.h"
#ifdef TABLING
#include "tab.macros.h"
#endif /* TABLING */
@ -564,7 +565,7 @@ init_atomic_concat3( USES_REGS1 )
else cut_fail();
}
/* Error handling */
if (LOCAL_Error_TYPE && Yap_HandleError( "atoicm_concat/3" )) {
if (LOCAL_Error_TYPE && Yap_HandleError( "atomic_concat/3" )) {
goto restart_aux;
}
return FALSE;
@ -627,12 +628,92 @@ init_string_concat3( USES_REGS1 )
else cut_fail();
}
/* Error handling */
if (LOCAL_Error_TYPE && Yap_HandleError( "atom_concat/3" )) {
if (LOCAL_Error_TYPE && Yap_HandleError( "string_concat/3" )) {
goto restart_aux;
}
return FALSE;
}
static Int
cont_string_code3( USES_REGS1 )
{
Term t2;
Int i, j;
int chr;
char *s;
const char *s0;
restart_aux:
t2 = Deref(ARG2);
s0 = StringOfTerm( t2 );
i = IntOfTerm(EXTRA_CBACK_ARG(3,1)); // offset in coded string, increases by 1..6
j = IntOfTerm(EXTRA_CBACK_ARG(3,2)); // offset in UNICODE string, always increases by 1
s = utf8_get_char( s0+i, &chr );
if (s[0]) {
EXTRA_CBACK_ARG(3,1) = MkIntTerm(s-s0);
EXTRA_CBACK_ARG(3,2) = MkIntTerm(j+1);
return Yap_unify(MkIntegerTerm( chr ), ARG3) && Yap_unify(MkIntegerTerm( j ), ARG1);
}
if (Yap_unify(MkIntegerTerm( chr ), ARG3) && Yap_unify(MkIntegerTerm( j ), ARG1))
cut_succeed();
else
cut_fail();
/* Error handling */
if (LOCAL_Error_TYPE && Yap_HandleError( "get_code/3" )) {
goto restart_aux;
}
return FALSE;
}
static Int
init_string_code3( USES_REGS1 )
{
Term t1;
Term t2;
const char *s;
restart_aux:
t1 = Deref(ARG1);
t2 = Deref(ARG2);
if (IsVarTerm(t2)) {
LOCAL_Error_TYPE = INSTANTIATION_ERROR;
LOCAL_Error_Term = t2;
} else if (!IsStringTerm(t2)) {
LOCAL_Error_TYPE = TYPE_ERROR_STRING;
LOCAL_Error_Term = t2;
} else {
s = StringOfTerm( t2 );
t1 = Deref(ARG1);
if (IsVarTerm(t1)) {
EXTRA_CBACK_ARG(3,1) = MkIntTerm(0);
EXTRA_CBACK_ARG(3,2) = MkIntTerm(0);
return cont_string_code3( PASS_REGS1 );
} else if (!IsIntegerTerm( t1 )) {
LOCAL_Error_TYPE = TYPE_ERROR_INTEGER;
LOCAL_Error_Term = t1;
} else {
const char *ns = s;
int chr;
Int indx = IntegerOfTerm( t1 );
if (indx < 0) {
LOCAL_Error_TYPE = DOMAIN_ERROR_NOT_LESS_THAN_ZERO;
LOCAL_Error_Term = t1;
}
ns = utf8_skip(s,indx);
if (ns == NULL) {
cut_fail(); // silently fail?
}
utf8_get_char( ns, &chr);
if ( chr == '\0') cut_fail();
if (Yap_unify(ARG3, MkIntegerTerm(chr))) cut_succeed();
cut_fail();
}
}
/* Error handling */
if (LOCAL_Error_TYPE && Yap_HandleError( "get_code/3" )) {
goto restart_aux;
}
return FALSE;
}
static Int
p_atom_concat2( USES_REGS1 )
@ -1083,7 +1164,7 @@ check_sub_string_at(int min, Term at, Term nat)
const char *p1, *p2;
int c1;
p1 = utf8_n(StringOfTerm(at), min);
p1 = utf8_skip(StringOfTerm(at), min);
p2 = StringOfTerm(nat);
while ( (c1 = *p1++) == *p2++ && c1);
return c1 == 0;
@ -1137,7 +1218,7 @@ check_sub_string_bef(int max, Term at, Term nat)
if ((Int)(min - len) < 0) return FALSE;
p1 = utf8_n(StringOfTerm(at),min);
p1 = utf8_skip(StringOfTerm(at),min);
p2 = StringOfTerm(nat);
while ( (c1 = *p1++) == *p2++ && c1);
return c1 == 0;
@ -1241,7 +1322,7 @@ cont_sub_atomic( USES_REGS1 )
}
} else {
while (!found) {
p = (char *)utf8_n(p, min);
p = (char *)utf8_skip(p, min);
if (utf8_strncmp(p, StringOfTerm(nat), len) == 0) {
Yap_unify(ARG2, MkIntegerTerm(min));
Yap_unify(ARG3, MkIntegerTerm(len));
@ -1455,14 +1536,14 @@ init_sub_atomic( int sub_atom USES_REGS )
if (!sub_atom) {
out = (utf8_strlen1(StringOfTerm(tout)) == len);
if (!out) cut_fail();
} else if (IsWideAtom(AtomOfTerm(nat))) {
} else if (IsWideAtom(AtomOfTerm(tout))) {
if (!(mask & SUB_ATOM_HAS_VAL)) {
cut_fail();
}
/* just check length, they may still be several occurrences :( */
out = (wcslen(RepAtom(AtomOfTerm(nat))->WStrOfAE) == len);
out = (wcslen(RepAtom(AtomOfTerm(tout))->WStrOfAE) == len);
} else {
out = (strlen(RepAtom(AtomOfTerm(nat))->StrOfAE) == len);
out = (strlen(RepAtom(AtomOfTerm(tout))->StrOfAE) == len);
if (!out) cut_fail();
}
if (len == sz) {
@ -1681,6 +1762,7 @@ Yap_InitBackAtoms(void)
Yap_InitCPredBack("string_concat", 3, 2, init_string_concat3, cont_string_concat3, 0);
Yap_InitCPredBack("sub_atom", 5, 5, init_sub_atom, cont_sub_atomic, 0);
Yap_InitCPredBack("sub_string", 5, 5, init_sub_string, cont_sub_atomic, 0);
Yap_InitCPredBack("string_code", 3, 1, init_string_code3, cont_string_code3, 0);
}

View File

@ -348,6 +348,17 @@ p_is_bignum( USES_REGS1 )
#endif
}
static Int
p_is_string( USES_REGS1 )
{
Term t = Deref(ARG1);
return(
IsNonVarTerm(t) &&
IsApplTerm(t) &&
FunctorOfTerm(t) == FunctorString
);
}
static Int
p_nb_set_bit( USES_REGS1 )
{
@ -476,6 +487,7 @@ Yap_InitBigNums(void)
Yap_InitCPred("$bignum", 1, p_is_bignum, SafePredFlag);
Yap_InitCPred("rational", 3, p_rational, 0);
Yap_InitCPred("rational", 1, p_is_rational, SafePredFlag);
Yap_InitCPred("string", 1, p_is_string, SafePredFlag);
Yap_InitCPred("opaque", 1, p_is_opaque, SafePredFlag);
Yap_InitCPred("nb_set_bit", 2, p_nb_set_bit, SafePredFlag);
}

View File

@ -360,7 +360,7 @@ compare(Term t1, Term t2) /* compare terms t1 and t2 */
case db_ref_e:
return 1;
case string_e:
return 1;
return -1;
}
}
return -1;
@ -437,10 +437,22 @@ compare(Term t1, Term t2) /* compare terms t1 and t2 */
case string_e:
{
if (IsApplTerm(t2)) {
Functor f2 = FunctorOfTerm(t2);
if (f2 == FunctorString)
Functor fun2 = FunctorOfTerm(t2);
switch ((CELL)fun2) {
case double_e:
return 1;
case long_int_e:
return 1;
#ifdef USE_GMP
case big_int_e:
return 1;
#endif
case db_ref_e:
return 1;
case string_e:
return strcmp(StringOfTerm(t1), StringOfTerm(t2));
return 1;
}
return -1;
}
return -1;
}

View File

@ -21,7 +21,7 @@
#include "eval.h"
#include "yapio.h"
#include "pl-shared.h"
#include "YapMirror.h"
#include "YapText.h"
#include <string.h>
@ -1013,7 +1013,7 @@ advance_Text( void *s, int l, encoding_t enc )
case YAP_CHAR:
return ((char *)s)+l;
case YAP_UTF8:
return (char *)utf8_n((const char *)s,l);
return (char *)utf8_skip((const char *)s,l);
case YAP_WCHAR:
return ((wchar_t *)s)+l;
}
@ -1198,7 +1198,7 @@ slice( int min, int max, void *buf, seq_tv_t *out, encoding_t enc USES_REGS )
int chr;
while ( min++ < max ) { chr = *ptr++; nbuf = utf8_put_char(nbuf, chr); }
} else {
const char *ptr = utf8_n ( (const char *)buf, min );
const char *ptr = utf8_skip ( (const char *)buf, min );
int chr;
while ( min++ < max ) { ptr = utf8_get_char(ptr, & chr); nbuf = utf8_put_char(nbuf, chr); }
}
@ -1231,7 +1231,7 @@ slice( int min, int max, void *buf, seq_tv_t *out, encoding_t enc USES_REGS )
/* atom */
wchar_t *nbuf = (wchar_t *)H;
Term t = ARG1;
const char *ptr = utf8_n ( (const char *)buf, min );
const char *ptr = utf8_skip ( (const char *)buf, min );
int chr;
LOCAL_ERROR( max-min );

View File

@ -46,6 +46,7 @@ typedef enum {
YAP_STRING_BIG = 0x100,
YAP_STRING_LITERAL = 0x200,
YAP_STRING_LENGTH = 0x400,
YAP_STRING_NTH = 0x800,
YAP_STRING_TERM = 0x1000, // joint with other flags that define possible values
YAP_STRING_DIFF = 0x2000, // difference list
YAP_STRING_NCHARS= 0x4000, // size of input/result
@ -63,6 +64,7 @@ typedef union {
const wchar_t *w;
Atom a;
size_t l;
int d;
Term t;// depends on other flags
}
seq_val_t;
@ -968,3 +970,4 @@ Yap_SubtractTailAtomic(Term t1, Term th USES_REGS)
else
return 0L;
}

View File

@ -342,12 +342,12 @@ atom_concat(Xs,At) :-
( var(At) ->
'$atom_concat'(Xs, At )
;
'$atom_concat_constraints'(Xs, start, At, Unbound),
'$atom_concat_constraints'(Xs, 0, At, Unbound),
'$process_atom_holes'(Unbound)
).
% the constraints are of the form hole: HoleAtom, Begin, Atom, End
'$atom_concat_constraints'([At], 0, At, _, []) :- !.
'$atom_concat_constraints'([At], 0, At, []) :- !.
'$atom_concat_constraints'([At0], mid(Next, At), At, [hole(At0, Next, At, end)]) :- !.
% just slice first atom
'$atom_concat_constraints'([At0|Xs], 0, At, Unbound) :-
@ -356,7 +356,7 @@ atom_concat(Xs,At) :-
sub_atom(At, _, L, 0, Atr ), %remainder
'$atom_concat_constraints'(Xs, 0, Atr, Unbound).
% first hole: Follow says whether we have two holes in a row, At1 will be our atom
'$atom_concat_constraints'([At0|Xs], start, At, [hole(At0, 0, At, Next)|Unbound]) :-
'$atom_concat_constraints'([At0|Xs], 0, At, [hole(At0, 0, At, Next)|Unbound]) :-
'$atom_concat_constraints'(Xs, mid(Next,At1), At, Unbound).
% end of a run
'$atom_concat_constraints'([At0|Xs], mid(end, At1), At, Unbound) :-
@ -364,7 +364,7 @@ atom_concat(Xs,At) :-
sub_atom(At, Next, Sz, L, At0),
sub_atom(At, 0, Next, Next, At1),
sub_atom(At, _, L, 0, Atr), %remainder
'$atom_concat_constraints'(Xs, 0, Atr, _, Unbound).
'$atom_concat_constraints'(Xs, 0, Atr, Unbound).
'$atom_concat_constraints'([At0|Xs], mid(Next,At1), At, Next, [hole(At0, Next, At, Follow)|Unbound]) :-
'$atom_concat_constraints'(Xs, mid(NextFollow, At1), At, Unbound).