use utf-8 internally: problem, still seems to core dump

This commit is contained in:
Vítor Santos Costa 2016-03-03 23:18:04 +00:00
parent 0db27196aa
commit 5f9752baff

View File

@ -19,28 +19,26 @@ static char SccsId[] = "%W% %G%";
#endif #endif
#include "Yap.h" #include "Yap.h"
#include "Yatom.h"
#include "YapHeap.h" #include "YapHeap.h"
#include "yapio.h"
#include "iopreds.h"
#include "YapText.h" #include "YapText.h"
#include "Yatom.h"
#include "encoding.h" #include "encoding.h"
#include "iopreds.h"
#include "yapio.h"
/// @addtogroup readutil /// @addtogroup readutil
static Int rl_to_codes(Term TEnd, int do_as_binary, int arity USES_REGS) {
static Int
rl_to_codes(Term TEnd, int do_as_binary, int arity USES_REGS)
{
int sno = Yap_CheckStream(ARG1, Input_Stream_f, "read_line_to_codes/2"); int sno = Yap_CheckStream(ARG1, Input_Stream_f, "read_line_to_codes/2");
StreamDesc *st = GLOBAL_Stream + sno; StreamDesc *st = GLOBAL_Stream + sno;
Int status; Int status;
UInt max_inp, buf_sz, sz; UInt max_inp, buf_sz, sz;
int *buf; unsigned char *buf;
bool binary_stream; bool binary_stream;
int ch;
if (sno < 0) if (sno < 0)
return FALSE; return false;
status = GLOBAL_Stream[sno].status; status = GLOBAL_Stream[sno].status;
binary_stream = GLOBAL_Stream[sno].status & Binary_Stream_f; binary_stream = GLOBAL_Stream[sno].status & Binary_Stream_f;
if (status & Eof_Stream_f) { if (status & Eof_Stream_f) {
@ -48,9 +46,9 @@ rl_to_codes(Term TEnd, int do_as_binary, int arity USES_REGS)
return Yap_unify_constant(ARG2, MkAtomTerm(AtomEof)); return Yap_unify_constant(ARG2, MkAtomTerm(AtomEof));
} }
max_inp = (ASP - HR) / 2 - 1024; max_inp = (ASP - HR) / 2 - 1024;
buf = (int *)TR; buf = (unsigned char *)TR;
buf_sz = (int *)LOCAL_TrailTop-buf; buf_sz = (unsigned char *)LOCAL_TrailTop - buf;
while (TRUE) { while (true) {
if (buf_sz > max_inp) { if (buf_sz > max_inp) {
buf_sz = max_inp; buf_sz = max_inp;
} }
@ -61,11 +59,14 @@ rl_to_codes(Term TEnd, int do_as_binary, int arity USES_REGS)
char *b = (char *)TR; char *b = (char *)TR;
sz = fread(b, 1, buf_sz, GLOBAL_Stream[sno].file); sz = fread(b, 1, buf_sz, GLOBAL_Stream[sno].file);
} else { } else {
int ch; unsigned char *pt = buf;
int *pt = buf;
do { do {
*pt++ = ch = st->stream_wgetc_for_read(sno); ch = st->stream_wgetc_for_read(sno);
if (pt+1 == buf+buf_sz) if (ch < 127)
*pt++ = ch;
else
pt += get_utf8(pt, 4, &ch);
if (pt + 4 == buf + buf_sz)
break; break;
} while (ch != '\n'); } while (ch != '\n');
sz = pt - buf; sz = pt - buf;
@ -78,7 +79,6 @@ rl_to_codes(Term TEnd, int do_as_binary, int arity USES_REGS)
return Yap_unify_constant(ARG2, MkAtomTerm(AtomEof)); return Yap_unify_constant(ARG2, MkAtomTerm(AtomEof));
} }
UNLOCK(GLOBAL_Stream[sno].streamlock); UNLOCK(GLOBAL_Stream[sno].streamlock);
return FALSE;
} }
if (GLOBAL_Stream[sno].status & Eof_Stream_f || buf[sz - 1] == 10) { if (GLOBAL_Stream[sno].status & Eof_Stream_f || buf[sz - 1] == 10) {
/* we're done */ /* we're done */
@ -97,11 +97,8 @@ rl_to_codes(Term TEnd, int do_as_binary, int arity USES_REGS)
end = TermNil; end = TermNil;
else else
end = Deref(XREGS[arity]); end = Deref(XREGS[arity]);
if (GLOBAL_Stream[sno].encoding == ENC_ISO_UTF8) return Yap_unify(
return Yap_unify(ARG2, Yap_UTF8ToDiffListOfCodes((const char *)TR, end PASS_REGS)) ; ARG2, Yap_UTF8ToDiffListOfCodes((const char *)TR, end PASS_REGS));
else if (GLOBAL_Stream[sno].encoding == ENC_WCHAR)
return Yap_unify(ARG2, Yap_WCharsToDiffListOfCodes((const wchar_t *)TR, end PASS_REGS)) ;
return Yap_unify(ARG2, Yap_CharsToDiffListOfCodes((const char *)TR, end, ENC_ISO_LATIN1 PASS_REGS)) ;
} }
buf += (buf_sz - 1); buf += (buf_sz - 1);
max_inp -= (buf_sz - 1); max_inp -= (buf_sz - 1);
@ -113,37 +110,32 @@ rl_to_codes(Term TEnd, int do_as_binary, int arity USES_REGS)
} }
} }
static Int static Int read_line_to_codes(USES_REGS1) {
read_line_to_codes(USES_REGS1)
{
return rl_to_codes(TermNil, FALSE, 2 PASS_REGS); return rl_to_codes(TermNil, FALSE, 2 PASS_REGS);
} }
static Int static Int read_line_to_codes2(USES_REGS1) {
read_line_to_codes2(USES_REGS1)
{
return rl_to_codes(TermNil, TRUE, 3 PASS_REGS); return rl_to_codes(TermNil, TRUE, 3 PASS_REGS);
} }
static Int static Int read_line_to_string(USES_REGS1) {
read_line_to_string( USES_REGS1 )
{
int sno = Yap_CheckStream(ARG1, Input_Stream_f, "read_line_to_codes/2"); int sno = Yap_CheckStream(ARG1, Input_Stream_f, "read_line_to_codes/2");
Int status; Int status;
UInt max_inp, buf_sz; UInt max_inp, buf_sz;
int *buf; unsigned char *buf;
size_t sz;
StreamDesc *st = GLOBAL_Stream + sno; StreamDesc *st = GLOBAL_Stream + sno;
if (sno < 0) if (sno < 0)
return FALSE; return false;
status = GLOBAL_Stream[sno].status; status = GLOBAL_Stream[sno].status;
if (status & Eof_Stream_f) { if (status & Eof_Stream_f) {
UNLOCK(GLOBAL_Stream[sno].streamlock); UNLOCK(GLOBAL_Stream[sno].streamlock);
return Yap_unify_constant(ARG2, MkAtomTerm(AtomEof)); return Yap_unify_constant(ARG2, MkAtomTerm(AtomEof));
} }
max_inp = (ASP - HR) / 2 - 1024; max_inp = (ASP - HR) / 2 - 1024;
buf = (int *)TR; buf = (unsigned char *)TR;
buf_sz = (int *)LOCAL_TrailTop-buf; buf_sz = (unsigned char *)LOCAL_TrailTop - buf;
while (true) { while (true) {
size_t sz; size_t sz;
@ -155,14 +147,19 @@ read_line_to_string( USES_REGS1 )
sz = fread(b, 1, buf_sz, GLOBAL_Stream[sno].file); sz = fread(b, 1, buf_sz, GLOBAL_Stream[sno].file);
} else { } else {
int ch; int ch;
int *pt = buf; unsigned char *pt = buf;
do { do {
*pt++ = ch = st->stream_wgetc_for_read(sno); ch = st->stream_wgetc_for_read(sno);
if (pt+1 == buf+buf_sz) if (ch < 127)
*pt++ = ch;
else
pt += put_utf8(pt, ch);
if (pt + 4 == buf + buf_sz)
break; break;
} while (ch != '\n'); } while (ch != '\n');
sz = pt - buf; sz = pt - buf;
} }
}
if (sz == -1 || sz == 0) { if (sz == -1 || sz == 0) {
if (GLOBAL_Stream[sno].status & Eof_Stream_f) { if (GLOBAL_Stream[sno].status & Eof_Stream_f) {
UNLOCK(GLOBAL_Stream[sno].streamlock); UNLOCK(GLOBAL_Stream[sno].streamlock);
@ -191,7 +188,8 @@ read_line_to_string( USES_REGS1 )
} else if (GLOBAL_Stream[sno].encoding == ENC_WCHAR) { } else if (GLOBAL_Stream[sno].encoding == ENC_WCHAR) {
return Yap_unify(ARG2, Yap_WCharsToString((const wchar_t *)TR PASS_REGS)); return Yap_unify(ARG2, Yap_WCharsToString((const wchar_t *)TR PASS_REGS));
} else { } else {
return Yap_unify(ARG2, Yap_CharsToString((const char *)TR, ENC_ISO_LATIN1 PASS_REGS) ); return Yap_unify(
ARG2, Yap_CharsToString((const char *)TR, ENC_ISO_LATIN1 PASS_REGS));
} }
buf += (buf_sz - 1); buf += (buf_sz - 1);
max_inp -= (buf_sz - 1); max_inp -= (buf_sz - 1);
@ -201,12 +199,11 @@ read_line_to_string( USES_REGS1 )
return FALSE; return FALSE;
} }
} }
}
static Int
read_stream_to_codes(USES_REGS1) static Int read_stream_to_codes(USES_REGS1) {
{ int sno = Yap_CheckStream(ARG1, Input_Stream_f,
int sno = Yap_CheckStream (ARG1, Input_Stream_f, "reaMkAtomTerm (AtomEofd_line_to_codes/2"); "reaMkAtomTerm (AtomEofd_line_to_codes/2");
CELL *HBASE = HR; CELL *HBASE = HR;
CELL *h0 = &ARG4; CELL *h0 = &ARG4;
@ -244,12 +241,9 @@ read_stream_to_codes(USES_REGS1)
RESET_VARIABLE(HR - 1); RESET_VARIABLE(HR - 1);
Yap_unify(HR[-1], ARG3); Yap_unify(HR[-1], ARG3);
return Yap_unify(AbsPair(HBASE), ARG2); return Yap_unify(AbsPair(HBASE), ARG2);
} }
static Int static Int read_stream_to_terms(USES_REGS1) {
read_stream_to_terms(USES_REGS1)
{
int sno = Yap_CheckStream(ARG1, Input_Stream_f, "read_line_to_codes/2"); int sno = Yap_CheckStream(ARG1, Input_Stream_f, "read_line_to_codes/2");
Term t, hd; Term t, hd;
yhandle_t tails, news; yhandle_t tails, news;
@ -287,9 +281,7 @@ read_stream_to_terms(USES_REGS1)
return Yap_unify(t, ARG2); return Yap_unify(t, ARG2);
} }
void void Yap_InitReadUtil(void) {
Yap_InitReadUtil(void)
{
CACHE_REGS CACHE_REGS
Term cm = CurrentModule; Term cm = CurrentModule;
@ -301,4 +293,3 @@ Yap_InitReadUtil(void)
Yap_InitCPred("read_stream_to_terms", 3, read_stream_to_terms, SyncPredFlag); Yap_InitCPred("read_stream_to_terms", 3, read_stream_to_terms, SyncPredFlag);
CurrentModule = cm; CurrentModule = cm;
} }