609 lines
12 KiB
C
609 lines
12 KiB
C
|
/* $Id$
|
||
|
|
||
|
Part of SWI-Prolog
|
||
|
|
||
|
Author: Jan Wielemaker
|
||
|
E-mail: J.Wielemaker@cs.vu.nl
|
||
|
WWW: http://www.swi-prolog.org
|
||
|
Copyright (C): 1985-2009, VU University Amsterdam
|
||
|
|
||
|
This library is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU Lesser General Public
|
||
|
License as published by the Free Software Foundation; either
|
||
|
version 2.1 of the License, or (at your option) any later version.
|
||
|
|
||
|
This library is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
Lesser General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Lesser General Public
|
||
|
License along with this library; if not, write to the Free Software
|
||
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
*/
|
||
|
|
||
|
#include <SWI-Stream.h>
|
||
|
#include <SWI-Prolog.h>
|
||
|
#include <string.h>
|
||
|
#ifdef __WINDOWS__
|
||
|
#define inline __inline
|
||
|
#endif
|
||
|
|
||
|
#include "turtle_chars.c"
|
||
|
|
||
|
/*******************************
|
||
|
* ERRORS *
|
||
|
*******************************/
|
||
|
|
||
|
static atom_t ATOM_;
|
||
|
static functor_t FUNCTOR_error2;
|
||
|
static functor_t FUNCTOR_type_error2;
|
||
|
static functor_t FUNCTOR_syntax_error1;
|
||
|
static functor_t FUNCTOR_representation_error1;
|
||
|
|
||
|
static int
|
||
|
type_error(term_t actual, const char *expected)
|
||
|
{ term_t ex;
|
||
|
|
||
|
if ( (ex = PL_new_term_ref()) &&
|
||
|
PL_unify_term(ex,
|
||
|
PL_FUNCTOR, FUNCTOR_error2,
|
||
|
PL_FUNCTOR, FUNCTOR_type_error2,
|
||
|
PL_CHARS, expected,
|
||
|
PL_TERM, actual,
|
||
|
PL_VARIABLE) )
|
||
|
return PL_raise_exception(ex);
|
||
|
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
syntax_error(const char *culprit)
|
||
|
{ term_t ex;
|
||
|
|
||
|
if ( (ex = PL_new_term_ref()) &&
|
||
|
PL_unify_term(ex,
|
||
|
PL_FUNCTOR, FUNCTOR_error2,
|
||
|
PL_FUNCTOR, FUNCTOR_syntax_error1,
|
||
|
PL_CHARS, culprit,
|
||
|
PL_VARIABLE) )
|
||
|
return PL_raise_exception(ex);
|
||
|
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
representation_error(const char *culprit)
|
||
|
{ term_t ex;
|
||
|
|
||
|
if ( (ex = PL_new_term_ref()) &&
|
||
|
PL_unify_term(ex,
|
||
|
PL_FUNCTOR, FUNCTOR_error2,
|
||
|
PL_FUNCTOR, FUNCTOR_representation_error1,
|
||
|
PL_CHARS, culprit,
|
||
|
PL_VARIABLE) )
|
||
|
return PL_raise_exception(ex);
|
||
|
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* PROLOG *
|
||
|
*******************************/
|
||
|
|
||
|
/** turtle_name(+Atom) is semidet.
|
||
|
|
||
|
True if Atom is a valid Turtle identifier
|
||
|
*/
|
||
|
|
||
|
static inline int
|
||
|
wcis_name_char(int c)
|
||
|
{ return wcis_name_start_char(c) ||
|
||
|
wcis_name_extender_char(c);
|
||
|
}
|
||
|
|
||
|
/** turtle_name_start_char(+Int) is semidet.
|
||
|
*/
|
||
|
|
||
|
static foreign_t
|
||
|
turtle_name_start_char(term_t Code)
|
||
|
{ int c;
|
||
|
|
||
|
if ( !PL_get_integer(Code, &c) )
|
||
|
return type_error(Code, "code");
|
||
|
if ( !wcis_name_start_char(c) )
|
||
|
return FALSE;
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/** turtle_name(+Atom) is semidet.
|
||
|
*/
|
||
|
|
||
|
static foreign_t
|
||
|
turtle_name(term_t name)
|
||
|
{ char *s;
|
||
|
pl_wchar_t *w;
|
||
|
size_t len;
|
||
|
|
||
|
if ( PL_get_nchars(name, &len, &s, CVT_ATOM) )
|
||
|
{ const char *e = &s[len];
|
||
|
|
||
|
if ( !wcis_name_start_char(s[0]&0xff) )
|
||
|
return FALSE;
|
||
|
for(s++; s<e; s++)
|
||
|
{ if ( !wcis_name_char(s[0]&0xff) )
|
||
|
return FALSE;
|
||
|
}
|
||
|
return TRUE;
|
||
|
} else if ( PL_get_wchars(name, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
|
||
|
{ const pl_wchar_t *e = &w[len];
|
||
|
|
||
|
if ( !wcis_name_start_char(w[0]) )
|
||
|
return FALSE;
|
||
|
for(w++; w<e; w++)
|
||
|
{ if ( !wcis_name_char(w[0]) )
|
||
|
return FALSE;
|
||
|
}
|
||
|
return TRUE;
|
||
|
} else
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
|
||
|
typedef struct charbuf
|
||
|
{ pl_wchar_t *base;
|
||
|
pl_wchar_t *here;
|
||
|
pl_wchar_t *end;
|
||
|
pl_wchar_t tmp[256];
|
||
|
} charbuf;
|
||
|
|
||
|
|
||
|
static void
|
||
|
init_charbuf(charbuf *cb)
|
||
|
{ cb->base = cb->here = cb->tmp;
|
||
|
cb->end = &cb->tmp[sizeof(cb->tmp)/sizeof(pl_wchar_t)];
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
add_charbuf(charbuf *cb, int c)
|
||
|
{ if ( cb->here < cb->end )
|
||
|
{ *cb->here++ = c;
|
||
|
} else
|
||
|
{ size_t len = (cb->end-cb->base);
|
||
|
|
||
|
if ( cb->base == cb->tmp )
|
||
|
{ pl_wchar_t *n = PL_malloc(len*2*sizeof(pl_wchar_t));
|
||
|
memcpy(n, cb->base, sizeof(cb->tmp));
|
||
|
cb->base = n;
|
||
|
} else
|
||
|
{ cb->base = PL_realloc(cb->base, len*2*sizeof(pl_wchar_t));
|
||
|
}
|
||
|
cb->here = &cb->base[len];
|
||
|
cb->end = &cb->base[len*2];
|
||
|
*cb->here++ = c;
|
||
|
}
|
||
|
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
static void
|
||
|
free_charbuf(charbuf *cb)
|
||
|
{ if ( cb->base != cb->tmp )
|
||
|
PL_free(cb->base);
|
||
|
}
|
||
|
|
||
|
|
||
|
/** turtle_read_name(+C0, +Stream, -C, -Name) is semidet.
|
||
|
*/
|
||
|
|
||
|
static foreign_t
|
||
|
turtle_read_name(term_t C0, term_t Stream, term_t C, term_t Name)
|
||
|
{ int c;
|
||
|
charbuf b;
|
||
|
IOSTREAM *in;
|
||
|
|
||
|
if ( !PL_get_integer(C0, &c) )
|
||
|
return type_error(C0, "code");
|
||
|
if ( !wcis_name_start_char(c) )
|
||
|
return FALSE;
|
||
|
|
||
|
if ( !PL_get_stream_handle(Stream, &in) )
|
||
|
return FALSE;
|
||
|
|
||
|
init_charbuf(&b);
|
||
|
add_charbuf(&b, c);
|
||
|
|
||
|
for(;;)
|
||
|
{ int c = Sgetcode(in);
|
||
|
|
||
|
if ( wcis_name_char(c) )
|
||
|
{ add_charbuf(&b, c);
|
||
|
} else
|
||
|
{ int rc = ( PL_unify_integer(C, c) &&
|
||
|
PL_unify_wchars(Name, PL_ATOM, b.here-b.base, b.base) );
|
||
|
|
||
|
free_charbuf(&b);
|
||
|
PL_release_stream(in);
|
||
|
|
||
|
return rc;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
read_hN(IOSTREAM *in, int digits, int *value)
|
||
|
{ int d = digits;
|
||
|
int v = 0;
|
||
|
|
||
|
while ( d-- > 0 )
|
||
|
{ int c = Sgetcode(in);
|
||
|
|
||
|
if ( c >= '0' && c <= '9' )
|
||
|
v = (v<<4) + c - '0';
|
||
|
else if ( c >= 'A' && c <= 'F' )
|
||
|
v = (v<<4) + c + 10 - 'A';
|
||
|
else if ( c >= 'a' && c <= 'f' )
|
||
|
v = (v<<4) + c + 10 - 'a';
|
||
|
else
|
||
|
{ if ( digits == 4 )
|
||
|
return syntax_error("Illegal \\uNNNN in string");
|
||
|
else
|
||
|
return syntax_error("Illegal \\UNNNNNNNN in string");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*value = v;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
string_escape(IOSTREAM *in, int c, int *value)
|
||
|
{ int esc;
|
||
|
|
||
|
switch(c)
|
||
|
{ case 'n': esc = '\n'; break;
|
||
|
case '"': esc = '"'; break;
|
||
|
case '\\':esc = '\\'; break;
|
||
|
case 't': esc = '\t'; break;
|
||
|
case 'r': esc = '\r'; break;
|
||
|
case 'u':
|
||
|
if ( !read_hN(in, 4, &esc) )
|
||
|
return FALSE;
|
||
|
break;
|
||
|
case 'U':
|
||
|
if ( !read_hN(in, 8, &esc) )
|
||
|
return FALSE;
|
||
|
break;
|
||
|
default:
|
||
|
return syntax_error("illegal escape in string");
|
||
|
}
|
||
|
|
||
|
*value = esc;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/** turtle_read_string(+C0, +Stream, -C, -Value:atom) is semidet.
|
||
|
*/
|
||
|
|
||
|
static foreign_t
|
||
|
turtle_read_string(term_t C0, term_t Stream, term_t C, term_t Value)
|
||
|
{ int c;
|
||
|
charbuf b;
|
||
|
IOSTREAM *in;
|
||
|
int endlen = 1;
|
||
|
|
||
|
if ( !PL_get_integer(C0, &c) )
|
||
|
return type_error(C0, "code");
|
||
|
if ( c != '"' )
|
||
|
return FALSE;
|
||
|
|
||
|
if ( !PL_get_stream_handle(Stream, &in) )
|
||
|
return FALSE;
|
||
|
|
||
|
init_charbuf(&b);
|
||
|
|
||
|
c = Sgetcode(in);
|
||
|
if ( c == '"' )
|
||
|
{ c = Sgetcode(in);
|
||
|
if ( c == '"' ) /* """...""" */
|
||
|
{ endlen = 3;
|
||
|
c = Sgetcode(in);
|
||
|
} else
|
||
|
{ PL_release_stream(in);
|
||
|
return (PL_unify_integer(C, c) &&
|
||
|
PL_unify_atom(Value, ATOM_));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for(;;c = Sgetcode(in))
|
||
|
{ if ( c == -1 )
|
||
|
{ free_charbuf(&b);
|
||
|
PL_release_stream(in);
|
||
|
return syntax_error("eof_in_string");
|
||
|
} else if ( c == '"' )
|
||
|
{ int count = 1;
|
||
|
|
||
|
for(count=1; count<endlen; )
|
||
|
{ if ( (c=Sgetcode(in)) == '"' )
|
||
|
count++;
|
||
|
else
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if ( count == endlen )
|
||
|
{ int rc;
|
||
|
|
||
|
c = Sgetcode(in);
|
||
|
rc = (PL_unify_integer(C, c) &&
|
||
|
PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base));
|
||
|
free_charbuf(&b);
|
||
|
PL_release_stream(in);
|
||
|
return rc;
|
||
|
}
|
||
|
|
||
|
while(count-- > 0)
|
||
|
add_charbuf(&b, '"');
|
||
|
add_charbuf(&b, c);
|
||
|
} else if ( c == '\\' )
|
||
|
{ int esc;
|
||
|
|
||
|
c = Sgetcode(in);
|
||
|
if ( !string_escape(in, c, &esc) )
|
||
|
{ free_charbuf(&b);
|
||
|
PL_release_stream(in);
|
||
|
return FALSE;
|
||
|
}
|
||
|
add_charbuf(&b, esc);
|
||
|
} else
|
||
|
{ add_charbuf(&b, c);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/** turtle_read_relative_uri(+C0, +Stream, -C, -Value:atom) is semidet.
|
||
|
*/
|
||
|
|
||
|
static foreign_t
|
||
|
turtle_read_relative_uri(term_t C0, term_t Stream, term_t C, term_t Value)
|
||
|
{ int c;
|
||
|
charbuf b;
|
||
|
IOSTREAM *in;
|
||
|
|
||
|
if ( !PL_get_integer(C0, &c) )
|
||
|
return type_error(C0, "code");
|
||
|
if ( c != '<' )
|
||
|
return FALSE;
|
||
|
|
||
|
if ( !PL_get_stream_handle(Stream, &in) )
|
||
|
return FALSE;
|
||
|
|
||
|
init_charbuf(&b);
|
||
|
c = Sgetcode(in);
|
||
|
for(; ; c = Sgetcode(in))
|
||
|
{ if ( c == '>' )
|
||
|
{ int rc;
|
||
|
|
||
|
c = Sgetcode(in);
|
||
|
rc = (PL_unify_integer(C, c) &&
|
||
|
PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base));
|
||
|
PL_release_stream(in);
|
||
|
free_charbuf(&b);
|
||
|
return rc;
|
||
|
} else if ( c == '\\' )
|
||
|
{ int esc;
|
||
|
|
||
|
c = Sgetcode(in);
|
||
|
if ( c == '>' )
|
||
|
{ add_charbuf(&b, c);
|
||
|
} else if ( string_escape(in, c, &esc) )
|
||
|
{ add_charbuf(&b, esc);
|
||
|
} else
|
||
|
{ free_charbuf(&b);
|
||
|
PL_release_stream(in);
|
||
|
return FALSE;
|
||
|
}
|
||
|
} else if ( c == -1 )
|
||
|
{ free_charbuf(&b);
|
||
|
PL_release_stream(in);
|
||
|
return syntax_error("eof_in_uri");
|
||
|
} else
|
||
|
{ add_charbuf(&b, c);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*******************************
|
||
|
* WRITING *
|
||
|
*******************************/
|
||
|
|
||
|
static int
|
||
|
ttl_put_uesc(IOSTREAM *s, int c)
|
||
|
{ if ( c <= 0xffff )
|
||
|
return Sfprintf(s, "\\u%04x", (unsigned)c);
|
||
|
else
|
||
|
return Sfprintf(s, "\\U%08x", (unsigned)c);
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
ttl_put_character(IOSTREAM *s, int c)
|
||
|
{ if ( c >= 32 && c <= 126 )
|
||
|
return Sputcode(c, s);
|
||
|
if ( c <= 31 )
|
||
|
return ttl_put_uesc(s, c);
|
||
|
if ( c >= 127 && c < 0x10ffff )
|
||
|
{ if ( s->encoding == ENC_ASCII )
|
||
|
return ttl_put_uesc(s, c);
|
||
|
if ( s->encoding == ENC_ISO_LATIN_1 && c > 255 )
|
||
|
return ttl_put_uesc(s, c);
|
||
|
return Sputcode(c, s);
|
||
|
}
|
||
|
|
||
|
representation_error("turtle_character");
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
ttl_put_echaracter(IOSTREAM *s, int c)
|
||
|
{ int c2;
|
||
|
|
||
|
switch(c)
|
||
|
{ case '\t': c2 = 't'; break;
|
||
|
case '\n': c2 = 'n'; break;
|
||
|
case '\r': c2 = 'r'; break;
|
||
|
default:
|
||
|
return ttl_put_character(s, c);
|
||
|
}
|
||
|
|
||
|
Sputcode('\\', s);
|
||
|
|
||
|
return Sputcode(c2, s);
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
ttl_put_scharacter(IOSTREAM *s, int c)
|
||
|
{ switch(c)
|
||
|
{ case '"':
|
||
|
Sputcode('\\', s);
|
||
|
return Sputcode('"', s);
|
||
|
case '\\':
|
||
|
Sputcode('\\', s);
|
||
|
return Sputcode('\\', s);
|
||
|
default:
|
||
|
return ttl_put_echaracter(s, c);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
static foreign_t
|
||
|
turtle_write_quoted_string(term_t Stream, term_t Value)
|
||
|
{ size_t len;
|
||
|
char *s;
|
||
|
pl_wchar_t *w;
|
||
|
IOSTREAM *out;
|
||
|
|
||
|
if ( !PL_get_stream_handle(Stream, &out) )
|
||
|
return FALSE;
|
||
|
|
||
|
if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) )
|
||
|
{ const char *e = &s[len];
|
||
|
|
||
|
Sputcode('"', out);
|
||
|
for(; s<e; s++)
|
||
|
{ if ( ttl_put_scharacter(out, s[0]&0xff) < 0 )
|
||
|
break;
|
||
|
}
|
||
|
Sputcode('"', out);
|
||
|
return PL_release_stream(out);
|
||
|
} else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
|
||
|
{ const pl_wchar_t *e = &w[len];
|
||
|
|
||
|
Sputcode('"', out);
|
||
|
for(; w<e; w++)
|
||
|
{ if ( ttl_put_scharacter(out, w[0]) < 0 )
|
||
|
break;
|
||
|
}
|
||
|
Sputcode('"', out);
|
||
|
return PL_release_stream(out);
|
||
|
} else
|
||
|
{ PL_release_stream(out);
|
||
|
return FALSE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
static int
|
||
|
ttl_put_ucharacter(IOSTREAM *s, int c)
|
||
|
{ switch(c)
|
||
|
{ case '>':
|
||
|
Sputcode('\\', s);
|
||
|
return Sputcode('>', s);
|
||
|
default:
|
||
|
return ttl_put_character(s, c);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/** turtle_write_uri(+Stream, +URI) is det.
|
||
|
*/
|
||
|
|
||
|
static foreign_t
|
||
|
turtle_write_uri(term_t Stream, term_t Value)
|
||
|
{ size_t len;
|
||
|
char *s;
|
||
|
pl_wchar_t *w;
|
||
|
IOSTREAM *out;
|
||
|
|
||
|
if ( !PL_get_stream_handle(Stream, &out) )
|
||
|
return FALSE;
|
||
|
|
||
|
if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) )
|
||
|
{ const char *e = &s[len];
|
||
|
|
||
|
Sputcode('<', out);
|
||
|
for(; s<e; s++)
|
||
|
{ if ( ttl_put_ucharacter(out, s[0]&0xff) < 0 )
|
||
|
break;
|
||
|
}
|
||
|
Sputcode('>', out);
|
||
|
return PL_release_stream(out);
|
||
|
} else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
|
||
|
{ const pl_wchar_t *e = &w[len];
|
||
|
|
||
|
Sputcode('<', out);
|
||
|
for(; w<e; w++)
|
||
|
{ if ( ttl_put_ucharacter(out, w[0]) < 0 )
|
||
|
break;
|
||
|
}
|
||
|
Sputcode('>', out);
|
||
|
return PL_release_stream(out);
|
||
|
} else
|
||
|
{ PL_release_stream(out);
|
||
|
return FALSE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* REGISTRATION *
|
||
|
*******************************/
|
||
|
|
||
|
#define MKFUNCTOR(n,a) \
|
||
|
FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
|
||
|
#define MKATOM(n) \
|
||
|
ATOM_ ## n = PL_new_atom(#n)
|
||
|
|
||
|
install_t
|
||
|
install_turtle()
|
||
|
{ MKFUNCTOR(error, 2);
|
||
|
MKFUNCTOR(type_error, 2);
|
||
|
MKFUNCTOR(syntax_error, 1);
|
||
|
MKFUNCTOR(representation_error, 1);
|
||
|
ATOM_ = PL_new_atom("");
|
||
|
|
||
|
PL_register_foreign("turtle_name_start_char",
|
||
|
1, turtle_name_start_char, 0);
|
||
|
PL_register_foreign("turtle_name", 1, turtle_name, 0);
|
||
|
PL_register_foreign("turtle_read_name", 4, turtle_read_name, 0);
|
||
|
PL_register_foreign("turtle_read_string", 4, turtle_read_string, 0);
|
||
|
PL_register_foreign("turtle_read_relative_uri",
|
||
|
4, turtle_read_relative_uri, 0);
|
||
|
PL_register_foreign("turtle_write_quoted_string",
|
||
|
2, turtle_write_quoted_string, 0);
|
||
|
PL_register_foreign("turtle_write_uri", 2, turtle_write_uri, 0);
|
||
|
}
|