/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: J.Wielemaker@cs.vu.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2009, VU University Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #ifdef __WINDOWS__ #define inline __inline #endif #include "turtle_chars.c" /******************************* * ERRORS * *******************************/ static atom_t ATOM_; static functor_t FUNCTOR_error2; static functor_t FUNCTOR_type_error2; static functor_t FUNCTOR_syntax_error1; static functor_t FUNCTOR_representation_error1; static int type_error(term_t actual, const char *expected) { term_t ex; if ( (ex = PL_new_term_ref()) && PL_unify_term(ex, PL_FUNCTOR, FUNCTOR_error2, PL_FUNCTOR, FUNCTOR_type_error2, PL_CHARS, expected, PL_TERM, actual, PL_VARIABLE) ) return PL_raise_exception(ex); return FALSE; } static int syntax_error(const char *culprit) { term_t ex; if ( (ex = PL_new_term_ref()) && PL_unify_term(ex, PL_FUNCTOR, FUNCTOR_error2, PL_FUNCTOR, FUNCTOR_syntax_error1, PL_CHARS, culprit, PL_VARIABLE) ) return PL_raise_exception(ex); return FALSE; } static int representation_error(const char *culprit) { term_t ex; if ( (ex = PL_new_term_ref()) && PL_unify_term(ex, PL_FUNCTOR, FUNCTOR_error2, PL_FUNCTOR, FUNCTOR_representation_error1, PL_CHARS, culprit, PL_VARIABLE) ) return PL_raise_exception(ex); return FALSE; } /******************************* * PROLOG * *******************************/ /** turtle_name(+Atom) is semidet. True if Atom is a valid Turtle identifier */ static inline int wcis_name_char(int c) { return wcis_name_start_char(c) || wcis_name_extender_char(c); } /** turtle_name_start_char(+Int) is semidet. */ static foreign_t turtle_name_start_char(term_t Code) { int c; if ( !PL_get_integer(Code, &c) ) return type_error(Code, "code"); if ( !wcis_name_start_char(c) ) return FALSE; return TRUE; } /** turtle_name(+Atom) is semidet. */ static foreign_t turtle_name(term_t name) { char *s; pl_wchar_t *w; size_t len; if ( PL_get_nchars(name, &len, &s, CVT_ATOM) ) { const char *e = &s[len]; if ( !wcis_name_start_char(s[0]&0xff) ) return FALSE; for(s++; sbase = cb->here = cb->tmp; cb->end = &cb->tmp[sizeof(cb->tmp)/sizeof(pl_wchar_t)]; } static int add_charbuf(charbuf *cb, int c) { if ( cb->here < cb->end ) { *cb->here++ = c; } else { size_t len = (cb->end-cb->base); if ( cb->base == cb->tmp ) { pl_wchar_t *n = PL_malloc(len*2*sizeof(pl_wchar_t)); memcpy(n, cb->base, sizeof(cb->tmp)); cb->base = n; } else { cb->base = PL_realloc(cb->base, len*2*sizeof(pl_wchar_t)); } cb->here = &cb->base[len]; cb->end = &cb->base[len*2]; *cb->here++ = c; } return TRUE; } static void free_charbuf(charbuf *cb) { if ( cb->base != cb->tmp ) PL_free(cb->base); } /** turtle_read_name(+C0, +Stream, -C, -Name) is semidet. */ static foreign_t turtle_read_name(term_t C0, term_t Stream, term_t C, term_t Name) { int c; charbuf b; IOSTREAM *in; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( !wcis_name_start_char(c) ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); add_charbuf(&b, c); for(;;) { int c = Sgetcode(in); if ( wcis_name_char(c) ) { add_charbuf(&b, c); } else { int rc = ( PL_unify_integer(C, c) && PL_unify_wchars(Name, PL_ATOM, b.here-b.base, b.base) ); free_charbuf(&b); PL_release_stream(in); return rc; } } } static int read_hN(IOSTREAM *in, int digits, int *value) { int d = digits; int v = 0; while ( d-- > 0 ) { int c = Sgetcode(in); if ( c >= '0' && c <= '9' ) v = (v<<4) + c - '0'; else if ( c >= 'A' && c <= 'F' ) v = (v<<4) + c + 10 - 'A'; else if ( c >= 'a' && c <= 'f' ) v = (v<<4) + c + 10 - 'a'; else { if ( digits == 4 ) return syntax_error("Illegal \\uNNNN in string"); else return syntax_error("Illegal \\UNNNNNNNN in string"); } } *value = v; return TRUE; } static int string_escape(IOSTREAM *in, int c, int *value) { int esc; switch(c) { case 'n': esc = '\n'; break; case '"': esc = '"'; break; case '\\':esc = '\\'; break; case 't': esc = '\t'; break; case 'r': esc = '\r'; break; case 'u': if ( !read_hN(in, 4, &esc) ) return FALSE; break; case 'U': if ( !read_hN(in, 8, &esc) ) return FALSE; break; default: return syntax_error("illegal escape in string"); } *value = esc; return TRUE; } /** turtle_read_string(+C0, +Stream, -C, -Value:atom) is semidet. */ static foreign_t turtle_read_string(term_t C0, term_t Stream, term_t C, term_t Value) { int c; charbuf b; IOSTREAM *in; int endlen = 1; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( c != '"' ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); c = Sgetcode(in); if ( c == '"' ) { c = Sgetcode(in); if ( c == '"' ) /* """...""" */ { endlen = 3; c = Sgetcode(in); } else { PL_release_stream(in); return (PL_unify_integer(C, c) && PL_unify_atom(Value, ATOM_)); } } for(;;c = Sgetcode(in)) { if ( c == -1 ) { free_charbuf(&b); PL_release_stream(in); return syntax_error("eof_in_string"); } else if ( c == '"' ) { int count = 1; for(count=1; count 0) add_charbuf(&b, '"'); add_charbuf(&b, c); } else if ( c == '\\' ) { int esc; c = Sgetcode(in); if ( !string_escape(in, c, &esc) ) { free_charbuf(&b); PL_release_stream(in); return FALSE; } add_charbuf(&b, esc); } else { add_charbuf(&b, c); } } } /** turtle_read_relative_uri(+C0, +Stream, -C, -Value:atom) is semidet. */ static foreign_t turtle_read_relative_uri(term_t C0, term_t Stream, term_t C, term_t Value) { int c; charbuf b; IOSTREAM *in; if ( !PL_get_integer(C0, &c) ) return type_error(C0, "code"); if ( c != '<' ) return FALSE; if ( !PL_get_stream_handle(Stream, &in) ) return FALSE; init_charbuf(&b); c = Sgetcode(in); for(; ; c = Sgetcode(in)) { if ( c == '>' ) { int rc; c = Sgetcode(in); rc = (PL_unify_integer(C, c) && PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base)); PL_release_stream(in); free_charbuf(&b); return rc; } else if ( c == '\\' ) { int esc; c = Sgetcode(in); if ( c == '>' ) { add_charbuf(&b, c); } else if ( string_escape(in, c, &esc) ) { add_charbuf(&b, esc); } else { free_charbuf(&b); PL_release_stream(in); return FALSE; } } else if ( c == -1 ) { free_charbuf(&b); PL_release_stream(in); return syntax_error("eof_in_uri"); } else { add_charbuf(&b, c); } } } /******************************* * WRITING * *******************************/ static int ttl_put_uesc(IOSTREAM *s, int c) { if ( c <= 0xffff ) return Sfprintf(s, "\\u%04x", (unsigned)c); else return Sfprintf(s, "\\U%08x", (unsigned)c); } static int ttl_put_character(IOSTREAM *s, int c) { if ( c >= 32 && c <= 126 ) return Sputcode(c, s); if ( c <= 31 ) return ttl_put_uesc(s, c); if ( c >= 127 && c < 0x10ffff ) { if ( s->encoding == ENC_ASCII ) return ttl_put_uesc(s, c); if ( s->encoding == ENC_ISO_LATIN_1 && c > 255 ) return ttl_put_uesc(s, c); return Sputcode(c, s); } representation_error("turtle_character"); return -1; } static int ttl_put_echaracter(IOSTREAM *s, int c) { int c2; switch(c) { case '\t': c2 = 't'; break; case '\n': c2 = 'n'; break; case '\r': c2 = 'r'; break; default: return ttl_put_character(s, c); } Sputcode('\\', s); return Sputcode(c2, s); } static int ttl_put_scharacter(IOSTREAM *s, int c) { switch(c) { case '"': Sputcode('\\', s); return Sputcode('"', s); case '\\': Sputcode('\\', s); return Sputcode('\\', s); default: return ttl_put_echaracter(s, c); } } static foreign_t turtle_write_quoted_string(term_t Stream, term_t Value) { size_t len; char *s; pl_wchar_t *w; IOSTREAM *out; if ( !PL_get_stream_handle(Stream, &out) ) return FALSE; if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) ) { const char *e = &s[len]; Sputcode('"', out); for(; s': Sputcode('\\', s); return Sputcode('>', s); default: return ttl_put_character(s, c); } } /** turtle_write_uri(+Stream, +URI) is det. */ static foreign_t turtle_write_uri(term_t Stream, term_t Value) { size_t len; char *s; pl_wchar_t *w; IOSTREAM *out; if ( !PL_get_stream_handle(Stream, &out) ) return FALSE; if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) ) { const char *e = &s[len]; Sputcode('<', out); for(; s', out); return PL_release_stream(out); } else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) ) { const pl_wchar_t *e = &w[len]; Sputcode('<', out); for(; w', out); return PL_release_stream(out); } else { PL_release_stream(out); return FALSE; } } /******************************* * REGISTRATION * *******************************/ #define MKFUNCTOR(n,a) \ FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a) #define MKATOM(n) \ ATOM_ ## n = PL_new_atom(#n) install_t install_turtle() { MKFUNCTOR(error, 2); MKFUNCTOR(type_error, 2); MKFUNCTOR(syntax_error, 1); MKFUNCTOR(representation_error, 1); ATOM_ = PL_new_atom(""); PL_register_foreign("turtle_name_start_char", 1, turtle_name_start_char, 0); PL_register_foreign("turtle_name", 1, turtle_name, 0); PL_register_foreign("turtle_read_name", 4, turtle_read_name, 0); PL_register_foreign("turtle_read_string", 4, turtle_read_string, 0); PL_register_foreign("turtle_read_relative_uri", 4, turtle_read_relative_uri, 0); PL_register_foreign("turtle_write_quoted_string", 2, turtle_write_quoted_string, 0); PL_register_foreign("turtle_write_uri", 2, turtle_write_uri, 0); }