This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/packages/semweb/turtle.c

609 lines
12 KiB
C

/* $Id$
Part of SWI-Prolog
Author: Jan Wielemaker
E-mail: J.Wielemaker@cs.vu.nl
WWW: http://www.swi-prolog.org
Copyright (C): 1985-2009, VU University Amsterdam
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <SWI-Stream.h>
#include <SWI-Prolog.h>
#include <string.h>
#ifdef __WINDOWS__
#define inline __inline
#endif
#include "turtle_chars.c"
/*******************************
* ERRORS *
*******************************/
static atom_t ATOM_;
static functor_t FUNCTOR_error2;
static functor_t FUNCTOR_type_error2;
static functor_t FUNCTOR_syntax_error1;
static functor_t FUNCTOR_representation_error1;
static int
type_error(term_t actual, const char *expected)
{ term_t ex;
if ( (ex = PL_new_term_ref()) &&
PL_unify_term(ex,
PL_FUNCTOR, FUNCTOR_error2,
PL_FUNCTOR, FUNCTOR_type_error2,
PL_CHARS, expected,
PL_TERM, actual,
PL_VARIABLE) )
return PL_raise_exception(ex);
return FALSE;
}
static int
syntax_error(const char *culprit)
{ term_t ex;
if ( (ex = PL_new_term_ref()) &&
PL_unify_term(ex,
PL_FUNCTOR, FUNCTOR_error2,
PL_FUNCTOR, FUNCTOR_syntax_error1,
PL_CHARS, culprit,
PL_VARIABLE) )
return PL_raise_exception(ex);
return FALSE;
}
static int
representation_error(const char *culprit)
{ term_t ex;
if ( (ex = PL_new_term_ref()) &&
PL_unify_term(ex,
PL_FUNCTOR, FUNCTOR_error2,
PL_FUNCTOR, FUNCTOR_representation_error1,
PL_CHARS, culprit,
PL_VARIABLE) )
return PL_raise_exception(ex);
return FALSE;
}
/*******************************
* PROLOG *
*******************************/
/** turtle_name(+Atom) is semidet.
True if Atom is a valid Turtle identifier
*/
static inline int
wcis_name_char(int c)
{ return wcis_name_start_char(c) ||
wcis_name_extender_char(c);
}
/** turtle_name_start_char(+Int) is semidet.
*/
static foreign_t
turtle_name_start_char(term_t Code)
{ int c;
if ( !PL_get_integer(Code, &c) )
return type_error(Code, "code");
if ( !wcis_name_start_char(c) )
return FALSE;
return TRUE;
}
/** turtle_name(+Atom) is semidet.
*/
static foreign_t
turtle_name(term_t name)
{ char *s;
pl_wchar_t *w;
size_t len;
if ( PL_get_nchars(name, &len, &s, CVT_ATOM) )
{ const char *e = &s[len];
if ( !wcis_name_start_char(s[0]&0xff) )
return FALSE;
for(s++; s<e; s++)
{ if ( !wcis_name_char(s[0]&0xff) )
return FALSE;
}
return TRUE;
} else if ( PL_get_wchars(name, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
{ const pl_wchar_t *e = &w[len];
if ( !wcis_name_start_char(w[0]) )
return FALSE;
for(w++; w<e; w++)
{ if ( !wcis_name_char(w[0]) )
return FALSE;
}
return TRUE;
} else
return FALSE;
}
typedef struct charbuf
{ pl_wchar_t *base;
pl_wchar_t *here;
pl_wchar_t *end;
pl_wchar_t tmp[256];
} charbuf;
static void
init_charbuf(charbuf *cb)
{ cb->base = cb->here = cb->tmp;
cb->end = &cb->tmp[sizeof(cb->tmp)/sizeof(pl_wchar_t)];
}
static int
add_charbuf(charbuf *cb, int c)
{ if ( cb->here < cb->end )
{ *cb->here++ = c;
} else
{ size_t len = (cb->end-cb->base);
if ( cb->base == cb->tmp )
{ pl_wchar_t *n = PL_malloc(len*2*sizeof(pl_wchar_t));
memcpy(n, cb->base, sizeof(cb->tmp));
cb->base = n;
} else
{ cb->base = PL_realloc(cb->base, len*2*sizeof(pl_wchar_t));
}
cb->here = &cb->base[len];
cb->end = &cb->base[len*2];
*cb->here++ = c;
}
return TRUE;
}
static void
free_charbuf(charbuf *cb)
{ if ( cb->base != cb->tmp )
PL_free(cb->base);
}
/** turtle_read_name(+C0, +Stream, -C, -Name) is semidet.
*/
static foreign_t
turtle_read_name(term_t C0, term_t Stream, term_t C, term_t Name)
{ int c;
charbuf b;
IOSTREAM *in;
if ( !PL_get_integer(C0, &c) )
return type_error(C0, "code");
if ( !wcis_name_start_char(c) )
return FALSE;
if ( !PL_get_stream_handle(Stream, &in) )
return FALSE;
init_charbuf(&b);
add_charbuf(&b, c);
for(;;)
{ int c = Sgetcode(in);
if ( wcis_name_char(c) )
{ add_charbuf(&b, c);
} else
{ int rc = ( PL_unify_integer(C, c) &&
PL_unify_wchars(Name, PL_ATOM, b.here-b.base, b.base) );
free_charbuf(&b);
PL_release_stream(in);
return rc;
}
}
}
static int
read_hN(IOSTREAM *in, int digits, int *value)
{ int d = digits;
int v = 0;
while ( d-- > 0 )
{ int c = Sgetcode(in);
if ( c >= '0' && c <= '9' )
v = (v<<4) + c - '0';
else if ( c >= 'A' && c <= 'F' )
v = (v<<4) + c + 10 - 'A';
else if ( c >= 'a' && c <= 'f' )
v = (v<<4) + c + 10 - 'a';
else
{ if ( digits == 4 )
return syntax_error("Illegal \\uNNNN in string");
else
return syntax_error("Illegal \\UNNNNNNNN in string");
}
}
*value = v;
return TRUE;
}
static int
string_escape(IOSTREAM *in, int c, int *value)
{ int esc;
switch(c)
{ case 'n': esc = '\n'; break;
case '"': esc = '"'; break;
case '\\':esc = '\\'; break;
case 't': esc = '\t'; break;
case 'r': esc = '\r'; break;
case 'u':
if ( !read_hN(in, 4, &esc) )
return FALSE;
break;
case 'U':
if ( !read_hN(in, 8, &esc) )
return FALSE;
break;
default:
return syntax_error("illegal escape in string");
}
*value = esc;
return TRUE;
}
/** turtle_read_string(+C0, +Stream, -C, -Value:atom) is semidet.
*/
static foreign_t
turtle_read_string(term_t C0, term_t Stream, term_t C, term_t Value)
{ int c;
charbuf b;
IOSTREAM *in;
int endlen = 1;
if ( !PL_get_integer(C0, &c) )
return type_error(C0, "code");
if ( c != '"' )
return FALSE;
if ( !PL_get_stream_handle(Stream, &in) )
return FALSE;
init_charbuf(&b);
c = Sgetcode(in);
if ( c == '"' )
{ c = Sgetcode(in);
if ( c == '"' ) /* """...""" */
{ endlen = 3;
c = Sgetcode(in);
} else
{ PL_release_stream(in);
return (PL_unify_integer(C, c) &&
PL_unify_atom(Value, ATOM_));
}
}
for(;;c = Sgetcode(in))
{ if ( c == -1 )
{ free_charbuf(&b);
PL_release_stream(in);
return syntax_error("eof_in_string");
} else if ( c == '"' )
{ int count = 1;
for(count=1; count<endlen; )
{ if ( (c=Sgetcode(in)) == '"' )
count++;
else
break;
}
if ( count == endlen )
{ int rc;
c = Sgetcode(in);
rc = (PL_unify_integer(C, c) &&
PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base));
free_charbuf(&b);
PL_release_stream(in);
return rc;
}
while(count-- > 0)
add_charbuf(&b, '"');
add_charbuf(&b, c);
} else if ( c == '\\' )
{ int esc;
c = Sgetcode(in);
if ( !string_escape(in, c, &esc) )
{ free_charbuf(&b);
PL_release_stream(in);
return FALSE;
}
add_charbuf(&b, esc);
} else
{ add_charbuf(&b, c);
}
}
}
/** turtle_read_relative_uri(+C0, +Stream, -C, -Value:atom) is semidet.
*/
static foreign_t
turtle_read_relative_uri(term_t C0, term_t Stream, term_t C, term_t Value)
{ int c;
charbuf b;
IOSTREAM *in;
if ( !PL_get_integer(C0, &c) )
return type_error(C0, "code");
if ( c != '<' )
return FALSE;
if ( !PL_get_stream_handle(Stream, &in) )
return FALSE;
init_charbuf(&b);
c = Sgetcode(in);
for(; ; c = Sgetcode(in))
{ if ( c == '>' )
{ int rc;
c = Sgetcode(in);
rc = (PL_unify_integer(C, c) &&
PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base));
PL_release_stream(in);
free_charbuf(&b);
return rc;
} else if ( c == '\\' )
{ int esc;
c = Sgetcode(in);
if ( c == '>' )
{ add_charbuf(&b, c);
} else if ( string_escape(in, c, &esc) )
{ add_charbuf(&b, esc);
} else
{ free_charbuf(&b);
PL_release_stream(in);
return FALSE;
}
} else if ( c == -1 )
{ free_charbuf(&b);
PL_release_stream(in);
return syntax_error("eof_in_uri");
} else
{ add_charbuf(&b, c);
}
}
}
/*******************************
* WRITING *
*******************************/
static int
ttl_put_uesc(IOSTREAM *s, int c)
{ if ( c <= 0xffff )
return Sfprintf(s, "\\u%04x", (unsigned)c);
else
return Sfprintf(s, "\\U%08x", (unsigned)c);
}
static int
ttl_put_character(IOSTREAM *s, int c)
{ if ( c >= 32 && c <= 126 )
return Sputcode(c, s);
if ( c <= 31 )
return ttl_put_uesc(s, c);
if ( c >= 127 && c < 0x10ffff )
{ if ( s->encoding == ENC_ASCII )
return ttl_put_uesc(s, c);
if ( s->encoding == ENC_ISO_LATIN_1 && c > 255 )
return ttl_put_uesc(s, c);
return Sputcode(c, s);
}
representation_error("turtle_character");
return -1;
}
static int
ttl_put_echaracter(IOSTREAM *s, int c)
{ int c2;
switch(c)
{ case '\t': c2 = 't'; break;
case '\n': c2 = 'n'; break;
case '\r': c2 = 'r'; break;
default:
return ttl_put_character(s, c);
}
Sputcode('\\', s);
return Sputcode(c2, s);
}
static int
ttl_put_scharacter(IOSTREAM *s, int c)
{ switch(c)
{ case '"':
Sputcode('\\', s);
return Sputcode('"', s);
case '\\':
Sputcode('\\', s);
return Sputcode('\\', s);
default:
return ttl_put_echaracter(s, c);
}
}
static foreign_t
turtle_write_quoted_string(term_t Stream, term_t Value)
{ size_t len;
char *s;
pl_wchar_t *w;
IOSTREAM *out;
if ( !PL_get_stream_handle(Stream, &out) )
return FALSE;
if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) )
{ const char *e = &s[len];
Sputcode('"', out);
for(; s<e; s++)
{ if ( ttl_put_scharacter(out, s[0]&0xff) < 0 )
break;
}
Sputcode('"', out);
return PL_release_stream(out);
} else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
{ const pl_wchar_t *e = &w[len];
Sputcode('"', out);
for(; w<e; w++)
{ if ( ttl_put_scharacter(out, w[0]) < 0 )
break;
}
Sputcode('"', out);
return PL_release_stream(out);
} else
{ PL_release_stream(out);
return FALSE;
}
}
static int
ttl_put_ucharacter(IOSTREAM *s, int c)
{ switch(c)
{ case '>':
Sputcode('\\', s);
return Sputcode('>', s);
default:
return ttl_put_character(s, c);
}
}
/** turtle_write_uri(+Stream, +URI) is det.
*/
static foreign_t
turtle_write_uri(term_t Stream, term_t Value)
{ size_t len;
char *s;
pl_wchar_t *w;
IOSTREAM *out;
if ( !PL_get_stream_handle(Stream, &out) )
return FALSE;
if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) )
{ const char *e = &s[len];
Sputcode('<', out);
for(; s<e; s++)
{ if ( ttl_put_ucharacter(out, s[0]&0xff) < 0 )
break;
}
Sputcode('>', out);
return PL_release_stream(out);
} else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
{ const pl_wchar_t *e = &w[len];
Sputcode('<', out);
for(; w<e; w++)
{ if ( ttl_put_ucharacter(out, w[0]) < 0 )
break;
}
Sputcode('>', out);
return PL_release_stream(out);
} else
{ PL_release_stream(out);
return FALSE;
}
}
/*******************************
* REGISTRATION *
*******************************/
#define MKFUNCTOR(n,a) \
FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
#define MKATOM(n) \
ATOM_ ## n = PL_new_atom(#n)
install_t
install_turtle()
{ MKFUNCTOR(error, 2);
MKFUNCTOR(type_error, 2);
MKFUNCTOR(syntax_error, 1);
MKFUNCTOR(representation_error, 1);
ATOM_ = PL_new_atom("");
PL_register_foreign("turtle_name_start_char",
1, turtle_name_start_char, 0);
PL_register_foreign("turtle_name", 1, turtle_name, 0);
PL_register_foreign("turtle_read_name", 4, turtle_read_name, 0);
PL_register_foreign("turtle_read_string", 4, turtle_read_string, 0);
PL_register_foreign("turtle_read_relative_uri",
4, turtle_read_relative_uri, 0);
PL_register_foreign("turtle_write_quoted_string",
2, turtle_write_quoted_string, 0);
PL_register_foreign("turtle_write_uri", 2, turtle_write_uri, 0);
}