semweb and http compile now (but they don't work properly yet).
This commit is contained in:
608
packages/semweb/turtle.c
Normal file
608
packages/semweb/turtle.c
Normal file
@@ -0,0 +1,608 @@
|
||||
/* $Id$
|
||||
|
||||
Part of SWI-Prolog
|
||||
|
||||
Author: Jan Wielemaker
|
||||
E-mail: J.Wielemaker@cs.vu.nl
|
||||
WWW: http://www.swi-prolog.org
|
||||
Copyright (C): 1985-2009, VU University Amsterdam
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <SWI-Stream.h>
|
||||
#include <SWI-Prolog.h>
|
||||
#include <string.h>
|
||||
#ifdef __WINDOWS__
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
#include "turtle_chars.c"
|
||||
|
||||
/*******************************
|
||||
* ERRORS *
|
||||
*******************************/
|
||||
|
||||
static atom_t ATOM_;
|
||||
static functor_t FUNCTOR_error2;
|
||||
static functor_t FUNCTOR_type_error2;
|
||||
static functor_t FUNCTOR_syntax_error1;
|
||||
static functor_t FUNCTOR_representation_error1;
|
||||
|
||||
static int
|
||||
type_error(term_t actual, const char *expected)
|
||||
{ term_t ex;
|
||||
|
||||
if ( (ex = PL_new_term_ref()) &&
|
||||
PL_unify_term(ex,
|
||||
PL_FUNCTOR, FUNCTOR_error2,
|
||||
PL_FUNCTOR, FUNCTOR_type_error2,
|
||||
PL_CHARS, expected,
|
||||
PL_TERM, actual,
|
||||
PL_VARIABLE) )
|
||||
return PL_raise_exception(ex);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
syntax_error(const char *culprit)
|
||||
{ term_t ex;
|
||||
|
||||
if ( (ex = PL_new_term_ref()) &&
|
||||
PL_unify_term(ex,
|
||||
PL_FUNCTOR, FUNCTOR_error2,
|
||||
PL_FUNCTOR, FUNCTOR_syntax_error1,
|
||||
PL_CHARS, culprit,
|
||||
PL_VARIABLE) )
|
||||
return PL_raise_exception(ex);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
representation_error(const char *culprit)
|
||||
{ term_t ex;
|
||||
|
||||
if ( (ex = PL_new_term_ref()) &&
|
||||
PL_unify_term(ex,
|
||||
PL_FUNCTOR, FUNCTOR_error2,
|
||||
PL_FUNCTOR, FUNCTOR_representation_error1,
|
||||
PL_CHARS, culprit,
|
||||
PL_VARIABLE) )
|
||||
return PL_raise_exception(ex);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/*******************************
|
||||
* PROLOG *
|
||||
*******************************/
|
||||
|
||||
/** turtle_name(+Atom) is semidet.
|
||||
|
||||
True if Atom is a valid Turtle identifier
|
||||
*/
|
||||
|
||||
static inline int
|
||||
wcis_name_char(int c)
|
||||
{ return wcis_name_start_char(c) ||
|
||||
wcis_name_extender_char(c);
|
||||
}
|
||||
|
||||
/** turtle_name_start_char(+Int) is semidet.
|
||||
*/
|
||||
|
||||
static foreign_t
|
||||
turtle_name_start_char(term_t Code)
|
||||
{ int c;
|
||||
|
||||
if ( !PL_get_integer(Code, &c) )
|
||||
return type_error(Code, "code");
|
||||
if ( !wcis_name_start_char(c) )
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/** turtle_name(+Atom) is semidet.
|
||||
*/
|
||||
|
||||
static foreign_t
|
||||
turtle_name(term_t name)
|
||||
{ char *s;
|
||||
pl_wchar_t *w;
|
||||
size_t len;
|
||||
|
||||
if ( PL_get_nchars(name, &len, &s, CVT_ATOM) )
|
||||
{ const char *e = &s[len];
|
||||
|
||||
if ( !wcis_name_start_char(s[0]&0xff) )
|
||||
return FALSE;
|
||||
for(s++; s<e; s++)
|
||||
{ if ( !wcis_name_char(s[0]&0xff) )
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
} else if ( PL_get_wchars(name, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
|
||||
{ const pl_wchar_t *e = &w[len];
|
||||
|
||||
if ( !wcis_name_start_char(w[0]) )
|
||||
return FALSE;
|
||||
for(w++; w<e; w++)
|
||||
{ if ( !wcis_name_char(w[0]) )
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
} else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
typedef struct charbuf
|
||||
{ pl_wchar_t *base;
|
||||
pl_wchar_t *here;
|
||||
pl_wchar_t *end;
|
||||
pl_wchar_t tmp[256];
|
||||
} charbuf;
|
||||
|
||||
|
||||
static void
|
||||
init_charbuf(charbuf *cb)
|
||||
{ cb->base = cb->here = cb->tmp;
|
||||
cb->end = &cb->tmp[sizeof(cb->tmp)/sizeof(pl_wchar_t)];
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
add_charbuf(charbuf *cb, int c)
|
||||
{ if ( cb->here < cb->end )
|
||||
{ *cb->here++ = c;
|
||||
} else
|
||||
{ size_t len = (cb->end-cb->base);
|
||||
|
||||
if ( cb->base == cb->tmp )
|
||||
{ pl_wchar_t *n = PL_malloc(len*2*sizeof(pl_wchar_t));
|
||||
memcpy(n, cb->base, sizeof(cb->tmp));
|
||||
cb->base = n;
|
||||
} else
|
||||
{ cb->base = PL_realloc(cb->base, len*2*sizeof(pl_wchar_t));
|
||||
}
|
||||
cb->here = &cb->base[len];
|
||||
cb->end = &cb->base[len*2];
|
||||
*cb->here++ = c;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
free_charbuf(charbuf *cb)
|
||||
{ if ( cb->base != cb->tmp )
|
||||
PL_free(cb->base);
|
||||
}
|
||||
|
||||
|
||||
/** turtle_read_name(+C0, +Stream, -C, -Name) is semidet.
|
||||
*/
|
||||
|
||||
static foreign_t
|
||||
turtle_read_name(term_t C0, term_t Stream, term_t C, term_t Name)
|
||||
{ int c;
|
||||
charbuf b;
|
||||
IOSTREAM *in;
|
||||
|
||||
if ( !PL_get_integer(C0, &c) )
|
||||
return type_error(C0, "code");
|
||||
if ( !wcis_name_start_char(c) )
|
||||
return FALSE;
|
||||
|
||||
if ( !PL_get_stream_handle(Stream, &in) )
|
||||
return FALSE;
|
||||
|
||||
init_charbuf(&b);
|
||||
add_charbuf(&b, c);
|
||||
|
||||
for(;;)
|
||||
{ int c = Sgetcode(in);
|
||||
|
||||
if ( wcis_name_char(c) )
|
||||
{ add_charbuf(&b, c);
|
||||
} else
|
||||
{ int rc = ( PL_unify_integer(C, c) &&
|
||||
PL_unify_wchars(Name, PL_ATOM, b.here-b.base, b.base) );
|
||||
|
||||
free_charbuf(&b);
|
||||
PL_release_stream(in);
|
||||
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
read_hN(IOSTREAM *in, int digits, int *value)
|
||||
{ int d = digits;
|
||||
int v = 0;
|
||||
|
||||
while ( d-- > 0 )
|
||||
{ int c = Sgetcode(in);
|
||||
|
||||
if ( c >= '0' && c <= '9' )
|
||||
v = (v<<4) + c - '0';
|
||||
else if ( c >= 'A' && c <= 'F' )
|
||||
v = (v<<4) + c + 10 - 'A';
|
||||
else if ( c >= 'a' && c <= 'f' )
|
||||
v = (v<<4) + c + 10 - 'a';
|
||||
else
|
||||
{ if ( digits == 4 )
|
||||
return syntax_error("Illegal \\uNNNN in string");
|
||||
else
|
||||
return syntax_error("Illegal \\UNNNNNNNN in string");
|
||||
}
|
||||
}
|
||||
|
||||
*value = v;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
string_escape(IOSTREAM *in, int c, int *value)
|
||||
{ int esc;
|
||||
|
||||
switch(c)
|
||||
{ case 'n': esc = '\n'; break;
|
||||
case '"': esc = '"'; break;
|
||||
case '\\':esc = '\\'; break;
|
||||
case 't': esc = '\t'; break;
|
||||
case 'r': esc = '\r'; break;
|
||||
case 'u':
|
||||
if ( !read_hN(in, 4, &esc) )
|
||||
return FALSE;
|
||||
break;
|
||||
case 'U':
|
||||
if ( !read_hN(in, 8, &esc) )
|
||||
return FALSE;
|
||||
break;
|
||||
default:
|
||||
return syntax_error("illegal escape in string");
|
||||
}
|
||||
|
||||
*value = esc;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/** turtle_read_string(+C0, +Stream, -C, -Value:atom) is semidet.
|
||||
*/
|
||||
|
||||
static foreign_t
|
||||
turtle_read_string(term_t C0, term_t Stream, term_t C, term_t Value)
|
||||
{ int c;
|
||||
charbuf b;
|
||||
IOSTREAM *in;
|
||||
int endlen = 1;
|
||||
|
||||
if ( !PL_get_integer(C0, &c) )
|
||||
return type_error(C0, "code");
|
||||
if ( c != '"' )
|
||||
return FALSE;
|
||||
|
||||
if ( !PL_get_stream_handle(Stream, &in) )
|
||||
return FALSE;
|
||||
|
||||
init_charbuf(&b);
|
||||
|
||||
c = Sgetcode(in);
|
||||
if ( c == '"' )
|
||||
{ c = Sgetcode(in);
|
||||
if ( c == '"' ) /* """...""" */
|
||||
{ endlen = 3;
|
||||
c = Sgetcode(in);
|
||||
} else
|
||||
{ PL_release_stream(in);
|
||||
return (PL_unify_integer(C, c) &&
|
||||
PL_unify_atom(Value, ATOM_));
|
||||
}
|
||||
}
|
||||
|
||||
for(;;c = Sgetcode(in))
|
||||
{ if ( c == -1 )
|
||||
{ free_charbuf(&b);
|
||||
PL_release_stream(in);
|
||||
return syntax_error("eof_in_string");
|
||||
} else if ( c == '"' )
|
||||
{ int count = 1;
|
||||
|
||||
for(count=1; count<endlen; )
|
||||
{ if ( (c=Sgetcode(in)) == '"' )
|
||||
count++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if ( count == endlen )
|
||||
{ int rc;
|
||||
|
||||
c = Sgetcode(in);
|
||||
rc = (PL_unify_integer(C, c) &&
|
||||
PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base));
|
||||
free_charbuf(&b);
|
||||
PL_release_stream(in);
|
||||
return rc;
|
||||
}
|
||||
|
||||
while(count-- > 0)
|
||||
add_charbuf(&b, '"');
|
||||
add_charbuf(&b, c);
|
||||
} else if ( c == '\\' )
|
||||
{ int esc;
|
||||
|
||||
c = Sgetcode(in);
|
||||
if ( !string_escape(in, c, &esc) )
|
||||
{ free_charbuf(&b);
|
||||
PL_release_stream(in);
|
||||
return FALSE;
|
||||
}
|
||||
add_charbuf(&b, esc);
|
||||
} else
|
||||
{ add_charbuf(&b, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** turtle_read_relative_uri(+C0, +Stream, -C, -Value:atom) is semidet.
|
||||
*/
|
||||
|
||||
static foreign_t
|
||||
turtle_read_relative_uri(term_t C0, term_t Stream, term_t C, term_t Value)
|
||||
{ int c;
|
||||
charbuf b;
|
||||
IOSTREAM *in;
|
||||
|
||||
if ( !PL_get_integer(C0, &c) )
|
||||
return type_error(C0, "code");
|
||||
if ( c != '<' )
|
||||
return FALSE;
|
||||
|
||||
if ( !PL_get_stream_handle(Stream, &in) )
|
||||
return FALSE;
|
||||
|
||||
init_charbuf(&b);
|
||||
c = Sgetcode(in);
|
||||
for(; ; c = Sgetcode(in))
|
||||
{ if ( c == '>' )
|
||||
{ int rc;
|
||||
|
||||
c = Sgetcode(in);
|
||||
rc = (PL_unify_integer(C, c) &&
|
||||
PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base));
|
||||
PL_release_stream(in);
|
||||
free_charbuf(&b);
|
||||
return rc;
|
||||
} else if ( c == '\\' )
|
||||
{ int esc;
|
||||
|
||||
c = Sgetcode(in);
|
||||
if ( c == '>' )
|
||||
{ add_charbuf(&b, c);
|
||||
} else if ( string_escape(in, c, &esc) )
|
||||
{ add_charbuf(&b, esc);
|
||||
} else
|
||||
{ free_charbuf(&b);
|
||||
PL_release_stream(in);
|
||||
return FALSE;
|
||||
}
|
||||
} else if ( c == -1 )
|
||||
{ free_charbuf(&b);
|
||||
PL_release_stream(in);
|
||||
return syntax_error("eof_in_uri");
|
||||
} else
|
||||
{ add_charbuf(&b, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************
|
||||
* WRITING *
|
||||
*******************************/
|
||||
|
||||
static int
|
||||
ttl_put_uesc(IOSTREAM *s, int c)
|
||||
{ if ( c <= 0xffff )
|
||||
return Sfprintf(s, "\\u%04x", (unsigned)c);
|
||||
else
|
||||
return Sfprintf(s, "\\U%08x", (unsigned)c);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
ttl_put_character(IOSTREAM *s, int c)
|
||||
{ if ( c >= 32 && c <= 126 )
|
||||
return Sputcode(c, s);
|
||||
if ( c <= 31 )
|
||||
return ttl_put_uesc(s, c);
|
||||
if ( c >= 127 && c < 0x10ffff )
|
||||
{ if ( s->encoding == ENC_ASCII )
|
||||
return ttl_put_uesc(s, c);
|
||||
if ( s->encoding == ENC_ISO_LATIN_1 && c > 255 )
|
||||
return ttl_put_uesc(s, c);
|
||||
return Sputcode(c, s);
|
||||
}
|
||||
|
||||
representation_error("turtle_character");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
ttl_put_echaracter(IOSTREAM *s, int c)
|
||||
{ int c2;
|
||||
|
||||
switch(c)
|
||||
{ case '\t': c2 = 't'; break;
|
||||
case '\n': c2 = 'n'; break;
|
||||
case '\r': c2 = 'r'; break;
|
||||
default:
|
||||
return ttl_put_character(s, c);
|
||||
}
|
||||
|
||||
Sputcode('\\', s);
|
||||
|
||||
return Sputcode(c2, s);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
ttl_put_scharacter(IOSTREAM *s, int c)
|
||||
{ switch(c)
|
||||
{ case '"':
|
||||
Sputcode('\\', s);
|
||||
return Sputcode('"', s);
|
||||
case '\\':
|
||||
Sputcode('\\', s);
|
||||
return Sputcode('\\', s);
|
||||
default:
|
||||
return ttl_put_echaracter(s, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static foreign_t
|
||||
turtle_write_quoted_string(term_t Stream, term_t Value)
|
||||
{ size_t len;
|
||||
char *s;
|
||||
pl_wchar_t *w;
|
||||
IOSTREAM *out;
|
||||
|
||||
if ( !PL_get_stream_handle(Stream, &out) )
|
||||
return FALSE;
|
||||
|
||||
if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) )
|
||||
{ const char *e = &s[len];
|
||||
|
||||
Sputcode('"', out);
|
||||
for(; s<e; s++)
|
||||
{ if ( ttl_put_scharacter(out, s[0]&0xff) < 0 )
|
||||
break;
|
||||
}
|
||||
Sputcode('"', out);
|
||||
return PL_release_stream(out);
|
||||
} else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
|
||||
{ const pl_wchar_t *e = &w[len];
|
||||
|
||||
Sputcode('"', out);
|
||||
for(; w<e; w++)
|
||||
{ if ( ttl_put_scharacter(out, w[0]) < 0 )
|
||||
break;
|
||||
}
|
||||
Sputcode('"', out);
|
||||
return PL_release_stream(out);
|
||||
} else
|
||||
{ PL_release_stream(out);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
ttl_put_ucharacter(IOSTREAM *s, int c)
|
||||
{ switch(c)
|
||||
{ case '>':
|
||||
Sputcode('\\', s);
|
||||
return Sputcode('>', s);
|
||||
default:
|
||||
return ttl_put_character(s, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** turtle_write_uri(+Stream, +URI) is det.
|
||||
*/
|
||||
|
||||
static foreign_t
|
||||
turtle_write_uri(term_t Stream, term_t Value)
|
||||
{ size_t len;
|
||||
char *s;
|
||||
pl_wchar_t *w;
|
||||
IOSTREAM *out;
|
||||
|
||||
if ( !PL_get_stream_handle(Stream, &out) )
|
||||
return FALSE;
|
||||
|
||||
if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) )
|
||||
{ const char *e = &s[len];
|
||||
|
||||
Sputcode('<', out);
|
||||
for(; s<e; s++)
|
||||
{ if ( ttl_put_ucharacter(out, s[0]&0xff) < 0 )
|
||||
break;
|
||||
}
|
||||
Sputcode('>', out);
|
||||
return PL_release_stream(out);
|
||||
} else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
|
||||
{ const pl_wchar_t *e = &w[len];
|
||||
|
||||
Sputcode('<', out);
|
||||
for(; w<e; w++)
|
||||
{ if ( ttl_put_ucharacter(out, w[0]) < 0 )
|
||||
break;
|
||||
}
|
||||
Sputcode('>', out);
|
||||
return PL_release_stream(out);
|
||||
} else
|
||||
{ PL_release_stream(out);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*******************************
|
||||
* REGISTRATION *
|
||||
*******************************/
|
||||
|
||||
#define MKFUNCTOR(n,a) \
|
||||
FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
|
||||
#define MKATOM(n) \
|
||||
ATOM_ ## n = PL_new_atom(#n)
|
||||
|
||||
install_t
|
||||
install_turtle()
|
||||
{ MKFUNCTOR(error, 2);
|
||||
MKFUNCTOR(type_error, 2);
|
||||
MKFUNCTOR(syntax_error, 1);
|
||||
MKFUNCTOR(representation_error, 1);
|
||||
ATOM_ = PL_new_atom("");
|
||||
|
||||
PL_register_foreign("turtle_name_start_char",
|
||||
1, turtle_name_start_char, 0);
|
||||
PL_register_foreign("turtle_name", 1, turtle_name, 0);
|
||||
PL_register_foreign("turtle_read_name", 4, turtle_read_name, 0);
|
||||
PL_register_foreign("turtle_read_string", 4, turtle_read_string, 0);
|
||||
PL_register_foreign("turtle_read_relative_uri",
|
||||
4, turtle_read_relative_uri, 0);
|
||||
PL_register_foreign("turtle_write_quoted_string",
|
||||
2, turtle_write_quoted_string, 0);
|
||||
PL_register_foreign("turtle_write_uri", 2, turtle_write_uri, 0);
|
||||
}
|
Reference in New Issue
Block a user