609 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			609 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*  $Id$
 | |
| 
 | |
|     Part of SWI-Prolog
 | |
| 
 | |
|     Author:        Jan Wielemaker
 | |
|     E-mail:        J.Wielemaker@cs.vu.nl
 | |
|     WWW:           http://www.swi-prolog.org
 | |
|     Copyright (C): 1985-2009, VU University Amsterdam
 | |
| 
 | |
|     This library is free software; you can redistribute it and/or
 | |
|     modify it under the terms of the GNU Lesser General Public
 | |
|     License as published by the Free Software Foundation; either
 | |
|     version 2.1 of the License, or (at your option) any later version.
 | |
| 
 | |
|     This library is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|     Lesser General Public License for more details.
 | |
| 
 | |
|     You should have received a copy of the GNU Lesser General Public
 | |
|     License along with this library; if not, write to the Free Software
 | |
|     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | |
| */
 | |
| 
 | |
| #include <SWI-Stream.h>
 | |
| #include <SWI-Prolog.h>
 | |
| #include <string.h>
 | |
| #ifdef __WINDOWS__
 | |
| #define inline __inline
 | |
| #endif
 | |
| 
 | |
| #include "turtle_chars.c"
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	       ERRORS		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static atom_t	 ATOM_;
 | |
| static functor_t FUNCTOR_error2;
 | |
| static functor_t FUNCTOR_type_error2;
 | |
| static functor_t FUNCTOR_syntax_error1;
 | |
| static functor_t FUNCTOR_representation_error1;
 | |
| 
 | |
| static int
 | |
| type_error(term_t actual, const char *expected)
 | |
| { term_t ex;
 | |
| 
 | |
|   if ( (ex = PL_new_term_ref()) &&
 | |
|        PL_unify_term(ex,
 | |
| 		     PL_FUNCTOR, FUNCTOR_error2,
 | |
| 		       PL_FUNCTOR, FUNCTOR_type_error2,
 | |
| 		         PL_CHARS, expected,
 | |
| 		         PL_TERM, actual,
 | |
| 		       PL_VARIABLE) )
 | |
|     return PL_raise_exception(ex);
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| syntax_error(const char *culprit)
 | |
| { term_t ex;
 | |
| 
 | |
|   if ( (ex = PL_new_term_ref()) &&
 | |
|        PL_unify_term(ex,
 | |
| 		     PL_FUNCTOR, FUNCTOR_error2,
 | |
| 		       PL_FUNCTOR, FUNCTOR_syntax_error1,
 | |
| 		         PL_CHARS, culprit,
 | |
| 		       PL_VARIABLE) )
 | |
|     return PL_raise_exception(ex);
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| representation_error(const char *culprit)
 | |
| { term_t ex;
 | |
| 
 | |
|   if ( (ex = PL_new_term_ref()) &&
 | |
|        PL_unify_term(ex,
 | |
| 		     PL_FUNCTOR, FUNCTOR_error2,
 | |
| 		       PL_FUNCTOR, FUNCTOR_representation_error1,
 | |
| 		         PL_CHARS, culprit,
 | |
| 		       PL_VARIABLE) )
 | |
|     return PL_raise_exception(ex);
 | |
| 
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	       PROLOG		*
 | |
| 		 *******************************/
 | |
| 
 | |
| /** turtle_name(+Atom) is semidet.
 | |
| 
 | |
| True if Atom is a valid Turtle identifier
 | |
| */
 | |
| 
 | |
| static inline int
 | |
| wcis_name_char(int c)
 | |
| { return wcis_name_start_char(c) ||
 | |
|          wcis_name_extender_char(c);
 | |
| }
 | |
| 
 | |
| /** turtle_name_start_char(+Int) is semidet.
 | |
| */
 | |
| 
 | |
| static foreign_t
 | |
| turtle_name_start_char(term_t Code)
 | |
| { int c;
 | |
| 
 | |
|   if ( !PL_get_integer(Code, &c) )
 | |
|     return type_error(Code, "code");
 | |
|   if ( !wcis_name_start_char(c) )
 | |
|     return FALSE;
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| /** turtle_name(+Atom) is semidet.
 | |
| */
 | |
| 
 | |
| static foreign_t
 | |
| turtle_name(term_t name)
 | |
| { char *s;
 | |
|   pl_wchar_t *w;
 | |
|   size_t len;
 | |
| 
 | |
|   if ( PL_get_nchars(name, &len, &s, CVT_ATOM) )
 | |
|   { const char *e = &s[len];
 | |
| 
 | |
|     if ( !wcis_name_start_char(s[0]&0xff) )
 | |
|       return FALSE;
 | |
|     for(s++; s<e; s++)
 | |
|     { if ( !wcis_name_char(s[0]&0xff) )
 | |
| 	return FALSE;
 | |
|     }
 | |
|     return TRUE;
 | |
|   } else if ( PL_get_wchars(name, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
 | |
|   { const pl_wchar_t *e = &w[len];
 | |
| 
 | |
|     if ( !wcis_name_start_char(w[0]) )
 | |
|       return FALSE;
 | |
|     for(w++; w<e; w++)
 | |
|     { if ( !wcis_name_char(w[0]) )
 | |
| 	return FALSE;
 | |
|     }
 | |
|     return TRUE;
 | |
|   } else
 | |
|     return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| typedef struct charbuf
 | |
| { pl_wchar_t *base;
 | |
|   pl_wchar_t *here;
 | |
|   pl_wchar_t *end;
 | |
|   pl_wchar_t tmp[256];
 | |
| } charbuf;
 | |
| 
 | |
| 
 | |
| static void
 | |
| init_charbuf(charbuf *cb)
 | |
| { cb->base = cb->here = cb->tmp;
 | |
|   cb->end = &cb->tmp[sizeof(cb->tmp)/sizeof(pl_wchar_t)];
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| add_charbuf(charbuf *cb, int c)
 | |
| { if ( cb->here < cb->end )
 | |
|   { *cb->here++ = c;
 | |
|   } else
 | |
|   { size_t len = (cb->end-cb->base);
 | |
| 
 | |
|     if ( cb->base == cb->tmp )
 | |
|     { pl_wchar_t *n = PL_malloc(len*2*sizeof(pl_wchar_t));
 | |
|       memcpy(n, cb->base, sizeof(cb->tmp));
 | |
|       cb->base = n;
 | |
|     } else
 | |
|     { cb->base = PL_realloc(cb->base, len*2*sizeof(pl_wchar_t));
 | |
|     }
 | |
|     cb->here = &cb->base[len];
 | |
|     cb->end = &cb->base[len*2];
 | |
|     *cb->here++ = c;
 | |
|   }
 | |
| 
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void
 | |
| free_charbuf(charbuf *cb)
 | |
| { if ( cb->base != cb->tmp )
 | |
|     PL_free(cb->base);
 | |
| }
 | |
| 
 | |
| 
 | |
| /** turtle_read_name(+C0, +Stream, -C, -Name) is semidet.
 | |
| */
 | |
| 
 | |
| static foreign_t
 | |
| turtle_read_name(term_t C0, term_t Stream, term_t C, term_t Name)
 | |
| { int c;
 | |
|   charbuf b;
 | |
|   IOSTREAM *in;
 | |
| 
 | |
|   if ( !PL_get_integer(C0, &c) )
 | |
|     return type_error(C0, "code");
 | |
|   if ( !wcis_name_start_char(c) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( !PL_get_stream_handle(Stream, &in) )
 | |
|     return FALSE;
 | |
| 
 | |
|   init_charbuf(&b);
 | |
|   add_charbuf(&b, c);
 | |
| 
 | |
|   for(;;)
 | |
|   { int c = Sgetcode(in);
 | |
| 
 | |
|     if ( wcis_name_char(c) )
 | |
|     { add_charbuf(&b, c);
 | |
|     } else
 | |
|     { int rc = ( PL_unify_integer(C, c) &&
 | |
| 		 PL_unify_wchars(Name, PL_ATOM, b.here-b.base, b.base) );
 | |
| 
 | |
|       free_charbuf(&b);
 | |
|       PL_release_stream(in);
 | |
| 
 | |
|       return rc;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| read_hN(IOSTREAM *in, int digits, int *value)
 | |
| { int d = digits;
 | |
|   int v = 0;
 | |
| 
 | |
|   while ( d-- > 0 )
 | |
|   { int c = Sgetcode(in);
 | |
| 
 | |
|     if ( c >= '0' && c <= '9' )
 | |
|       v = (v<<4) + c - '0';
 | |
|     else if ( c >= 'A' && c <= 'F' )
 | |
|       v = (v<<4) + c + 10 - 'A';
 | |
|     else if ( c >= 'a' && c <= 'f' )
 | |
|       v = (v<<4) + c + 10 - 'a';
 | |
|     else
 | |
|     { if ( digits == 4 )
 | |
| 	return syntax_error("Illegal \\uNNNN in string");
 | |
|       else
 | |
| 	return syntax_error("Illegal \\UNNNNNNNN in string");
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   *value = v;
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| string_escape(IOSTREAM *in, int c, int *value)
 | |
| { int esc;
 | |
| 
 | |
|   switch(c)
 | |
|   { case 'n': esc = '\n'; break;
 | |
|     case '"': esc = '"';  break;
 | |
|     case '\\':esc = '\\'; break;
 | |
|     case 't': esc = '\t'; break;
 | |
|     case 'r': esc = '\r'; break;
 | |
|     case 'u':
 | |
|       if ( !read_hN(in, 4, &esc) )
 | |
| 	return FALSE;
 | |
|       break;
 | |
|     case 'U':
 | |
|       if ( !read_hN(in, 8, &esc) )
 | |
| 	return FALSE;
 | |
|       break;
 | |
|     default:
 | |
|       return syntax_error("illegal escape in string");
 | |
|   }
 | |
| 
 | |
|   *value = esc;
 | |
|   return TRUE;
 | |
| }
 | |
| 
 | |
| 
 | |
| /** turtle_read_string(+C0, +Stream, -C, -Value:atom) is semidet.
 | |
| */
 | |
| 
 | |
| static foreign_t
 | |
| turtle_read_string(term_t C0, term_t Stream, term_t C, term_t Value)
 | |
| { int c;
 | |
|   charbuf b;
 | |
|   IOSTREAM *in;
 | |
|   int endlen = 1;
 | |
| 
 | |
|   if ( !PL_get_integer(C0, &c) )
 | |
|     return type_error(C0, "code");
 | |
|   if ( c != '"' )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( !PL_get_stream_handle(Stream, &in) )
 | |
|     return FALSE;
 | |
| 
 | |
|   init_charbuf(&b);
 | |
| 
 | |
|   c = Sgetcode(in);
 | |
|   if ( c == '"' )
 | |
|   { c = Sgetcode(in);
 | |
|     if ( c == '"' )			/* """...""" */
 | |
|     { endlen = 3;
 | |
|       c = Sgetcode(in);
 | |
|     } else
 | |
|     { PL_release_stream(in);
 | |
|       return (PL_unify_integer(C, c) &&
 | |
| 	      PL_unify_atom(Value, ATOM_));
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   for(;;c = Sgetcode(in))
 | |
|   { if ( c == -1 )
 | |
|     { free_charbuf(&b);
 | |
|       PL_release_stream(in);
 | |
|       return syntax_error("eof_in_string");
 | |
|     } else if ( c == '"' )
 | |
|     { int count = 1;
 | |
| 
 | |
|       for(count=1; count<endlen; )
 | |
|       { if ( (c=Sgetcode(in)) == '"' )
 | |
| 	  count++;
 | |
| 	else
 | |
| 	  break;
 | |
|       }
 | |
| 
 | |
|       if ( count == endlen )
 | |
|       { int rc;
 | |
| 
 | |
| 	c = Sgetcode(in);
 | |
| 	rc = (PL_unify_integer(C, c) &&
 | |
| 	      PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base));
 | |
| 	free_charbuf(&b);
 | |
| 	PL_release_stream(in);
 | |
| 	return rc;
 | |
|       }
 | |
| 
 | |
|       while(count-- > 0)
 | |
| 	add_charbuf(&b, '"');
 | |
|       add_charbuf(&b, c);
 | |
|     } else if ( c == '\\' )
 | |
|     { int esc;
 | |
| 
 | |
|       c = Sgetcode(in);
 | |
|       if ( !string_escape(in, c, &esc) )
 | |
|       { free_charbuf(&b);
 | |
| 	PL_release_stream(in);
 | |
| 	return FALSE;
 | |
|       }
 | |
|       add_charbuf(&b, esc);
 | |
|     } else
 | |
|     { add_charbuf(&b, c);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /** turtle_read_relative_uri(+C0, +Stream, -C, -Value:atom) is semidet.
 | |
| */
 | |
| 
 | |
| static foreign_t
 | |
| turtle_read_relative_uri(term_t C0, term_t Stream, term_t C, term_t Value)
 | |
| { int c;
 | |
|   charbuf b;
 | |
|   IOSTREAM *in;
 | |
| 
 | |
|   if ( !PL_get_integer(C0, &c) )
 | |
|     return type_error(C0, "code");
 | |
|   if ( c != '<' )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( !PL_get_stream_handle(Stream, &in) )
 | |
|     return FALSE;
 | |
| 
 | |
|   init_charbuf(&b);
 | |
|   c = Sgetcode(in);
 | |
|   for(; ; c = Sgetcode(in))
 | |
|   { if ( c == '>' )
 | |
|     { int rc;
 | |
| 
 | |
|       c = Sgetcode(in);
 | |
|       rc = (PL_unify_integer(C, c) &&
 | |
| 	    PL_unify_wchars(Value, PL_ATOM, b.here-b.base, b.base));
 | |
|       PL_release_stream(in);
 | |
|       free_charbuf(&b);
 | |
|       return rc;
 | |
|     } else if ( c == '\\' )
 | |
|     { int esc;
 | |
| 
 | |
|       c = Sgetcode(in);
 | |
|       if ( c == '>' )
 | |
|       { add_charbuf(&b, c);
 | |
|       } else if ( string_escape(in, c, &esc) )
 | |
|       { add_charbuf(&b, esc);
 | |
|       } else
 | |
|       { free_charbuf(&b);
 | |
| 	PL_release_stream(in);
 | |
| 	return FALSE;
 | |
|       }
 | |
|     } else if ( c == -1 )
 | |
|     { free_charbuf(&b);
 | |
|       PL_release_stream(in);
 | |
|       return syntax_error("eof_in_uri");
 | |
|     } else
 | |
|     { add_charbuf(&b, c);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	     WRITING		*
 | |
| 		 *******************************/
 | |
| 
 | |
| static int
 | |
| ttl_put_uesc(IOSTREAM *s, int c)
 | |
| { if ( c <= 0xffff )
 | |
|     return Sfprintf(s, "\\u%04x", (unsigned)c);
 | |
|   else
 | |
|     return Sfprintf(s, "\\U%08x", (unsigned)c);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| ttl_put_character(IOSTREAM *s, int c)
 | |
| { if ( c >= 32 && c <= 126 )
 | |
|     return Sputcode(c, s);
 | |
|   if ( c <= 31 )
 | |
|     return ttl_put_uesc(s, c);
 | |
|   if ( c >= 127 && c < 0x10ffff )
 | |
|   { if ( s->encoding == ENC_ASCII )
 | |
|       return ttl_put_uesc(s, c);
 | |
|     if ( s->encoding == ENC_ISO_LATIN_1 && c > 255 )
 | |
|       return ttl_put_uesc(s, c);
 | |
|     return Sputcode(c, s);
 | |
|   }
 | |
| 
 | |
|   representation_error("turtle_character");
 | |
|   return -1;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| ttl_put_echaracter(IOSTREAM *s, int c)
 | |
| { int c2;
 | |
| 
 | |
|   switch(c)
 | |
|   { case '\t': c2 = 't'; break;
 | |
|     case '\n': c2 = 'n'; break;
 | |
|     case '\r': c2 = 'r'; break;
 | |
|     default:
 | |
|       return ttl_put_character(s, c);
 | |
|   }
 | |
| 
 | |
|   Sputcode('\\', s);
 | |
| 
 | |
|   return Sputcode(c2, s);
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| ttl_put_scharacter(IOSTREAM *s, int c)
 | |
| { switch(c)
 | |
|   { case '"':
 | |
|       Sputcode('\\', s);
 | |
|       return Sputcode('"', s);
 | |
|     case '\\':
 | |
|       Sputcode('\\', s);
 | |
|       return Sputcode('\\', s);
 | |
|     default:
 | |
|       return ttl_put_echaracter(s, c);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static foreign_t
 | |
| turtle_write_quoted_string(term_t Stream, term_t Value)
 | |
| { size_t len;
 | |
|   char *s;
 | |
|   pl_wchar_t *w;
 | |
|   IOSTREAM *out;
 | |
| 
 | |
|   if ( !PL_get_stream_handle(Stream, &out) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) )
 | |
|   { const char *e = &s[len];
 | |
| 
 | |
|     Sputcode('"', out);
 | |
|     for(; s<e; s++)
 | |
|     { if ( ttl_put_scharacter(out, s[0]&0xff) < 0 )
 | |
| 	break;
 | |
|     }
 | |
|     Sputcode('"', out);
 | |
|     return PL_release_stream(out);
 | |
|   } else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
 | |
|   { const pl_wchar_t *e = &w[len];
 | |
| 
 | |
|     Sputcode('"', out);
 | |
|     for(; w<e; w++)
 | |
|     { if ( ttl_put_scharacter(out, w[0]) < 0 )
 | |
| 	break;
 | |
|     }
 | |
|     Sputcode('"', out);
 | |
|     return PL_release_stream(out);
 | |
|   } else
 | |
|   { PL_release_stream(out);
 | |
|     return FALSE;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| ttl_put_ucharacter(IOSTREAM *s, int c)
 | |
| { switch(c)
 | |
|   { case '>':
 | |
|       Sputcode('\\', s);
 | |
|       return Sputcode('>', s);
 | |
|     default:
 | |
|       return ttl_put_character(s, c);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| /** turtle_write_uri(+Stream, +URI) is det.
 | |
| */
 | |
| 
 | |
| static foreign_t
 | |
| turtle_write_uri(term_t Stream, term_t Value)
 | |
| { size_t len;
 | |
|   char *s;
 | |
|   pl_wchar_t *w;
 | |
|   IOSTREAM *out;
 | |
| 
 | |
|   if ( !PL_get_stream_handle(Stream, &out) )
 | |
|     return FALSE;
 | |
| 
 | |
|   if ( PL_get_nchars(Value, &len, &s, CVT_ATOM|CVT_STRING) )
 | |
|   { const char *e = &s[len];
 | |
| 
 | |
|     Sputcode('<', out);
 | |
|     for(; s<e; s++)
 | |
|     { if ( ttl_put_ucharacter(out, s[0]&0xff) < 0 )
 | |
| 	break;
 | |
|     }
 | |
|     Sputcode('>', out);
 | |
|     return PL_release_stream(out);
 | |
|   } else if ( PL_get_wchars(Value, &len, &w, CVT_ATOM|CVT_EXCEPTION) )
 | |
|   { const pl_wchar_t *e = &w[len];
 | |
| 
 | |
|     Sputcode('<', out);
 | |
|     for(; w<e; w++)
 | |
|     { if ( ttl_put_ucharacter(out, w[0]) < 0 )
 | |
| 	break;
 | |
|     }
 | |
|     Sputcode('>', out);
 | |
|     return PL_release_stream(out);
 | |
|   } else
 | |
|   { PL_release_stream(out);
 | |
|     return FALSE;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 		 /*******************************
 | |
| 		 *	    REGISTRATION	*
 | |
| 		 *******************************/
 | |
| 
 | |
| #define MKFUNCTOR(n,a) \
 | |
| 	FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
 | |
| #define MKATOM(n) \
 | |
| 	ATOM_ ## n = PL_new_atom(#n)
 | |
| 
 | |
| install_t
 | |
| install_turtle()
 | |
| { MKFUNCTOR(error, 2);
 | |
|   MKFUNCTOR(type_error, 2);
 | |
|   MKFUNCTOR(syntax_error, 1);
 | |
|   MKFUNCTOR(representation_error, 1);
 | |
|   ATOM_ = PL_new_atom("");
 | |
| 
 | |
|   PL_register_foreign("turtle_name_start_char",
 | |
| 		      			    1, turtle_name_start_char, 0);
 | |
|   PL_register_foreign("turtle_name",        1, turtle_name,        0);
 | |
|   PL_register_foreign("turtle_read_name",   4, turtle_read_name,   0);
 | |
|   PL_register_foreign("turtle_read_string", 4, turtle_read_string, 0);
 | |
|   PL_register_foreign("turtle_read_relative_uri",
 | |
| 					    4, turtle_read_relative_uri, 0);
 | |
|   PL_register_foreign("turtle_write_quoted_string",
 | |
| 					    2, turtle_write_quoted_string, 0);
 | |
|   PL_register_foreign("turtle_write_uri",   2, turtle_write_uri,   0);
 | |
| }
 |