Experiment with porting SGML to YAP, and trying to preserve SWI code as much

as possible.
2009-03-13 19:39:06 +00:00
parent 754f00d311
commit d6a06fe092
260 changed files with 31894 additions and 0 deletions
--- a/packages/sgml/quote.c
+++ b/packages/sgml/quote.c
@@ -0,0 +1,401 @@
+/*  $Id$
+
+    Part of SWI-Prolog
+
+    Author:        Jan Wielemaker
+    E-mail:        wielemak@science.uva.nl
+    WWW:           http://www.swi-prolog.org
+    Copyright (C): 1985-2007, University of Amsterdam
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <SWI-Stream.h>			/* encoding */
+#include <SWI-Prolog.h>
+#include <stdlib.h>
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#include "error.h"
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <wctype.h>
+#include "dtd.h"
+
+static atom_t ATOM_iso_latin_1;
+static atom_t ATOM_utf8;
+static atom_t ATOM_unicode;
+static atom_t ATOM_ascii;
+
+#define CHARSET 256
+
+typedef struct charbuf
+{ char     buffer[1024];
+  char    *bufp;
+  char    *end;
+  size_t   size;
+} charbuf;
+
+
+static void
+init_buf(charbuf *b)
+{ b->bufp = b->end = b->buffer;
+  b->size = sizeof(b->buffer);
+}
+
+
+static void
+free_buf(charbuf *b)
+{ if ( b->bufp != b->buffer )
+    free(b->bufp);
+}
+
+
+static int
+room_buf(charbuf *b, size_t room)
+{ size_t used = b->end - b->bufp;
+
+  if ( room + used > b->size )
+  { if ( b->bufp == b->buffer )
+    { b->size = sizeof(b->buffer)*2;
+      if ( !(b->bufp = malloc(b->size)) )
+	return sgml2pl_error(ERR_ERRNO);
+
+      memcpy(b->bufp, b->buffer, used);
+    } else
+    { char *ptr;
+
+      b->size *= 2;
+      if ( !(ptr = realloc(b->bufp, b->size)) )
+	return sgml2pl_error(ERR_ERRNO);
+      b->bufp = ptr;
+    }
+    b->end = b->bufp + used;
+  }
+
+  return TRUE;    
+}
+
+
+static size_t
+used_buf(const charbuf *b)
+{ return b->end - b->bufp;
+}
+
+
+static int
+add_char_buf(charbuf *b, int chr)
+{ if ( room_buf(b, 1) )
+  { *b->end++ = chr;
+
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+
+static int
+add_char_bufW(charbuf *b, int chr)
+{ if ( room_buf(b, sizeof(wchar_t)) )
+  { wchar_t *p = (wchar_t*)b->end;
+
+    *p++ = chr;
+    b->end = (char *)p;
+
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+
+static int
+add_str_buf(charbuf *b, const char *s)
+{ size_t len = strlen(s);
+
+  if ( room_buf(b, len+1) )
+  { memcpy(b->end, s, len+1);
+    b->end += len;
+
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+
+static int
+add_str_bufW(charbuf *b, const char *s)
+{ size_t len = strlen(s);
+
+  if ( room_buf(b, len*sizeof(wchar_t)) )
+  { wchar_t *p = (wchar_t*)b->end;
+
+    while(*s)
+      *p++ = *s++;
+    b->end = (char *)p;
+
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+
+
+static foreign_t
+do_quote(term_t in, term_t quoted, char **map, int maxchr)
+{ char *inA = NULL;
+  wchar_t *inW = NULL;
+  size_t len;
+  const unsigned  char *s;
+  charbuf buffer;
+  int changes = 0;
+  int rc;
+
+  if ( !PL_get_nchars(in, &len, &inA, CVT_ATOMIC) &&
+       !PL_get_wchars(in, &len, &inW, CVT_ATOMIC) )
+    return sgml2pl_error(ERR_TYPE, "atom", in);
+  if ( len == 0 )
+    return PL_unify(in, quoted);
+
+  init_buf(&buffer);
+
+  if ( inA )
+  { for(s = (unsigned char*)inA ; len-- > 0; s++ )
+    { int c = *s;
+      
+      if ( map[c] )
+      { if ( !add_str_buf(&buffer, map[c]) )
+	  return FALSE;
+	
+	changes++;
+      } else if ( c > maxchr )
+      { char buf[10];
+
+	sprintf(buf, "&#%d;", c);
+	if ( !add_str_buf(&buffer, buf) )
+	  return FALSE;
+	
+	changes++;
+      } else
+      { add_char_buf(&buffer, c);
+      }
+    }
+
+    if ( changes > 0 )
+      rc = PL_unify_atom_nchars(quoted, used_buf(&buffer), buffer.bufp);
+    else
+      rc = PL_unify(in, quoted);
+  } else
+  { for( ; len-- > 0; inW++ )
+    { int c = *inW;
+      
+      if ( c <= 0xff && map[c] )
+      { if ( !add_str_bufW(&buffer, map[c]) )
+	  return FALSE;
+	
+	changes++;
+      } else if ( c > maxchr )
+      { char buf[10];
+
+	sprintf(buf, "&#%d;", c);
+	if ( !add_str_bufW(&buffer, buf) )
+	  return FALSE;
+	
+	changes++;
+      }else
+      { add_char_bufW(&buffer, c);
+      }
+    }
+	 
+    if ( changes > 0 )
+      rc = PL_unify_wchars(quoted, PL_ATOM,
+			   used_buf(&buffer)/sizeof(wchar_t),
+			   (wchar_t*)buffer.bufp);
+    else
+      rc = PL_unify(in, quoted);
+  }
+  
+  free_buf(&buffer);
+
+  return rc;
+}
+
+
+static int
+get_max_chr(term_t t, int *maxchr)
+{ atom_t a;
+
+  if ( PL_get_atom(t, &a) )
+  { if ( a == ATOM_iso_latin_1 )
+      *maxchr = 0xff;
+    else if ( a == ATOM_utf8 )
+      *maxchr = 0x7ffffff;
+    else if ( a == ATOM_unicode )
+      *maxchr = 0xffff;
+    else if ( a == ATOM_ascii )
+      *maxchr = 0x7f;
+    else
+      return sgml2pl_error(ERR_DOMAIN, "encoding", t);
+
+    return TRUE;
+  }
+
+  return sgml2pl_error(ERR_TYPE, "atom", t);
+}
+
+
+/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+(*) xml_quote_attribute/3 assumes the attribute is   quoted using "" and
+does *not* escape '. Although escaping ' with &apos; is valid XML, it is
+*not* valid html, and this  routine  is   also  used  by  the html_write
+library.
+- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
+
+static foreign_t
+xml_quote_attribute(term_t in, term_t out, term_t encoding)
+{ static char **map;
+  int maxchr;
+
+  if ( !map )
+  { int i;
+
+    if ( !(map = malloc(CHARSET*sizeof(char*))) )
+      return sgml2pl_error(ERR_ERRNO, errno);
+
+    for(i=0; i<CHARSET; i++)
+      map[i] = NULL;
+
+    map['<']  = "&lt;";
+    map['>']  = "&gt;";
+    map['&']  = "&amp;";
+/*  map['\''] = "&apos;"; See (*) */
+    map['"']  = "&quot;";
+  }
+
+  if ( !get_max_chr(encoding, &maxchr) )
+    return FALSE;
+
+  return do_quote(in, out, map, maxchr);
+}
+
+
+static foreign_t
+xml_quote_cdata(term_t in, term_t out, term_t encoding)
+{ static char **map;
+  int maxchr;
+
+  if ( !map )
+  { int i;
+
+    if ( !(map = malloc(CHARSET*sizeof(char*))) )
+      return sgml2pl_error(ERR_ERRNO, errno);
+
+    for(i=0; i<CHARSET; i++)
+      map[i] = NULL;
+
+    map['<']  = "&lt;";
+    map['>']  = "&gt;";
+    map['&']  = "&amp;";
+  }
+
+  if ( !get_max_chr(encoding, &maxchr) )
+    return FALSE;
+
+  return do_quote(in, out, map, maxchr);
+}
+
+
+static foreign_t
+xml_name(term_t in, term_t encoding)
+{ char *ins;
+  wchar_t *inW;
+  size_t len;
+  static dtd_charclass *map;
+  unsigned int i;
+  int maxchr;
+
+  if ( !get_max_chr(encoding, &maxchr) )
+    return FALSE;
+
+  if ( !map )
+    map = new_charclass();
+
+  if ( PL_get_nchars(in, &len, &ins, CVT_ATOMIC) )
+  { int c;
+
+    if ( len == 0 )
+      return FALSE;
+
+    c = ins[0] & 0xff;
+    if ( c > maxchr )
+      return FALSE;
+    
+    if ( !(map->class[c] & CH_NMSTART) )
+      return FALSE;
+    for(i=1; i<len; i++)
+    { c = ins[i] & 0xff;
+
+      if ( c > maxchr || !(map->class[c] & CH_NAME) )
+	return FALSE;
+    }
+
+    return TRUE;
+  }
+  if ( PL_get_wchars(in, &len, &inW, CVT_ATOMIC) )
+  { if ( len == 0 )
+      return FALSE;
+  
+    if ( inW[0] > maxchr )
+      return FALSE;
+
+    if ( inW[0] <= 0xff &&
+	 !(map->class[inW[0]] & CH_NMSTART) )
+      return FALSE;
+    if ( inW[0] > 0xff && !iswalpha(inW[0]) )
+      return FALSE;
+	 
+    for(i=1; i<len; i++)
+    { int c = inW[i];
+
+      if ( c <= 0xff && !(map->class[c] & CH_NAME) )
+	return FALSE;
+      if ( c > 0xff && !iswalnum((wint_t)c) )
+	return FALSE;
+    }
+
+    return TRUE;
+  }
+
+  return FALSE;
+}
+
+
+
+
+install_t
+install_xml_quote()
+{ ATOM_iso_latin_1 = PL_new_atom("iso_latin_1");
+  ATOM_utf8        = PL_new_atom("utf8");
+  ATOM_unicode     = PL_new_atom("unicode");
+  ATOM_ascii       = PL_new_atom("ascii");
+
+  PL_register_foreign("xml_quote_attribute", 3, xml_quote_attribute, 0);
+  PL_register_foreign("xml_quote_cdata",     3, xml_quote_cdata,     0);
+  PL_register_foreign("xml_name",            2, xml_name,            0);
+}