1599 lines
		
	
	
		
			36 KiB
		
	
	
	
		
			C
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			1599 lines
		
	
	
		
			36 KiB
		
	
	
	
		
			C
		
	
	
		
			Executable File
		
	
	
	
	
/*  $Id$
 | 
						|
 | 
						|
    Part of SWI-Prolog
 | 
						|
 | 
						|
    Author:        Jan Wielemaker
 | 
						|
    E-mail:        J.Wielemaker@cs.vu.nl
 | 
						|
    WWW:           http://www.swi-prolog.org
 | 
						|
    Copyright (C): 2009, VU University Amsterdam
 | 
						|
 | 
						|
    This library is free software; you can redistribute it and/or
 | 
						|
    modify it under the terms of the GNU Lesser General Public
 | 
						|
    License as published by the Free Software Foundation; either
 | 
						|
    version 2.1 of the License, or (at your option) any later version.
 | 
						|
 | 
						|
    This library is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
    Lesser General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Lesser General Public
 | 
						|
    License along with this library; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
*/
 | 
						|
 | 
						|
#ifdef HAVE_CONFIG_H
 | 
						|
#include <config.h>
 | 
						|
#endif
 | 
						|
#ifdef __WINDOWS__
 | 
						|
#define inline __inline
 | 
						|
#endif
 | 
						|
 | 
						|
#include <SWI-Prolog.h>
 | 
						|
#include <string.h>
 | 
						|
#include <stdio.h>
 | 
						|
#include <wchar.h>
 | 
						|
#include <wctype.h>
 | 
						|
#include <assert.h>
 | 
						|
 | 
						|
static size_t removed_dot_segments(size_t len, const pl_wchar_t *in,
 | 
						|
				   pl_wchar_t *out);
 | 
						|
static pl_wchar_t *remove_last_segment(const pl_wchar_t *base,
 | 
						|
				       const pl_wchar_t *o);
 | 
						|
static char *_utf8_put_char(char *out, int chr);
 | 
						|
 | 
						|
#define ISUTF8_MB(c) ((unsigned)(c) >= 0xc0 && (unsigned)(c) <= 0xfd)
 | 
						|
#define utf8_put_char(out, chr) \
 | 
						|
	((chr) < 0x80 ? out[0]=(char)(chr), out+1 \
 | 
						|
		      : _utf8_put_char(out, (chr)))
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      ERRORS		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static atom_t ATOM_query_value;
 | 
						|
static atom_t ATOM_fragment;
 | 
						|
static atom_t ATOM_path;
 | 
						|
 | 
						|
static functor_t FUNCTOR_equal2;	/* =/2 */
 | 
						|
static functor_t FUNCTOR_pair2;		/* -/2 */
 | 
						|
static functor_t FUNCTOR_uri_components5;
 | 
						|
static functor_t FUNCTOR_uri_authority4;
 | 
						|
static functor_t FUNCTOR_error2;
 | 
						|
static functor_t FUNCTOR_syntax_error1;
 | 
						|
static functor_t FUNCTOR_type_error2;
 | 
						|
static functor_t FUNCTOR_domain_error2;
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
syntax_error(const char *culprit)
 | 
						|
{ term_t ex;
 | 
						|
 | 
						|
  if ( (ex=PL_new_term_ref()) &&
 | 
						|
       PL_unify_term(ex,
 | 
						|
		     PL_FUNCTOR, FUNCTOR_error2,
 | 
						|
		       PL_FUNCTOR, FUNCTOR_syntax_error1,
 | 
						|
		         PL_CHARS, culprit,
 | 
						|
		       PL_VARIABLE) )
 | 
						|
    return PL_raise_exception(ex);
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
type_error(const char *expected, term_t found)
 | 
						|
{ term_t ex;
 | 
						|
 | 
						|
  if ( (ex=PL_new_term_ref()) &&
 | 
						|
       PL_unify_term(ex,
 | 
						|
		     PL_FUNCTOR, FUNCTOR_error2,
 | 
						|
		       PL_FUNCTOR, FUNCTOR_type_error2,
 | 
						|
		         PL_CHARS, expected,
 | 
						|
			 PL_TERM, found,
 | 
						|
		       PL_VARIABLE) )
 | 
						|
    return PL_raise_exception(ex);
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
domain_error(const char *expected, term_t found)
 | 
						|
{ term_t ex;
 | 
						|
 | 
						|
  if ( (ex=PL_new_term_ref()) &&
 | 
						|
       PL_unify_term(ex,
 | 
						|
		     PL_FUNCTOR, FUNCTOR_error2,
 | 
						|
		       PL_FUNCTOR, FUNCTOR_domain_error2,
 | 
						|
		         PL_CHARS, expected,
 | 
						|
			 PL_TERM, found,
 | 
						|
		       PL_VARIABLE) )
 | 
						|
    return PL_raise_exception(ex);
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      ESCAPING		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
#define	ESC_PATH       (CH_PCHAR|CH_EX_PATH)
 | 
						|
#define	ESC_QUERY      (CH_PCHAR|CH_EX_QF)
 | 
						|
#define	ESC_QVALUE     (CH_UNRESERVED|CH_QSUBDELIM|CH_EX_PCHAR|CH_EX_QF)
 | 
						|
#define	ESC_QNAME      (CH_PCHAR)
 | 
						|
#define	ESC_FRAGMENT   (CH_PCHAR|CH_EX_QF)
 | 
						|
#define	ESC_AUTH       (CH_PCHAR)
 | 
						|
#define	ESC_PASSWD     (CH_PCHAR)
 | 
						|
#define ESC_USER       (CH_PCHAR)
 | 
						|
#define	ESC_SCHEME     (CH_SCHEME)
 | 
						|
#define ESC_PORT       (CH_DIGIT)
 | 
						|
#define ESC_HOST       (CH_UNRESERVED|CH_SUBDELIM)
 | 
						|
 | 
						|
#define CH_ALPHA      0x0001
 | 
						|
#define CH_DIGIT      0x0002
 | 
						|
#define CH_EX_UNRES   0x0004
 | 
						|
#define CH_GENDELIM   0x0008
 | 
						|
#define CH_SUBDELIM   0x0010
 | 
						|
#define CH_URL	      0x0020
 | 
						|
#define CH_EX_PCHAR   0x0040
 | 
						|
#define CH_EX_QF      0x0080		/* Extra query and fragment chars */
 | 
						|
#define CH_EX_SCHEME  0x0100
 | 
						|
#define CH_QSUBDELIM  0x0200
 | 
						|
#define CH_EX_PATH    0x0400
 | 
						|
 | 
						|
#define CH_SCHEME	(CH_ALPHA|CH_DIGIT|CH_EX_SCHEME)
 | 
						|
#define CH_UNRESERVED	(CH_ALPHA|CH_DIGIT|CH_EX_UNRES)
 | 
						|
#define CH_PCHAR	(CH_UNRESERVED|CH_SUBDELIM|CH_EX_PCHAR)
 | 
						|
 | 
						|
static int  charflags[128] = {0};
 | 
						|
static int  flags_done = 0;
 | 
						|
 | 
						|
static void
 | 
						|
set_flags(const char *from, int flag)
 | 
						|
{ for(; *from; from++)
 | 
						|
    charflags[from[0]&0xff] |= flag;
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
fill_flags()
 | 
						|
{ if ( !flags_done )
 | 
						|
  { int c;
 | 
						|
 | 
						|
    for(c='a'; c<='z'; c++)
 | 
						|
      charflags[c] |= CH_ALPHA;
 | 
						|
    for(c='A'; c<='Z'; c++)
 | 
						|
      charflags[c] |= CH_ALPHA;
 | 
						|
    for(c='0'; c<='9'; c++)
 | 
						|
      charflags[c] |= CH_DIGIT;
 | 
						|
 | 
						|
    set_flags("-._~",        CH_EX_UNRES);
 | 
						|
    set_flags(":/?#[]@",     CH_GENDELIM);
 | 
						|
    set_flags("!$&'()+*,;=", CH_SUBDELIM);
 | 
						|
    set_flags("!$'()*,;",    CH_QSUBDELIM); /* = CH_SUBDELIM - "&=+" */
 | 
						|
    set_flags(":@",          CH_EX_PCHAR);
 | 
						|
    set_flags("/",           CH_EX_PATH);
 | 
						|
    set_flags("/?",          CH_EX_QF);
 | 
						|
    set_flags("+-.",	     CH_EX_SCHEME);
 | 
						|
 | 
						|
    set_flags("/:?#&=", CH_URL);
 | 
						|
 | 
						|
    flags_done = TRUE;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
#define no_escape(c, f) ((c < 128) && (charflags[(int)c] & (f)))
 | 
						|
#define iri_no_escape(c, f) ((c > 128) || (charflags[(int)c] & (f)))
 | 
						|
 | 
						|
 | 
						|
/* hex(const pl_wchar_t *in, int digits, int *value)
 | 
						|
 | 
						|
   Get <digits> characters from in and interpret them as a hexadecimal
 | 
						|
   integer.  Returns pointer to the end on success or NULL if error.
 | 
						|
*/
 | 
						|
 | 
						|
static const pl_wchar_t *
 | 
						|
hex(const pl_wchar_t *in, int digits, int *value)
 | 
						|
{ int v = 0;
 | 
						|
 | 
						|
  while(digits-- > 0)
 | 
						|
  { int c = *in++;
 | 
						|
 | 
						|
    if ( c >= '0' && c <= '9' )
 | 
						|
      v = (v<<4) + c - '0';
 | 
						|
    else if ( c >= 'A' && c <= 'F' )
 | 
						|
      v = (v<<4) + c + 10 - 'A';
 | 
						|
    else if ( c >= 'a' && c <= 'f' )
 | 
						|
      v = (v<<4) + c + 10 - 'a';
 | 
						|
    else
 | 
						|
      return NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  *value = v;
 | 
						|
  return in;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const pl_wchar_t *
 | 
						|
get_encoded_utf8_cont_1(const pl_wchar_t *in, int *val)
 | 
						|
{ int c;
 | 
						|
 | 
						|
  if ( in[0] == '%' && hex(in+1, 2, &c) )
 | 
						|
  { if ( (c&0xc0) == 0x80 )
 | 
						|
    { *val = (c&0x3f);
 | 
						|
      return in+3;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const pl_wchar_t *
 | 
						|
get_encoded_utf8_cont(const pl_wchar_t *in, int cnt, int *val)
 | 
						|
{ int shift = cnt*6;
 | 
						|
 | 
						|
  *val <<= shift;
 | 
						|
  shift -= 6;
 | 
						|
 | 
						|
  while(cnt-->0)
 | 
						|
  { int v0;
 | 
						|
 | 
						|
    if ( (in = get_encoded_utf8_cont_1(in, &v0)) )
 | 
						|
    { *val |= (v0<<shift);
 | 
						|
      shift -= 6;
 | 
						|
    } else
 | 
						|
      return NULL;
 | 
						|
  }
 | 
						|
 | 
						|
  return in;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static const pl_wchar_t *
 | 
						|
get_encoded_utf8(const pl_wchar_t *in, int *chr)
 | 
						|
{ int c1;
 | 
						|
 | 
						|
  if ( in[0] == '%' && hex(in+1, 2, &c1) )
 | 
						|
  { in += 3;
 | 
						|
 | 
						|
    if ( ISUTF8_MB(c1) )
 | 
						|
    { if ( (c1&0xe0) == 0xc0 )		/* 2-byte */
 | 
						|
      { *chr = (c1&0x1f);
 | 
						|
	return get_encoded_utf8_cont(in, 1, chr);
 | 
						|
      } else if ( (c1&0xf0) == 0xe0 )	/* 3-byte */
 | 
						|
      { *chr = (c1&0xf);
 | 
						|
	return get_encoded_utf8_cont(in, 2, chr);
 | 
						|
      } else if ( (c1&0xf8) == 0xf0 )	/* 4-byte */
 | 
						|
      { *chr = (c1&0x7);
 | 
						|
	return get_encoded_utf8_cont(in, 3, chr);
 | 
						|
      } else if ( (c1&0xfc) == 0xf8 )	/* 5-byte */
 | 
						|
      { *chr = (c1&0x3);
 | 
						|
	return get_encoded_utf8_cont(in, 4, chr);
 | 
						|
      } else if ( (c1&0xfe) == 0xfc )	/* 6-byte */
 | 
						|
      { *chr = (c1&0x1);
 | 
						|
	return get_encoded_utf8_cont(in, 5, chr);
 | 
						|
      } else
 | 
						|
	return NULL;
 | 
						|
    } else
 | 
						|
    { *chr = c1;
 | 
						|
      return in;			/* Encoded ASCII character */
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      RANGES		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
typedef struct range
 | 
						|
{ const pl_wchar_t *start;
 | 
						|
  const pl_wchar_t *end;
 | 
						|
} range;
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	 CHARACTER BUFFER	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
typedef struct charbuf
 | 
						|
{ pl_wchar_t *base;
 | 
						|
  pl_wchar_t *here;
 | 
						|
  pl_wchar_t *end;
 | 
						|
  pl_wchar_t tmp[256];
 | 
						|
} charbuf;
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
init_charbuf(charbuf *cb)
 | 
						|
{ cb->base = cb->here = cb->tmp;
 | 
						|
  cb->end = &cb->tmp[sizeof(cb->tmp)/sizeof(pl_wchar_t)];
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
init_charbuf_at_size(charbuf *cb, size_t size)
 | 
						|
{ size++;
 | 
						|
 | 
						|
  if ( size < sizeof(cb->tmp)/sizeof(pl_wchar_t) )
 | 
						|
    cb->base = cb->here = cb->tmp;
 | 
						|
  else
 | 
						|
    cb->base = cb->here = PL_malloc(size*sizeof(pl_wchar_t));
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
add_charbuf(charbuf *cb, int c)
 | 
						|
{ if ( cb->here < cb->end )
 | 
						|
  { *cb->here++ = c;
 | 
						|
  } else
 | 
						|
  { size_t len = (cb->end-cb->base);
 | 
						|
 | 
						|
    if ( cb->base == cb->tmp )
 | 
						|
    { pl_wchar_t *n = PL_malloc(len*2*sizeof(pl_wchar_t));
 | 
						|
      memcpy(n, cb->base, sizeof(cb->tmp));
 | 
						|
      cb->base = n;
 | 
						|
    } else
 | 
						|
    { cb->base = PL_realloc(cb->base, len*2*sizeof(pl_wchar_t));
 | 
						|
    }
 | 
						|
    cb->here = &cb->base[len];
 | 
						|
    cb->end = &cb->base[len*2];
 | 
						|
    *cb->here++ = c;
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static inline int
 | 
						|
hexdigit(int val)
 | 
						|
{ if ( val < 10 )
 | 
						|
    return '0'+val;
 | 
						|
  return 'A'-10+val;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
add_encoded_charbuf(charbuf *cb, int c, int flags)
 | 
						|
{ if ( no_escape(c, flags) )
 | 
						|
  { add_charbuf(cb, c);
 | 
						|
  } else
 | 
						|
  { char tmp[6];
 | 
						|
    const char *end = utf8_put_char(tmp, c);
 | 
						|
    const char *s;
 | 
						|
 | 
						|
    for(s=tmp; s<end; s++)
 | 
						|
    { int b = s[0]&0xff;
 | 
						|
 | 
						|
      add_charbuf(cb, '%');
 | 
						|
      add_charbuf(cb, hexdigit(b>>4));
 | 
						|
      add_charbuf(cb, hexdigit(b&0xf));
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
iri_add_encoded_charbuf(charbuf *cb, int c, int flags)
 | 
						|
{ if ( iri_no_escape(c, flags) )
 | 
						|
  { add_charbuf(cb, c);
 | 
						|
  } else
 | 
						|
  { assert(c < 128);
 | 
						|
    add_charbuf(cb, '%');
 | 
						|
    add_charbuf(cb, hexdigit(c>>4));
 | 
						|
    add_charbuf(cb, hexdigit(c&0xf));
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
add_nchars_charbuf(charbuf *cb, size_t len, const pl_wchar_t *s)
 | 
						|
{ if ( cb->here+len <= cb->end )
 | 
						|
  { wcsncpy(cb->here, s, len);
 | 
						|
    cb->here += len;
 | 
						|
  } else
 | 
						|
  { size_t n;
 | 
						|
 | 
						|
    for(n=0; n<len; n++)
 | 
						|
      add_charbuf(cb, s[n]);
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
range_has_escape(const range *r, int flags)
 | 
						|
{ const pl_wchar_t *s = r->start;
 | 
						|
 | 
						|
  for(; s<r->end; s++)
 | 
						|
  { if ( s[0] == '%' || (s[0] == '+' && flags == ESC_QVALUE) )
 | 
						|
      return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
range_is_unreserved(const range *r, int iri, int flags)
 | 
						|
{ const pl_wchar_t *s = r->start;
 | 
						|
 | 
						|
  if ( iri )
 | 
						|
  { for(; s<r->end; s++)
 | 
						|
    { if ( !iri_no_escape(s[0], flags) )
 | 
						|
	return FALSE;
 | 
						|
    }
 | 
						|
  } else
 | 
						|
  { for(; s<r->end; s++)
 | 
						|
    { if ( !no_escape(s[0], flags) )
 | 
						|
	return FALSE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
add_verb_range_charbuf(charbuf *cb, const range *r)
 | 
						|
{ return add_nchars_charbuf(cb, r->end-r->start, r->start);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
add_decoded_range_charbuf(charbuf *cb, const range *r, int flags)
 | 
						|
{ const pl_wchar_t *s = r->start;
 | 
						|
 | 
						|
  while(s<r->end)
 | 
						|
  { int c;
 | 
						|
 | 
						|
    if ( *s == '%' )
 | 
						|
    { const pl_wchar_t *e;
 | 
						|
 | 
						|
      if ( (e=get_encoded_utf8(s, &c)) )
 | 
						|
      { s = e;
 | 
						|
      } else if (hex(s+1, 2, &c) )
 | 
						|
      { s += 3;
 | 
						|
      } else
 | 
						|
      { c = *s++;
 | 
						|
      }
 | 
						|
    } else if ( *s == '+' && flags == ESC_QVALUE )
 | 
						|
    { s++;
 | 
						|
      c = ' ';
 | 
						|
    } else
 | 
						|
    { c = *s++;
 | 
						|
    }
 | 
						|
 | 
						|
    add_charbuf(cb, c);
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
add_normalized_range_charbuf(charbuf *cb, const range *r, int iri, int flags)
 | 
						|
{ const pl_wchar_t *s = r->start;
 | 
						|
  while(s<r->end)
 | 
						|
  { int c;
 | 
						|
 | 
						|
    if ( *s == '%' )
 | 
						|
    { const pl_wchar_t *e;
 | 
						|
 | 
						|
      if ( (e=get_encoded_utf8(s, &c)) )
 | 
						|
      { s = e;
 | 
						|
      } else if (hex(s+1, 2, &c) )
 | 
						|
      { s += 3;
 | 
						|
      } else
 | 
						|
      { c = *s++;
 | 
						|
      }
 | 
						|
    } else if ( *s == '+' && flags == ESC_QVALUE )
 | 
						|
    { s++;
 | 
						|
      c = ' ';
 | 
						|
    } else
 | 
						|
    { c = *s++;
 | 
						|
    }
 | 
						|
 | 
						|
    if ( iri )
 | 
						|
    { iri_add_encoded_charbuf(cb, c, flags);
 | 
						|
    } else
 | 
						|
    { add_encoded_charbuf(cb, c, flags);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* add_range_charbuf(charbuf *cb, const range *r, int iri, int flags)
 | 
						|
 | 
						|
   Add a range of characters while normalizing %-encoding.  This
 | 
						|
   implies not to use encoding if it is not needed and upcase
 | 
						|
   %xx to %XX otherwise.
 | 
						|
 | 
						|
   If iri == TRUE, values >= 128 are not escaped.  Otherwise they
 | 
						|
   use %-encoded UTF-8
 | 
						|
*/
 | 
						|
 | 
						|
static int
 | 
						|
add_range_charbuf(charbuf *cb, const range *r, int iri, int flags)
 | 
						|
{ if ( range_has_escape(r, flags) )
 | 
						|
  { return add_normalized_range_charbuf(cb, r, iri, flags);
 | 
						|
  } else if ( range_is_unreserved(r, iri, flags) )
 | 
						|
  { add_nchars_charbuf(cb, r->end-r->start, r->start);
 | 
						|
  } else
 | 
						|
  { const pl_wchar_t *s = r->start;
 | 
						|
 | 
						|
    if ( iri )
 | 
						|
    { while(s<r->end)
 | 
						|
	iri_add_encoded_charbuf(cb, *s++, flags);
 | 
						|
    } else
 | 
						|
    { while(s<r->end)
 | 
						|
	add_encoded_charbuf(cb, *s++, flags);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* add_lwr_range_charbuf(charbuf *cb, const range *r, int iri, int flags)
 | 
						|
 | 
						|
   Add a range of characters while normalizing %-encoding and
 | 
						|
   mapping all characters to lowercase.
 | 
						|
 | 
						|
   FIXME: encoding and decoding compatible to add_range_charbuf();
 | 
						|
*/
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
add_lwr_range_charbuf(charbuf *cb, const range *r, int iri, int flags)
 | 
						|
{ const pl_wchar_t *s = r->start;
 | 
						|
 | 
						|
  while(s<r->end)
 | 
						|
  { int c;
 | 
						|
 | 
						|
    if ( *s == '%' )
 | 
						|
    { const pl_wchar_t *e;
 | 
						|
 | 
						|
      if ( (e=get_encoded_utf8(s, &c)) )
 | 
						|
      { s = e;
 | 
						|
      } else if (hex(s+1, 2, &c) )
 | 
						|
      { s += 3;
 | 
						|
      } else
 | 
						|
      { c = *s++;
 | 
						|
      }
 | 
						|
    } else
 | 
						|
    { c = *s++;
 | 
						|
    }
 | 
						|
 | 
						|
    if ( iri )
 | 
						|
      iri_add_encoded_charbuf(cb, towlower((wint_t)c), flags);
 | 
						|
    else
 | 
						|
      add_encoded_charbuf(cb, towlower((wint_t)c), flags);
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
free_charbuf(charbuf *cb)
 | 
						|
{ if ( cb->base != cb->tmp )
 | 
						|
    PL_free(cb->base);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#define TXT_EX_TEXT (CVT_ATOM|CVT_STRING|CVT_EXCEPTION)
 | 
						|
 | 
						|
static int
 | 
						|
get_text_arg(term_t term, int pos, size_t *len, pl_wchar_t **s, int flags)
 | 
						|
{ term_t tmp = PL_new_term_ref();
 | 
						|
 | 
						|
  _PL_get_arg(pos, term, tmp);
 | 
						|
  if ( PL_is_variable(tmp) )
 | 
						|
    return FALSE;
 | 
						|
  if ( !PL_get_wchars(tmp, len, s, flags) )
 | 
						|
    return -1;
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_components(+URI, -Components)
 | 
						|
 | 
						|
Based on RFC-3986 regular expression:
 | 
						|
 | 
						|
    ==
 | 
						|
    ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 | 
						|
     12            3  4          5       6  7        8 9
 | 
						|
    ==
 | 
						|
*/
 | 
						|
 | 
						|
typedef struct uri_component_ranges
 | 
						|
{ range scheme;
 | 
						|
  range authority;
 | 
						|
  range path;
 | 
						|
  range query;
 | 
						|
  range fragment;
 | 
						|
} uri_component_ranges;
 | 
						|
 | 
						|
 | 
						|
static const pl_wchar_t *
 | 
						|
skip_not(const pl_wchar_t *in, const pl_wchar_t *end, const pl_wchar_t *chars)
 | 
						|
{ if ( !chars[1] )
 | 
						|
  { for(; in < end; in++)
 | 
						|
    { if ( chars[0] == in[0] )
 | 
						|
	return in;
 | 
						|
    }
 | 
						|
  } else
 | 
						|
  { for(; in < end; in++)
 | 
						|
    { if ( wcschr(chars, in[0]) )
 | 
						|
	return in;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return in;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
unify_range(term_t t, const range *r)
 | 
						|
{ if ( r->start )
 | 
						|
    return PL_unify_wchars(t, PL_ATOM, r->end - r->start, r->start);
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
parse_uri(uri_component_ranges *ranges, size_t len, const pl_wchar_t *s)
 | 
						|
{ const pl_wchar_t *end = &s[len];
 | 
						|
  const pl_wchar_t *here = s;
 | 
						|
  const pl_wchar_t *e;
 | 
						|
 | 
						|
  memset(ranges, 0, sizeof(*ranges));
 | 
						|
 | 
						|
  e = skip_not(here, end, L":/?#");
 | 
						|
  if ( e > s && e[0] == ':' )			/* 1&2 */
 | 
						|
  { ranges->scheme.start = s;
 | 
						|
    ranges->scheme.end = e;
 | 
						|
    here = e+1;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( here[0] == '/' && here[1] == '/' )	/* 3 */
 | 
						|
  { here += 2;				/* 4 */
 | 
						|
    e = skip_not(here, end, L"/?#");
 | 
						|
    ranges->authority.start = here;
 | 
						|
    ranges->authority.end   = e;
 | 
						|
    here = e;					/* 5 */
 | 
						|
  }
 | 
						|
 | 
						|
  e = skip_not(here, end, L"?#");
 | 
						|
  ranges->path.start = here;
 | 
						|
  ranges->path.end   = e;
 | 
						|
  here = e;					/* 6 */
 | 
						|
 | 
						|
  if ( here[0] == '?' )
 | 
						|
  { here++;					/* 7 */
 | 
						|
    e = skip_not(here, end, L"#");
 | 
						|
    ranges->query.start = here;
 | 
						|
    ranges->query.end = e;
 | 
						|
    here = e;					/* 8 */
 | 
						|
  }
 | 
						|
 | 
						|
  if ( here[0] == '#' )
 | 
						|
  { here++;					/* 9 */
 | 
						|
    ranges->fragment.start = here;
 | 
						|
    ranges->fragment.end   = end;
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_components(term_t URI, term_t components)
 | 
						|
{ pl_wchar_t *s;
 | 
						|
  size_t len;
 | 
						|
 | 
						|
  if ( PL_get_wchars(URI, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) )
 | 
						|
  { uri_component_ranges ranges;
 | 
						|
    term_t rt = PL_new_term_refs(6);
 | 
						|
    term_t av = rt+1;
 | 
						|
 | 
						|
    parse_uri(&ranges, len, s);
 | 
						|
 | 
						|
    unify_range(av+0, &ranges.scheme);
 | 
						|
    unify_range(av+1, &ranges.authority);
 | 
						|
    unify_range(av+2, &ranges.path);
 | 
						|
    unify_range(av+3, &ranges.query);
 | 
						|
    unify_range(av+4, &ranges.fragment);
 | 
						|
 | 
						|
    return (PL_cons_functor_v(rt, FUNCTOR_uri_components5, av) &&
 | 
						|
	    PL_unify(components, rt));
 | 
						|
  } else if ( PL_is_functor(components, FUNCTOR_uri_components5) )
 | 
						|
  { charbuf b;
 | 
						|
    int rc;
 | 
						|
 | 
						|
    init_charbuf(&b);
 | 
						|
					/* schema */
 | 
						|
    if ( (rc=get_text_arg(components, 1, &len, &s, TXT_EX_TEXT)) == TRUE )
 | 
						|
    { add_nchars_charbuf(&b, len, s);
 | 
						|
      add_charbuf(&b, ':');
 | 
						|
    } else if ( rc == -1 )
 | 
						|
    { free_charbuf(&b);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
					/* authority */
 | 
						|
    if ( (rc=get_text_arg(components, 2, &len, &s, TXT_EX_TEXT)) == TRUE )
 | 
						|
    { add_charbuf(&b, '/');
 | 
						|
      add_charbuf(&b, '/');
 | 
						|
      add_nchars_charbuf(&b, len, s);
 | 
						|
    } else if ( rc == -1 )
 | 
						|
    { free_charbuf(&b);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
					/* path */
 | 
						|
    if ( (rc=get_text_arg(components, 3, &len, &s, TXT_EX_TEXT)) == TRUE )
 | 
						|
    { add_nchars_charbuf(&b, len, s);
 | 
						|
    } else if ( rc == -1 )
 | 
						|
    { free_charbuf(&b);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
					/* query */
 | 
						|
    if ( (rc=get_text_arg(components, 4, &len, &s, TXT_EX_TEXT)) == TRUE )
 | 
						|
    { if ( len > 0 )
 | 
						|
      { add_charbuf(&b, '?');
 | 
						|
	add_nchars_charbuf(&b, len, s);
 | 
						|
      }
 | 
						|
    } else if ( rc == -1 )
 | 
						|
    { free_charbuf(&b);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
					/* fragment */
 | 
						|
    if ( (rc=get_text_arg(components, 5, &len, &s, TXT_EX_TEXT)) == TRUE )
 | 
						|
    { add_charbuf(&b, '#');
 | 
						|
      add_nchars_charbuf(&b, len, s);
 | 
						|
    } else if ( rc == -1 )
 | 
						|
    { free_charbuf(&b);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    rc = PL_unify_wchars(URI, PL_ATOM, b.here-b.base, b.base);
 | 
						|
    free_charbuf(&b);
 | 
						|
 | 
						|
    return rc;
 | 
						|
  } else				/* generate an error */
 | 
						|
  { return PL_get_wchars(URI, &len, &s,
 | 
						|
			 CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_is_global(+URI) is semidet.
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_is_global(term_t URI)
 | 
						|
{ pl_wchar_t *s;
 | 
						|
  size_t len;
 | 
						|
 | 
						|
  if ( PL_get_wchars(URI, &len, &s,
 | 
						|
		     CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION) )
 | 
						|
  { const pl_wchar_t *e;
 | 
						|
    const pl_wchar_t *end = &s[len];
 | 
						|
    range r;
 | 
						|
 | 
						|
    e = skip_not(s, end, L":/?#");
 | 
						|
    if ( e > s && e[0] == ':' )
 | 
						|
    { r.start = s;
 | 
						|
      r.end = e;
 | 
						|
      if ( range_is_unreserved(&r, TRUE, CH_SCHEME) )
 | 
						|
	return TRUE;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	   QUERY-STRING		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static int
 | 
						|
unify_decoded_atom(term_t t, range *r, int flags)
 | 
						|
{ if ( range_has_escape(r, flags) )
 | 
						|
  { charbuf b;
 | 
						|
    int rc;
 | 
						|
 | 
						|
    init_charbuf(&b);
 | 
						|
    add_decoded_range_charbuf(&b, r, flags);
 | 
						|
    rc = PL_unify_wchars(t, PL_ATOM, b.here - b.base, b.base);
 | 
						|
    free_charbuf(&b);
 | 
						|
    return rc;
 | 
						|
  } else
 | 
						|
  { return unify_range(t, r);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
unify_query_string_components(term_t list, size_t len, const pl_wchar_t *qs)
 | 
						|
{ if ( len == 0 )
 | 
						|
  { return PL_unify_nil(list);
 | 
						|
  } else
 | 
						|
  { term_t tail = PL_copy_term_ref(list);
 | 
						|
    term_t head = PL_new_term_ref();
 | 
						|
    term_t eq   = PL_new_term_refs(3);
 | 
						|
    term_t nv   = eq+1;
 | 
						|
    const pl_wchar_t *end = &qs[len];
 | 
						|
 | 
						|
    while(qs < end)
 | 
						|
    { range name, value;
 | 
						|
 | 
						|
      name.start = qs;
 | 
						|
      name.end   = skip_not(qs, end, L"=");
 | 
						|
      if ( name.end < end )
 | 
						|
      { value.start = name.end+1;
 | 
						|
	value.end   = skip_not(value.start, end, L"&");
 | 
						|
 | 
						|
	qs = value.end+1;
 | 
						|
      } else
 | 
						|
      { return syntax_error("illegal_uri_query");
 | 
						|
      }
 | 
						|
 | 
						|
      PL_put_variable(nv+0);
 | 
						|
      PL_put_variable(nv+1);
 | 
						|
      unify_decoded_atom(nv+0, &name, ESC_QNAME);
 | 
						|
      unify_decoded_atom(nv+1, &value, ESC_QVALUE);
 | 
						|
 | 
						|
      if ( !PL_cons_functor_v(eq, FUNCTOR_equal2, nv) ||
 | 
						|
	   !PL_unify_list(tail, head, tail) ||
 | 
						|
	   !PL_unify(head, eq) )
 | 
						|
	return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    return PL_unify_nil(tail);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
add_encoded_term_charbuf(charbuf *cb, term_t value, int flags)
 | 
						|
{ pl_wchar_t *s;
 | 
						|
  range r;
 | 
						|
  size_t len;
 | 
						|
 | 
						|
  if ( !PL_get_wchars(value, &len, &s, CVT_ATOMIC|CVT_EXCEPTION) )
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
  r.start = s;
 | 
						|
  r.end = r.start+len;
 | 
						|
  if ( range_is_unreserved(&r, TRUE, flags) )
 | 
						|
  { add_nchars_charbuf(cb, r.end-r.start, r.start);
 | 
						|
  } else
 | 
						|
  { const pl_wchar_t *s = r.start;
 | 
						|
 | 
						|
    while(s<r.end)
 | 
						|
      add_encoded_charbuf(cb, *s++, flags);
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_query_components(+QueryString, -ValueList) is det.
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_query_components(term_t string, term_t list)
 | 
						|
{ pl_wchar_t *s;
 | 
						|
  size_t len;
 | 
						|
 | 
						|
  if ( PL_get_wchars(string, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) )
 | 
						|
  { return  unify_query_string_components(list, len, s);
 | 
						|
  } else if ( PL_is_list(list) )
 | 
						|
  { term_t tail = PL_copy_term_ref(list);
 | 
						|
    term_t head = PL_new_term_ref();
 | 
						|
    term_t nv   = PL_new_term_refs(2);
 | 
						|
    charbuf out;
 | 
						|
    int rc;
 | 
						|
 | 
						|
    fill_flags();
 | 
						|
    init_charbuf(&out);
 | 
						|
    while( PL_get_list(tail, head, tail) )
 | 
						|
    { atom_t fname;
 | 
						|
      int arity;
 | 
						|
 | 
						|
      if ( PL_is_functor(head, FUNCTOR_equal2) ||
 | 
						|
	   PL_is_functor(head, FUNCTOR_pair2) )
 | 
						|
      {	_PL_get_arg(1, head, nv+0);
 | 
						|
	_PL_get_arg(2, head, nv+1);
 | 
						|
      } else if ( PL_get_name_arity(head, &fname, &arity) && arity == 1 )
 | 
						|
      { PL_put_atom(nv+0, fname);
 | 
						|
	_PL_get_arg(1, head, nv+1);
 | 
						|
      } else
 | 
						|
      { free_charbuf(&out);
 | 
						|
	return type_error("name_value", head);
 | 
						|
      }
 | 
						|
 | 
						|
      if ( out.here != out.base )
 | 
						|
	add_charbuf(&out, '&');
 | 
						|
      if ( !add_encoded_term_charbuf(&out, nv+0, ESC_QNAME) )
 | 
						|
      { free_charbuf(&out);
 | 
						|
	return FALSE;
 | 
						|
      }
 | 
						|
      add_charbuf(&out, '=');
 | 
						|
      if ( !add_encoded_term_charbuf(&out, nv+1, ESC_QVALUE) )
 | 
						|
      { free_charbuf(&out);
 | 
						|
	return FALSE;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    rc = PL_unify_wchars(string, PL_ATOM, out.here-out.base, out.base);
 | 
						|
    free_charbuf(&out);
 | 
						|
    return rc;
 | 
						|
  } else
 | 
						|
  { return PL_get_wchars(string, &len, &s,
 | 
						|
			 CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION);
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_encoded(+What, +String, -Encoded)
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_encoded(term_t what, term_t qv, term_t enc)
 | 
						|
{ pl_wchar_t *s;
 | 
						|
  size_t len;
 | 
						|
  atom_t w;
 | 
						|
  int flags;
 | 
						|
 | 
						|
  if ( !PL_get_atom(what, &w) )
 | 
						|
    return type_error("atom", what);
 | 
						|
  if ( w == ATOM_query_value )
 | 
						|
    flags = ESC_QVALUE;
 | 
						|
  else if ( w == ATOM_fragment )
 | 
						|
    flags = ESC_FRAGMENT;
 | 
						|
  else if ( w == ATOM_path )
 | 
						|
    flags = ESC_PATH;
 | 
						|
  else
 | 
						|
    return domain_error("uri_component", what);
 | 
						|
 | 
						|
  fill_flags();
 | 
						|
 | 
						|
  if ( !PL_is_variable(qv) )
 | 
						|
  { charbuf out;
 | 
						|
    int rc;
 | 
						|
 | 
						|
    init_charbuf(&out);
 | 
						|
    if ( !add_encoded_term_charbuf(&out, qv, flags) )
 | 
						|
    { free_charbuf(&out);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    rc = PL_unify_wchars(enc, PL_ATOM, out.here-out.base, out.base);
 | 
						|
    free_charbuf(&out);
 | 
						|
    return rc;
 | 
						|
  } else if ( PL_get_wchars(enc, &len, &s, CVT_ATOM|CVT_STRING|CVT_EXCEPTION) )
 | 
						|
  { range r;
 | 
						|
 | 
						|
    r.start = s;
 | 
						|
    r.end = s+len;
 | 
						|
 | 
						|
    return unify_decoded_atom(qv, &r, flags);
 | 
						|
  } else
 | 
						|
  { return FALSE;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      AUTHORITY		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static int
 | 
						|
unify_uri_authority_components(term_t components,
 | 
						|
			       size_t len, const pl_wchar_t *s)
 | 
						|
{ const pl_wchar_t *end = &s[len];
 | 
						|
  const pl_wchar_t *e;
 | 
						|
  range user   = {0};
 | 
						|
  range passwd = {0};
 | 
						|
  range host   = {0};
 | 
						|
  range port   = {0};
 | 
						|
  term_t t = PL_new_term_refs(5);
 | 
						|
  term_t av = t+1;
 | 
						|
 | 
						|
  if ( (e=skip_not(s, end, L"@")) && e<end )
 | 
						|
  { user.start = s;
 | 
						|
    user.end = e;
 | 
						|
    s = e+1;
 | 
						|
    if ( (e=skip_not(user.start, user.end, L":")) && e<user.end )
 | 
						|
    { passwd.start = e+1;
 | 
						|
      passwd.end   = user.end;
 | 
						|
      user.end     = e;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  host.start = s;
 | 
						|
  host.end = skip_not(s, end, L":");
 | 
						|
  if ( host.end < end )
 | 
						|
  { port.start = host.end+1;
 | 
						|
    port.end = end;
 | 
						|
  }
 | 
						|
 | 
						|
  if ( user.start )
 | 
						|
    unify_decoded_atom(av+0, &user, ESC_USER);
 | 
						|
  if ( passwd.start )
 | 
						|
    unify_decoded_atom(av+1, &passwd, ESC_PASSWD);
 | 
						|
  unify_decoded_atom(av+2, &host, ESC_HOST);
 | 
						|
  if ( port.start )
 | 
						|
  { wchar_t *ep;
 | 
						|
    long pn = wcstol(port.start, &ep, 10);
 | 
						|
 | 
						|
    if ( ep == port.end )
 | 
						|
    { if ( !PL_put_integer(av+3, pn) )
 | 
						|
	return FALSE;
 | 
						|
    } else
 | 
						|
    { unify_decoded_atom(av+3, &port, ESC_PORT);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return (PL_cons_functor_v(t, FUNCTOR_uri_authority4, av) &&
 | 
						|
	  PL_unify(components, t));
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_authority_components(+Authority, -Components) is det.
 | 
						|
    uri_authority_components(-Authority, +Components) is det.
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_authority_components(term_t Authority, term_t components)
 | 
						|
{ pl_wchar_t *s;
 | 
						|
  size_t len;
 | 
						|
 | 
						|
  if ( PL_get_wchars(Authority, &len, &s, CVT_ATOM|CVT_STRING|CVT_LIST) )
 | 
						|
  { return  unify_uri_authority_components(components, len, s);
 | 
						|
  } else if ( PL_is_functor(components, FUNCTOR_uri_authority4) )
 | 
						|
  { charbuf b;
 | 
						|
    int rc;
 | 
						|
 | 
						|
    init_charbuf(&b);
 | 
						|
    if ( (rc=get_text_arg(components, 1, &len, &s, TXT_EX_TEXT)) == TRUE )
 | 
						|
    { add_nchars_charbuf(&b, len, s);
 | 
						|
      if ( (rc=get_text_arg(components, 2, &len, &s, TXT_EX_TEXT)) == TRUE )
 | 
						|
      { add_charbuf(&b, ':');
 | 
						|
	add_nchars_charbuf(&b, len, s);
 | 
						|
      } else if ( rc == -1 )
 | 
						|
      { free_charbuf(&b);
 | 
						|
	return FALSE;
 | 
						|
      }
 | 
						|
      add_charbuf(&b, '@');
 | 
						|
    } else if ( rc == -1 )
 | 
						|
    { free_charbuf(&b);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    if ( (rc=get_text_arg(components, 3, &len, &s, TXT_EX_TEXT)) == TRUE )
 | 
						|
    { add_nchars_charbuf(&b, len, s);
 | 
						|
    } else if ( rc == -1 )
 | 
						|
    { free_charbuf(&b);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    if ( (rc=get_text_arg(components, 4, &len, &s,
 | 
						|
			  TXT_EX_TEXT|CVT_INTEGER)) == TRUE )
 | 
						|
    { add_charbuf(&b, ':');
 | 
						|
      add_nchars_charbuf(&b, len, s);
 | 
						|
    } else if ( rc == -1 )
 | 
						|
    { free_charbuf(&b);
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    rc = PL_unify_wchars(Authority, PL_ATOM, b.here-b.base, b.base);
 | 
						|
    free_charbuf(&b);
 | 
						|
 | 
						|
    return rc;
 | 
						|
  } else
 | 
						|
  { return PL_get_wchars(Authority, &len, &s,
 | 
						|
			 CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	  NORMALIZATION		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static int
 | 
						|
normalize_in_charbuf(charbuf *cb, uri_component_ranges *ranges, int iri)
 | 
						|
{ fill_flags();
 | 
						|
 | 
						|
  if ( ranges->scheme.start )
 | 
						|
  { add_lwr_range_charbuf(cb, &ranges->scheme, iri, ESC_SCHEME);
 | 
						|
    add_charbuf(cb, ':');
 | 
						|
  }
 | 
						|
  if ( ranges->authority.start )
 | 
						|
  { add_charbuf(cb, '/');
 | 
						|
    add_charbuf(cb, '/');
 | 
						|
    add_lwr_range_charbuf(cb, &ranges->authority, iri, ESC_AUTH);
 | 
						|
  }
 | 
						|
  if ( ranges->path.end > ranges->path.start )
 | 
						|
  { charbuf pb;
 | 
						|
    charbuf path;
 | 
						|
    size_t len;
 | 
						|
 | 
						|
    init_charbuf(&pb);
 | 
						|
    add_range_charbuf(&pb, &ranges->path, iri, ESC_PATH);
 | 
						|
    init_charbuf_at_size(&path, pb.here-pb.base);
 | 
						|
    len = removed_dot_segments(pb.here-pb.base, pb.base, path.base);
 | 
						|
    add_nchars_charbuf(cb, len, path.base);
 | 
						|
    free_charbuf(&path);
 | 
						|
    free_charbuf(&pb);
 | 
						|
  }
 | 
						|
  if ( ranges->query.start )
 | 
						|
  { add_charbuf(cb, '?');
 | 
						|
    add_range_charbuf(cb, &ranges->query, iri, ESC_QUERY);
 | 
						|
  }
 | 
						|
  if ( ranges->fragment.start )
 | 
						|
  { add_charbuf(cb, '#');
 | 
						|
    add_range_charbuf(cb, &ranges->fragment, iri, ESC_FRAGMENT);
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static foreign_t
 | 
						|
normalized(term_t URI, term_t CannonicalURI, int iri)
 | 
						|
{ pl_wchar_t *s;
 | 
						|
  size_t len;
 | 
						|
 | 
						|
  if ( PL_get_wchars(URI, &len, &s,
 | 
						|
		     CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION) )
 | 
						|
  { uri_component_ranges ranges;
 | 
						|
    charbuf b;
 | 
						|
    int rc;
 | 
						|
 | 
						|
    parse_uri(&ranges, len, s);
 | 
						|
    init_charbuf(&b);
 | 
						|
    normalize_in_charbuf(&b, &ranges, iri);
 | 
						|
 | 
						|
    rc = PL_unify_wchars(CannonicalURI, PL_ATOM, b.here-b.base, b.base);
 | 
						|
    free_charbuf(&b);
 | 
						|
 | 
						|
    return rc;
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_normalized(+URI, -CannonicalURI)
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_normalized(term_t URI, term_t CannonicalURI)
 | 
						|
{ return normalized(URI, CannonicalURI, FALSE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_normalized_iri(+URI, -CannonicalIRI)
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_normalized_iri(term_t URI, term_t CannonicalURI)
 | 
						|
{ return normalized(URI, CannonicalURI, TRUE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
ranges_in_charbuf(charbuf *cb, uri_component_ranges *ranges)
 | 
						|
{ if ( ranges->scheme.start )
 | 
						|
  { add_verb_range_charbuf(cb, &ranges->scheme);
 | 
						|
    add_charbuf(cb, ':');
 | 
						|
  }
 | 
						|
  if ( ranges->authority.start )
 | 
						|
  { add_charbuf(cb, '/');
 | 
						|
    add_charbuf(cb, '/');
 | 
						|
    add_verb_range_charbuf(cb, &ranges->authority);
 | 
						|
  }
 | 
						|
  add_verb_range_charbuf(cb, &ranges->path);
 | 
						|
  if ( ranges->query.start )
 | 
						|
  { add_charbuf(cb, '?');
 | 
						|
    add_verb_range_charbuf(cb, &ranges->query);
 | 
						|
  }
 | 
						|
  if ( ranges->fragment.start )
 | 
						|
  { add_charbuf(cb, '#');
 | 
						|
    add_verb_range_charbuf(cb, &ranges->fragment);
 | 
						|
  }
 | 
						|
 | 
						|
  return TRUE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
typedef struct
 | 
						|
{ atom_t     	       atom;
 | 
						|
  pl_wchar_t 	      *text;
 | 
						|
  uri_component_ranges ranges;
 | 
						|
} base_cache;
 | 
						|
 | 
						|
#ifdef _REENTRANT
 | 
						|
#include <pthread.h>
 | 
						|
static pthread_key_t base_key;
 | 
						|
 | 
						|
static void
 | 
						|
free_base_cache(void *cache)
 | 
						|
{ base_cache *base = cache;
 | 
						|
 | 
						|
  if ( PL_query(PL_QUERY_HALTING) )
 | 
						|
    return;
 | 
						|
 | 
						|
  if ( base->atom )
 | 
						|
  { PL_unregister_atom(base->atom);
 | 
						|
    PL_free(base->text);
 | 
						|
  }
 | 
						|
 | 
						|
  PL_free(base);
 | 
						|
}
 | 
						|
 | 
						|
static base_cache *
 | 
						|
myBase()
 | 
						|
{ base_cache *base;
 | 
						|
 | 
						|
  if ( (base=pthread_getspecific(base_key)) )
 | 
						|
    return base;
 | 
						|
  base = PL_malloc(sizeof(*base));
 | 
						|
  memset(base, 0, sizeof(*base));
 | 
						|
 | 
						|
  pthread_setspecific(base_key, base);
 | 
						|
  return base;
 | 
						|
}
 | 
						|
 | 
						|
#else
 | 
						|
static base_cache base_store;
 | 
						|
#define myBase() &base_store;
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
static const uri_component_ranges *
 | 
						|
base_ranges(term_t t)
 | 
						|
{ atom_t a;
 | 
						|
 | 
						|
  if ( PL_get_atom(t, &a) )
 | 
						|
  { base_cache *base = myBase();
 | 
						|
 | 
						|
    if ( base->atom != a )
 | 
						|
    { size_t len;
 | 
						|
      pl_wchar_t *s;
 | 
						|
 | 
						|
      if ( base->atom )
 | 
						|
      { PL_unregister_atom(base->atom);
 | 
						|
	PL_free(base->text);
 | 
						|
      }
 | 
						|
      if ( !PL_get_wchars(t, &len, &s, CVT_ATOM|BUF_MALLOC) )
 | 
						|
	return NULL;
 | 
						|
      base->atom = a;
 | 
						|
      PL_register_atom(a);
 | 
						|
      base->text = s;
 | 
						|
      parse_uri(&base->ranges, len, s);
 | 
						|
    }
 | 
						|
 | 
						|
    return &base->ranges;
 | 
						|
  } else
 | 
						|
  { type_error("atom", t);
 | 
						|
    return NULL;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static foreign_t
 | 
						|
resolve(term_t Rel, term_t Base, term_t URI, int normalize, int iri)
 | 
						|
{ pl_wchar_t *s;
 | 
						|
  size_t slen;
 | 
						|
  uri_component_ranges s_ranges, t_ranges;
 | 
						|
  int rc;
 | 
						|
  size_t len;
 | 
						|
  charbuf out, pb, path;
 | 
						|
 | 
						|
  init_charbuf(&pb);			/* path-buffer */
 | 
						|
 | 
						|
  if ( PL_get_wchars(Rel, &slen, &s,
 | 
						|
		     CVT_ATOM|CVT_STRING|CVT_LIST|CVT_EXCEPTION) )
 | 
						|
  { parse_uri(&s_ranges, slen, s);
 | 
						|
    if ( s_ranges.scheme.start )
 | 
						|
    { t_ranges = s_ranges;
 | 
						|
    } else
 | 
						|
    { const uri_component_ranges *b_ranges;
 | 
						|
 | 
						|
      if ( !(b_ranges = base_ranges(Base)) )
 | 
						|
	return FALSE;
 | 
						|
 | 
						|
      memset(&t_ranges, 0, sizeof(t_ranges));
 | 
						|
      if ( s_ranges.authority.start )
 | 
						|
      { t_ranges.authority = s_ranges.authority;
 | 
						|
	t_ranges.path      = s_ranges.path;
 | 
						|
	t_ranges.query     = s_ranges.query;
 | 
						|
      } else
 | 
						|
      { if ( s_ranges.path.start == s_ranges.path.end )
 | 
						|
	{ t_ranges.path = b_ranges->path;
 | 
						|
	  if ( s_ranges.query.start )
 | 
						|
	    t_ranges.query = s_ranges.query;
 | 
						|
	  else
 | 
						|
	    t_ranges.query = b_ranges->query;
 | 
						|
	} else
 | 
						|
	{ if ( s_ranges.path.start[0] == '/' )
 | 
						|
	  { t_ranges.path = s_ranges.path;
 | 
						|
	  } else
 | 
						|
	  { if ( b_ranges->authority.start &&
 | 
						|
		 b_ranges->path.start == b_ranges->path.end )
 | 
						|
	    { add_charbuf(&pb, '/');
 | 
						|
	      add_verb_range_charbuf(&pb, &s_ranges.path);
 | 
						|
	    } else
 | 
						|
	    { range path = b_ranges->path;
 | 
						|
 | 
						|
	      path.end = remove_last_segment(path.start, path.end);
 | 
						|
	      add_verb_range_charbuf(&pb, &path);
 | 
						|
	      add_verb_range_charbuf(&pb, &s_ranges.path);
 | 
						|
	      t_ranges.path.start = pb.base;
 | 
						|
	      t_ranges.path.end = pb.here;
 | 
						|
	    }
 | 
						|
	  }
 | 
						|
	  t_ranges.query = s_ranges.query;
 | 
						|
	}
 | 
						|
	t_ranges.authority = b_ranges->authority;
 | 
						|
      }
 | 
						|
      t_ranges.scheme = b_ranges->scheme;
 | 
						|
      t_ranges.fragment = s_ranges.fragment;
 | 
						|
    }
 | 
						|
  } else
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
  init_charbuf(&out);			/* output buffer */
 | 
						|
 | 
						|
  if ( normalize )
 | 
						|
  { normalize_in_charbuf(&out, &t_ranges, iri);
 | 
						|
  } else
 | 
						|
  { init_charbuf_at_size(&path, t_ranges.path.end - t_ranges.path.start);
 | 
						|
    len = removed_dot_segments(t_ranges.path.end - t_ranges.path.start,
 | 
						|
			       t_ranges.path.start,
 | 
						|
			       path.base);
 | 
						|
    t_ranges.path.start = path.base;
 | 
						|
    t_ranges.path.end   = path.base+len;
 | 
						|
    free_charbuf(&pb);
 | 
						|
 | 
						|
    ranges_in_charbuf(&out, &t_ranges);
 | 
						|
  }
 | 
						|
 | 
						|
  rc = PL_unify_wchars(URI, PL_ATOM, out.here-out.base, out.base);
 | 
						|
  free_charbuf(&out);
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
/** uri_resolve(+Relative, +Base, -Absolute) is det.
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_resolve(term_t Rel, term_t Base, term_t URI)
 | 
						|
{ return resolve(Rel, Base, URI, FALSE, FALSE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_normalized(+Relative, +Base, -Absolute) is det.
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_normalized3(term_t Rel, term_t Base, term_t URI)
 | 
						|
{ return resolve(Rel, Base, URI, TRUE, FALSE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_normalized_iri(+Relative, +Base, -Absolute) is det.
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_normalized_iri3(term_t Rel, term_t Base, term_t IRI)
 | 
						|
{ return resolve(Rel, Base, IRI, TRUE, TRUE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    PATH LOGIC		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
/* http://labs.apache.org/webarch/uri/rfc/rfc3986.html#relative-dot-segments
 | 
						|
*/
 | 
						|
 | 
						|
static pl_wchar_t *
 | 
						|
remove_last_segment(const pl_wchar_t *base, const pl_wchar_t *o)
 | 
						|
{ while(o>base && o[-1] != '/' )
 | 
						|
    o--;
 | 
						|
 | 
						|
  return (pl_wchar_t*) o;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static inline int
 | 
						|
fetch(const pl_wchar_t *in, const pl_wchar_t *end, int at)
 | 
						|
{ if ( in+at>=end )
 | 
						|
    return 0;
 | 
						|
  return in[at];
 | 
						|
}
 | 
						|
 | 
						|
static size_t
 | 
						|
removed_dot_segments(size_t len, const pl_wchar_t *in, pl_wchar_t *out)
 | 
						|
{ const pl_wchar_t *end = &in[len];
 | 
						|
  pl_wchar_t *o = out;
 | 
						|
 | 
						|
  while(in<end)
 | 
						|
  { if ( in[0] == '.' )
 | 
						|
    { if ( fetch(in, end, 1) == '/' ||
 | 
						|
	   (fetch(in, end, 1) == '.' && fetch(in, end, 2) == '/') )
 | 
						|
      { in += 2;			/* 2A */
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if ( in[0] == '/' && fetch(in, end, 1) == '.' )
 | 
						|
    { if ( fetch(in, end, 2) == '/' )
 | 
						|
      { in += 2;			/* 2B "/./" --> "/" */
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
      if ( !fetch(in, end, 2) )
 | 
						|
      { *o++ = '/';			/* 2B "/." --> "/" (and close) */
 | 
						|
	in += 2;
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
      if ( fetch(in, end, 2) == '.' )
 | 
						|
      { if ( fetch(in, end, 3) == '/' )
 | 
						|
	{ in += 3;			/* 2C "/../" --> "/" */
 | 
						|
	  o = remove_last_segment(out, o);
 | 
						|
	  if ( o>out ) o--;		/* delete / */
 | 
						|
	  continue;
 | 
						|
	}
 | 
						|
	if ( !fetch(in, end, 3) )
 | 
						|
	{ o = remove_last_segment(out, o);
 | 
						|
	  if ( o>out ) o--;		/* delete / */
 | 
						|
	  *o++ = '/';
 | 
						|
	  in += 3;
 | 
						|
	  continue;
 | 
						|
	}
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if ( in[0] == '.' )
 | 
						|
    { if ( !fetch(in, end, 1) )
 | 
						|
      { in++;				/* 3D */
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
      if ( fetch(in, end, 1) == '.' && !fetch(in, end, 2) )
 | 
						|
      { in += 2;			/* 3D */
 | 
						|
	continue;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    if ( in[0] == '/' )
 | 
						|
      *o++ = *in++;
 | 
						|
    while( in < end && in[0] != '/' )
 | 
						|
      *o++ = *in++;
 | 
						|
  }
 | 
						|
 | 
						|
  return o-out;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    IRI HANDLING	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
#define utf8_put_char(out, chr) \
 | 
						|
	((chr) < 0x80 ? out[0]=(char)(chr), out+1 \
 | 
						|
		      : _utf8_put_char(out, (chr)))
 | 
						|
 | 
						|
 | 
						|
static char *
 | 
						|
_utf8_put_char(char *out, int chr)
 | 
						|
{ if ( chr < 0x80 )
 | 
						|
  { *out++ = chr;
 | 
						|
  } else if ( chr < 0x800 )
 | 
						|
  { *out++ = 0xc0|((chr>>6)&0x1f);
 | 
						|
    *out++ = 0x80|(chr&0x3f);
 | 
						|
  } else if ( chr < 0x10000 )
 | 
						|
  { *out++ = 0xe0|((chr>>12)&0x0f);
 | 
						|
    *out++ = 0x80|((chr>>6)&0x3f);
 | 
						|
    *out++ = 0x80|(chr&0x3f);
 | 
						|
  } else if ( chr < 0x200000 )
 | 
						|
  { *out++ = 0xf0|((chr>>18)&0x07);
 | 
						|
    *out++ = 0x80|((chr>>12)&0x3f);
 | 
						|
    *out++ = 0x80|((chr>>6)&0x3f);
 | 
						|
    *out++ = 0x80|(chr&0x3f);
 | 
						|
  } else if ( chr < 0x4000000 )
 | 
						|
  { *out++ = 0xf8|((chr>>24)&0x03);
 | 
						|
    *out++ = 0x80|((chr>>18)&0x3f);
 | 
						|
    *out++ = 0x80|((chr>>12)&0x3f);
 | 
						|
    *out++ = 0x80|((chr>>6)&0x3f);
 | 
						|
    *out++ = 0x80|(chr&0x3f);
 | 
						|
  } else if ( (unsigned)chr < 0x80000000 )
 | 
						|
  { *out++ = 0xfc|((chr>>30)&0x01);
 | 
						|
    *out++ = 0x80|((chr>>24)&0x3f);
 | 
						|
    *out++ = 0x80|((chr>>18)&0x3f);
 | 
						|
    *out++ = 0x80|((chr>>12)&0x3f);
 | 
						|
    *out++ = 0x80|((chr>>6)&0x3f);
 | 
						|
    *out++ = 0x80|(chr&0x3f);
 | 
						|
  }
 | 
						|
 | 
						|
  return out;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/** uri_iri(+URI, -IRI) is det.
 | 
						|
    uri_iri(-URI, +IRI) is det.
 | 
						|
 | 
						|
Perform %- and UTF-8 encoding/decoding to translate between a URI and
 | 
						|
IRI
 | 
						|
*/
 | 
						|
 | 
						|
static foreign_t
 | 
						|
uri_iri(term_t URI, term_t IRI)
 | 
						|
{ if ( !PL_is_variable(URI) )
 | 
						|
    return uri_normalized_iri(URI, IRI);
 | 
						|
  else
 | 
						|
    return uri_normalized(IRI, URI);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	   REGISTRATION		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
#define MKATOM(n) \
 | 
						|
	ATOM_ ## n = PL_new_atom(#n)
 | 
						|
#define MKFUNCTOR(n,a) \
 | 
						|
	FUNCTOR_ ## n ## a = PL_new_functor(PL_new_atom(#n), a)
 | 
						|
 | 
						|
install_t
 | 
						|
install_uri()
 | 
						|
{ MKATOM(query_value);
 | 
						|
  MKATOM(fragment);
 | 
						|
  MKATOM(path);
 | 
						|
 | 
						|
  MKFUNCTOR(uri_components, 5);
 | 
						|
  MKFUNCTOR(uri_authority, 4);
 | 
						|
  MKFUNCTOR(error, 2);
 | 
						|
  MKFUNCTOR(syntax_error, 1);
 | 
						|
  MKFUNCTOR(type_error, 2);
 | 
						|
  MKFUNCTOR(domain_error, 2);
 | 
						|
  FUNCTOR_equal2 = PL_new_functor(PL_new_atom("="), 2);
 | 
						|
  FUNCTOR_pair2 = PL_new_functor(PL_new_atom("-"), 2);
 | 
						|
 | 
						|
#ifdef _REENTRANT
 | 
						|
  pthread_key_create(&base_key, free_base_cache);
 | 
						|
#endif
 | 
						|
 | 
						|
  PL_register_foreign("uri_components",	      2, uri_components,       0);
 | 
						|
  PL_register_foreign("uri_is_global",	      1, uri_is_global,	       0);
 | 
						|
  PL_register_foreign("uri_normalized",	      2, uri_normalized,       0);
 | 
						|
  PL_register_foreign("uri_normalized_iri",   2, uri_normalized_iri,   0);
 | 
						|
  PL_register_foreign("uri_resolve",	      3, uri_resolve,	       0);
 | 
						|
  PL_register_foreign("uri_normalized",	      3, uri_normalized3,      0);
 | 
						|
  PL_register_foreign("uri_normalized_iri",   3, uri_normalized_iri3,  0);
 | 
						|
  PL_register_foreign("uri_query_components", 2, uri_query_components, 0);
 | 
						|
  PL_register_foreign("uri_authority_components",
 | 
						|
					      2, uri_authority_components, 0);
 | 
						|
  PL_register_foreign("uri_encoded",	      3, uri_encoded,	       0);
 | 
						|
  PL_register_foreign("uri_iri",	      2, uri_iri,	       0);
 | 
						|
}
 |