895 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			895 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*  $Id$
 | 
						|
 | 
						|
    Part of SWI-Prolog
 | 
						|
 | 
						|
    Author:        Jan Wielemaker
 | 
						|
    E-mail:        wielemak@science.uva.nl
 | 
						|
    WWW:           http://www.swi-prolog.org
 | 
						|
    Copyright (C): 1985-2007, University of Amsterdam
 | 
						|
 | 
						|
    This library is free software; you can redistribute it and/or
 | 
						|
    modify it under the terms of the GNU Lesser General Public
 | 
						|
    License as published by the Free Software Foundation; either
 | 
						|
    version 2.1 of the License, or (at your option) any later version.
 | 
						|
 | 
						|
    This library is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
    Lesser General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Lesser General Public
 | 
						|
    License along with this library; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
*/
 | 
						|
 | 
						|
#include "pl-incl.h"
 | 
						|
#include <ctype.h>
 | 
						|
#include "pl-ctype.h"
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
This module defines:
 | 
						|
 | 
						|
	char_type(?Char, ?Type)
 | 
						|
	code_type(?Char, ?Type)
 | 
						|
 | 
						|
See manual for details.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
#define CHAR_MODE 0
 | 
						|
#define CODE_MODE 1
 | 
						|
 | 
						|
#define CTX_CHAR 0			/* Class(Char) */
 | 
						|
#define CTX_CODE 1			/* Class(Int) */
 | 
						|
 | 
						|
typedef struct
 | 
						|
{ atom_t	name;			/* name of the class */
 | 
						|
  int (*test)(wint_t chr);		/* boolean */
 | 
						|
  int (*reverse)(wint_t chr);		/* reverse mapping */
 | 
						|
  short		arity;			/* arity of class (i.e. lower('A')) */
 | 
						|
  short		ctx_type;		/* CTX_* */
 | 
						|
} char_type;
 | 
						|
 | 
						|
#define ENUM_NONE	0x00
 | 
						|
#define ENUM_CHAR	0x01
 | 
						|
#define ENUM_CLASS	0x02
 | 
						|
#define ENUM_BOTH	0x03
 | 
						|
 | 
						|
typedef struct
 | 
						|
{ int		current;		/* current character */
 | 
						|
  const char_type   *class;		/* current class */
 | 
						|
  int   	do_enum;		/* what to enumerate */
 | 
						|
} generator;
 | 
						|
 | 
						|
 | 
						|
static  int unicode_separator(pl_wchar_t c);
 | 
						|
 | 
						|
static int
 | 
						|
iswhite(wint_t chr)
 | 
						|
{ return chr == ' ' || chr == '\t';
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#ifdef __YAP_PROLOG__
 | 
						|
inline int
 | 
						|
unicode_separator(pl_wchar_t c)
 | 
						|
{ //return PlBlankW(c); // vsc: we need to look into this
 | 
						|
  return iswhite(c);
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
static int
 | 
						|
fiscsym(wint_t chr)
 | 
						|
{ return iswalnum(chr) || chr == '_';
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
fiscsymf(wint_t chr)
 | 
						|
{ return iswalpha(chr) || chr == '_';
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
iseof(wint_t chr)
 | 
						|
{ return chr == (wint_t)-1;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
iseol(wint_t chr)
 | 
						|
{ return chr >= 10 && chr <= 13;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
isnl(wint_t chr)
 | 
						|
{ return chr == '\n';
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
isperiod(wint_t chr)
 | 
						|
{ return chr && strchr(".?!", chr) != NULL;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
isquote(wint_t chr)
 | 
						|
{ return chr && strchr("'`\"", chr) != NULL;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
fupper(wint_t chr)
 | 
						|
{ return iswlower(chr) ? (int)towupper(chr) : -1;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
flower(wint_t chr)
 | 
						|
{ return iswupper(chr) ? (int)towlower(chr) : -1;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
ftoupper(wint_t chr)
 | 
						|
{ return towupper(chr);
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
ftolower(wint_t chr)
 | 
						|
{ return towlower(chr);
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
fparen(wint_t chr)
 | 
						|
{ switch(chr)
 | 
						|
  { case '(':
 | 
						|
      return ')';
 | 
						|
    case '{':
 | 
						|
      return '}';
 | 
						|
    case '[':
 | 
						|
      return ']';
 | 
						|
    default:
 | 
						|
      return -1;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
rparen(wint_t chr)
 | 
						|
{ switch(chr)
 | 
						|
  { case ')':
 | 
						|
      return '(';
 | 
						|
    case '}':
 | 
						|
      return '{';
 | 
						|
    case ']':
 | 
						|
      return '[';
 | 
						|
    default:
 | 
						|
      return -1;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
fdigit(wint_t chr)
 | 
						|
{ if ( chr <= 0xff && isdigit(chr) )
 | 
						|
    return chr - '0';
 | 
						|
  return -1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
rdigit(wint_t d)
 | 
						|
{ if ( (int)d >= 0 && d <= 9 )
 | 
						|
    return d+'0';
 | 
						|
  return -1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
fxdigit(wint_t chr)
 | 
						|
{ if ( chr > 0xff )
 | 
						|
    return -1;
 | 
						|
  if ( isdigit(chr) )
 | 
						|
    return chr - '0';
 | 
						|
  if ( chr >= 'a' && chr <= 'f' )
 | 
						|
    return chr - 'a' + 10;
 | 
						|
  if ( chr >= 'A' && chr <= 'F' )
 | 
						|
    return chr - 'A' + 10;
 | 
						|
  return -1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
rxdigit(wint_t d)
 | 
						|
{ if ( (int)d >= 0 && d <= 9 )
 | 
						|
    return d+'0';
 | 
						|
  if ( d >= 10 && d <= 15 )
 | 
						|
    return d-10+'a';
 | 
						|
  return -1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
#define mkfunction(name) \
 | 
						|
	static int f ## name(wint_t chr) { return name(chr); }
 | 
						|
 | 
						|
mkfunction(iswalnum)
 | 
						|
mkfunction(iswalpha)
 | 
						|
mkfunction(isascii)
 | 
						|
mkfunction(iswcntrl)
 | 
						|
mkfunction(iswdigit)
 | 
						|
mkfunction(iswgraph)
 | 
						|
mkfunction(iswlower)
 | 
						|
mkfunction(iswupper)
 | 
						|
mkfunction(iswpunct)
 | 
						|
mkfunction(iswspace)
 | 
						|
 | 
						|
INIT_DEF(char_type, char_types, 26)
 | 
						|
ADD_DEF2(ATOM_alnum, fiswalnum)
 | 
						|
ADD_DEF2(ATOM_csym,	fiscsym )
 | 
						|
ADD_DEF2(ATOM_csymf,	fiscsymf )
 | 
						|
ADD_DEF2(ATOM_ascii,	fisascii )
 | 
						|
ADD_DEF2(ATOM_white,	iswhite )
 | 
						|
ADD_DEF2(ATOM_cntrl,	fiswcntrl )
 | 
						|
ADD_DEF2(ATOM_digit,	fiswdigit )
 | 
						|
ADD_DEF2(ATOM_graph,	fiswgraph )
 | 
						|
ADD_DEF2(ATOM_lower,	fiswlower )
 | 
						|
ADD_DEF2(ATOM_upper,	fiswupper )
 | 
						|
ADD_DEF2(ATOM_punct,	fiswpunct )
 | 
						|
ADD_DEF2(ATOM_space,	fiswspace )
 | 
						|
ADD_DEF2(ATOM_end_of_file,iseof )
 | 
						|
ADD_DEF2(ATOM_end_of_line,iseol )
 | 
						|
ADD_DEF2(ATOM_newline,isnl )
 | 
						|
ADD_DEF2(ATOM_period,isperiod )
 | 
						|
ADD_DEF2(ATOM_quote,        isquote )
 | 
						|
ADD_DEF5(ATOM_lower,	fupper,		flower,   1, CTX_CHAR )
 | 
						|
ADD_DEF5(ATOM_upper,	flower,		fupper,   1, CTX_CHAR )
 | 
						|
ADD_DEF5(ATOM_to_lower,ftoupper,	ftolower, 1, CTX_CHAR )
 | 
						|
ADD_DEF5(ATOM_to_upper,ftolower,	ftoupper, 1, CTX_CHAR )
 | 
						|
ADD_DEF5(ATOM_paren,	fparen,		rparen,   1, CTX_CHAR )
 | 
						|
ADD_DEF5(ATOM_digit,	fdigit,		rdigit,   1, CTX_CODE  )
 | 
						|
ADD_DEF5(ATOM_xdigit,fxdigit,	rxdigit,  1, CTX_CODE  )
 | 
						|
END_DEFS(NULL_ATOM,	NULL)
 | 
						|
 | 
						|
 | 
						|
static const char_type *
 | 
						|
char_type_by_name(atom_t name, int arity)
 | 
						|
{ const char_type *cc;
 | 
						|
 | 
						|
  for(cc = char_types; cc->name; cc++)
 | 
						|
  { if ( cc->name == name && cc->arity == arity )
 | 
						|
      return cc;
 | 
						|
  }
 | 
						|
 | 
						|
  return NULL;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
advanceGen(generator *gen)
 | 
						|
{ if ( gen->do_enum & ENUM_CHAR )
 | 
						|
  { if ( ++gen->current == 256 )
 | 
						|
      fail;
 | 
						|
  } else
 | 
						|
  { gen->class++;
 | 
						|
    if ( !gen->class->name )
 | 
						|
      fail;
 | 
						|
  }
 | 
						|
 | 
						|
  succeed;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
unify_char_type(term_t type, const char_type *ct, int context, int how)
 | 
						|
{ if ( ct->arity == 0 )
 | 
						|
    return PL_unify_atom(type, ct->name);
 | 
						|
  else /*if ( ct->arity == 1 )*/
 | 
						|
  { if ( PL_unify_functor(type, PL_new_functor(ct->name, 1)) )
 | 
						|
    { term_t a = PL_new_term_ref();
 | 
						|
      
 | 
						|
      _PL_get_arg(1, type, a);
 | 
						|
 | 
						|
      if ( ct->ctx_type == CTX_CHAR )
 | 
						|
	return PL_unify_char(a, context, how);
 | 
						|
      else
 | 
						|
	return PL_unify_integer(a, context);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  fail;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static foreign_t
 | 
						|
do_char_type(term_t chr, term_t class, control_t h, int how)
 | 
						|
{ generator *gen;
 | 
						|
  fid_t fid;
 | 
						|
 | 
						|
  switch( ForeignControl(h) )
 | 
						|
  { case FRG_FIRST_CALL:
 | 
						|
    { const char_type *cc = NULL;
 | 
						|
      int c;
 | 
						|
      int do_enum = ENUM_NONE;
 | 
						|
      atom_t cn;
 | 
						|
      int arity;
 | 
						|
 | 
						|
      if ( PL_is_variable(chr) )
 | 
						|
	do_enum |= ENUM_CHAR;
 | 
						|
      if ( PL_is_variable(class) )
 | 
						|
	do_enum |= ENUM_CLASS;
 | 
						|
 | 
						|
      if ( do_enum == ENUM_BOTH )
 | 
						|
	return PL_error("char_type", 2, NULL, ERR_INSTANTIATION);
 | 
						|
 | 
						|
      if ( !(do_enum & ENUM_CHAR) )
 | 
						|
      { if ( !PL_get_char(chr, &c, TRUE) )
 | 
						|
	  fail;
 | 
						|
	if ( c == -1 )
 | 
						|
	  return PL_unify_atom(class, ATOM_end_of_file);
 | 
						|
      }
 | 
						|
 | 
						|
      if ( !(do_enum & ENUM_CLASS) )
 | 
						|
      { if ( !PL_get_name_arity(class, &cn, &arity) ||
 | 
						|
	     !(cc = char_type_by_name(cn, arity)) )
 | 
						|
	  return PL_error("char_type", 2, NULL,
 | 
						|
			  ERR_TYPE, ATOM_char_type, class);
 | 
						|
      }
 | 
						|
 | 
						|
      if ( do_enum == ENUM_NONE )
 | 
						|
      { if ( arity == 0 )
 | 
						|
	  return (*cc->test)((wint_t)c) ? TRUE : FALSE;
 | 
						|
	else
 | 
						|
	{ int rval = (*cc->test)((wint_t)c);
 | 
						|
 | 
						|
	  if ( rval >= 0 )
 | 
						|
	  { term_t a = PL_new_term_ref();
 | 
						|
	    int ok;
 | 
						|
 | 
						|
	    _PL_get_arg(1, class, a);
 | 
						|
 | 
						|
	    if ( cc->ctx_type == CTX_CHAR )
 | 
						|
	      ok = PL_unify_char(a, rval, how);
 | 
						|
	    else
 | 
						|
	      ok = PL_unify_integer(a, rval);
 | 
						|
 | 
						|
	    if ( ok )
 | 
						|
	      return TRUE;
 | 
						|
	    else
 | 
						|
	      do_enum = ENUM_CHAR;	/* try the other way around */
 | 
						|
	  } else
 | 
						|
	    fail;
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      if ( do_enum == ENUM_CHAR && arity == 1 )
 | 
						|
      {	term_t a = PL_new_term_ref();	/* char_type(X, lower('A')) */
 | 
						|
	int ca;
 | 
						|
 | 
						|
	_PL_get_arg(1, class, a);
 | 
						|
	if ( !PL_is_variable(a) )
 | 
						|
	{ if ( PL_get_char(a, &ca, FALSE) )
 | 
						|
	  { int c = (*cc->reverse)((wint_t)ca);
 | 
						|
 | 
						|
	    if ( c < 0 )
 | 
						|
	      fail;
 | 
						|
 | 
						|
	    return PL_unify_char(chr, c, how);
 | 
						|
	  }
 | 
						|
	  fail;				/* error */
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      gen = allocHeap(sizeof(*gen));
 | 
						|
      gen->do_enum = do_enum;
 | 
						|
 | 
						|
      if ( do_enum & ENUM_CHAR )
 | 
						|
      { gen->class      = cc;
 | 
						|
	gen->current    = -1;
 | 
						|
      } else if ( do_enum & ENUM_CLASS )
 | 
						|
      { gen->class	= char_types;
 | 
						|
	gen->current    = c;
 | 
						|
      }
 | 
						|
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    case FRG_REDO:
 | 
						|
      gen = ForeignContextPtr(h);
 | 
						|
      break;
 | 
						|
    case FRG_CUTTED:
 | 
						|
      gen = ForeignContextPtr(h);
 | 
						|
      if ( gen )
 | 
						|
	freeHeap(gen, sizeof(*gen));
 | 
						|
    default:
 | 
						|
      succeed;
 | 
						|
  }
 | 
						|
 | 
						|
  fid = PL_open_foreign_frame();
 | 
						|
  for(;;)
 | 
						|
  { int rval;
 | 
						|
 | 
						|
    if ( (rval = (*gen->class->test)((wint_t)gen->current)) )
 | 
						|
    { if ( gen->do_enum & ENUM_CHAR )
 | 
						|
      { if ( !PL_unify_char(chr, gen->current, how) )
 | 
						|
	  goto next;
 | 
						|
      }
 | 
						|
      if ( gen->class->arity > 0 )
 | 
						|
      { if ( rval < 0 ||
 | 
						|
	     !unify_char_type(class, gen->class, rval, how) )
 | 
						|
	  goto next;
 | 
						|
	     
 | 
						|
      } else if ( gen->do_enum & ENUM_CLASS )
 | 
						|
      { if ( !unify_char_type(class, gen->class, rval, how) )
 | 
						|
	  goto next;
 | 
						|
      }
 | 
						|
 | 
						|
      if ( advanceGen(gen) )		/* ok, found one */
 | 
						|
	ForeignRedoPtr(gen);
 | 
						|
      else
 | 
						|
      { freeHeap(gen, sizeof(*gen));	/* the only one */
 | 
						|
	succeed;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  next:
 | 
						|
    PL_rewind_foreign_frame(fid);
 | 
						|
 | 
						|
    if ( !advanceGen(gen) )
 | 
						|
      break;
 | 
						|
  }
 | 
						|
 | 
						|
  freeHeap(gen, sizeof(*gen));
 | 
						|
  fail;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
static
 | 
						|
PRED_IMPL("char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
 | 
						|
{ return do_char_type(A1, A2, PL__ctx, CHAR_MODE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static
 | 
						|
PRED_IMPL("code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
 | 
						|
{ return do_char_type(A1, A2, PL__ctx, CODE_MODE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#if 0
 | 
						|
static
 | 
						|
PRED_IMPL("iswctype", 2, iswctype, 0)
 | 
						|
{ char *s;
 | 
						|
  int chr;
 | 
						|
  wctype_t t;
 | 
						|
 | 
						|
  if ( !PL_get_char_ex(A1, &chr, FALSE) ||
 | 
						|
       !PL_get_chars_ex(A2, &s, CVT_ATOM) )
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
  if ( !(t=wctype(s)) )
 | 
						|
    return PL_error(NULL, 0, NULL, ERR_EXISTENCE, ATOM_type, A2);
 | 
						|
 | 
						|
  return iswctype(chr, t) ? TRUE : FALSE;
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
init_tout(PL_chars_t *t, size_t len)
 | 
						|
{ switch(t->encoding)
 | 
						|
  { case ENC_ISO_LATIN_1:
 | 
						|
      if ( len < sizeof(t->buf) )
 | 
						|
      { t->text.t = t->buf;
 | 
						|
	t->storage = PL_CHARS_LOCAL;
 | 
						|
      } else
 | 
						|
      { t->text.t = PL_malloc(len);
 | 
						|
	t->storage = PL_CHARS_MALLOC;
 | 
						|
      }
 | 
						|
      succeed;
 | 
						|
    case ENC_WCHAR:
 | 
						|
      if ( len*sizeof(pl_wchar_t) < sizeof(t->buf) )
 | 
						|
      { t->text.w = (pl_wchar_t*)t->buf;
 | 
						|
	t->storage = PL_CHARS_LOCAL;
 | 
						|
      } else
 | 
						|
      { t->text.w = PL_malloc(len*sizeof(pl_wchar_t));
 | 
						|
	t->storage = PL_CHARS_MALLOC;
 | 
						|
      }
 | 
						|
      succeed;
 | 
						|
    default:
 | 
						|
      assert(0);
 | 
						|
      fail;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static inline wint_t
 | 
						|
get_chr_from_text(const PL_chars_t *t, size_t index)
 | 
						|
{ switch(t->encoding)
 | 
						|
  { case ENC_ISO_LATIN_1:
 | 
						|
      return t->text.t[index]&0xff;
 | 
						|
    case ENC_WCHAR:
 | 
						|
      return t->text.w[index];
 | 
						|
    default:
 | 
						|
      assert(0);
 | 
						|
      return 0;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static foreign_t
 | 
						|
modify_case_atom(term_t in, term_t out, int down)
 | 
						|
{ PL_chars_t tin, tout;
 | 
						|
 | 
						|
  if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
  if ( PL_get_text(out, &tout, CVT_ATOMIC) )
 | 
						|
  { unsigned int i;
 | 
						|
 | 
						|
    if ( tin.length != tout.length )
 | 
						|
      fail;
 | 
						|
 | 
						|
    for(i=0; i<tin.length; i++)
 | 
						|
    { wint_t ci = get_chr_from_text(&tin, i);
 | 
						|
      wint_t co = get_chr_from_text(&tout, i);
 | 
						|
 | 
						|
      if ( down )
 | 
						|
      { if ( co != towlower(ci) )
 | 
						|
	  fail;
 | 
						|
      } else
 | 
						|
      { if ( co != towupper(ci) )
 | 
						|
	  fail;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    succeed;
 | 
						|
  } else if ( PL_is_variable(out) )
 | 
						|
  { unsigned int i;
 | 
						|
 | 
						|
    tout.encoding  = tin.encoding;
 | 
						|
    tout.length    = tin.length;
 | 
						|
    tout.canonical = FALSE;		/* or TRUE? Can WCHAR map to ISO? */
 | 
						|
 | 
						|
    init_tout(&tout, tin.length);
 | 
						|
 | 
						|
    if ( tin.encoding == ENC_ISO_LATIN_1 )
 | 
						|
    { const unsigned char *in = (const unsigned char*)tin.text.t;
 | 
						|
 | 
						|
      if ( down )
 | 
						|
      { for(i=0; i<tin.length; i++)
 | 
						|
	{ wint_t c = towlower(in[i]);
 | 
						|
 | 
						|
	  if ( c > 255 )
 | 
						|
	  { PL_promote_text(&tout);
 | 
						|
	    for( ; i<tin.length; i++)
 | 
						|
	    { tout.text.w[i] = towlower(in[i]);
 | 
						|
	    }
 | 
						|
	    break;
 | 
						|
	  } else
 | 
						|
	  { tout.text.t[i] = (char)c;
 | 
						|
	  }
 | 
						|
	}
 | 
						|
      } else				/* upcase */
 | 
						|
      { for(i=0; i<tin.length; i++)
 | 
						|
	{ wint_t c = towupper(in[i]);
 | 
						|
 | 
						|
	  if ( c > 255 )
 | 
						|
	  { PL_promote_text(&tout);
 | 
						|
	    for( ; i<tin.length; i++)
 | 
						|
	    { tout.text.w[i] = towupper(in[i]);
 | 
						|
	    }
 | 
						|
	    break;
 | 
						|
	  } else
 | 
						|
	  { tout.text.t[i] = (char)c;
 | 
						|
	  }
 | 
						|
	}
 | 
						|
      } 
 | 
						|
    } else
 | 
						|
    { if ( down )
 | 
						|
      { for(i=0; i<tin.length; i++)
 | 
						|
	{ tout.text.w[i] = towlower(tin.text.w[i]);
 | 
						|
	}
 | 
						|
      } else
 | 
						|
      { for(i=0; i<tin.length; i++)
 | 
						|
	{ tout.text.w[i] = towupper(tin.text.w[i]);
 | 
						|
	}
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    PL_unify_text(out, 0, &tout, PL_ATOM);
 | 
						|
    PL_free_text(&tout);
 | 
						|
 | 
						|
    succeed;
 | 
						|
  } else
 | 
						|
  { return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_atom, out);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static
 | 
						|
PRED_IMPL("downcase_atom", 2, downcase_atom, 0)
 | 
						|
{ return modify_case_atom(A1, A2, TRUE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static
 | 
						|
PRED_IMPL("upcase_atom", 2, upcase_atom, 0)
 | 
						|
{ return modify_case_atom(A1, A2, FALSE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    WHITE SPACE		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static int
 | 
						|
write_normalize_space(IOSTREAM *out, term_t in)
 | 
						|
{ PL_chars_t tin;
 | 
						|
  size_t i, end;
 | 
						|
 | 
						|
  if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) )
 | 
						|
    return FALSE;
 | 
						|
 | 
						|
  end = tin.length;
 | 
						|
  i = 0;
 | 
						|
 | 
						|
  while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
 | 
						|
    i++;
 | 
						|
  while( i<end )
 | 
						|
  { wint_t c;
 | 
						|
 | 
						|
    while(i<end && !unicode_separator((c=get_chr_from_text(&tin, i))))
 | 
						|
    { if ( Sputcode(c, out) < 0 )
 | 
						|
	fail;
 | 
						|
      i++;
 | 
						|
    }
 | 
						|
    while(i<end && unicode_separator(get_chr_from_text(&tin, i)))
 | 
						|
      i++;
 | 
						|
    if ( i < end )
 | 
						|
    { if (  Sputcode(' ', out) < 0 )
 | 
						|
	fail;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  succeed;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static
 | 
						|
PRED_IMPL("normalize_space", 2, normalize_space, 0)
 | 
						|
{ redir_context ctx;
 | 
						|
  word rc;
 | 
						|
 | 
						|
  EXCEPTION_GUARDED(/*code*/
 | 
						|
		    if ( setupOutputRedirect(A1, &ctx, FALSE) )
 | 
						|
		    { if ( (rc = write_normalize_space(ctx.stream, A2)) )
 | 
						|
			rc = closeOutputRedirect(&ctx);
 | 
						|
		      else
 | 
						|
			discardOutputRedirect(&ctx);
 | 
						|
		    } else
 | 
						|
		      rc = FALSE;
 | 
						|
		    /*cleanup*/,
 | 
						|
		    DEBUG(1, Sdprintf("Cleanup after throw()\n"));
 | 
						|
		    discardOutputRedirect(&ctx);
 | 
						|
		    rc = PL_rethrow(););
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	       LOCALE		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
#if defined(HAVE_LOCALE_H) && defined(HAVE_SETLOCALE)
 | 
						|
#include <locale.h>
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Note: on some installations, locale doesn't   work correctly. Printing a
 | 
						|
message isn't really cute. It would be better to use printMessage(), but
 | 
						|
the system isn't yet initialised far enough.   Maybe we should store the
 | 
						|
failure and print a message at the end of the initialisation?
 | 
						|
 | 
						|
We only return FALSE if LC_CTYPE  fails.   This  is a serious indication
 | 
						|
that locale support is broken. We don't   depend too much on the others,
 | 
						|
so we ignore possible problems.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static int
 | 
						|
initLocale()
 | 
						|
{ int rc = TRUE;
 | 
						|
 | 
						|
  if ( !setlocale(LC_CTYPE, "") )
 | 
						|
  { rc = FALSE;
 | 
						|
    DEBUG(0, Sdprintf("Failed to set LC_CTYPE locale\n"));
 | 
						|
  }
 | 
						|
  if ( !setlocale(LC_TIME, "") )
 | 
						|
  { DEBUG(0, Sdprintf("Failed to set LC_TIME locale\n"));
 | 
						|
  }
 | 
						|
  if ( !setlocale(LC_COLLATE, "") )
 | 
						|
  { DEBUG(0, Sdprintf("Failed to set LC_COLLATE locale\n"));
 | 
						|
  }
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
typedef struct
 | 
						|
{ int category;
 | 
						|
  const char *name;
 | 
						|
} lccat;
 | 
						|
 | 
						|
static lccat lccats[] =
 | 
						|
{ { LC_ALL,      "all" },
 | 
						|
  { LC_COLLATE,  "collate" },
 | 
						|
  { LC_CTYPE,    "ctype" },
 | 
						|
#ifdef LC_MESSAGES
 | 
						|
  { LC_MESSAGES, "messages" },
 | 
						|
#endif
 | 
						|
  { LC_MONETARY, "monetary" },
 | 
						|
  { LC_NUMERIC,  "numeric" },
 | 
						|
  { LC_TIME,     "time" },
 | 
						|
  { 0,           NULL }
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
static
 | 
						|
PRED_IMPL("setlocale", 3, setlocale, 0)
 | 
						|
{ char *what;
 | 
						|
  char *locale;
 | 
						|
  const lccat *lcp;
 | 
						|
 | 
						|
 | 
						|
  if ( !PL_get_chars_ex(A1, &what, CVT_ATOM) )
 | 
						|
    fail;
 | 
						|
  if ( PL_is_variable(A3) )
 | 
						|
    locale = NULL;
 | 
						|
  else if ( !PL_get_chars_ex(A3, &locale, CVT_ATOM) )
 | 
						|
    fail;
 | 
						|
 | 
						|
  for ( lcp = lccats; lcp->name; lcp++ )
 | 
						|
  { if ( streq(lcp->name, what) )
 | 
						|
    { char *old = setlocale(lcp->category, NULL);
 | 
						|
 | 
						|
      if ( !PL_unify_chars(A2, PL_ATOM, -1, old) )
 | 
						|
	fail;
 | 
						|
 | 
						|
      if ( PL_compare(A2, A3) != 0 )
 | 
						|
      { if ( !setlocale(lcp->category, locale) )
 | 
						|
	  return PL_error(NULL, 0, MSG_ERRNO, ERR_SYSCALL, "setlocale");
 | 
						|
      }
 | 
						|
 | 
						|
      succeed;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return PL_error(NULL, 0, NULL, ERR_DOMAIN,
 | 
						|
		  PL_new_atom("category"), A1);
 | 
						|
}
 | 
						|
 | 
						|
#else
 | 
						|
 | 
						|
#define initLocale() 1
 | 
						|
 | 
						|
static
 | 
						|
PRED_IMPL("setlocale", 3, setlocale, 0)
 | 
						|
{ return notImplemented("setlocale", 3);
 | 
						|
}
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *      PUBLISH PREDICATES	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
BeginPredDefs(ctype)
 | 
						|
  PRED_DEF("char_type", 2, char_type, PL_FA_NONDETERMINISTIC)
 | 
						|
  PRED_DEF("code_type", 2, code_type, PL_FA_NONDETERMINISTIC)
 | 
						|
  PRED_DEF("setlocale", 3, setlocale, 0)
 | 
						|
  PRED_DEF("downcase_atom", 2, downcase_atom, 0)
 | 
						|
  PRED_DEF("upcase_atom", 2, upcase_atom, 0)
 | 
						|
  PRED_DEF("normalize_space", 2, normalize_space, 0)
 | 
						|
EndPredDefs
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	PROLOG CHARACTERS	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
const char _PL_char_types[] = {
 | 
						|
/* ^@  ^A  ^B  ^C  ^D  ^E  ^F  ^G  ^H  ^I  ^J  ^K  ^L  ^M  ^N  ^O    0-15 */
 | 
						|
   CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, 
 | 
						|
/* ^P  ^Q  ^R  ^S  ^T  ^U  ^V  ^W  ^X  ^Y  ^Z  ^[  ^\  ^]  ^^  ^_   16-31 */
 | 
						|
   CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, 
 | 
						|
/* sp   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /   32-47 */
 | 
						|
   SP, SO, DQ, SY, SY, SO, SY, SQ, PU, PU, SY, SY, PU, SY, SY, SY, 
 | 
						|
/*  0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?   48-63 */
 | 
						|
   DI, DI, DI, DI, DI, DI, DI, DI, DI, DI, SY, SO, SY, SY, SY, SY, 
 | 
						|
/*  @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   64-79 */
 | 
						|
   SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, 
 | 
						|
/*  P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _   80-95 */
 | 
						|
   UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, PU, SY, PU, SY, UC, 
 | 
						|
/*  `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   96-111 */
 | 
						|
   SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, 
 | 
						|
/*  p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~  ^?   112-127 */
 | 
						|
   LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, PU, PU, PU, SY, CT, 
 | 
						|
			  /* 128-159 (C1 controls) */
 | 
						|
   CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, 
 | 
						|
   CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, 
 | 
						|
			  /* 160-255 (G1 graphics) */
 | 
						|
			  /* ISO Latin 1 is assumed */
 | 
						|
   SP, SO, SO, SO, SO, SO, SO, SO, SO, SO, LC, SO, SO, SO, SO, SO,
 | 
						|
   SO, SO, SO, SO, SO, SO, SO, SO, SO, SO, LC, SO, SO, SO, SO, SO,   
 | 
						|
   UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
 | 
						|
   UC, UC, UC, UC, UC, UC, UC, SO, UC, UC, UC, UC, UC, UC, UC, LC,
 | 
						|
   LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
 | 
						|
   LC, LC, LC, LC, LC, LC, LC, SO, LC, LC, LC, LC, LC, LC, LC, LC
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
typedef struct
 | 
						|
{ const char *name;
 | 
						|
  IOENC encoding;
 | 
						|
} enc_map;
 | 
						|
 | 
						|
static const enc_map map[] =
 | 
						|
{ { "UTF-8",	  ENC_UTF8 },
 | 
						|
  { "utf8",	  ENC_UTF8 },
 | 
						|
  { "ISO8859-1",  ENC_ISO_LATIN_1 },
 | 
						|
  { "ISO8859_1",  ENC_ISO_LATIN_1 },
 | 
						|
  { "iso88591",   ENC_ISO_LATIN_1 },
 | 
						|
  { "iso_8859_1", ENC_ISO_LATIN_1 },
 | 
						|
  { NULL, ENC_UNKNOWN }
 | 
						|
};
 | 
						|
 | 
						|
static IOENC
 | 
						|
initEncoding(void)
 | 
						|
{ if ( LD )
 | 
						|
  { if ( !LD->encoding )
 | 
						|
    { char *enc;
 | 
						|
 | 
						|
      if ( !initLocale() )
 | 
						|
      { LD->encoding = ENC_ISO_LATIN_1;
 | 
						|
      } else if ( (enc = setlocale(LC_CTYPE, NULL)) )
 | 
						|
      { LD->encoding = ENC_ANSI;		/* text encoding */
 | 
						|
 | 
						|
	if ( (enc = strchr(enc, '.')) )
 | 
						|
	{ const enc_map *m;
 | 
						|
	  enc++;				/* skip '.' */
 | 
						|
 | 
						|
	  for ( m=map; m->name; m++ )
 | 
						|
	  { if ( strcmp(enc, m->name) == 0 )
 | 
						|
	    { LD->encoding = m->encoding;
 | 
						|
	      break;
 | 
						|
	    }
 | 
						|
	  }
 | 
						|
	}
 | 
						|
      } else
 | 
						|
      { LD->encoding = ENC_ISO_LATIN_1;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    return LD->encoding;
 | 
						|
  }
 | 
						|
 | 
						|
  return ENC_ANSI;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void
 | 
						|
initCharTypes(void)
 | 
						|
{ 
 | 
						|
  init_char_types();
 | 
						|
  initEncoding();
 | 
						|
}
 | 
						|
 | 
						|
#if __SWI_PROLOG__
 | 
						|
bool
 | 
						|
systemMode(bool accept)
 | 
						|
{ bool old = SYSTEM_MODE ? TRUE : FALSE;
 | 
						|
 | 
						|
  if ( accept )
 | 
						|
    debugstatus.styleCheck |= DOLLAR_STYLE;
 | 
						|
  else
 | 
						|
    debugstatus.styleCheck &= ~DOLLAR_STYLE;
 | 
						|
 | 
						|
  return old;
 | 
						|
}
 | 
						|
 | 
						|
#endif
 |