| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-06-20 10:47:44 -05:00
										 |  |  | /*  Part of SWI-Prolog
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Author:        Jan Wielemaker | 
					
						
							| 
									
										
										
										
											2013-06-20 10:47:44 -05:00
										 |  |  |     E-mail:        J.Wielemaker@vu.nl | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |     WWW:           http://www.swi-prolog.org
 | 
					
						
							| 
									
										
										
										
											2013-06-20 10:47:44 -05:00
										 |  |  |     Copyright (C): 1985-2013, University of Amsterdam | 
					
						
							|  |  |  | 			      VU University Amsterdam | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     This library is free software; you can redistribute it and/or | 
					
						
							|  |  |  |     modify it under the terms of the GNU Lesser General Public | 
					
						
							|  |  |  |     License as published by the Free Software Foundation; either | 
					
						
							|  |  |  |     version 2.1 of the License, or (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     This library is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
					
						
							|  |  |  |     Lesser General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     You should have received a copy of the GNU Lesser General Public | 
					
						
							|  |  |  |     License along with this library; if not, write to the Free Software | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "pl-incl.h"
 | 
					
						
							|  |  |  | #include <ctype.h>
 | 
					
						
							|  |  |  | #include "pl-ctype.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | //! @defgroup YAPChars Character Classification and Manipulation
 | 
					
						
							|  |  |  | //  @ingroup YAPBuiltins
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // This module defines routines to manipulate individual characters.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // @{
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | This module defines: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	char_type(?Char, ?Type) | 
					
						
							|  |  |  | 	code_type(?Char, ?Type) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | See manual for details. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define CTX_CHAR 0			/* Class(Char) */
 | 
					
						
							|  |  |  | #define CTX_CODE 1			/* Class(Int) */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct | 
					
						
							|  |  |  | { atom_t	name;			/* name of the class */ | 
					
						
							|  |  |  |   int (*test)(wint_t chr);		/* boolean */ | 
					
						
							|  |  |  |   int (*reverse)(wint_t chr);		/* reverse mapping */ | 
					
						
							|  |  |  |   short		arity;			/* arity of class (i.e. lower('A')) */ | 
					
						
							|  |  |  |   short		ctx_type;		/* CTX_* */ | 
					
						
							|  |  |  | } char_type; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define ENUM_NONE	0x00
 | 
					
						
							|  |  |  | #define ENUM_CHAR	0x01
 | 
					
						
							|  |  |  | #define ENUM_CLASS	0x02
 | 
					
						
							|  |  |  | #define ENUM_BOTH	0x03
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct | 
					
						
							|  |  |  | { int		current;		/* current character */ | 
					
						
							|  |  |  |   const char_type   *class;		/* current class */ | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |   int		do_enum;		/* what to enumerate */ | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | } generator; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | iswhite(wint_t chr) | 
					
						
							|  |  |  | { return chr == ' ' || chr == '\t'; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | fiscsym(wint_t chr) | 
					
						
							|  |  |  | { return iswalnum(chr) || chr == '_'; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | fiscsymf(wint_t chr) | 
					
						
							|  |  |  | { return iswalpha(chr) || chr == '_'; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | iseof(wint_t chr) | 
					
						
							|  |  |  | { return chr == (wint_t)-1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | iseol(wint_t chr) | 
					
						
							|  |  |  | { return chr >= 10 && chr <= 13; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | isnl(wint_t chr) | 
					
						
							|  |  |  | { return chr == '\n'; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | isperiod(wint_t chr) | 
					
						
							|  |  |  | { return chr && strchr(".?!", chr) != NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | isquote(wint_t chr) | 
					
						
							|  |  |  | { return chr && strchr("'`\"", chr) != NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | fupper(wint_t chr) | 
					
						
							|  |  |  | { return iswlower(chr) ? (int)towupper(chr) : -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | flower(wint_t chr) | 
					
						
							|  |  |  | { return iswupper(chr) ? (int)towlower(chr) : -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | ftoupper(wint_t chr) | 
					
						
							|  |  |  | { return towupper(chr); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | ftolower(wint_t chr) | 
					
						
							|  |  |  | { return towlower(chr); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | fparen(wint_t chr) | 
					
						
							|  |  |  | { switch(chr) | 
					
						
							|  |  |  |   { case '(': | 
					
						
							|  |  |  |       return ')'; | 
					
						
							|  |  |  |     case '{': | 
					
						
							|  |  |  |       return '}'; | 
					
						
							|  |  |  |     case '[': | 
					
						
							|  |  |  |       return ']'; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       return -1; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | rparen(wint_t chr) | 
					
						
							|  |  |  | { switch(chr) | 
					
						
							|  |  |  |   { case ')': | 
					
						
							|  |  |  |       return '('; | 
					
						
							|  |  |  |     case '}': | 
					
						
							|  |  |  |       return '{'; | 
					
						
							|  |  |  |     case ']': | 
					
						
							|  |  |  |       return '['; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       return -1; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | fdigit(wint_t chr) | 
					
						
							|  |  |  | { if ( chr <= 0xff && isdigit(chr) ) | 
					
						
							|  |  |  |     return chr - '0'; | 
					
						
							|  |  |  |   return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | rdigit(wint_t d) | 
					
						
							|  |  |  | { if ( (int)d >= 0 && d <= 9 ) | 
					
						
							|  |  |  |     return d+'0'; | 
					
						
							|  |  |  |   return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | fxdigit(wint_t chr) | 
					
						
							|  |  |  | { if ( chr > 0xff ) | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  |   if ( isdigit(chr) ) | 
					
						
							|  |  |  |     return chr - '0'; | 
					
						
							|  |  |  |   if ( chr >= 'a' && chr <= 'f' ) | 
					
						
							|  |  |  |     return chr - 'a' + 10; | 
					
						
							|  |  |  |   if ( chr >= 'A' && chr <= 'F' ) | 
					
						
							|  |  |  |     return chr - 'A' + 10; | 
					
						
							|  |  |  |   return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | rxdigit(wint_t d) | 
					
						
							|  |  |  | { if ( (int)d >= 0 && d <= 9 ) | 
					
						
							|  |  |  |     return d+'0'; | 
					
						
							|  |  |  |   if ( d >= 10 && d <= 15 ) | 
					
						
							|  |  |  |     return d-10+'a'; | 
					
						
							|  |  |  |   return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define mkfunction(name) \
 | 
					
						
							|  |  |  | 	static int f ## name(wint_t chr) { return name(chr); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | mkfunction(iswalnum) | 
					
						
							|  |  |  | mkfunction(iswalpha) | 
					
						
							|  |  |  | mkfunction(isascii) | 
					
						
							|  |  |  | mkfunction(iswcntrl) | 
					
						
							|  |  |  | mkfunction(iswdigit) | 
					
						
							|  |  |  | mkfunction(iswgraph) | 
					
						
							|  |  |  | mkfunction(iswlower) | 
					
						
							|  |  |  | mkfunction(iswupper) | 
					
						
							|  |  |  | mkfunction(iswpunct) | 
					
						
							|  |  |  | mkfunction(iswspace) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-05-03 18:52:56 +01:00
										 |  |  | static const char_type char_types[] = | 
					
						
							| 
									
										
										
										
											2013-11-15 01:10:25 +00:00
										 |  |  | { { ATOM_alnum,			     fiswalnum }, | 
					
						
							|  |  |  |   { ATOM_alpha,			     fiswalpha }, | 
					
						
							|  |  |  |   { ATOM_csym,			     fiscsym }, | 
					
						
							|  |  |  |   { ATOM_csymf,			     fiscsymf }, | 
					
						
							|  |  |  |   { ATOM_prolog_var_start,	     f_is_prolog_var_start }, | 
					
						
							|  |  |  |   { ATOM_prolog_atom_start,	     f_is_prolog_atom_start }, | 
					
						
							|  |  |  |   { ATOM_prolog_identifier_continue, f_is_prolog_identifier_continue }, | 
					
						
							|  |  |  |   { ATOM_prolog_symbol,		     f_is_prolog_symbol }, | 
					
						
							|  |  |  |   { ATOM_csymf,			     fiscsymf }, | 
					
						
							|  |  |  |   { ATOM_ascii,			     fisascii }, | 
					
						
							|  |  |  |   { ATOM_white,			     iswhite }, | 
					
						
							|  |  |  |   { ATOM_cntrl,			     fiswcntrl }, | 
					
						
							|  |  |  |   { ATOM_digit,			     fiswdigit }, | 
					
						
							|  |  |  |   { ATOM_graph,			     fiswgraph }, | 
					
						
							|  |  |  |   { ATOM_lower,			     fiswlower }, | 
					
						
							|  |  |  |   { ATOM_upper,			     fiswupper }, | 
					
						
							|  |  |  |   { ATOM_punct,			     fiswpunct }, | 
					
						
							|  |  |  |   { ATOM_space,			     fiswspace }, | 
					
						
							|  |  |  |   { ATOM_end_of_file,		     iseof }, | 
					
						
							|  |  |  |   { ATOM_end_of_line,		     iseol }, | 
					
						
							|  |  |  |   { ATOM_newline,		     isnl }, | 
					
						
							|  |  |  |   { ATOM_period,		     isperiod }, | 
					
						
							|  |  |  |   { ATOM_quote,			     isquote }, | 
					
						
							|  |  |  |   { ATOM_lower,			     fupper,	flower,   1, CTX_CHAR }, | 
					
						
							|  |  |  |   { ATOM_upper,			     flower,	fupper,   1, CTX_CHAR }, | 
					
						
							|  |  |  |   { ATOM_to_lower,		     ftoupper,	ftolower, 1, CTX_CHAR }, | 
					
						
							|  |  |  |   { ATOM_to_upper,		     ftolower,	ftoupper, 1, CTX_CHAR }, | 
					
						
							|  |  |  |   { ATOM_paren,			     fparen,	rparen,   1, CTX_CHAR }, | 
					
						
							|  |  |  |   { ATOM_digit,			     fdigit,	rdigit,   1, CTX_CODE  }, | 
					
						
							|  |  |  |   { ATOM_xdigit,		     fxdigit,	rxdigit,  1, CTX_CODE  }, | 
					
						
							|  |  |  |   { NULL_ATOM,			     NULL } | 
					
						
							| 
									
										
										
										
											2010-05-03 18:52:56 +01:00
										 |  |  | }; | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2011-02-10 00:01:19 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | static const char_type * | 
					
						
							|  |  |  | char_type_by_name(atom_t name, int arity) | 
					
						
							|  |  |  | { const char_type *cc; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for(cc = char_types; cc->name; cc++) | 
					
						
							|  |  |  |   { if ( cc->name == name && cc->arity == arity ) | 
					
						
							|  |  |  |       return cc; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | advanceGen(generator *gen) | 
					
						
							|  |  |  | { if ( gen->do_enum & ENUM_CHAR ) | 
					
						
							|  |  |  |   { if ( ++gen->current == 256 ) | 
					
						
							|  |  |  |       fail; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { gen->class++; | 
					
						
							|  |  |  |     if ( !gen->class->name ) | 
					
						
							|  |  |  |       fail; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   succeed; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | unify_char_type(term_t type, const char_type *ct, int context, int how) | 
					
						
							| 
									
										
										
										
											2011-02-10 00:01:19 +00:00
										 |  |  | { GET_LD | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( ct->arity == 0 ) | 
					
						
							|  |  |  |   { return PL_unify_atom(type, ct->name); | 
					
						
							|  |  |  |   } else /*if ( ct->arity == 1 )*/ | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |   { if ( PL_unify_functor(type, PL_new_functor(ct->name, 1)) ) | 
					
						
							|  |  |  |     { term_t a = PL_new_term_ref(); | 
					
						
							| 
									
										
										
										
											2011-02-10 00:01:19 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |       _PL_get_arg(1, type, a); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( ct->ctx_type == CTX_CHAR ) | 
					
						
							|  |  |  | 	return PL_unify_char(a, context, how); | 
					
						
							|  |  |  |       else | 
					
						
							|  |  |  | 	return PL_unify_integer(a, context); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   fail; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static foreign_t | 
					
						
							|  |  |  | do_char_type(term_t chr, term_t class, control_t h, int how) | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | { GET_LD | 
					
						
							|  |  |  |   generator *gen; | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |   fid_t fid; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   switch( ForeignControl(h) ) | 
					
						
							|  |  |  |   { case FRG_FIRST_CALL: | 
					
						
							|  |  |  |     { const char_type *cc = NULL; | 
					
						
							|  |  |  |       int c; | 
					
						
							|  |  |  |       int do_enum = ENUM_NONE; | 
					
						
							|  |  |  |       atom_t cn; | 
					
						
							|  |  |  |       int arity; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( PL_is_variable(chr) ) | 
					
						
							|  |  |  | 	do_enum |= ENUM_CHAR; | 
					
						
							|  |  |  |       if ( PL_is_variable(class) ) | 
					
						
							|  |  |  | 	do_enum |= ENUM_CLASS; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( do_enum == ENUM_BOTH ) | 
					
						
							|  |  |  | 	return PL_error("char_type", 2, NULL, ERR_INSTANTIATION); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( !(do_enum & ENUM_CHAR) ) | 
					
						
							|  |  |  |       { if ( !PL_get_char(chr, &c, TRUE) ) | 
					
						
							|  |  |  | 	  fail; | 
					
						
							|  |  |  | 	if ( c == -1 ) | 
					
						
							|  |  |  | 	  return PL_unify_atom(class, ATOM_end_of_file); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( !(do_enum & ENUM_CLASS) ) | 
					
						
							|  |  |  |       { if ( !PL_get_name_arity(class, &cn, &arity) || | 
					
						
							|  |  |  | 	     !(cc = char_type_by_name(cn, arity)) ) | 
					
						
							|  |  |  | 	  return PL_error("char_type", 2, NULL, | 
					
						
							|  |  |  | 			  ERR_TYPE, ATOM_char_type, class); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( do_enum == ENUM_NONE ) | 
					
						
							|  |  |  |       { if ( arity == 0 ) | 
					
						
							|  |  |  | 	  return (*cc->test)((wint_t)c) ? TRUE : FALSE; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 	{ int rval = (*cc->test)((wint_t)c); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( rval >= 0 ) | 
					
						
							|  |  |  | 	  { term_t a = PL_new_term_ref(); | 
					
						
							|  |  |  | 	    int ok; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    _PL_get_arg(1, class, a); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    if ( cc->ctx_type == CTX_CHAR ) | 
					
						
							|  |  |  | 	      ok = PL_unify_char(a, rval, how); | 
					
						
							|  |  |  | 	    else | 
					
						
							|  |  |  | 	      ok = PL_unify_integer(a, rval); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    if ( ok ) | 
					
						
							|  |  |  | 	      return TRUE; | 
					
						
							|  |  |  | 	    else | 
					
						
							|  |  |  | 	      do_enum = ENUM_CHAR;	/* try the other way around */ | 
					
						
							|  |  |  | 	  } else | 
					
						
							|  |  |  | 	    fail; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( do_enum == ENUM_CHAR && arity == 1 ) | 
					
						
							|  |  |  |       {	term_t a = PL_new_term_ref();	/* char_type(X, lower('A')) */ | 
					
						
							|  |  |  | 	int ca; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	_PL_get_arg(1, class, a); | 
					
						
							|  |  |  | 	if ( !PL_is_variable(a) ) | 
					
						
							|  |  |  | 	{ if ( PL_get_char(a, &ca, FALSE) ) | 
					
						
							|  |  |  | 	  { int c = (*cc->reverse)((wint_t)ca); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    if ( c < 0 ) | 
					
						
							|  |  |  | 	      fail; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    return PL_unify_char(chr, c, how); | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	  fail;				/* error */ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |       gen = allocForeignState(sizeof(*gen)); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |       gen->do_enum = do_enum; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( do_enum & ENUM_CHAR ) | 
					
						
							|  |  |  |       { gen->class      = cc; | 
					
						
							|  |  |  | 	gen->current    = -1; | 
					
						
							|  |  |  |       } else if ( do_enum & ENUM_CLASS ) | 
					
						
							|  |  |  |       { gen->class	= char_types; | 
					
						
							|  |  |  | 	gen->current    = c; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     case FRG_REDO: | 
					
						
							|  |  |  |       gen = ForeignContextPtr(h); | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     case FRG_CUTTED: | 
					
						
							|  |  |  |       gen = ForeignContextPtr(h); | 
					
						
							|  |  |  |       if ( gen ) | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  | 	freeForeignState(gen, sizeof(*gen)); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |     default: | 
					
						
							|  |  |  |       succeed; | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |   if ( !(fid = PL_open_foreign_frame()) ) | 
					
						
							|  |  |  |     goto error; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |   for(;;) | 
					
						
							|  |  |  |   { int rval; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( (rval = (*gen->class->test)((wint_t)gen->current)) ) | 
					
						
							|  |  |  |     { if ( gen->do_enum & ENUM_CHAR ) | 
					
						
							|  |  |  |       { if ( !PL_unify_char(chr, gen->current, how) ) | 
					
						
							|  |  |  | 	  goto next; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       if ( gen->class->arity > 0 ) | 
					
						
							|  |  |  |       { if ( rval < 0 || | 
					
						
							|  |  |  | 	     !unify_char_type(class, gen->class, rval, how) ) | 
					
						
							|  |  |  | 	  goto next; | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |       } else if ( gen->do_enum & ENUM_CLASS ) | 
					
						
							|  |  |  |       { if ( !unify_char_type(class, gen->class, rval, how) ) | 
					
						
							|  |  |  | 	  goto next; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( advanceGen(gen) )		/* ok, found one */ | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |       { ForeignRedoPtr(gen); | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { freeForeignState(gen, sizeof(*gen));	/* the only one */ | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 	succeed; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   next: | 
					
						
							|  |  |  |     PL_rewind_foreign_frame(fid); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( !advanceGen(gen) ) | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | error: | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |   freeForeignState(gen, sizeof(*gen)); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |   fail; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /** @pred  char_type(? _Char_, ? _Type_) 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Like code_type/2, tests or generates alternative _Types_ or | 
					
						
							|  |  |  | _Chars_. The character-types are inspired by the standard `C` | 
					
						
							|  |  |  | `<ctype.h>` primitives. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// @memberof code_type/2
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | static | 
					
						
							|  |  |  | PRED_IMPL("char_type", 2, char_type, PL_FA_NONDETERMINISTIC) | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | { return do_char_type(A1, A2, PL__ctx, PL_CHAR); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /** @pred  code_type(? _Char_, ? _Type_) 
 | 
					
						
							| 
									
										
										
										
											2014-09-11 14:06:57 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Tests or generates alternative  _Types_ or  _Chars_. The | 
					
						
							|  |  |  | character-types are inspired by the standard `C` | 
					
						
							|  |  |  | `<ctype.h>` primitives. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | +  `alnum` | 
					
						
							|  |  |  |      _Char_ is a letter (upper- or lowercase) or digit. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `alpha` | 
					
						
							|  |  |  |     _Char_ is a letter (upper- or lowercase). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `csym` | 
					
						
							|  |  |  |     _Char_ is a letter (upper- or lowercase), digit or the underscore (_). These are valid C- and Prolog symbol characters. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `csymf` | 
					
						
							|  |  |  |     _Char_ is a letter (upper- or lowercase) or the underscore (_). These are valid first characters for C- and Prolog symbols | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `ascii` | 
					
						
							|  |  |  |     _Char_ is a 7-bits ASCII character (0..127). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `white` | 
					
						
							|  |  |  |     _Char_ is a space or tab. E.i. white space inside a line. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `cntrl` | 
					
						
							|  |  |  |     _Char_ is an ASCII control-character (0..31). | 
					
						
							|  |  |  |   | 
					
						
							|  |  |  | + `digit` | 
					
						
							|  |  |  |     _Char_ is a digit. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `digit( _Weight_)` | 
					
						
							|  |  |  |     _Char_ is a digit with value _Weight_. I.e. `char_type(X, digit(6))` yields X =  aaasaá'6'. Useful for parsing numbers. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `xdigit( _Weight_)` | 
					
						
							|  |  |  |     _Char_ is a hexa-decimal digit with value  _Weight_. I.e. char_type(a, xdigit(X) yields X = '10'. Useful for parsing numbers. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `graph` | 
					
						
							|  |  |  |     _Char_ produces a visible mark on a page when printed. Note that the space is not included! | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `lower` | 
					
						
							|  |  |  |     _Char_ is a lower-case letter. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `lower(Upper)` | 
					
						
							|  |  |  |     _Char_ is a lower-case version of  _Upper_. Only true if _Char_ is lowercase and  _Upper_ uppercase. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `to_lower(Upper)` | 
					
						
							|  |  |  |  _Char_ is a lower-case version of Upper. For non-letters, or letter without case,  _Char_ and Lower are the same. See also upcase_atom/2 and downcase_atom/2. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `upper` | 
					
						
							|  |  |  |  _Char_ is an upper-case letter. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `upper(Lower)` | 
					
						
							|  |  |  |  _Char_ is an upper-case version of Lower. Only true if  _Char_ is uppercase and Lower lowercase. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `to_upper(Lower)` | 
					
						
							|  |  |  |  _Char_ is an upper-case version of Lower. For non-letters, or letter without case,  _Char_ and Lower are the same. See also upcase_atom/2 and downcase_atom/2. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `punct` | 
					
						
							|  |  |  |  _Char_ is a punctuation character. This is a graph character that is not a letter or digit. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `space` | 
					
						
							|  |  |  |  _Char_ is some form of layout character (tab, vertical-tab, newline, etc.). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `end_of_file` | 
					
						
							|  |  |  |  _Char_ is -1. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `end_of_line` | 
					
						
							|  |  |  |  _Char_ ends a line (ASCII: 10..13). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `newline` | 
					
						
							|  |  |  |  _Char_ is a the newline character (10). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `period` | 
					
						
							|  |  |  |  _Char_ counts as the end of a sentence (.,!,?). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `quote` | 
					
						
							|  |  |  |  _Char_ is a quote-character. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `paren(Close)` | 
					
						
							|  |  |  |  _Char_ is an open-parenthesis and Close is the corresponding close-parenthesis.  | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | + `code_type(? _Code_, ? _Type_)` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | As char_type/2, but uses character-codes rather than | 
					
						
							|  |  |  | one-character atoms. Please note that both predicates are as | 
					
						
							|  |  |  | flexible as possible. They handle either representation if the | 
					
						
							|  |  |  | argument is instantiated and only will instantiate with an integer | 
					
						
							|  |  |  | code or one-character atom depending of the version used. See also | 
					
						
							|  |  |  | the prolog-flag double_quotes and the built-in predicates  | 
					
						
							|  |  |  | atom_chars/2 and atom_codes/2. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /// @memberof code_type/2
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | static | 
					
						
							|  |  |  | PRED_IMPL("code_type", 2, code_type, PL_FA_NONDETERMINISTIC) | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | { return do_char_type(A1, A2, PL__ctx, PL_CODE); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if 0
 | 
					
						
							|  |  |  | static | 
					
						
							|  |  |  | PRED_IMPL("iswctype", 2, iswctype, 0) | 
					
						
							|  |  |  | { char *s; | 
					
						
							|  |  |  |   int chr; | 
					
						
							|  |  |  |   wctype_t t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !PL_get_char_ex(A1, &chr, FALSE) || | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |        !PL_get_chars(A2, &s, CVT_ATOM|CVT_EXCEPTION) ) | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |     return FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !(t=wctype(s)) ) | 
					
						
							|  |  |  |     return PL_error(NULL, 0, NULL, ERR_EXISTENCE, ATOM_type, A2); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return iswctype(chr, t) ? TRUE : FALSE; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | init_tout(PL_chars_t *t, size_t len) | 
					
						
							|  |  |  | { switch(t->encoding) | 
					
						
							|  |  |  |   { case ENC_ISO_LATIN_1: | 
					
						
							|  |  |  |       if ( len < sizeof(t->buf) ) | 
					
						
							|  |  |  |       { t->text.t = t->buf; | 
					
						
							|  |  |  | 	t->storage = PL_CHARS_LOCAL; | 
					
						
							|  |  |  |       } else | 
					
						
							| 
									
										
										
										
											2013-01-16 00:19:07 +00:00
										 |  |  |       { t->text.t = PL_malloc(len); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 	t->storage = PL_CHARS_MALLOC; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       succeed; | 
					
						
							|  |  |  |     case ENC_WCHAR: | 
					
						
							|  |  |  |       if ( len*sizeof(pl_wchar_t) < sizeof(t->buf) ) | 
					
						
							|  |  |  |       { t->text.w = (pl_wchar_t*)t->buf; | 
					
						
							|  |  |  | 	t->storage = PL_CHARS_LOCAL; | 
					
						
							|  |  |  |       } else | 
					
						
							| 
									
										
										
										
											2013-01-16 00:19:07 +00:00
										 |  |  |       { t->text.w = PL_malloc(len*sizeof(pl_wchar_t)); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 	t->storage = PL_CHARS_MALLOC; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |       succeed; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       assert(0); | 
					
						
							|  |  |  |       fail; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline wint_t | 
					
						
							|  |  |  | get_chr_from_text(const PL_chars_t *t, size_t index) | 
					
						
							|  |  |  | { switch(t->encoding) | 
					
						
							|  |  |  |   { case ENC_ISO_LATIN_1: | 
					
						
							|  |  |  |       return t->text.t[index]&0xff; | 
					
						
							|  |  |  |     case ENC_WCHAR: | 
					
						
							|  |  |  |       return t->text.w[index]; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       assert(0); | 
					
						
							|  |  |  |       return 0; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static foreign_t | 
					
						
							|  |  |  | modify_case_atom(term_t in, term_t out, int down) | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | { GET_LD | 
					
						
							|  |  |  |   PL_chars_t tin, tout; | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) ) | 
					
						
							|  |  |  |     return FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( PL_get_text(out, &tout, CVT_ATOMIC) ) | 
					
						
							|  |  |  |   { unsigned int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( tin.length != tout.length ) | 
					
						
							|  |  |  |       fail; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for(i=0; i<tin.length; i++) | 
					
						
							|  |  |  |     { wint_t ci = get_chr_from_text(&tin, i); | 
					
						
							|  |  |  |       wint_t co = get_chr_from_text(&tout, i); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( down ) | 
					
						
							|  |  |  |       { if ( co != towlower(ci) ) | 
					
						
							|  |  |  | 	  fail; | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { if ( co != towupper(ci) ) | 
					
						
							|  |  |  | 	  fail; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     succeed; | 
					
						
							|  |  |  |   } else if ( PL_is_variable(out) ) | 
					
						
							|  |  |  |   { unsigned int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     tout.encoding  = tin.encoding; | 
					
						
							|  |  |  |     tout.length    = tin.length; | 
					
						
							|  |  |  |     tout.canonical = FALSE;		/* or TRUE? Can WCHAR map to ISO? */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     init_tout(&tout, tin.length); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ( tin.encoding == ENC_ISO_LATIN_1 ) | 
					
						
							|  |  |  |     { const unsigned char *in = (const unsigned char*)tin.text.t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( down ) | 
					
						
							|  |  |  |       { for(i=0; i<tin.length; i++) | 
					
						
							|  |  |  | 	{ wint_t c = towlower(in[i]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( c > 255 ) | 
					
						
							|  |  |  | 	  { PL_promote_text(&tout); | 
					
						
							|  |  |  | 	    for( ; i<tin.length; i++) | 
					
						
							|  |  |  | 	    { tout.text.w[i] = towlower(in[i]); | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	  } else | 
					
						
							|  |  |  | 	  { tout.text.t[i] = (char)c; | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } else				/* upcase */ | 
					
						
							|  |  |  |       { for(i=0; i<tin.length; i++) | 
					
						
							|  |  |  | 	{ wint_t c = towupper(in[i]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  if ( c > 255 ) | 
					
						
							|  |  |  | 	  { PL_promote_text(&tout); | 
					
						
							|  |  |  | 	    for( ; i<tin.length; i++) | 
					
						
							|  |  |  | 	    { tout.text.w[i] = towupper(in[i]); | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	    break; | 
					
						
							|  |  |  | 	  } else | 
					
						
							|  |  |  | 	  { tout.text.t[i] = (char)c; | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |       } | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |     } else | 
					
						
							|  |  |  |     { if ( down ) | 
					
						
							|  |  |  |       { for(i=0; i<tin.length; i++) | 
					
						
							|  |  |  | 	{ tout.text.w[i] = towlower(tin.text.w[i]); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } else | 
					
						
							|  |  |  |       { for(i=0; i<tin.length; i++) | 
					
						
							|  |  |  | 	{ tout.text.w[i] = towupper(tin.text.w[i]); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     PL_unify_text(out, 0, &tout, PL_ATOM); | 
					
						
							|  |  |  |     PL_free_text(&tout); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     succeed; | 
					
						
							|  |  |  |   } else | 
					
						
							|  |  |  |   { return PL_error(NULL, 0, NULL, ERR_TYPE, ATOM_atom, out); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /** @pred  downcase_atom(+ _Word_, - _LowerCaseWord_) 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If the first argument is bound to an atom _Word_, the | 
					
						
							|  |  |  | second argument shoud unify with an atom such that all alphabetic cdes | 
					
						
							|  |  |  | are lower case, _LowerCaseWord_. Non-alphabetic characers are | 
					
						
							|  |  |  | preserved. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// @memberof downcase_atom/2
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | static | 
					
						
							|  |  |  | PRED_IMPL("downcase_atom", 2, downcase_atom, 0) | 
					
						
							|  |  |  | { return modify_case_atom(A1, A2, TRUE); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /** @pred  upcase_atom(+ _Word_, - _UpCaseWord_) 
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | If the first argument is bound to an atom _Word_, the | 
					
						
							|  |  |  | second argument shoud unify with an atom such that all alphabetic cdes | 
					
						
							|  |  |  | are up case, _UpCaseWord_. Non-alphabetic characers are | 
					
						
							|  |  |  | preserved. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// @memberof upcase_atom/2
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | static | 
					
						
							|  |  |  | PRED_IMPL("upcase_atom", 2, upcase_atom, 0) | 
					
						
							|  |  |  | { return modify_case_atom(A1, A2, FALSE); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	    WHITE SPACE		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | write_normalize_space(IOSTREAM *out, term_t in) | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | { GET_LD | 
					
						
							|  |  |  |   PL_chars_t tin; | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |   size_t i, end; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !PL_get_text(in, &tin, CVT_ATOMIC|CVT_EXCEPTION) ) | 
					
						
							|  |  |  |     return FALSE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   end = tin.length; | 
					
						
							|  |  |  |   i = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   while(i<end && unicode_separator(get_chr_from_text(&tin, i))) | 
					
						
							|  |  |  |     i++; | 
					
						
							|  |  |  |   while( i<end ) | 
					
						
							|  |  |  |   { wint_t c; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while(i<end && !unicode_separator((c=get_chr_from_text(&tin, i)))) | 
					
						
							|  |  |  |     { if ( Sputcode(c, out) < 0 ) | 
					
						
							|  |  |  | 	fail; | 
					
						
							|  |  |  |       i++; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     while(i<end && unicode_separator(get_chr_from_text(&tin, i))) | 
					
						
							|  |  |  |       i++; | 
					
						
							|  |  |  |     if ( i < end ) | 
					
						
							|  |  |  |     { if (  Sputcode(' ', out) < 0 ) | 
					
						
							|  |  |  | 	fail; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   succeed; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /** @pred  normalize_space(- _Out_, + _In_) 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Remove white space at the beginning an end of the word _In_, and replace | 
					
						
							|  |  |  | sequences of white space in the middle of _In_ by a single white | 
					
						
							|  |  |  | space. | 
					
						
							|  |  |  | ~~~ | 
					
						
							|  |  |  |  ?- normalize_space(atom(X), '  the white     fox jumped '). | 
					
						
							|  |  |  | X = 'the white fox jumped'. | 
					
						
							|  |  |  |  ?- normalize_space(string(X), '  over the           lazy dog '). | 
					
						
							|  |  |  | X = "over the lazy dog". | 
					
						
							|  |  |  | ~~~ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Notice that the first argument is bound to a stream descriptor in the | 
					
						
							|  |  |  | style of format/3. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// @memberof normalize_space/2
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | static | 
					
						
							|  |  |  | PRED_IMPL("normalize_space", 2, normalize_space, 0) | 
					
						
							|  |  |  | { redir_context ctx; | 
					
						
							|  |  |  |   word rc; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |   if ( (rc = setupOutputRedirect(A1, &ctx, FALSE)) ) | 
					
						
							|  |  |  |   { if ( (rc = write_normalize_space(ctx.stream, A2)) ) | 
					
						
							|  |  |  |       rc = closeOutputRedirect(&ctx); | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |       discardOutputRedirect(&ctx); | 
					
						
							|  |  |  |   } | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   return rc; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /// @addtogroup SetLocale
 | 
					
						
							|  |  |  | // @{
 | 
					
						
							|  |  |  | //
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	       LOCALE		* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-15 22:29:04 +00:00
										 |  |  | #if O_LOCALE
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | #include <locale.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
					
						
							|  |  |  | Note: on some installations, locale doesn't   work correctly. Printing a | 
					
						
							|  |  |  | message isn't really cute. It would be better to use printMessage(), but | 
					
						
							|  |  |  | the system isn't yet initialised far enough.   Maybe we should store the | 
					
						
							|  |  |  | failure and print a message at the end of the initialisation? | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | We only return FALSE if LC_CTYPE  fails.   This  is a serious indication | 
					
						
							|  |  |  | that locale support is broken. We don't   depend too much on the others, | 
					
						
							|  |  |  | so we ignore possible problems. | 
					
						
							|  |  |  | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							| 
									
										
										
										
											2013-11-15 01:10:25 +00:00
										 |  |  | init_locale(void) | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | { int rc = TRUE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   if ( !setlocale(LC_CTYPE, "") ) | 
					
						
							|  |  |  |   { rc = FALSE; | 
					
						
							|  |  |  |     DEBUG(0, Sdprintf("Failed to set LC_CTYPE locale\n")); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( !setlocale(LC_TIME, "") ) | 
					
						
							|  |  |  |   { DEBUG(0, Sdprintf("Failed to set LC_TIME locale\n")); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   if ( !setlocale(LC_COLLATE, "") ) | 
					
						
							|  |  |  |   { DEBUG(0, Sdprintf("Failed to set LC_COLLATE locale\n")); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return rc; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct | 
					
						
							|  |  |  | { int category; | 
					
						
							|  |  |  |   const char *name; | 
					
						
							|  |  |  | } lccat; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static lccat lccats[] = | 
					
						
							|  |  |  | { { LC_ALL,      "all" }, | 
					
						
							|  |  |  |   { LC_COLLATE,  "collate" }, | 
					
						
							|  |  |  |   { LC_CTYPE,    "ctype" }, | 
					
						
							|  |  |  | #ifdef LC_MESSAGES
 | 
					
						
							|  |  |  |   { LC_MESSAGES, "messages" }, | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |   { LC_MONETARY, "monetary" }, | 
					
						
							|  |  |  |   { LC_NUMERIC,  "numeric" }, | 
					
						
							|  |  |  |   { LC_TIME,     "time" }, | 
					
						
							|  |  |  |   { 0,           NULL } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /// @}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /** @pred  setlocale( + _In_, - _Old_, -_New_) 
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /// @memberof setlocale/3
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | static | 
					
						
							|  |  |  | PRED_IMPL("setlocale", 3, setlocale, 0) | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | { PRED_LD | 
					
						
							|  |  |  |   char *what; | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |   char *locale; | 
					
						
							|  |  |  |   const lccat *lcp; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |   if ( !PL_get_chars(A1, &what, CVT_ATOM|CVT_EXCEPTION) ) | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |     fail; | 
					
						
							|  |  |  |   if ( PL_is_variable(A3) ) | 
					
						
							|  |  |  |     locale = NULL; | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |   else if ( !PL_get_chars(A3, &locale, CVT_ATOM|CVT_EXCEPTION) ) | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |     fail; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   for ( lcp = lccats; lcp->name; lcp++ ) | 
					
						
							|  |  |  |   { if ( streq(lcp->name, what) ) | 
					
						
							|  |  |  |     { char *old = setlocale(lcp->category, NULL); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( !PL_unify_chars(A2, PL_ATOM, -1, old) ) | 
					
						
							|  |  |  | 	fail; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if ( PL_compare(A2, A3) != 0 ) | 
					
						
							|  |  |  |       { if ( !setlocale(lcp->category, locale) ) | 
					
						
							| 
									
										
										
										
											2013-11-15 01:10:25 +00:00
										 |  |  | 	{ if ( errno == ENOENT ) | 
					
						
							|  |  |  | 	    return PL_existence_error("locale", A3); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 	  return PL_error(NULL, 0, MSG_ERRNO, ERR_SYSCALL, "setlocale"); | 
					
						
							| 
									
										
										
										
											2013-11-15 01:10:25 +00:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-15 01:10:25 +00:00
										 |  |  | #ifdef O_LOCALE
 | 
					
						
							|  |  |  |       updateLocale(lcp->category, locale); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |       succeed; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |   return PL_domain_error("category", A1); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-15 01:10:25 +00:00
										 |  |  | #define init_locale() 1
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | static | 
					
						
							|  |  |  | PRED_IMPL("setlocale", 3, setlocale, 0) | 
					
						
							|  |  |  | { return notImplemented("setlocale", 3); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /// @}
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *      PUBLISH PREDICATES	* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | BeginPredDefs(ctype) | 
					
						
							| 
									
										
										
										
											2011-02-11 01:22:07 +00:00
										 |  |  |   PRED_DEF("char_type", 2, char_type, PL_FA_NONDETERMINISTIC) | 
					
						
							|  |  |  |   PRED_DEF("code_type", 2, code_type, PL_FA_NONDETERMINISTIC) | 
					
						
							|  |  |  |   PRED_DEF("setlocale", 3, setlocale, 0) | 
					
						
							|  |  |  |   PRED_DEF("downcase_atom", 2, downcase_atom, 0) | 
					
						
							|  |  |  |   PRED_DEF("upcase_atom", 2, upcase_atom, 0) | 
					
						
							| 
									
										
										
										
											2011-03-11 19:49:32 +00:00
										 |  |  |   PRED_DEF("normalize_space", 2, normalize_space, 0) | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | EndPredDefs | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		 /*******************************
 | 
					
						
							|  |  |  | 		 *	PROLOG CHARACTERS	* | 
					
						
							|  |  |  | 		 *******************************/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const char _PL_char_types[] = { | 
					
						
							|  |  |  | /* ^@  ^A  ^B  ^C  ^D  ^E  ^F  ^G  ^H  ^I  ^J  ^K  ^L  ^M  ^N  ^O    0-15 */ | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  |    CT, CT, CT, CT, CT, CT, CT, CT, CT, SP, SP, SP, SP, SP, CT, CT, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | /* ^P  ^Q  ^R  ^S  ^T  ^U  ^V  ^W  ^X  ^Y  ^Z  ^[  ^\  ^]  ^^  ^_   16-31 */ | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |    CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | /* sp   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /   32-47 */ | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |    SP, SO, DQ, SY, SY, SO, SY, SQ, PU, PU, SY, SY, PU, SY, SY, SY, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | /*  0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?   48-63 */ | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |    DI, DI, DI, DI, DI, DI, DI, DI, DI, DI, SY, SO, SY, SY, SY, SY, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | /*  @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   64-79 */ | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |    SY, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | /*  P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _   80-95 */ | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |    UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, PU, SY, PU, SY, UC, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | /*  `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   96-111 */ | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |    SY, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | /*  p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~  ^?   112-127 */ | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, PU, PU, PU, SY, CT, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 			  /* 128-159 (C1 controls) */ | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  |    CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, | 
					
						
							|  |  |  |    CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 			  /* 160-255 (G1 graphics) */ | 
					
						
							| 
									
										
										
										
											2012-02-01 20:52:13 +00:00
										 |  |  | 			  /* ISO Latin 1 (=Unicode) is assumed */ | 
					
						
							|  |  |  | /*  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */ | 
					
						
							|  |  |  |    SP, SY, SY, SY, SY, SY, SY, SY, SY, SY, LC, SY, SY, SO, SY, SY, /*00AX*/ | 
					
						
							|  |  |  |    SY, SY, SO, SO, SY, LC, SY, SY, SY, SO, LC, SY, SO, SO, SO, SY, /*00BX*/ | 
					
						
							|  |  |  |    UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, /*00CX*/ | 
					
						
							|  |  |  |    UC, UC, UC, UC, UC, UC, UC, SY, UC, UC, UC, UC, UC, UC, UC, LC, /*00DX*/ | 
					
						
							|  |  |  |    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, /*00EX*/ | 
					
						
							|  |  |  |    LC, LC, LC, LC, LC, LC, LC, SY, LC, LC, LC, LC, LC, LC, LC, LC  /*00FX*/ | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct | 
					
						
							|  |  |  | { const char *name; | 
					
						
							|  |  |  |   IOENC encoding; | 
					
						
							|  |  |  | } enc_map; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const enc_map map[] = | 
					
						
							|  |  |  | { { "UTF-8",	  ENC_UTF8 }, | 
					
						
							|  |  |  |   { "utf8",	  ENC_UTF8 }, | 
					
						
							|  |  |  |   { "ISO8859-1",  ENC_ISO_LATIN_1 }, | 
					
						
							|  |  |  |   { "ISO8859_1",  ENC_ISO_LATIN_1 }, | 
					
						
							|  |  |  |   { "iso88591",   ENC_ISO_LATIN_1 }, | 
					
						
							|  |  |  |   { "iso_8859_1", ENC_ISO_LATIN_1 }, | 
					
						
							|  |  |  |   { NULL, ENC_UNKNOWN } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-20 16:22:47 -05:00
										 |  |  | IOENC | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | initEncoding(void) | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | { GET_LD | 
					
						
							| 
									
										
										
										
											2014-03-15 22:29:04 +00:00
										 |  |  | #if HAVE_SETLOCALE
 | 
					
						
							|  |  |  |     char *enc; | 
					
						
							|  |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2010-02-22 09:35:47 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   if ( LD ) | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |   { if ( !LD->encoding ) | 
					
						
							| 
									
										
										
										
											2014-03-15 22:29:04 +00:00
										 |  |  |       { | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-11-15 01:10:25 +00:00
										 |  |  |       if ( !init_locale() ) | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |       { LD->encoding = ENC_ISO_LATIN_1; | 
					
						
							| 
									
										
										
										
											2014-03-15 22:29:04 +00:00
										 |  |  | #if HAVE_SETLOCALE
 | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |       } else if ( (enc = setlocale(LC_CTYPE, NULL)) ) | 
					
						
							| 
									
										
										
										
											2014-03-15 22:29:04 +00:00
										 |  |  |       {  char *encp; | 
					
						
							|  |  |  | 	LD->encoding = ENC_ANSI;		/* text encoding */ | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-05 11:20:29 +00:00
										 |  |  | 	if ( (encp = strchr(enc, '.')) ) | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 	{ const enc_map *m; | 
					
						
							| 
									
										
										
										
											2013-12-05 11:20:29 +00:00
										 |  |  | 	  encp++;				/* skip '.' */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	  for ( m=map; m->name; m++ ) | 
					
						
							|  |  |  | 	  { if ( strcmp(encp, m->name) == 0 ) | 
					
						
							|  |  |  | 	    { LD->encoding = m->encoding; | 
					
						
							|  |  |  | 	      break; | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 	  /* vsc: just test LC_CTYPE, works on Macs */ | 
					
						
							|  |  |  | 	  const enc_map *m; | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	  for ( m=map; m->name; m++ ) | 
					
						
							|  |  |  | 	  { if ( strcmp(enc, m->name) == 0 ) | 
					
						
							|  |  |  | 	    { LD->encoding = m->encoding; | 
					
						
							|  |  |  | 	      break; | 
					
						
							|  |  |  | 	    } | 
					
						
							|  |  |  | 	  } | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-03-15 22:29:04 +00:00
										 |  |  | #endif
 | 
					
						
							|  |  |  |       } else { LD->encoding = ENC_ISO_LATIN_1; | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return LD->encoding; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   return ENC_ANSI; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | initCharTypes(void) | 
					
						
							| 
									
										
										
										
											2011-02-10 00:01:19 +00:00
										 |  |  | { initEncoding(); | 
					
						
							| 
									
										
										
										
											2008-12-22 12:02:22 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-15 03:13:50 -05:00
										 |  |  | /// @}
 | 
					
						
							|  |  |  | 
 |