yap-6.3/H/pl-utf8.h

/*  $Id$

    Part of SWI-Prolog

    Author:        Jan Wielemaker and Anjo Anjewierden
    E-mail:        jan@swi.psy.uva.nl
    WWW:           http://www.swi-prolog.org
    Copyright (C): 1985-2002, University of Amsterdam

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/


#ifndef UTF8_H_INCLUDED
#define UTF8_H_INCLUDED

#include <wchar.h>

#define PL_MB_LEN_MAX 16

#define UTF8_MALFORMED_REPLACEMENT 0xfffd

#define ISUTF8_MB(c) ((unsigned)(c) >= 0xc0 && (unsigned)(c) <= 0xfd)

#define ISUTF8_CB(c)  (((c)&0xc0) == 0x80) /* Is continuation byte */
#define ISUTF8_FB2(c) (((c)&0xe0) == 0xc0)
#define ISUTF8_FB3(c) (((c)&0xf0) == 0xe0)
#define ISUTF8_FB4(c) (((c)&0xf8) == 0xf0)
#define ISUTF8_FB5(c) (((c)&0xfc) == 0xf8)
#define ISUTF8_FB6(c) (((c)&0xfe) == 0xfc)

#define UTF8_FBN(c) (!(c&0x80)     ? 0 : \
		     ISUTF8_FB2(c) ? 1 : \
		     ISUTF8_FB3(c) ? 2 : \
		     ISUTF8_FB4(c) ? 3 : \
		     ISUTF8_FB5(c) ? 4 : \
		     ISUTF8_FB6(c) ? 5 : -1)
#define UTF8_FBV(c,n) ( n == 0 ? c : (c & ((0x01<<(6-n))-1)) )

#define utf8_get_char(in, chr) \
	(*(in) & 0x80 ? _PL__utf8_get_char(in, chr) \
		      : (*(chr) = *(in), (char *)(in)+1))
#define utf8_skip_char(in) \
	(*(in) & 0x80 ? _PL__utf8_skip_char(in) \
		      : (char *)(in)+1)
#define utf8_put_char(out, chr) \
	((chr) < 0x80 ? out[0]=(char)(chr), out+1 \
		      : _PL__utf8_put_char(out, (chr)))

extern char *_PL__utf8_get_char(const char *in, int *chr);
extern char *_PL__utf8_put_char(char *out, int chr);
extern char *_PL__utf8_skip_char(const char *out);

extern size_t utf8_strlen(const char *s, size_t len);
extern size_t utf8_strlen1(const char *s);
extern const char * utf8_skip(const char *s, int n);
extern int    utf8_strncmp(const char *s1, const char *s2, size_t n);
extern int    utf8_strprefix(const char *s1, const char *s2);
/// copy a wide string to an UTF-8 version.
extern char *utf8_wcscpy(char *sf, const wchar_t *s0);

typedef enum {
  S_ASCII,
  S_LATIN,
  S_WIDE
} unicode_type_t;

extern unicode_type_t _PL__utf8_type(const char *in0, size_t len);

#endif /*UTF8_H_INCLUDED*/
q# 2008-12-22 12:02:22 +00:00			`/* $Id$`

			`Part of SWI-Prolog`

			`Author: Jan Wielemaker and Anjo Anjewierden`
			`E-mail: jan@swi.psy.uva.nl`
			`WWW: http://www.swi-prolog.org`
			`Copyright (C): 1985-2002, University of Amsterdam`

			`This library is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU Lesser General Public`
			`License as published by the Free Software Foundation; either`
			`version 2.1 of the License, or (at your option) any later version.`

			`This library is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`Lesser General Public License for more details.`

			`You should have received a copy of the GNU Lesser General Public`
			`License along with this library; if not, write to the Free Software`
SWI update 2013-01-16 00:19:07 +00:00			`Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
q# 2008-12-22 12:02:22 +00:00			`*/`


			`#ifndef UTF8_H_INCLUDED`
			`#define UTF8_H_INCLUDED`

blanks and backslashed strings 2015-02-10 00:03:02 +00:00			`#include <wchar.h>`

SWI update 2013-01-16 00:19:07 +00:00			`#define PL_MB_LEN_MAX 16`

q# 2008-12-22 12:02:22 +00:00			`#define UTF8_MALFORMED_REPLACEMENT 0xfffd`

			`#define ISUTF8_MB(c) ((unsigned)(c) >= 0xc0 && (unsigned)(c) <= 0xfd)`

			`#define ISUTF8_CB(c) (((c)&0xc0) == 0x80) /* Is continuation byte */`
			`#define ISUTF8_FB2(c) (((c)&0xe0) == 0xc0)`
			`#define ISUTF8_FB3(c) (((c)&0xf0) == 0xe0)`
			`#define ISUTF8_FB4(c) (((c)&0xf8) == 0xf0)`
			`#define ISUTF8_FB5(c) (((c)&0xfc) == 0xf8)`
			`#define ISUTF8_FB6(c) (((c)&0xfe) == 0xfc)`

			`#define UTF8_FBN(c) (!(c&0x80) ? 0 : \`
			`ISUTF8_FB2(c) ? 1 : \`
			`ISUTF8_FB3(c) ? 2 : \`
			`ISUTF8_FB4(c) ? 3 : \`
			`ISUTF8_FB5(c) ? 4 : \`
			`ISUTF8_FB6(c) ? 5 : -1)`
			`#define UTF8_FBV(c,n) ( n == 0 ? c : (c & ((0x01<<(6-n))-1)) )`

			`#define utf8_get_char(in, chr) \`
			`(*(in) & 0x80 ? _PL__utf8_get_char(in, chr) \`
			`: ((chr) = (in), (char *)(in)+1))`
utf8 utilities 2013-12-06 23:22:42 +00:00			`#define utf8_skip_char(in) \`
			`(*(in) & 0x80 ? _PL__utf8_skip_char(in) \`
			`: (char *)(in)+1)`
q# 2008-12-22 12:02:22 +00:00			`#define utf8_put_char(out, chr) \`
			`((chr) < 0x80 ? out[0]=(char)(chr), out+1 \`
			`: _PL__utf8_put_char(out, (chr)))`

			`extern char _PL__utf8_get_char(const char in, int *chr);`
			`extern char _PL__utf8_put_char(char out, int chr);`
utf8 utilities 2013-12-06 23:22:42 +00:00			`extern char _PL__utf8_skip_char(const char out);`
q# 2008-12-22 12:02:22 +00:00
			`extern size_t utf8_strlen(const char *s, size_t len);`
keep trying to fix atoms 2013-12-04 23:01:30 +00:00			`extern size_t utf8_strlen1(const char *s);`
utf8 utilities 2013-12-06 23:22:42 +00:00			`extern const char * utf8_skip(const char *s, int n);`
keep trying to fix atoms 2013-12-04 23:01:30 +00:00			`extern int utf8_strncmp(const char s1, const char s2, size_t n);`
			`extern int utf8_strprefix(const char s1, const char s2);`
blanks and backslashed strings 2015-02-10 00:03:02 +00:00			`/// copy a wide string to an UTF-8 version.`
			`extern char utf8_wcscpy(char sf, const wchar_t *s0);`
q# 2008-12-22 12:02:22 +00:00
new text conversion and string code (big changes, take care please) 2013-12-02 14:49:41 +00:00			`typedef enum {`
			`S_ASCII,`
			`S_LATIN,`
			`S_WIDE`
			`} unicode_type_t;`

			`extern unicode_type_t _PL__utf8_type(const char *in0, size_t len);`

q# 2008-12-22 12:02:22 +00:00			`#endif /UTF8_H_INCLUDED/`