annoying small bugs

This commit is contained in:
Vitor Santos Costa 2016-05-26 22:47:19 +01:00
parent a9fe413dd0
commit cd7571da7a
3 changed files with 358 additions and 341 deletions

View File

@ -29,7 +29,6 @@ static char SccsId[] = "%W% %G%";
///{@ ///{@
/// @addtogroup CharProps /// @addtogroup CharProps
/** /**
* @defgroup CharIO Character-Based Input/Output * @defgroup CharIO Character-Based Input/Output
@ -42,10 +41,10 @@ static char SccsId[] = "%W% %G%";
*/ */
#include "Yap.h" #include "Yap.h"
#include "Yatom.h"
#include "YapHeap.h" #include "YapHeap.h"
#include "yapio.h"
#include "YapText.h" #include "YapText.h"
#include "Yatom.h"
#include "yapio.h"
#include <stdlib.h> #include <stdlib.h>
#if HAVE_UNISTD_H #if HAVE_UNISTD_H
#include <unistd.h> #include <unistd.h>
@ -75,8 +74,8 @@ static char SccsId[] = "%W% %G%";
#define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR) #define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR)
#endif #endif
#endif #endif
#include "iopreds.h"
#include "eval.h" #include "eval.h"
#include "iopreds.h"
static Int p_change_type_of_char(USES_REGS1); static Int p_change_type_of_char(USES_REGS1);
@ -94,8 +93,7 @@ Term Yap_StringToNumberTerm(char *s, encoding_t *encp) {
GLOBAL_Stream[sno].encoding = LOCAL_encoding; GLOBAL_Stream[sno].encoding = LOCAL_encoding;
#ifdef __ANDROID__ #ifdef __ANDROID__
while (*s && isblank(*s) && Yap_wide_chtype(*s) == BS) while (*s && isblank(*s) && Yap_wide_chtype(*s) == BS)
s++ s++;
;
#else #else
while (*s && iswblank(*s++)) while (*s && iswblank(*s++))
; ;
@ -133,14 +131,15 @@ static enc_map_t ematches[] = {
#endif #endif
}; };
static encoding_t enc_os_default( encoding_t rc)\ static encoding_t enc_os_default(encoding_t rc) {
{
// by default, return UTF-8 // by default, return UTF-8
// note that we match the C locale to UTF8/16, as all Unix machines will work on UNICODE. // note that we match the C locale to UTF8/16, as all Unix machines will work
// on UNICODE.
// WIN32 we will rely on BOM // WIN32 we will rely on BOM
if (rc == ENC_ISO_ASCII) { if (rc == ENC_ISO_ASCII) {
return ENC_ISO_UTF8; } return ENC_ISO_UTF8;
}
return rc; return rc;
} }
@ -149,8 +148,7 @@ encoding_t Yap_SystemEncoding(void) {
while (i == -1 || encvs[i]) { while (i == -1 || encvs[i]) {
char *v; char *v;
if (i == -1) { if (i == -1) {
if ((v = setlocale(LC_CTYPE, NULL)) == NULL || if ((v = setlocale(LC_CTYPE, NULL)) == NULL || !strcmp(v, "C")) {
!strcmp(v,"C")) {
if ((v = getenv("LC_CTYPE"))) if ((v = getenv("LC_CTYPE")))
setlocale(LC_CTYPE, v); setlocale(LC_CTYPE, v);
else if ((v = getenv("LANG"))) else if ((v = getenv("LANG")))
@ -164,8 +162,7 @@ encoding_t Yap_SystemEncoding(void) {
const char *coding; const char *coding;
while ((coding = ematches[j].s) != NULL) { while ((coding = ematches[j].s) != NULL) {
char *v1; char *v1;
if ((v1 = strstr(v, coding)) && if ((v1 = strstr(v, coding)) && strlen(v1) == strlen(coding)) {
strlen(v1) == strlen(coding)) {
return ematches[j].e; return ematches[j].e;
} }
j++; j++;
@ -298,7 +295,7 @@ static Int toupper2(USES_REGS1) {
if (out < 128) if (out < 128)
uout = toupper(out); uout = toupper(out);
else else
uout = towupper(out); uout = utf8proc_toupper(out);
if (is_char) if (is_char)
return Yap_unify(ARG2, MkCharTerm(uout)); return Yap_unify(ARG2, MkCharTerm(uout));
else else
@ -310,16 +307,14 @@ static Int toupper2(USES_REGS1) {
if (uout < 128) if (uout < 128)
out = tolower(uout); out = tolower(uout);
else else
out = towlower(uout); out = utf8proc_tolower(uout);
} else if (charp == LC) {
return false;
} else { } else {
out = uout; out = uout;
} }
if (is_char) if (is_char)
return Yap_unify(ARG2, MkCharTerm(out)); return Yap_unify(ARG1, MkCharTerm(out));
else else
return Yap_unify(ARG2, MkIntegerTerm(out)); return Yap_unify(ARG1, MkIntegerTerm(out));
} else { } else {
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL); Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
} }
@ -335,7 +330,7 @@ static Int tolower2(USES_REGS1) {
if (out < 128) if (out < 128)
uout = tolower(out); uout = tolower(out);
else else
uout = towlower(out); uout = utf8proc_tolower(out);
if (is_char) if (is_char)
return Yap_unify(ARG2, MkCharTerm(uout)); return Yap_unify(ARG2, MkCharTerm(uout));
else else
@ -347,16 +342,14 @@ static Int tolower2(USES_REGS1) {
if (uout < 128) if (uout < 128)
out = toupper(uout); out = toupper(uout);
else else
out = towupper(uout); out = utf8proc_toupper(uout);
} else if (charp == UC) {
return false;
} else { } else {
out = uout; out = uout;
} }
if (is_char) if (is_char)
return Yap_unify(ARG2, MkCharTerm(out)); return Yap_unify(ARG1, MkCharTerm(out));
else else
return Yap_unify(ARG2, MkIntegerTerm(out)); return Yap_unify(ARG1, MkIntegerTerm(out));
} else { } else {
Yap_Error(INSTANTIATION_ERROR, ARG1, NULL); Yap_Error(INSTANTIATION_ERROR, ARG1, NULL);
} }

View File

@ -140,135 +140,159 @@ Char is an uppercase version of Lower. Only true if Char is uppercase and Lower
:- discontiguous digit_weight/2, digit_weight/3. :- discontiguous digit_weight/2, digit_weight/3.
prolog:char_type( CH, TYPE) :- prolog:char_type( CH, TYPE) :-
( var( CH ) -> between(-1,+inf,CH) ), (nonvar( CH )
prolog:char_type( CH, TYPE) . ->
prolog:char_type( ALNUM, alnum) :- true
;
arg(1,TYPE, A),
atomic(A)
->
true
;
between(0,0x10FFFF,I),
atom_codes(CH, [I])
),
p_char_type( CH, TYPE).
p_char_type( ALNUM, alnum) :-
char_type_alnum( ALNUM ). char_type_alnum( ALNUM ).
prolog:char_type( ALPHA, alpha) :- p_char_type( ALPHA, alpha) :-
char_type_alpha( ALPHA). char_type_alpha( ALPHA).
prolog:char_type( CSYM, csym) :- p_char_type( CSYM, csym) :-
char_type_csym( CSYM ). char_type_csym( CSYM ).
prolog:char_type( CSYMF, csymf) :- p_char_type( CSYMF, csymf) :-
char_type_csymf( CSYMF). char_type_csymf( CSYMF).
prolog:char_type( ASCII, ascii ) :- p_char_type( ASCII, ascii ) :-
char_type_ascii( ASCII ). char_type_ascii( ASCII ).
prolog:char_type( WHITE, white) :- p_char_type( WHITE, white) :-
char_type_white( WHITE ). char_type_white( WHITE ).
prolog:char_type( CNTRL , cntrl) :- p_char_type( CNTRL , cntrl) :-
char_type_cntrl( CNTRL ). char_type_cntrl( CNTRL ).
prolog:char_type( DIGIT , digit) :- p_char_type( DIGIT , digit) :-
char_type_digit( DIGIT ). char_type_digit( DIGIT ).
prolog:char_type( DIGIT, digit(Weight) ) :- p_char_type( DIGIT, digit(Weight) ) :-
char_type_digit( DIGIT ), char_type_digit( DIGIT ),
digit_weight( DIGIT, Weight ). digit_weight( DIGIT, Weight ).
prolog:char_type( XDIGIT, xdigit(Weight) ) :- p_char_type( XDIGIT, xdigit(Weight) ) :-
char_type_digit( XDIGIT ), char_type_digit( XDIGIT ),
xdigit_weight( XDIGIT, Weight ). xdigit_weight( XDIGIT, Weight ).
prolog:char_type( GRAPH , graph) :- p_char_type( GRAPH , graph) :-
char_type_graph( GRAPH ). char_type_graph( GRAPH ).
prolog:char_type( LOWER , lower) :- p_char_type( LOWER , lower) :-
char_type_lower( LOWER ). char_type_lower( LOWER ).
prolog:char_type( LOWER, lower( Upper)) :- p_char_type( LOWER, lower( Upper)) :-
toupper( LOWER, Upper), toupper( LOWER, Upper),
char_type_lower( LOWER ). char_type_lower( LOWER ).
prolog:char_type( LOWER, to_lower( Upper)) :- p_char_type( LOWER, to_lower( Upper)) :-
toupper( LOWER, Upper). toupper( LOWER, Upper).
prolog:char_type( UPPER, upper ) :- p_char_type( UPPER, upper ) :-
char_type_upper( UPPER ). char_type_upper( UPPER ).
prolog:char_type( UPPER , upper( Lower)) :- p_char_type( UPPER , upper( Lower)) :-
char_type_upper( UPPER ), char_type_upper( UPPER ),
tolower( UPPER, Lower). tolower( UPPER, Lower).
prolog:char_type( UPPER, to_upper( Lower) ) :- p_char_type( UPPER, to_upper( Lower) ) :-
tolower( UPPER, Lower). tolower( UPPER, Lower).
prolog:char_type( PUNCT , punct) :- p_char_type( PUNCT , punct) :-
char_type_punct( PUNCT ). char_type_punct( PUNCT ).
prolog:char_type( SPACE , space) :- p_char_type( SPACE , space) :-
char_type_space( SPACE ). char_type_space( SPACE ).
prolog:char_type( END_OF_FILE , end_of_file) :- p_char_type( END_OF_FILE , end_of_file) :-
char_type_end_of_file( END_OF_FILE ). char_type_end_of_file( END_OF_FILE ).
prolog:char_type( END_OF_LINE , end_of_line) :- p_char_type( END_OF_LINE , end_of_line) :-
char_type_end_of_line( END_OF_LINE ). char_type_end_of_line( END_OF_LINE ).
prolog:char_type( NEWLINE , newline) :- p_char_type( NEWLINE , newline) :-
char_type_newline( NEWLINE ). char_type_newline( NEWLINE ).
prolog:char_type( PERIOD , period) :- p_char_type( PERIOD , period) :-
char_type_period( PERIOD ). char_type_period( PERIOD ).
prolog:char_type( QUOTE , quote) :- p_char_type( QUOTE , quote) :-
char_type_quote( QUOTE ). char_type_quote( QUOTE ).
prolog:char_type( Parent_Open, paren( PAREN_CLOSE) ) :- p_char_type( Parent_Open, paren( PAREN_CLOSE) ) :-
paren_paren(Parent_Open, PAREN_CLOSE). paren_paren(Parent_Open, PAREN_CLOSE).
prolog:char_type( PROLOG_VAR_START , prolog_var_start) :- p_char_type( PROLOG_VAR_START , prolog_var_start) :-
char_type_prolog_var_start( PROLOG_VAR_START ). char_type_prolog_var_start( PROLOG_VAR_START ).
prolog:char_type( PROLOG_ATOM_START , prolog_atom_start) :- p_char_type( PROLOG_ATOM_START , prolog_atom_start) :-
char_type_prolog_atom_start( PROLOG_ATOM_START ). char_type_prolog_atom_start( PROLOG_ATOM_START ).
prolog:char_type( PROLOG_IDENTIFIER_CONTINUE , prolog_identifier_continue) :- p_char_type( PROLOG_IDENTIFIER_CONTINUE , prolog_identifier_continue) :-
char_type_prolog_identifier_continue( PROLOG_IDENTIFIER_CONTINUE ). char_type_prolog_identifier_continue( PROLOG_IDENTIFIER_CONTINUE ).
prolog:char_type( PROLOG_PROLOG_SYMBOL , prolog_prolog_symbol) :- p_char_type( PROLOG_PROLOG_SYMBOL , prolog_prolog_symbol) :-
char_type_prolog_prolog_symbol( PROLOG_PROLOG_SYMBOL ). char_type_prolog_prolog_symbol( PROLOG_PROLOG_SYMBOL ).
prolog:code_type(CH, TYPE) :- prolog:code_type(CH, TYPE) :-
( var( CH ) -> between(-1,+inf,CH) ), (nonvar( CH )
prolog:code_type( CH, TYPE) . ->
prolog:code_type( ALNUM, alnum) :- true
;
arg(1,TYPE, A),
atomic(A)
->
true
;
between(0,0x10FFFF,CH)
),
p_code_type( CH, TYPE).
p_code_type( ALNUM, alnum) :-
code_type_alnum( ALNUM ). code_type_alnum( ALNUM ).
prolog:code_type( ALPHA, alpha) :- p_code_type( ALPHA, alpha) :-
code_type_alpha( ALPHA). code_type_alpha( ALPHA).
prolog:code_type( CSYM, csym) :- p_code_type( CSYM, csym) :-
code_type_csym( CSYM ). code_type_csym( CSYM ).
prolog:code_type( CSYMF, csymf) :- p_code_type( CSYMF, csymf) :-
code_type_csymf( CSYMF). code_type_csymf( CSYMF).
prolog:code_type( ASCII, ascii ) :- p_code_type( ASCII, ascii ) :-
code_type_ascii( ASCII ). code_type_ascii( ASCII ).
prolog:code_type( WHITE, white) :- p_code_type( WHITE, white) :-
code_type_white( WHITE ). code_type_white( WHITE ).
prolog:code_type( CNTRL , cntrl) :- p_code_type( CNTRL , cntrl) :-
code_type_cntrl( CNTRL ). code_type_cntrl( CNTRL ).
prolog:code_type( DIGIT , digit) :- p_code_type( DIGIT , digit) :-
code_type_digit( DIGIT ). code_type_digit( DIGIT ).
prolog:code_type( DIGIT, digit(Weight) ) :- p_code_type( DIGIT, digit(Weight) ) :-
code_type_digit( DIGIT ), code_type_digit( DIGIT ),
digit_weight( DIGIT, Weight ). digit_weight( DIGIT, Weight ).
prolog:code_type( XDIGIT, xdigit(Weight) ) :- p_code_type( XDIGIT, xdigit(Weight) ) :-
code_type_digit( XDIGIT ), code_type_digit( XDIGIT ),
xdigit_weight( XDIGIT, Weight ). xdigit_weight( XDIGIT, Weight ).
prolog:code_type( GRAPH , graph) :- p_code_type( GRAPH , graph) :-
code_type_graph( GRAPH ). code_type_graph( GRAPH ).
prolog:code_type( LOWER , lower) :- p_code_type( LOWER , lower) :-
code_type_lower( LOWER ). code_type_lower( LOWER ).
prolog:code_type( LOWER, lower( Upper)) :- p_code_type( LOWER, lower( Upper)) :-
toupper( LOWER, Upper), toupper( LOWER, Upper),
code_type_lower( LOWER ). code_type_lower( LOWER ).
prolog:code_type( LOWER, to_lower( Upper)) :- p_code_type( LOWER, to_lower( Upper)) :-
toupper( LOWER, Upper). toupper( LOWER, Upper).
prolog:code_type( UPPER, upper ) :- p_code_type( UPPER, upper ) :-
code_type_upper( UPPER ). code_type_upper( UPPER ).
prolog:code_type( UPPER , upper( Lower)) :- p_code_type( UPPER , upper( Lower)) :-
tolower( UPPER, Lower). tolower( UPPER, Lower).
prolog:code_type( UPPER, to_upper( Lower) ) :- p_code_type( UPPER, to_upper( Lower) ) :-
tolower( UPPER, Lower), tolower( UPPER, Lower),
char_type_upper( UPPER). char_type_upper( UPPER).
prolog:code_type( PUNCT , punct) :- p_code_type( PUNCT , punct) :-
code_type_punct( PUNCT ). code_type_punct( PUNCT ).
prolog:code_type( SPACE , space) :- p_code_type( SPACE , space) :-
code_type_space( SPACE ). code_type_space( SPACE ).
prolog:code_type( END_OF_FILE , end_of_file) :- p_code_type( END_OF_FILE , end_of_file) :-
code_type_end_of_file( END_OF_FILE ). code_type_end_of_file( END_OF_FILE ).
prolog:code_type( END_OF_LINE , end_of_line) :- p_code_type( END_OF_LINE , end_of_line) :-
code_type_end_of_line( END_OF_LINE ). code_type_end_of_line( END_OF_LINE ).
prolog:code_type( NEWLINE , newline) :- p_code_type( NEWLINE , newline) :-
code_type_newline( NEWLINE ). code_type_newline( NEWLINE ).
prolog:code_type( PERIOD , period) :- p_code_type( PERIOD , period) :-
code_type_period( PERIOD ). code_type_period( PERIOD ).
prolog:code_type( QUOTE , quote) :- p_code_type( QUOTE , quote) :-
code_type_quote( QUOTE ). code_type_quote( QUOTE ).
prolog:code_type( Parent_Open, paren( PAREN_CLOSE) ) :- p_code_type( Parent_Open, paren( PAREN_CLOSE) ) :-
paren_paren(Parent_Open, PAREN_CLOSE). paren_paren(Parent_Open, PAREN_CLOSE).
prolog:code_type( PROLOG_VAR_START , prolog_var_start) :- p_code_type( PROLOG_VAR_START , prolog_var_start) :-
code_type_prolog_var_start( PROLOG_VAR_START ). code_type_prolog_var_start( PROLOG_VAR_START ).
prolog:code_type( PROLOG_ATOM_START , prolog_atom_start) :- p_code_type( PROLOG_ATOM_START , prolog_atom_start) :-
code_type_prolog_atom_start( PROLOG_ATOM_START ). code_type_prolog_atom_start( PROLOG_ATOM_START ).
prolog:code_type( PROLOG_IDENTIFIER_CONTINUE , prolog_identifier_continue) :- p_code_type( PROLOG_IDENTIFIER_CONTINUE , prolog_identifier_continue) :-
code_type_prolog_identifier_continue( PROLOG_IDENTIFIER_CONTINUE ). code_type_prolog_identifier_continue( PROLOG_IDENTIFIER_CONTINUE ).
prolog:code_type( PROLOG_PROLOG_SYMBOL , prolog_prolog_symbol) :- p_code_type( PROLOG_PROLOG_SYMBOL , prolog_prolog_symbol) :-
code_type_prolog_prolog_symbol( PROLOG_PROLOG_SYMBOL ). code_type_prolog_prolog_symbol( PROLOG_PROLOG_SYMBOL ).