From eb79049bdb8385b4f5f31ab2cef3858927e5b598 Mon Sep 17 00:00:00 2001 From: Vitor Santos Costa Date: Sun, 28 Oct 2012 18:22:09 +0000 Subject: [PATCH] more improvements to interface --- packages/pyswip/pl2py_examples/nltk.pl | 11 +++++++++ packages/pyswip/python.c | 32 ++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/packages/pyswip/pl2py_examples/nltk.pl b/packages/pyswip/pl2py_examples/nltk.pl index 7650373af..789169e2c 100644 --- a/packages/pyswip/pl2py_examples/nltk.pl +++ b/packages/pyswip/pl2py_examples/nltk.pl @@ -1,3 +1,14 @@ +/* +import nltk +sentence = """At eight o'clock on Thursday morning +... Arthur didn't feel very good.""" +tokens = nltk.word_tokenize(sentence) +tagged = nltk.pos_tag(tokens) +tagged[0:6] +entities = nltk.chunk.ne_chunk(tagged) +entities +*/ + :- use_module(library(python)). :- use_module(library(maplist)). diff --git a/packages/pyswip/python.c b/packages/pyswip/python.c index 50f1ba8e2..204738ee1 100644 --- a/packages/pyswip/python.c +++ b/packages/pyswip/python.c @@ -20,6 +20,18 @@ static functor_t FUNCTOR_dollar1, static PyObject *py_Main; +static inline int +proper_ascii_string(const char *s) +{ + unsigned int c; + + while ((c = *s++)) { + if (c > 127) + return FALSE; + } + return TRUE; +} + static PyObject * term_to_python(term_t t) { @@ -31,9 +43,21 @@ term_to_python(term_t t) { char *s; - if (!PL_get_atom_chars(t, &s)) - return NULL; - return PyString_FromStringAndSize(s, strlen(s) ); + if (!PL_get_atom_chars(t, &s)) { + wchar_t *w; + atom_t at; + size_t len; + + if (!PL_get_atom(t, &at)) + return NULL; + if (!(w = PL_atom_wchars(at, &len))) + return NULL; + return PyUnicode_FromWideChar(w, wcslen(w) ); + } + if (proper_ascii_string(s)) + return PyString_FromStringAndSize(s, strlen(s) ); + else + return PyUnicode_DecodeLatin1(s, strlen(s), NULL); } case PL_INTEGER: { @@ -49,7 +73,7 @@ term_to_python(term_t t) if (!PL_get_string_chars(t, &s, &len)) return NULL; - return PyByteArray_FromStringAndSize(s, len ); + return PyByteArray_FromStringAndSize(s, len); } case PL_FLOAT: {