From efa64ba6c4120d886b1a41edd182047cb94de909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADtor=20Santos=20Costa?= Date: Wed, 23 Jun 2010 11:49:34 +0100 Subject: [PATCH] more SWI emulation. --- LGPL/Makefile.in | 10 +- LGPL/base64.pl | 230 ++++++++++ LGPL/broadcast.pl | 177 ++++++++ LGPL/quintus.pl | 401 +++++++++++++++++ LGPL/settings.pl | 629 +++++++++++++++++++++++++++ LGPL/url.pl | 1048 +++++++++++++++++++++++++++++++++++++++++++++ LGPL/utf8.pl | 134 ++++++ 7 files changed, 2627 insertions(+), 2 deletions(-) create mode 100644 LGPL/base64.pl create mode 100644 LGPL/broadcast.pl create mode 100644 LGPL/quintus.pl create mode 100644 LGPL/settings.pl create mode 100644 LGPL/url.pl create mode 100644 LGPL/utf8.pl diff --git a/LGPL/Makefile.in b/LGPL/Makefile.in index a4ba16dee..9161b578a 100644 --- a/LGPL/Makefile.in +++ b/LGPL/Makefile.in @@ -24,13 +24,19 @@ INSTALL_PROGRAM=@INSTALL_PROGRAM@ srcdir=@srcdir@ YAP_EXTRAS=@YAP_EXTRAS@ -PROGRAMS= $(srcdir)/debug.pl \ +PROGRAMS= $(srcdir)/base64.pl \ + $(srcdir)/broadcast.pl \ + $(srcdir)/debug.pl \ $(srcdir)/maplist.pl \ $(srcdir)/operators.pl \ $(srcdir)/option.pl \ $(srcdir)/prolog_source.pl \ $(srcdir)/prolog_xref.pl \ - $(srcdir)/shlib.pl + $(srcdir)/quintus.pl \ + $(srcdir)/settings.pl \ + $(srcdir)/shlib.pl \ + $(srcdir)/url.pl \ + $(srcdir)/utf8.pl install: $(PROGRAMS) diff --git a/LGPL/base64.pl b/LGPL/base64.pl new file mode 100644 index 000000000..81b34ecdf --- /dev/null +++ b/LGPL/base64.pl @@ -0,0 +1,230 @@ +/* $Id$ + + Part of SWI-Prolog + + Author: Jan Wielemaker + E-mail: wielemak@science.uva.nl + WWW: http://www.swi-prolog.org + Copyright (C): 1985-2007, University of Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +:- module(base64, + [ base64/2, % ?PlainText, ?Encoded + base64//1 % ?PlainText + ]). + +/** Base64 encoding and decoding + +Prolog-based base64 encoding using DCG rules. Encoding according to +rfc2045. For example: + +== +1 ?- base64('Hello World', X). + +X = 'SGVsbG8gV29ybGQ=' + +Yes +2 ?- base64(H, 'SGVsbG8gV29ybGQ='). + +H = 'Hello World' +== + +@tbd Stream I/O +@tbd White-space introduction and parsing +@author Jan Wielemaker +*/ + +%% base64(+Plain, -Encoded) is det. +%% base64(-Plain, +Encoded) is det. +% +% Translates between plaintext and base64 encoded atom or string. +% See also base64//1. + +base64(Plain, Encoded) :- + nonvar(Plain), !, + atom_codes(Plain, PlainCodes), + phrase(base64(PlainCodes), EncCodes), + atom_codes(Encoded, EncCodes). +base64(Plain, Encoded) :- + nonvar(Encoded), !, + atom_codes(Encoded, EncCodes), + phrase(base64(PlainCodes), EncCodes), + atom_codes(Plain, PlainCodes). +base64(_, _) :- + throw(error(instantiation_error, _)). + + +%% base64(+PlainText)// is det. +%% base64(-PlainText)// is det. +% +% Encode/decode list of character codes using _base64_. See also +% base64/2. + +base64(Input) --> + { nonvar(Input) }, !, + encode(Input). +base64(Output) --> + decode(Output). + + + /******************************* + * ENCODING * + *******************************/ + +encode([I0, I1, I2|Rest]) --> !, + [O0, O1, O2, O3], + { A is (I0<<16)+(I1<<8)+I2, + O00 is (A>>18) /\ 0x3f, + O01 is (A>>12) /\ 0x3f, + O02 is (A>>6) /\ 0x3f, + O03 is A /\ 0x3f, + base64_char(O00, O0), + base64_char(O01, O1), + base64_char(O02, O2), + base64_char(O03, O3) + }, + encode(Rest). +encode([I0, I1]) --> !, + [O0, O1, O2, 0'=], + { A is (I0<<16)+(I1<<8), + O00 is (A>>18) /\ 0x3f, + O01 is (A>>12) /\ 0x3f, + O02 is (A>>6) /\ 0x3f, + base64_char(O00, O0), + base64_char(O01, O1), + base64_char(O02, O2) + }. +encode([I0]) --> !, + [O0, O1, 0'=, 0'=], + { A is (I0<<16), + O00 is (A>>18) /\ 0x3f, + O01 is (A>>12) /\ 0x3f, + base64_char(O00, O0), + base64_char(O01, O1) + }. +encode([]) --> + []. + + + /******************************* + * DECODE * + *******************************/ + +decode(Text) --> + [C0, C1, C2, C3], !, + { base64_char(B0, C0), + base64_char(B1, C1) + }, !, + { C3 == 0'= + -> ( C2 == 0'= + -> A is (B0<<18) + (B1<<12), + I0 is (A>>16) /\ 0xff, + Text = [I0|Rest] + ; base64_char(B2, C2) + -> A is (B0<<18) + (B1<<12) + (B2<<6), + I0 is (A>>16) /\ 0xff, + I1 is (A>>8) /\ 0xff, + Text = [I0,I1|Rest] + ) + ; base64_char(B2, C2), + base64_char(B3, C3) + -> A is (B0<<18) + (B1<<12) + (B2<<6) + B3, + I0 is (A>>16) /\ 0xff, + I1 is (A>>8) /\ 0xff, + I2 is A /\ 0xff, + Text = [I0,I1,I2|Rest] + }, + decode(Rest). +decode([]) --> + []. + + + /******************************* + * BASIC CHARACTER ENCODING * + *******************************/ + +base64_char(00, 0'A). +base64_char(01, 0'B). +base64_char(02, 0'C). +base64_char(03, 0'D). +base64_char(04, 0'E). +base64_char(05, 0'F). +base64_char(06, 0'G). +base64_char(07, 0'H). +base64_char(08, 0'I). +base64_char(09, 0'J). +base64_char(10, 0'K). +base64_char(11, 0'L). +base64_char(12, 0'M). +base64_char(13, 0'N). +base64_char(14, 0'O). +base64_char(15, 0'P). +base64_char(16, 0'Q). +base64_char(17, 0'R). +base64_char(18, 0'S). +base64_char(19, 0'T). +base64_char(20, 0'U). +base64_char(21, 0'V). +base64_char(22, 0'W). +base64_char(23, 0'X). +base64_char(24, 0'Y). +base64_char(25, 0'Z). +base64_char(26, 0'a). +base64_char(27, 0'b). +base64_char(28, 0'c). +base64_char(29, 0'd). +base64_char(30, 0'e). +base64_char(31, 0'f). +base64_char(32, 0'g). +base64_char(33, 0'h). +base64_char(34, 0'i). +base64_char(35, 0'j). +base64_char(36, 0'k). +base64_char(37, 0'l). +base64_char(38, 0'm). +base64_char(39, 0'n). +base64_char(40, 0'o). +base64_char(41, 0'p). +base64_char(42, 0'q). +base64_char(43, 0'r). +base64_char(44, 0's). +base64_char(45, 0't). +base64_char(46, 0'u). +base64_char(47, 0'v). +base64_char(48, 0'w). +base64_char(49, 0'x). +base64_char(50, 0'y). +base64_char(51, 0'z). +base64_char(52, 0'0). +base64_char(53, 0'1). +base64_char(54, 0'2). +base64_char(55, 0'3). +base64_char(56, 0'4). +base64_char(57, 0'5). +base64_char(58, 0'6). +base64_char(59, 0'7). +base64_char(60, 0'8). +base64_char(61, 0'9). +base64_char(62, 0'+). +base64_char(63, 0'/). diff --git a/LGPL/broadcast.pl b/LGPL/broadcast.pl new file mode 100644 index 000000000..238f048a9 --- /dev/null +++ b/LGPL/broadcast.pl @@ -0,0 +1,177 @@ +/* $Id$ + + Part of SWI-Prolog + + Author: Jan Wielemaker + E-mail: wielemak@science.uva.nl + WWW: http://www.swi-prolog.org + Copyright (C): 1985-2006, University of Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +:- module(broadcast, + [ listen/3, % Listener x Templ x Goal + listen/2, % Templ x Goal + unlisten/1, % Listener + unlisten/2, % Listener x Templ + unlisten/3, % Listener x Templ x Goal + listening/3, % Listener x Templ x Goal + broadcast/1, % Templ + broadcast_request/1 % Templ + ]). +:- meta_predicate + listen(+, :), + listen(+, +, :), + unlisten(+, +, :). + +:- dynamic + listener/4. + +/** Event service + +Generic broadcasting service. Broadcasts are made using the predicate +broadcast(+Templ). All registered `listeners' will have their goal +called. Success or failure of this is ignored. The listener can not bind +arguments. + +This library is particulary useful for disconnecting modules in an +application. Modules can broadcast events such as changes, anticipating +other modules need to react on such changes. For example, settings.pl +broadcasts changes to settings, allowing dependent modules to react on +changes: + +== +:- listing(setting(changed(http:workers, New)), + change_workers(New)). + +change_workers(New) :- + setting(http:port, Port), + http_workers(Port, New). +== +*/ + +%% listen(+Listener, +Templ, :Goal) is det. +%% listen(+Templ, :Goal) is det. +% +% Open a channel for listening for events of the given `Templ'. + +listen(Listener0, Templ, Goal) :- + canonical_listener(Listener0, Listener), + strip_module(Goal, Module, TheGoal), + assert_listener(Templ, Listener, Module, TheGoal). + +listen(Templ, Goal) :- + strip_module(Goal, Module, TheGoal), + assert_listener(Templ, Module, Module, TheGoal). + + +%% unlisten(+Listener) is det. +%% unlisten(+Listener, +Templ) is det. +%% unlisten(+Listener, +Templ, :Goal) is det. +% +% Destroy a channel. All arguments may be variables, removing the +% all matching listening channals. + +unlisten(Listener0) :- + canonical_listener(Listener0, Listener), + retractall(listener(_, Listener, _, _)). +unlisten(Listener0, Templ) :- + canonical_listener(Listener0, Listener), + retractall(listener(Templ, Listener, _, _)). +unlisten(Listener0, Templ, Goal) :- + canonical_listener(Listener0, Listener), + ( var(Goal) + -> true + ; strip_module(Goal, Module, TheGoal) + ), + retract_listener(Templ, Listener, Module, TheGoal). + + +%% listening(?Listener, ?Templ, ?Goal) is nondet. +% +% returns currently open channels + +listening(Listener0, Templ, Module:Goal) :- + canonical_listener(Listener0, Listener), + listener(Templ, Listener, Module, Goal). + + +%% broadcast(+Templ) is det. +% +% Broadcast given event. + +broadcast(Templ) :- + ( listener(Templ, _Listener, Module, Goal), + ( Module:Goal + -> fail + ) + ; true + ). + + +%% broadcast_request(+Templ) is nonet. +% +% Broadcast given event till accepted. Succeeds then, fail if no +% listener accepts the call. Bindings made by the listener goal +% are maintained. May be used to make broadcast requests. + +broadcast_request(Templ) :- + listener(Templ, _Listener, Module, Goal), + Module:Goal. + + +% {assert,retract}_listener(+Templ, +Listener, +Module, +Goal) +% +% Implemented as sub-predicate to ensure storage in this module. +% Second registration is ignored. Is this ok? It avoids problems +% using multiple registration of global listen channels. + +assert_listener(Templ, Listener, Module, TheGoal) :- + listener(Templ, Listener, Module, TheGoal), !. +assert_listener(Templ, Listener, Module, TheGoal) :- + asserta(listener(Templ, Listener, Module, TheGoal)). + +retract_listener(Templ, Listener, Module, TheGoal) :- + retractall(listener(Templ, Listener, Module, TheGoal)). + +%% canonical_listener(+Raw, -Canonical) +% +% Entry for later optimization. + +canonical_listener(Templ, Templ). + + + /******************************* + * GOAL EXPANSION * + *******************************/ + +:- multifile + user:goal_expansion/2. + +user:goal_expansion(listen(L,T,G0), listen(L,T,G)) :- + expand_goal(G0, G). +user:goal_expansion(listen(T,G0), listen(T,G)) :- + expand_goal(G0, G). +user:goal_expansion(unlisten(L,T,G0), unlisten(L,T,G)) :- + expand_goal(G0, G). + diff --git a/LGPL/quintus.pl b/LGPL/quintus.pl new file mode 100644 index 000000000..96b7401c9 --- /dev/null +++ b/LGPL/quintus.pl @@ -0,0 +1,401 @@ +/* $Id$ + + Part of SWI-Prolog + + Author: Jan Wielemaker + E-mail: J.Wielemaker@uva.nl + WWW: http://www.swi-prolog.org + Copyright (C): 1985-2008, University of Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +:- module(quintus, + [ % unix/1, +% file_exists/1, + + abs/2, + sin/2, + cos/2, + tan/2, + log/2, + log10/2, + pow/3, + ceiling/2, + floor/2, + round/2, + acos/2, + asin/2, + atan/2, + atan2/3, + sign/2, + sqrt/2, + + genarg/3, + + (mode)/1, + (public)/1, + no_style_check/1, + otherwise/0, + simple/1, +% statistics/2, % Please access as quintus:statistics/2 + prolog_flag/2, + + date/1, % -date(Year, Month, Day) + + current_stream/3, % ?File, ?Mode, ?Stream + stream_position/3, % +Stream, -Old, +New + skip_line/0, + skip_line/1, % +Stream + + compile/1, % +File(s) + + atom_char/2, + midstring/3, % ABC, B, AC + midstring/4, % ABC, B, AC, LenA + midstring/5, % ABC, B, AC, LenA, LenB + midstring/6, % ABC, B, AC, LenA, LenB, LenC + + raise_exception/1, % +Exception + on_exception/3 % +Ball, :Goal, :Recover + ]). +:- use_module(library(lists), [member/2]). + +/** Quintus compatibility + +This module defines several predicates from the Quintus Prolog +libraries. Note that our library structure is totally different. If this +library were complete, Prolog code could be ported by removing the +use_module/1 declarations, relying on the SWI-Prolog autoloader. + +Bluffers guide to porting: + + * Remove =|use_module(library(...))|= + * Run =|?- list_undefined.|= + * Fix problems + +Of course, this library is incomplete ... +*/ + + /******************************** + * SYSTEM INTERACTION * + *********************************/ + +% %% unix(+Action) +% % interface to Unix. + +% unix(system(Command)) :- +% shell(Command). +% unix(shell(Command)) :- +% shell(Command). +% unix(shell) :- +% shell. +% unix(access(File, 0)) :- +% access_file(File, read). +% unix(cd) :- +% expand_file_name(~, [Home]), +% working_directory(_, Home). +% unix(cd(Dir)) :- +% working_directory(_, Dir). +% unix(args(L)) :- +% current_prolog_flag(argv, L). +% unix(argv(L)) :- +% current_prolog_flag(argv, S), +% maplist(to_prolog, S, L). + +% to_prolog(S, A) :- +% name(S, L), +% name(A, L). + + + /******************************** + * META PREDICATES * + *********************************/ + +%% otherwise +% +% For (A -> B ; otherwise -> C) + +% otherwise. + + + /******************************** + * ARITHMETIC * + *********************************/ + +%% abs(+Number, -Absolute) +% Unify `Absolute' with the absolute value of `Number'. + +abs(Number, Absolute) :- + Absolute is abs(Number). + +%% sin(+Angle, -Sine) is det. +%% cos(+Angle, -Cosine) is det. +%% tan(+Angle, -Tangent) is det. +%% log(+X, -NatLog) is det. +%% log10(+X, -Log) is det. +% +% Math library predicates. SWI-Prolog (and ISO) support these as +% functions under is/2, etc. + +sin(A, V) :- V is sin(A). +cos(A, V) :- V is cos(A). +tan(A, V) :- V is tan(A). +log(A, V) :- V is log(A). +log10(X, V) :- V is log10(X). +pow(X,Y,V) :- V is X**Y. +ceiling(X, V) :- V is ceil(X). +floor(X, V) :- V is floor(X). +round(X, V) :- V is round(X). +sqrt(X, V) :- V is sqrt(X). +acos(X, V) :- V is acos(X). +asin(X, V) :- V is asin(X). +atan(X, V) :- V is atan(X). +atan2(Y, X, V) :- V is atan(Y, X). +sign(X, V) :- V is sign(X). + + + /******************************* + * TERM MANIPULATION * + *******************************/ + +%% genarg(?Index, +Term, ?Arg) is nondet. +% +% Generalised version of ISO arg/3. SWI-Prolog's arg/3 is already +% genarg/3. + +genarg(N, T, A) :- % SWI-Prolog arg/3 is generic + arg(N, T, A). + + + /******************************* + * FLAGS * + *******************************/ + +%% prolog_flag(?Flag, ?Value) is nondet. +% +% Same as ISO current_prolog_flag/2. Maps =version=. +% +% @bug Should map relevant Quintus flag identifiers. + +% prolog_flag(version, Version) :- !, +% current_prolog_flag(version_data, swi(Major, Minor, Patch, _)), +% current_prolog_flag(arch, Arch), +% current_prolog_flag(compiled_at, Compiled), +% atomic_list_concat(['SWI-Prolog ', +% Major, '.', Minor, '.', Patch, +% ' (', Arch, '): ', Compiled], Version). +% prolog_flag(Flag, Value) :- +% current_prolog_flag(Flag, Value). + + + /******************************* + * STATISTICS * + *******************************/ + +% Here used to be a definition of Quintus statistics/2 in traditional +% SWI-Prolog statistics/2. The current built-in emulates Quintus +% almost completely. + + + /******************************* + * DATE/TIME * + *******************************/ + +%% date(-Date) is det. +% +% Get current date as date(Y,M,D) + +date(Date) :- + get_time(T), + stamp_date_time(T, DaTime, local), + date_time_value(date, DaTime, Date). + + + /******************************** + * STYLE CHECK * + *********************************/ + +%% no_style_check(Style) is det. +% +% Same as SWI-Prolog =|style_check(-Style)|=. The Quintus option +% =single_var= is mapped to =singleton=. +% +% @see style_check/1. + +q_style_option(single_var, singleton) :- !. +q_style_option(Option, Option). + +% no_style_check(QOption) :- +% q_style_option(QOption, SWIOption), +% style_check(-SWIOption). + + + /******************************** + * DIRECTIVES * + *********************************/ + +% :- op(1150, fx, [(mode), (public)]). + +% mode(_). +% public(_). + + + /******************************* + * TYPES * + *******************************/ + +%% simple(@Term) is semidet. +% +% Term is atomic or a variable. + +% simple(X) :- +% ( atomic(X) +% -> true +% ; var(X) +% ). + + + /******************************* + * STREAMS * + *******************************/ + +%% current_stream(?Object, ?Mode, ?Stream) +% +% SICStus/Quintus and backward compatible predicate. New code should +% be using the ISO compatible stream_property/2. + +% current_stream(Object, Mode, Stream) :- +% stream_property(Stream, mode(FullMode)), +% stream_mode(FullMode, Mode), +% ( stream_property(Stream, file_name(Object0)) +% -> true +% ; stream_property(Stream, file_no(Object0)) +% -> true +% ; Object0 = [] +% ), +% Object = Object0. + +% stream_mode(read, read). +% stream_mode(write, write). +% stream_mode(append, write). +% stream_mode(update, write). + +% %% stream_position(+Stream, -Old, +New) + +% stream_position(Stream, Old, New) :- +% stream_property(Stream, position(Old)), +% set_stream_position(Stream, New). + + +%% skip_line is det. +%% skip_line(Stream) is det. +% +% Skip the rest of the current line (on Stream). Same as +% =|skip(0'\n)|=. + +skip_line :- + skip(10). +skip_line(Stream) :- + skip(Stream, 10). + + + /******************************* + * COMPILATION * + *******************************/ + +%% compile(+Files) is det. +% +% Compile files. SWI-Prolog doesn't distinguish between +% compilation and consult. +% +% @see load_files/2. + +% :- meta_predicate +% compile(:). + +% compile(Files) :- +% consult(Files). + + /******************************* + * ATOM-HANDLING * + *******************************/ + +%% atom_char(+Char, -Code) is det. +%% atom_char(-Char, +Code) is det. +% +% Same as ISO char_code/2. + +atom_char(Char, Code) :- + char_code(Char, Code). + +%% midstring(?ABC, ?B, ?AC) is nondet. +%% midstring(?ABC, ?B, ?AC, LenA) is nondet. +%% midstring(?ABC, ?B, ?AC, LenA, LenB) is nondet. +%% midstring(?ABC, ?B, ?AC, LenA, LenB, LenC) is nondet. +% +% Too difficult to explain. See the Quintus docs. As far as I +% understand them the code below emulates this function just fine. + +midstring(ABC, B, AC) :- + midstring(ABC, B, AC, _, _, _). +midstring(ABC, B, AC, LenA) :- + midstring(ABC, B, AC, LenA, _, _). +midstring(ABC, B, AC, LenA, LenB) :- + midstring(ABC, B, AC, LenA, LenB, _). +midstring(ABC, B, AC, LenA, LenB, LenC) :- % -ABC, +B, +AC + var(ABC), !, + atom_length(AC, LenAC), + ( nonvar(LenA) ; nonvar(LenC) + -> plus(LenA, LenC, LenAC) + ; true + ), + sub_atom(AC, 0, LenA, _, A), + LenC is LenAC - LenA, + sub_atom(AC, _, LenC, 0, C), + atom_length(B, LenB), + atomic_list_concat([A,B,C], ABC). +midstring(ABC, B, AC, LenA, LenB, LenC) :- + sub_atom(ABC, LenA, LenB, LenC, B), + sub_atom(ABC, 0, LenA, _, A), + sub_atom(ABC, _, LenC, 0, C), + atom_concat(A, C, AC). + + + /******************************* + * EXCEPTIONS * + *******************************/ + +%% raise_exception(+Term) +% +% Quintus compatible exception handling + +% raise_exception(Term) :- +% throw(Term). + +%% on_exception(+Template, :Goal, :Recover) + +:- meta_predicate + on_exception(+, 0, 0). + +% on_exception(Except, Goal, Recover) :- +% catch(Goal, Except, Recover). diff --git a/LGPL/settings.pl b/LGPL/settings.pl new file mode 100644 index 000000000..d3386bf39 --- /dev/null +++ b/LGPL/settings.pl @@ -0,0 +1,629 @@ +/* $Id$ + + Part of SWI-Prolog + + Author: Jan Wielemaker + E-mail: wielemak@science.uva.nl + WWW: http://www.swi-prolog.org + Copyright (C): 1985-2007, University of Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +:- module(settings, + [ setting/4, % :Name, +Type, +Default, +Comment + setting/2, % :Name, ?Value + set_setting/2, % :Name, +Value + set_setting_default/2, % :Name, +Value + restore_setting/1, % :Name + load_settings/1, % +File + load_settings/2, % +File, +Options + save_settings/0, + save_settings/1, % +File + current_setting/1, % Module:Name + setting_property/2, % ?Setting, ?Property + list_settings/0, + + convert_setting_text/3 % +Type, +Text, -Value + ]). +:- use_module(library(error)). +:- use_module(library(broadcast)). +:- use_module(library(debug)). +:- use_module(library(option)). + +/** Setting management + +This library allows management of configuration settings for Prolog +applications. Applications define settings in one or multiple files +using the directive setting/4 as illustrated below: + +== +:- use_module(library(setting)). + +:- setting(version, atom, '1.0', 'Current version'). +:- setting(timeout, number, 20, 'Timeout in seconds'). +== + +The directive is subject to term_expansion/2, which guarantees proper +synchronisation of the database if source-files are reloaded. This +implies it is *not* possible to call setting/4 as a predicate. + +Settings are local to a module. This implies they are defined in a +two-level namespace. Managing settings per module greatly simplifies +assembling large applications from multiple modules that configuration +through settings. This settings management library ensures proper +access, loading and saving of settings. + +@see library(config) distributed with XPCE provides an alternative + aimed at graphical applications. +@author Jan Wielemaker +*/ + +:- dynamic + st_value/3, % Name, Module, Value + st_default/3, % Name, Module, Value + local_file/1. % Path + +:- multifile + current_setting/6. % Name, Module, Type, Default, Comment, Source + +:- meta_predicate + setting(:, +, +, +), + setting(:, ?), + set_setting(:, +), + set_setting_default(:, +), + current_setting(:), + restore_setting(:). + +curr_setting(Name, Module, Type, Default, Comment) :- + current_setting(Name, Module, Type, Default0, Comment, _Src), + ( st_default(Name, Module, Default1) + -> Default = Default1 + ; Default = Default0 + ). + +%% setting(Name, Type, Default, Comment) is det. +% +% Define a setting. Name denotes the name of the setting, Type its +% type. Default is the value before it is modified. Default refer +% to environment variables and use arithmetic expressions as +% defined by eval_default/4. +% +% @param Name Name of the setting (an atom) +% @param Type Type for setting. One of =any= or a type defined +% by must_be/2. +% @param Default Default value for the setting. +% @param Comment Atom containing a (short) descriptive note. + + +setting(Name, Type, Default, Comment) :- + throw(error(context_error(nodirective, + setting(Name, Type, Default, Comment)), + _)). + +:- multifile + system:term_expansion/2. + +system:term_expansion((:- setting(QName, Type, Default, Comment)), + Expanded) :- + prolog_load_context(module, M0), + strip_module(M0:QName, Module, Name), + must_be(atom, Name), + to_atom(Comment, CommentAtom), + eval_default(Default, Module, Type, Value), + check_type(Type, Value), + ( current_setting(Name, Module, _, _, _, OldLoc) + -> format(string(Message), + 'Already defined at: ~w', [OldLoc]), + throw(error(permission_error(redefine, setting, Module:Name), + context(Message, _))) + ; source_location(File, Line) + -> Expanded = settings:current_setting(Name, Module, Type, Default, + CommentAtom, File:Line) + ). + +to_atom(Atom, Atom) :- + atom(Atom), !. +to_atom(String, Atom) :- + format(atom(Atom), '~s', String). + +%% setting(:Name, ?Value) is nondet. +% +% True if Name is a currently defined setting with Value. +% +% @error existence_error(setting, Name) + +setting(QName, Value) :- + strip_module(QName, Module, Name), + ( ground(Name) + -> ( st_value(Name, Module, Value0) + -> Value = Value0 + ; curr_setting(Name, Module, Type, Default, _) + -> eval_default(Default, Module, Type, Value) + ; existence_error(setting, Module:Name) + ) + ; current_setting(Name, Module, _, _, _, _), + setting(Module:Name, Value) + ). + + +:- dynamic + setting_cache/3. +:- volatile + setting_cache/3. + +%% clear_setting_cache is det. +% +% Clear the cache for evaluation of default values. + +clear_setting_cache :- + retractall(setting_cache(_,_,_)). + +%% eval_default(+Default, +Module, +Type, -Value) is det. +% +% Convert the settings default value. The notation allows for some +% `function-style' notations to make the library more generic: +% +% * env(Name) +% Get value from the given environment variable. The value +% is handed to convert_setting_text/3 to convert the +% textual representation into a Prolog term. Raises an +% existence_error of the variable is not defined. +% +% * env(Name, Default) +% As env(Name), but uses the value Default if the variable +% is not defined. +% +% * setting(Name) +% Ask the value of another setting. +% +% * Expression +% If Type is numeric, evaluate the expression. env(Var) +% evaluates to the value of an environment variable. +% If Type is =atom=, concatenate A+B+.... Elements of the +% expression can be env(Name). + +:- multifile + eval_default/3. % +Default, +Type, -Value + +eval_default(Default, _, Type, Value) :- + eval_default(Default, Type, Val), !, + Value = Val. +eval_default(Default, _, _, Value) :- + atomic(Default), !, + Value = Default. +eval_default(Default, _, Type, Value) :- + setting_cache(Default, Type, Val), !, + Value = Val. +eval_default(env(Name), _, Type, Value) :- !, + ( getenv(Name, TextValue) + -> convert_setting_text(Type, TextValue, Val), + assert(setting_cache(env(Name), Type, Val)), + Value = Val + ; existence_error(environment_variable, Name) + ). +eval_default(env(Name, Default), _, Type, Value) :- !, + ( getenv(Name, TextValue) + -> convert_setting_text(Type, TextValue, Val) + ; Value = Default + ), + assert(setting_cache(env(Name), Type, Val)), + Value = Val. +eval_default(setting(Name), Module, Type, Value) :- !, + strip_module(Module:Name, M, N), + setting(M:N, Value), + must_be(Type, Value). +eval_default(Expr, _, Type, Value) :- + numeric_type(Type, Basic), !, + Val0 is Expr, + ( Basic == float + -> Val is float(Val0) + ; Basic = integer + -> Val is round(Val0) + ; Val = Val0 + ), + assert(setting_cache(Expr, Type, Val)), + Value = Val. +eval_default(A+B, Module, atom, Value) :- !, + phrase(expr_to_list(A+B, Module), L), + atomic_list_concat(L, Val), + assert(setting_cache(A+B, atom, Val)), + Value = Val. +eval_default(List, Module, list(Type), Value) :- !, + eval_list_default(List, Module, Type, Val), + assert(setting_cache(List, list(Type), Val)), + Value = Val. +eval_default(Default, _, _, Default). + + +%% eval_list_default(+List, +Module, +ElementType, -DefaultList) +% +% Evaluate the default for a list of values. + +eval_list_default([], _, _, []). +eval_list_default([H0|T0], Module, Type, [H|T]) :- + eval_default(H0, Module, Type, H), + eval_list_default(T0, Module, Type, T). + +%% expr_to_list(+Expression, +Module)// is det. +% +% Process the components to create an atom. Atom concatenation is +% expressed as A+B. Components may refer to envrionment variables. + +expr_to_list(A+B, Module) --> !, + expr_to_list(A, Module), + expr_to_list(B, Module). +expr_to_list(env(Name), _) --> !, + ( { getenv(Name, Text) } + -> [Text] + ; { existence_error(environment_variable, Name) } + ). +expr_to_list(env(Name, Default), _) --> !, + ( { getenv(Name, Text) } + -> [Text] + ; [Default] + ). +expr_to_list(setting(Name), Module) --> !, + { strip_module(Module:Name, M, N), + setting(M:N, Value) + }, + [ Value ]. +expr_to_list(A, _) --> + [A]. + +%% env(+Name:atom, -Value:number) is det. +%% env(+Name:atom, +Default:number, -Value:number) is det +% +% Evaluate environment variables on behalf of arithmetic +% expressions. + +:- arithmetic_function(env/1). +:- arithmetic_function(env/2). + +env(Name, Value) :- + ( getenv(Name, Text) + -> convert_setting_text(number, Text, Value) + ; existence_error(environment_variable, Name) + ). +env(Name, Default, Value) :- + ( getenv(Name, Text) + -> convert_setting_text(number, Text, Value) + ; Value = Default + ). + + +%% numeric_type(+Type, -BaseType) +% +% True if Type is a numeric type and BaseType is the associated +% basic Prolog type. BaseType is one of =integer=, =float= or +% =number=. + +numeric_type(integer, integer). +numeric_type(nonneg, integer). +numeric_type(float, float). +numeric_type(between(L,_), Type) :- + ( integer(L) -> Type = integer ; Type = float ). + + +%% set_setting(:Name, +Value) is det. +% +% Change a setting. Performs existence and type-checking for the +% setting. If the effective value of the setting is changed it +% broadcasts the event below. +% +% settings(changed(Module:Name, Old, New)) +% +% @error existence_error(setting, Name) +% @error type_error(Type, Value) + +set_setting(QName, Value) :- + strip_module(QName, Module, Name), + must_be(atom, Name), + ( curr_setting(Name, Module, Type, Default0, _Comment), + eval_default(Default0, Module, Type, Default) + -> ( Value == Default + -> retract_setting(Module:Name) + ; st_value(Name, Module, Value) + -> true + ; check_type(Type, Value) + -> setting(Module:Name, Old), + retract_setting(Module:Name), + assert_setting(Module:Name, Value), + broadcast(settings(changed(Module:Name, Old, Value))), + clear_setting_cache % might influence dependent settings. + ) + ; existence_error(setting, Name) + ). + +retract_setting(Module:Name) :- + retractall(st_value(Name, Module, _)). + +assert_setting(Module:Name, Value) :- + assert(st_value(Name, Module, Value)). + +%% restore_setting(:Name) is det. +% +% Restore the value of setting Name to its default. Broadcast a +% change like set_setting/2 if the current value is not the +% default. + +restore_setting(QName) :- + strip_module(QName, Module, Name), + must_be(atom, Name), + ( st_value(Name, Module, Old) + -> retract_setting(Module:Name), + setting(Module:Name, Value), + ( Old \== Value + -> broadcast(settings(changed(Module:Name, Old, Value))) + ; true + ) + ; true + ). + +%% set_setting_default(:Name, +Default) is det. +% +% Change the default for a setting. The effect is the same as +% set_setting/2, but the new value is considered the default when +% saving and restoring a setting. It is intended to change +% application defaults in a particular context. + +set_setting_default(QName, Default) :- + strip_module(QName, Module, Name), + must_be(atom, Name), + ( current_setting(Name, Module, Type, Default0, _Comment, _Src) + -> retractall(settings:st_default(Name, Module, _)), + retract_setting(Module:Name), + ( Default == Default0 + -> true + ; assert(settings:st_default(Name, Module, Default)) + ), + eval_default(Default, Module, Type, Value), + set_setting(Module:Name, Value) + ; existence_error(setting, Module:Name) + ). + + + /******************************* + * TYPES * + *******************************/ + +%% check_type(+Type, +Term) +% +% Type checking for settings. Currently simply forwarded to +% must_be/2. + +check_type(Type, Term) :- + must_be(Type, Term). + + + /******************************* + * FILE * + *******************************/ + +%% load_settings(File) is det. +%% load_settings(File, +Options) is det. +% +% Load local settings from File. Succeeds if File does not exist, +% setting the default save-file to File. Options are: +% +% * undefined(+Action) +% Define how to handle settings that are not defined. When +% =error=, an error is printed and the setting is ignored. +% when =load=, the setting is loaded anyway, waiting for a +% definition. + +load_settings(File) :- + load_settings(File, []). + +load_settings(File, Options) :- + absolute_file_name(File, Path, + [ access(read), + file_errors(fail) + ]), !, + assert(local_file(Path)), + open(Path, read, In, [encoding(utf8)]), + read_setting(In, T0), + call_cleanup(load_settings(T0, In, Options), close(In)), + clear_setting_cache. +load_settings(File, _) :- + absolute_file_name(File, Path, + [ access(write), + file_errors(fail) + ]), !, + assert(local_file(Path)). +load_settings(_, _). + +load_settings(end_of_file, _, _) :- !. +load_settings(Setting, In, Options) :- + catch(store_setting(Setting, Options), E, + print_message(warning, E)), + read_setting(In, Next), + load_settings(Next, In, Options). + +read_setting(In, Term) :- + read_term(In, Term, + [ errors(dec10) + ]). + +%% store_setting(Term, +Options) +% +% Store setting loaded from file in the Prolog database. + +store_setting(setting(Module:Name, Value), _) :- + curr_setting(Name, Module, Type, Default0, _Commentm), !, + eval_default(Default0, Module, Type, Default), + ( Value == Default + -> true + ; check_type(Type, Value) + -> retractall(st_value(Name, Module, _)), + assert(st_value(Name, Module, Value)), + broadcast(settings(changed(Module:Name, Default, Value))) + ). +store_setting(setting(Module:Name, Value), Options) :- !, + ( option(undefined(load), Options, load) + -> retractall(st_value(Name, Module, _)), + assert(st_value(Name, Module, Value)) + ; existence_error(setting, Module:Name) + ). +store_setting(Term, _) :- + type_error(setting, Term). + +%% save_settings is det. +%% save_settings(+File) is det. +% +% Save modified settings to File. + +save_settings :- + local_file(File), !, + save_settings(File). + +save_settings(File) :- + absolute_file_name(File, Path, + [ access(write) + ]), !, + open(Path, write, Out, + [ encoding(utf8), + bom(true) + ]), + write_setting_header(Out), + forall(current_setting(Name, Module, _, _, _, _), + save_setting(Out, Module:Name)), + close(Out). + + +write_setting_header(Out) :- + get_time(Now), + format_time(string(Date), '%+', Now), + format(Out, '/* Saved settings~n', []), + format(Out, ' Date: ~w~n', [Date]), + format(Out, '*/~n~n', []). + +save_setting(Out, Module:Name) :- + curr_setting(Name, Module, Type, Default, Comment), + ( st_value(Name, Module, Value), + \+ ( eval_default(Default, Module, Type, DefValue), + debug(setting, '~w <-> ~w~n', [DefValue, Value]), + DefValue =@= Value + ) + -> format(Out, '~n% ~w~n', [Comment]), + format(Out, 'setting(~q:~q, ~q).~n', [Module, Name, Value]) + ; true + ). + +%% current_setting(?Setting) is nondet. +% +% True if Setting is a currently defined setting + +current_setting(Setting) :- + ground(Setting), !, + strip_module(Setting, Module, Name), + current_setting(Name, Module, _, _, _, _). +current_setting(Module:Name) :- + current_setting(Name, Module, _, _, _, _). + +%% setting_property(+Setting, +Property) is det. +%% setting_property(?Setting, ?Property) is nondet. +% +% Query currently defined settings. Property is one of +% +% * comment(-Atom) +% * type(-Type) +% Type of the setting. +% * default(-Default) +% Default value. If this is an expression, it is +% evaluated. + +setting_property(Setting, Property) :- + ground(Setting), !, + Setting = Module:Name, + curr_setting(Name, Module, Type, Default, Comment), !, + setting_property(Property, Module, Type, Default, Comment). +setting_property(Setting, Property) :- + Setting = Module:Name, + curr_setting(Name, Module, Type, Default, Comment), + setting_property(Property, Module, Type, Default, Comment). + +setting_property(type(Type), _, Type, _, _). +setting_property(default(Default), M, Type, Default0, _) :- + eval_default(Default0, M, Type, Default). +setting_property(comment(Comment), _, _, _, Comment). + +%% list_settings +% +% List settings to =current_output=. + +list_settings :- + format('~`=t~72|~n'), + format('~w~t~20| ~w~w~t~40| ~w~n', ['Name', 'Value (*=modified)', '', 'Comment']), + format('~`=t~72|~n'), + forall(current_setting(Module:Setting), + list_setting(Module:Setting)). + +list_setting(Module:Name) :- + curr_setting(Name, Module, Type, Default0, Comment), + eval_default(Default0, Module, Type, Default), + setting(Module:Name, Value), + ( Value \== Default + -> Modified = (*) + ; Modified = '' + ), + format('~w~t~20| ~q~w~t~40| ~w~n', [Module:Name, Value, Modified, Comment]). + + + /******************************* + * TYPES * + *******************************/ + +%% convert_setting_text(+Type, +Text, -Value) +% +% Converts from textual form to Prolog Value. Used to convert +% values obtained from the environment. Public to provide support +% in user-interfaces to this library. +% +% @error type_error(Type, Value) + +:- multifile + convert_text/3. % +Type, +Text, -Value + +convert_setting_text(Type, Text, Value) :- + convert_text(Type, Text, Value), !. +convert_setting_text(atom, Value, Value) :- !, + must_be(atom, Value). +convert_setting_text(boolean, Value, Value) :- !, + must_be(boolean, Value). +convert_setting_text(integer, Atom, Number) :- !, + term_to_atom(Term, Atom), + Number is round(Term). +convert_setting_text(float, Atom, Number) :- !, + term_to_atom(Term, Atom), + Number is float(Term). +convert_setting_text(between(L,U), Atom, Number) :- !, + ( integer(L) + -> convert_setting_text(integer, Atom, Number) + ; convert_setting_text(float, Atom, Number) + ), + must_be(between(L,U), Number). +convert_setting_text(Type, Atom, Term) :- + term_to_atom(Term, Atom), + must_be(Type, Term). + + diff --git a/LGPL/url.pl b/LGPL/url.pl new file mode 100644 index 000000000..08a0f4f28 --- /dev/null +++ b/LGPL/url.pl @@ -0,0 +1,1048 @@ +/* $Id$ + + Part of SWI-Prolog + + Author: Jan Wielemaker + E-mail: J.Wielemaker@uva.nl + WWW: http://www.swi-prolog.org + Copyright (C): 1985-2009, University of Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +:- module(url, + [ parse_url/2, % +URL, -Parts | -URL +Parts + parse_url/3, % +URL|URI, +BaseURL, -Parts + % -URL, +BaseURL, +Parts + is_absolute_url/1, % +URL + global_url/3, % +Local, +Base, -Global + http_location/2, % ?Parts, ?Location + www_form_encode/2, % Value <-> Encoded + parse_url_search/2, % Form-data <-> Form fields + + url_iri/2, % ?URL, ?IRI + + file_name_to_url/2, % ?FileName, ?URL + + set_url_encoding/2 % ?Old, +New + ]). +:- use_module(library(lists)). +:- use_module(library(error)). +:- use_module(library(utf8)). + +/** Analysing and constructing URL + +This library deals with the analysis and construction of a URL, +Universal Resource Locator. URL is the basis for communicating locations +of resources (data) on the web. A URL consists of a protocol identifier +(e.g. HTTP, FTP, and a protocol-specific syntax further defining the +location. URLs are standardized in RFC-1738. + +The implementation in this library covers only a small portion of the +defined protocols. Though the initial implementation followed RFC-1738 +strictly, the current is more relaxed to deal with frequent violations +of the standard encountered in practical use. + +@author Jan Wielemaker +@author Lukas Faulstich +@deprecated New code should use library(uri), provided by the =clib= + package. +*/ + + /******************************* + * GLOBALISE * + *******************************/ + +%% global_url(+URL, +Base, -Global) is det. +% +% Translate a possibly relative URL into an absolute one. +% +% @error syntax_error(illegal_url) if URL is not legal. + +global_url(URL, BaseURL, Global) :- + ( is_absolute_url(URL), + \+ sub_atom(URL, _, _, _, '%') % may have escape, use general + -> Global = URL + ; sub_atom(URL, 0, _, _, '//') + -> parse_url(BaseURL, [], Attributes), + memberchk(protocol(Proto), Attributes), + atomic_list_concat([Proto, :, URL], Global) + ; sub_atom(URL, 0, _, _, #) + -> ( sub_atom(BaseURL, _, _, 0, #) + -> sub_atom(URL, 1, _, 0, NoHash), + atom_concat(BaseURL, NoHash, Global) + ; atom_concat(BaseURL, URL, Global) + ) + ; parse_url(URL, BaseURL, Attributes) + -> phrase(curl(Attributes), Chars), + atom_codes(Global, Chars) + ; throw(error(syntax_error(illegal_url), URL)) + ). + +%% is_absolute_url(+URL) +% +% True if URL is an absolute URL. That is, a URL that starts with +% a protocol identifier. + +is_absolute_url(URL) :- + sub_atom(URL, 0, _, _, 'http://'), !. +is_absolute_url(URL) :- + sub_atom(URL, 0, _, _, 'https://'), !. +is_absolute_url(URL) :- + sub_atom(URL, 0, _, _, 'ftp://'), !. +is_absolute_url(URL) :- + sub_atom(URL, 0, _, _, 'file://'), !. +is_absolute_url(URL) :- + atom_codes(URL, Codes), + phrase(absolute_url, Codes, _), !. + + + /******************************* + * CREATE URL/URI * + *******************************/ + +%% http_location(?Parts, ?Location) +% +% Construct or analyze an HTTP location. This is similar to +% parse_url/2, but only deals with the location part of an HTTP +% URL. That is, the path, search and fragment specifiers. In the +% HTTP protocol, the first line of a message is +% +% == +% HTTP/ +% == +% +% @param Location Atom or list of character codes. + +http_location(Parts, Location) :- % Parts --> Location + nonvar(Parts), !, + phrase(curi(Parts), String), !, + atom_codes(Location, String). +http_location(Parts, Location) :- % Location --> Parts + atom(Location), !, + atom_codes(Location, Codes), + phrase(http_location(Parts), Codes). +http_location(Parts, Codes) :- % LocationCodes --> Parts + is_list(Codes), + phrase(http_location(Parts), Codes). + + +curl(A) --> + { memberchk(protocol(Protocol), A) + }, !, + catomic(Protocol), + ":", + curl(Protocol, A). +curl(A) --> + curl(http, A). + +curl(file, A) --> !, + ( "//" + -> cpath(A) + ; cpath(A) + ). +curl(_, A) --> + "//", + cuser(A), + chost(A), + cport(A), + cpath(A), + csearch(A), + cfragment(A). + +curi(A) --> + cpath(A), + csearch(A). + +cpath(A) --> + ( { memberchk(path(Path), A) } + -> { atom_codes(Path, Codes) }, + www_encode(Codes, "/+:,") + ; "" + ). + +cuser(A) --> + ( { memberchk(user(User), A) } + -> { atom_codes(User, Codes) }, + www_encode(Codes, ":"), + "@" + ; "" + ). + +chost(A) --> + ( { memberchk(host(Host), A) } + -> { atom_codes(Host, Codes) }, + www_encode(Codes, "") + ; "" + ). + +cport(A) --> + ( { memberchk(port(Port), A), Port \== 80 } + -> { number_codes(Port, Codes) }, + ":", + www_encode(Codes, "") + ; "" + ). + + +catomic(A, In, Out) :- + atom_codes(A, Codes), + append(Codes, Out, In). + +%% csearch(+Attributes)// + +csearch(A)--> + ( { memberchk(search(Parameters), A) } + -> csearch(Parameters, "?") + ; [] + ). + +csearch([], _) --> + []. +csearch([Parameter|Parameters], Sep) --> !, + codes(Sep), + cparam(Parameter), + csearch(Parameters, "&"). + +cparam(Name=Value) --> !, + cname(Name), + "=", + cvalue(Value). +cparam(NameValue) --> % allow to feed Name(Value) + { compound(NameValue), !, + NameValue =.. [Name,Value] + }, + cname(Name), + "=", + cvalue(Value). +cparam(Name)--> + cname(Name). + +codes([]) --> []. +codes([H|T]) --> [H], codes(T). + +cname(Atom) --> + { atom_codes(Atom, Codes) }, + www_encode(Codes, ""). + +%% cvalue(+Value)// is det. +% +% Construct a string from Value. Value is either atomic or a +% code-list. + +cvalue(Value) --> + { atomic(Value), !, + atom_codes(Value, Codes) + }, + www_encode(Codes, ""). +cvalue(Codes) --> + { must_be(codes, Codes) + }, + www_encode(Codes, ""). + + +%% cfragment(+Attributes)// + +cfragment(A) --> + { memberchk(fragment(Frag), A), !, + atom_codes(Frag, Codes) + }, + "#", + www_encode(Codes, ""). +cfragment(_) --> + "". + + + /******************************* + * PARSING * + *******************************/ + +%% parse_url(+URL, -Attributes) is det. +% +% Construct or analyse a URL. URL is an atom holding a URL or a +% variable. Parts is a list of components. Each component is of +% the format Name(Value). Defined components are: +% +% * protocol(Protocol) +% The used protocol. This is, after the optional =|url:|=, an +% identifier separated from the remainder of the URL using :. +% parse_url/2 assumes the =http= protocol if no protocol is +% specified and the URL can be parsed as a valid HTTP url. In +% addition to the RFC-1738 specified protocols, the =file= +% protocol is supported as well. +% +% * host(Host) +% Host-name or IP-address on which the resource is located. +% Supported by all network-based protocols. +% +% * port(Port) +% Integer port-number to access on the \arg{Host}. This only +% appears if the port is explicitly specified in the URL. +% Implicit default ports (e.g. 80 for HTTP) do \emph{not} +% appear in the part-list. +% +% * path(Path) +% (File-) path addressed by the URL. This is supported for the +% =ftp=, =http= and =file= protocols. If no path appears, the +% library generates the path =|/|=. +% +% * search(ListOfNameValue) +% Search-specification of HTTP URL. This is the part after the +% =|?|=, normally used to transfer data from HTML forms that +% use the =GET= protocol. In the URL it consists of a +% www-form-encoded list of Name=Value pairs. This is mapped to +% a list of Prolog Name=Value terms with decoded names and +% values. +% +% * fragment(Fragment) +% Fragment specification of HTTP URL. This is the part after +% the =|#|= character. +% +% The example below illustrates the all this for an HTTP URL. +% +% == +% ?- parse_url('http://swi.psy.uva.nl/message.cgi?msg=Hello+World%21#x', P). +% +% P = [ protocol(http), +% host('swi.psy.uva.nl'), +% fragment(x), +% search([ msg = 'Hello World!' +% ]), +% path('/message.cgi') +% ] +% == +% +% By instantiating the parts-list this predicate can be used to +% create a URL. + +parse_url(URL, Attributes) :- + nonvar(URL), !, + atom_codes(URL, Codes), + phrase(url(Attributes), Codes). +parse_url(URL, Attributes) :- + phrase(curl(Attributes), Codes), !, + atom_codes(URL, Codes). + +%% parse_url(+URL, +BaseURL, -Attributes) is det. +% +% Similar to parse_url/2 for relative URLs. If URL is relative, +% it is resolved using the absolute URL BaseURL. + +parse_url(URL, BaseURL, Attributes) :- + nonvar(URL), !, + atom_codes(URL, Codes), + ( phrase(absolute_url, Codes, _) + -> phrase(url(Attributes), Codes) + ; ( atomic(BaseURL) + -> parse_url(BaseURL, BaseA0) + ; BaseA0 = BaseURL + ), + select(path(BasePath), BaseA0, BaseA1), + delete(BaseA1, search(_), BaseA2), + delete(BaseA2, fragment(_), BaseA3), + phrase(relative_uri(URIA0), Codes), + select(path(LocalPath), URIA0, URIA1), !, + globalise_path(LocalPath, BasePath, Path), + append(BaseA3, [path(Path)|URIA1], Attributes) + ). +parse_url(URL, BaseURL, Attributes) :- + parse_url(BaseURL, BaseAttributes), + memberchk(path(BasePath), BaseAttributes), + ( memberchk(path(LocalPath), Attributes) + -> globalise_path(LocalPath, BasePath, Path) + ; Path = BasePath + ), + append([path(Path)|Attributes], BaseAttributes, GlobalAttributes), + phrase(curl(GlobalAttributes), Chars), + atom_codes(URL, Chars). + + +%% globalise_path(+LocalPath, +RelativeTo, -FullPath) is det. +% +% The first clause deals with the standard URL /... global paths. +% The second with file://drive:path on MS-Windows. This is a bit +% of a cludge, but unfortunately common practice is -especially on +% Windows- not always following the standard + +globalise_path(LocalPath, _, LocalPath) :- + sub_atom(LocalPath, 0, _, _, /), !. +globalise_path(LocalPath, _, LocalPath) :- + is_absolute_file_name(LocalPath), !. +globalise_path(Local, Base, Path) :- + base_dir(Base, BaseDir), + make_path(BaseDir, Local, Path). + +base_dir(BasePath, BaseDir) :- + ( atom_concat(BaseDir, /, BasePath) + -> true + ; file_directory_name(BasePath, BaseDir) + ). + +make_path(Dir, Local, Path) :- + atom_concat('../', L2, Local), + file_directory_name(Dir, Parent), + Parent \== Dir, !, + make_path(Parent, L2, Path). +make_path(/, Local, Path) :- !, + atom_concat(/, Local, Path). +make_path(Dir, Local, Path) :- + atomic_list_concat([Dir, /, Local], Path). + + +%% absolute_url// +% +% True if the input describes an absolute URL. This means it +% starts with a URL schema. We demand a schema of length > 1 to +% avoid confusion with Windows drive letters. + +absolute_url --> + lwalpha(_First), + schema_chars(Rest), + { Rest \== [] }, + ":", !. + + + /******************************* + * SEQUENCES * + *******************************/ + +digits(L) --> + digits(L, []). + +digits([C|T0], T) --> + digit(C), !, + digits(T0, T). +digits(T, T) --> + []. + + +digit(C, [C|T], T) :- code_type(C, digit). + + /******************************* + * RFC-3986 * + *******************************/ + +%% uri(-Parts)// + +url([protocol(Schema)|Parts]) --> + schema(Schema), + ":", !, + hier_part(Schema, Parts, P2), + query(P2, P3), + fragment(P3, []). +url([protocol(http)|Parts]) --> % implicit HTTP + authority(Parts, [path(Path)]), + path_abempty(Path). + +relative_uri(Parts) --> + relative_part(Parts, P2), + query(P2, P3), + fragment(P3, []). + +relative_part(Parts, Tail) --> + "//", !, + authority(Parts, [path(Path)|Tail]), + path_abempty(Path). +relative_part([path(Path)|T], T) --> + ( path_absolute(Path) + ; path_noschema(Path) + ; path_empty(Path) + ), !. + +http_location([path(Path)|P2]) --> + path_abempty(Path), + query(P2, P3), + fragment(P3, []). + +%% schema(-Atom)// +% +% Schema is case-insensitive and the canonical version is +% lowercase. +% +% == +% Schema ::= ALPHA *(ALPHA|DIGIT|"+"|"-"|".") +% == + +schema(Schema) --> + lwalpha(C0), + schema_chars(Codes), + { atom_codes(Schema, [C0|Codes]) }. + +schema_chars([H|T]) --> + schema_char(H), !, + schema_chars(T). +schema_chars([]) --> + []. + +schema_char(H) --> + [C], + { C < 128, + ( code_type(C, alpha) + -> code_type(H, to_lower(C)) + ; code_type(C, digit) + -> H = C + ; schema_extra(C) + -> H = C + ) + }. + +schema_extra(0'+). +schema_extra(0'-). +schema_extra(0'.). % 0' + + +%% hier_part(+Schema, -Parts, ?Tail)// + +hier_part(file, [path(Path)|Tail], Tail) --> !, + "//", + ( win_drive_path(Path) + ; path_absolute(Path) + ; path_rootless(Path) + ; path_empty(Path) + ), !. +hier_part(_, Parts, Tail) --> + "//", !, + authority(Parts, [path(Path)|Tail]), + path_abempty(Path). +hier_part(_, [path(Path)|T], T) --> + ( path_absolute(Path) + ; path_rootless(Path) + ; path_empty(Path) + ), !. + +authority(Parts, Tail) --> + user_info_chars(UserChars), + "@", !, + { atom_codes(User, UserChars), + Parts = [user(User),host(Host)|T0] + }, + host(Host), + port(T0,Tail). +authority([host(Host)|T0], Tail) --> + host(Host), + port(T0, Tail). + +user_info_chars([H|T]) --> + user_info_char(H), !, + user_info_chars(T). +user_info_chars([]) --> + []. + +user_info_char(_) --> "@", !, {fail}. +user_info_char(C) --> pchar(C). + +%host(Host) --> ip_literal(Host), !. % TBD: IP6 addresses +host(Host) --> ip4_address(Host), !. +host(Host) --> reg_name(Host). + +ip4_address(Atom) --> + i256_chars(Chars, [0'.|T0]), + i256_chars(T0, [0'.|T1]), + i256_chars(T1, [0'.|T2]), + i256_chars(T2, []), + { atom_codes(Atom, Chars) }. + +i256_chars(Chars, T) --> + digits(Chars, T), + \+ \+ { T = [], + Chars \== [], + number_codes(I, Chars), + I < 256 + }. + +reg_name(Host) --> + reg_name_chars(Chars), + { atom_codes(Host, Chars) }. + +reg_name_chars([H|T]) --> + reg_name_char(H), !, + reg_name_chars(T). +reg_name_chars([]) --> + []. + +reg_name_char(C) --> + pchar(C), + { C \== 0':, + C \== 0'@ + }. + +port([port(Port)|T], T) --> + ":", !, + digit(D0), + digits(Ds), + { number_codes(Port, [D0|Ds]) }. +port(T, T) --> + []. + +path_abempty(Path) --> + segments_chars(Chars, []), + { Chars == [] + -> Path = '/' + ; atom_codes(Path, Chars) + }. + + +win_drive_path(Path) --> + drive_letter(C0), + ":", + ( "/" + -> {Codes = [C0, 0':, 0'/|Chars]} + ; {Codes = [C0, 0':|Chars]} + ), + segment_nz_chars(Chars, T0), + segments_chars(T0, []), + { atom_codes(Path, Codes) }. + + +path_absolute(Path) --> + "/", + segment_nz_chars(Chars, T0), + segments_chars(T0, []), + { atom_codes(Path, [0'/| Chars]) }. + +path_noschema(Path) --> + segment_nz_nc_chars(Chars, T0), + segments_chars(T0, []), + { atom_codes(Path, Chars) }. + +path_rootless(Path) --> + segment_nz_chars(Chars, T0), + segments_chars(T0, []), + { atom_codes(Path, Chars) }. + +path_empty('/') --> + "". + +segments_chars([0'/|Chars], T) --> % 0' + "/", !, + segment_chars(Chars, T0), + segments_chars(T0, T). +segments_chars(T, T) --> + []. + +segment_chars([H|T0], T) --> + pchar(H), !, + segment_chars(T0, T). +segment_chars(T, T) --> + []. + +segment_nz_chars([H|T0], T) --> + pchar(H), + segment_chars(T0, T). + +segment_nz_nc_chars([H|T0], T) --> + segment_nz_nc_char(H), !, + segment_nz_nc_chars(T0, T). +segment_nz_nc_chars(T, T) --> + []. + +segment_nz_nc_char(_) --> ":", !, {fail}. +segment_nz_nc_char(C) --> pchar(C). + + +%% query(-Parts, ?Tail)// is det. +% +% Extract &Name=Value, ... + +query([search(Params)|T], T) --> + "?", !, + search(Params). +query(T,T) --> + []. + +search([Parameter|Parameters])--> + parameter(Parameter), !, + ( search_sep + -> search(Parameters) + ; { Parameters = [] } + ). +search([]) --> + []. + +parameter(Param)--> !, + search_chars(NameS), + { atom_codes(Name, NameS) + }, + ( "=" + -> search_value_chars(ValueS), + { atom_codes(Value, ValueS), + Param = (Name = Value) + } + ; { Param = Name + } + ). + +search_chars([C|T]) --> + search_char(C), !, + search_chars(T). +search_chars([]) --> + []. + +search_char(_) --> search_sep, !, { fail }. +search_char(_) --> "=", !, { fail }. +search_char(C) --> fragment_char(C). + +search_value_chars([C|T]) --> + search_value_char(C), !, + search_value_chars(T). +search_value_chars([]) --> + []. + +search_value_char(_) --> search_sep, !, { fail }. +search_value_char(C) --> fragment_char(C). + +%% search_sep// is semidet. +% +% Matches a search-parameter separator. Traditonally, this is the +% &-char, but these days there are `newstyle' ;-char separators. +% +% @see http://perldoc.perl.org/CGI.html +% @tbd This should be configurable + +search_sep --> "&", !. +search_sep --> ";". + + +%% fragment(-Fragment, ?Tail)// +% +% Extract the fragment (after the =#=) + +fragment([fragment(Fragment)|T], T) --> + "#", !, + fragment_chars(Codes), + { atom_codes(Fragment, Codes) }. +fragment(T, T) --> + []. + +fragment_chars([H|T]) --> + fragment_char(H), !, + fragment_chars(T). +fragment_chars([]) --> + []. + + +%% fragment_char(-Char) +% +% Find a fragment character. + +fragment_char(C) --> pchar(C), !. +fragment_char(0'/) --> "/", !. +fragment_char(0'?) --> "?", !. +fragment_char(0'[) --> "[", !. % Not according RDF3986! +fragment_char(0']) --> "]", !. + + + /******************************* + * CHARACTER CLASSES * + *******************************/ + +%% pchar(-Code)// +% +% unreserved|pct_encoded|sub_delim|":"|"@" +% +% Performs UTF-8 decoding of percent encoded strings. + +pchar(0' ) --> "+", !. %' ? +pchar(C) --> + [C], + { unreserved(C) + ; sub_delim(C) + ; C == 0': + ; C == 0'@ + }, !. +pchar(C) --> + percent_coded(C). + +%% lwalpha(-C)// +% +% Demand alpha, return as lowercase + +lwalpha(H) --> + [C], + { C < 128, + code_type(C, alpha), + code_type(H, to_lower(C)) + }. + +drive_letter(C) --> + [C], + { C < 128, + code_type(C, alpha) + }. + + + /******************************* + * RESERVED CHARACTERS * + *******************************/ + +%% sub_delim(?Code) +% +% Sub-delimiters + +sub_delim(0'!). +sub_delim(0'$). +sub_delim(0'&). +sub_delim(0''). +sub_delim(0'(). +sub_delim(0')). +sub_delim(0'*). +sub_delim(0'+). +sub_delim(0',). +sub_delim(0';). +sub_delim(0'=). + + +%% unreserved(+C) +% +% Characters that can be represented without procent escaping +% RFC 3986, section 2.3 + +term_expansion(unreserved(map), Clauses) :- + findall(unreserved(C), unreserved_(C), Clauses). + +unreserved_(C) :- + between(1, 128, C), + code_type(C, alnum). +unreserved_(0'-). +unreserved_(0'.). +unreserved_(0'_). +unreserved_(0'~). % 0' + +unreserved(map). % Expanded + + + /******************************* + * FORMS * + *******************************/ + +/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Encoding/decoding of form-fields using the popular www-form-encoded +encoding used with the HTTP GET. +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ + +%% www_form_encode(+Value, -XWWWFormEncoded) is det. +%% www_form_encode(-Value, +XWWWFormEncoded) is det. +% +% En/Decode between native value and +% application/x-www-form-encoded. Maps space to +, keeps alnum, +% maps anything else to =|%XX|= and newlines to =|%OD%OA|=. When +% decoding, newlines appear as a single newline (10) character. + +www_form_encode(Value, Encoded) :- + atomic(Value), !, + atom_codes(Value, Codes), + phrase(www_encode(Codes, ""), EncCodes), + atom_codes(Encoded, EncCodes). +www_form_encode(Value, Encoded) :- + atom_codes(Encoded, EncCodes), + phrase(www_decode(Codes), EncCodes), + atom_codes(Value, Codes). + +%% www_encode(+Codes, +ExtraUnescaped)// + +www_encode([0'\r, 0'\n|T], Extra) --> !, + "%0D%0A", + www_encode(T, Extra). +www_encode([0'\n|T], Extra) --> !, + "%0D%0A", + www_encode(T, Extra). +www_encode([H|T], Extra) --> + percent_encode(H, Extra), + www_encode(T, Extra). +www_encode([], _) --> + "". + +percent_encode(C, _Extra) --> + { unreserved(C) }, !, + [C]. +percent_encode(C, Extra) --> + { memberchk(C, Extra) }, !, + [C]. +%percent_encode(0' , _) --> !, "+". % Deprecated: use %20 +percent_encode(C, _) --> + { C =< 127 }, !, + percent_byte(C). +percent_encode(C, _) --> % Unicode characters + { current_prolog_flag(url_encoding, utf8), !, + phrase(utf8_codes([C]), Bytes) + }, + percent_bytes(Bytes). +percent_encode(C, _) --> + { C =< 255 }, !, + percent_byte(C). +percent_encode(_C, _) --> + { representation_error(url_character) + }. + +percent_bytes([]) --> + "". +percent_bytes([H|T]) --> + percent_byte(H), + percent_bytes(T). + +percent_byte(C) --> + [0'%, D1, D2], + { nonvar(C) + -> Dv1 is (C>>4 /\ 0xf), + Dv2 is (C /\ 0xf), + code_type(D1, xdigit(Dv1)), + code_type(D2, xdigit(Dv2)) + ; code_type(D1, xdigit(Dv1)), + code_type(D2, xdigit(Dv2)), + C is ((Dv1)<<4) + Dv2 + }. + +percent_coded(C) --> + percent_byte(C0), !, + ( { C0 == 13 % %0D%0A --> \n + }, + "%0", + ( "A" ; "a" ) + -> { C = 10 + } + ; { C0 >= 0xc0 }, % UTF-8 lead-in + utf8_cont(Cs), + { phrase(utf8_codes([C]), [C0|Cs]) } + -> [] + ; { C = C0 + } + ). + +%% www_decode(-Codes)// + +www_decode([0' |T]) --> + "+", !, + www_decode(T). +www_decode([C|T]) --> + percent_coded(C), !, + www_decode(T). +www_decode([C|T]) --> + [C], !, + www_decode(T). +www_decode([]) --> + []. + +utf8_cont([H|T]) --> + percent_byte(H), + { between(0x80, 0xbf, H) }, !, + utf8_cont(T). +utf8_cont([]) --> + []. + + +%% set_url_encoding(?Old, +New) is semidet. +% +% Query and set the encoding for URLs. The default is =utf8=. +% The only other defined value is =iso_latin_1=. +% +% @tbd Having a global flag is highly inconvenient, but a +% work-around for old sites using ISO Latin 1 encoding. + +:- create_prolog_flag(url_encoding, utf8, [type(atom)]). + +set_url_encoding(Old, New) :- + current_prolog_flag(url_encoding, Old), + ( Old == New + -> true + ; must_be(oneof([utf8, iso_latin_1]), New), + set_prolog_flag(url_encoding, New) + ). + + + /******************************* + * IRI PROCESSING * + *******************************/ + +%% url_iri(+Encoded, -Decoded) is det. +%% url_iri(-Encoded, +Decoded) is det. +% +% Convert between a URL, encoding in US-ASCII and an IRI. An IRI +% is a fully expanded Unicode string. Unicode strings are first +% encoded into UTF-8, after which %-encoding takes place. + +url_iri(Encoded, Decoded) :- + nonvar(Encoded), !, + ( sub_atom(Encoded, _, _, _, '%') + -> atom_codes(Encoded, Codes), + unescape_precent(Codes, UTF8), + phrase(utf8_codes(Unicodes), UTF8), + atom_codes(Decoded, Unicodes) + ; Decoded = Encoded + ). +url_iri(URL, IRI) :- + atom_codes(IRI, IRICodes), + phrase(percent_encode(IRICodes, "/:?#&="), UrlCodes), + atom_codes(URL, UrlCodes). + + +unescape_precent([], []). +unescape_precent([0'%,C1,C2|T0], [H|T]) :- !, %' + code_type(C1, xdigit(D1)), + code_type(C2, xdigit(D2)), + H is D1*16 + D2, + unescape_precent(T0, T). +unescape_precent([H|T0], [H|T]) :- + unescape_precent(T0, T). + + + /******************************* + * FORM DATA * + *******************************/ + +%% parse_url_search(?Spec, ?Fields:list(Name=Value)) is det. +% +% Construct or analyze an HTTP search specification. This deals +% with form data using the MIME-type +% =application/x-www-form-urlencoded= as used in HTTP GET +% requests. + +parse_url_search(Spec, Fields) :- + atomic(Spec), !, + atom_codes(Spec, Codes), + phrase(search(Fields), Codes). +parse_url_search(Codes, Fields) :- + is_list(Codes), !, + phrase(search(Fields), Codes). +parse_url_search(Codes, Fields) :- + must_be(list, Fields), + phrase(csearch(Fields, ""), Codes). + + + /******************************* + * FILE URLs * + *******************************/ + +%% file_name_to_url(+File, -URL) is det. +%% file_name_to_url(-File, +URL) is semidet. +% +% Translate between a filename and a file:// URL. +% +% @tbd Current implementation does not deal with paths that +% need special encoding. + +file_name_to_url(File, FileURL) :- + nonvar(File), !, + absolute_file_name(File, Path), + atom_concat('file://', Path, FileURL), !. +file_name_to_url(File, FileURL) :- + atom_concat('file://', File, FileURL), !. + diff --git a/LGPL/utf8.pl b/LGPL/utf8.pl new file mode 100644 index 000000000..030d7ba26 --- /dev/null +++ b/LGPL/utf8.pl @@ -0,0 +1,134 @@ +/* $Id$ + + Part of SWI-Prolog + + Author: Jan Wielemaker + E-mail: wielemak@science.uva.nl + WWW: http://www.swi-prolog.org + Copyright (C): 1985-2005, University of Amsterdam + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + As a special exception, if you link this library with other files, + compiled with a Free Software compiler, to produce an executable, this + library does not by itself cause the resulting executable to be covered + by the GNU General Public License. This exception does not however + invalidate any other reasons why the executable file might be covered by + the GNU General Public License. +*/ + +:- module(utf8, + [ utf8_codes//1 % ?String + ]). + +%% utf8_codes(?Codes)// is det. +% +% DCG translating between a Unicode code-list and its UTF-8 +% encoded byte-string. The DCG works two ways. Encoding a +% code-list to a UTF-8 byte string is achieved using +% +% phrase(utf8_codes(Codes), UTF8) +% +% The algorithm is a close copy of the C-algorithm used +% internally and defined in src/pl-utf8.c +% +% NOTE: in many cases you can avoid this library and leave +% encoding and decoding to I/O streams. If only part of the data +% is to be encoded the encoding of a stream can be switched +% temporary using set_stream(Stream, encoding(utf8)) + +utf8_codes([H|T]) --> + utf8_code(H), !, + utf8_codes(T). +utf8_codes([]) --> + []. + +utf8_code(C) --> + [C0], + { nonvar(C0) }, !, % decoding version + ( {C0 < 0x80} + -> {C = C0} + ; {C0/\0xe0 =:= 0xc0} + -> utf8_cont(C1, 0), + {C is (C0/\0x1f)<<6\/C1} + ; {C0/\0xf0 =:= 0xe0} + -> utf8_cont(C1, 6), + utf8_cont(C2, 0), + {C is ((C0/\0xf)<<12)\/C1\/C2} + ; {C0/\0xf8 =:= 0xf0} + -> utf8_cont(C1, 12), + utf8_cont(C2, 6), + utf8_cont(C3, 0), + {C is ((C0/\0x7)<<18)\/C1\/C2\/C3} + ; {C0/\0xfc =:= 0xf8} + -> utf8_cont(C1, 18), + utf8_cont(C2, 12), + utf8_cont(C3, 6), + utf8_cont(C4, 0), + {C is ((C0/\0x3)<<24)\/C1\/C2\/C3\/C4} + ; {C0/\0xfe =:= 0xfc} + -> utf8_cont(C1, 24), + utf8_cont(C2, 18), + utf8_cont(C3, 12), + utf8_cont(C4, 6), + utf8_cont(C5, 0), + {C is ((C0/\0x1)<<30)\/C1\/C2\/C3\/C4\/C5} + ). +utf8_code(C) --> + { nonvar(C) }, !, % encoding version + ( { C < 0x80 } + -> [C] + ; { C < 0x800 } + -> { C0 is 0xc0\/((C>>6)/\0x1f), + C1 is 0x80\/(C/\0x3f) + }, + [C0,C1] + ; { C < 0x10000 } + -> { C0 is 0xe0\/((C>>12)/\0x0f), + C1 is 0x80\/((C>>6)/\0x3f), + C2 is 0x80\/(C/\0x3f) + }, + [C0,C1,C2] + ; { C < 0x200000 } + -> { C0 is 0xf0\/((C>>18)/\0x07), + C1 is 0x80\/((C>>12)/\0x3f), + C2 is 0x80\/((C>>6)/\0x3f), + C3 is 0x80\/(C/\0x3f) + }, + [C0,C1,C2,C3] + ; { C < 0x4000000 } + -> { C0 is 0xf8\/((C>>24)/\0x03), + C1 is 0x80\/((C>>18)/\0x3f), + C2 is 0x80\/((C>>12)/\0x3f), + C3 is 0x80\/((C>>6)/\0x3f), + C4 is 0x80\/(C/\0x3f) + }, + [C0,C1,C2,C3,C4] + ; { C < 0x80000000 } + -> { C0 is 0xfc\/((C>>30)/\0x01), + C1 is 0x80\/((C>>24)/\0x3f), + C2 is 0x80\/((C>>18)/\0x3f), + C3 is 0x80\/((C>>12)/\0x3f), + C4 is 0x80\/((C>>6)/\0x3f), + C5 is 0x80\/(C/\0x3f) + }, + [C0,C1,C2,C3,C4,C5] + ). + +utf8_cont(Val, Shift) --> + [C], + { C/\0xc0 =:= 0x80, + Val is (C/\0x3f)<