129 lines
4.2 KiB
Perl
129 lines
4.2 KiB
Perl
|
/* $Id$
|
||
|
|
||
|
Part of SWI-Prolog
|
||
|
|
||
|
Author: Jan Wielemaker
|
||
|
E-mail: J.Wielemaker@cs.vu.nl
|
||
|
WWW: http://www.swi-prolog.org
|
||
|
Copyright (C): 1985-2010, University of Amsterdam, VU University Amsterdam
|
||
|
|
||
|
This program is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU General Public License
|
||
|
as published by the Free Software Foundation; either version 2
|
||
|
of the License, or (at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Lesser General Public
|
||
|
License along with this library; if not, write to the Free Software
|
||
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
||
|
As a special exception, if you link this library with other files,
|
||
|
compiled with a Free Software compiler, to produce an executable, this
|
||
|
library does not by itself cause the resulting executable to be covered
|
||
|
by the GNU General Public License. This exception does not however
|
||
|
invalidate any other reasons why the executable file might be covered by
|
||
|
the GNU General Public License.
|
||
|
*/
|
||
|
|
||
|
:- module(http_sgml_plugin, []).
|
||
|
:- use_module(http_client).
|
||
|
:- use_module(library(memfile)).
|
||
|
:- use_module(library(sgml)).
|
||
|
:- use_module(library(lists)).
|
||
|
:- use_module(library(debug)).
|
||
|
|
||
|
:- multifile
|
||
|
http_client:http_convert_data/4.
|
||
|
|
||
|
:- multifile
|
||
|
markup_type/2. % +MimeType, -ParseOptions
|
||
|
|
||
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||
|
This module provides a plugin for the HTTP client to handle xml, html
|
||
|
and sgml files using the SWI-Prolog sgml-parser from library(sgml).
|
||
|
Using this library avoids unnecessary copying of data as the sgml-parser
|
||
|
reads directly from the stream that established the HTTP connection.
|
||
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
||
|
|
||
|
http_client:http_convert_data(In, Fields, Data, Options) :-
|
||
|
memberchk(content_type(Type), Fields),
|
||
|
debug(sgml_plugin, 'Content type: ~w', [Type]),
|
||
|
( markup_type(Type, ParseOptions)
|
||
|
-> true
|
||
|
; type_major_props(Type, Major, Props),
|
||
|
default_markup_type(Major, ParseOptions0),
|
||
|
type_props(Props, ParseOptions0, ParseOptions)
|
||
|
),
|
||
|
merge_options([ max_errors(-1),
|
||
|
syntax_errors(quiet)
|
||
|
| ParseOptions
|
||
|
], Options, Merged),
|
||
|
markup_options(Fields, Merged, MarkupOptions),
|
||
|
debug(sgml_plugin, 'Markup options: ~p', [MarkupOptions]),
|
||
|
load_structure(stream(In), Data, MarkupOptions).
|
||
|
|
||
|
|
||
|
type_major_props(Type0, Type, Props) :-
|
||
|
sub_atom(Type0, B, _, A, ;), !,
|
||
|
sub_atom(Type0, 0, B, _, Major),
|
||
|
sub_atom(Type0, _, A, 0, Props),
|
||
|
normalize_space(atom(Type), Major).
|
||
|
type_major_props(Type, Type, '').
|
||
|
|
||
|
type_props('', L, L).
|
||
|
type_props(Props, L0, L) :-
|
||
|
sub_atom(Props, _, _, A, 'charset='),
|
||
|
sub_atom(Props, _, A, 0, CharSet0),
|
||
|
downcase_atom(CharSet0, CharSet),
|
||
|
known_charset(CharSet),
|
||
|
L = [encoding(CharSet)|L0].
|
||
|
type_props(_, L, L).
|
||
|
|
||
|
known_charset('iso-8859-1').
|
||
|
known_charset('us-ascii').
|
||
|
known_charset('utf-8').
|
||
|
|
||
|
|
||
|
%% default_markup_type(+MimeType, -ParseOptions)
|
||
|
%
|
||
|
% State that the HTTP contents should be parsed with
|
||
|
% load_structure/3 using the returned options. This predicate may
|
||
|
% be hooked using the multifile predicate markup_type/2.
|
||
|
|
||
|
default_markup_type('text/xml',
|
||
|
[ dialect(xmlns)
|
||
|
]).
|
||
|
default_markup_type('text/html',
|
||
|
[ dtd(DTD),
|
||
|
dialect(sgml),
|
||
|
shorttag(false)
|
||
|
]) :-
|
||
|
dtd(html, DTD).
|
||
|
default_markup_type('text/x-sgml',
|
||
|
[ dialect(sgml)
|
||
|
]).
|
||
|
|
||
|
markup_options(Fields, Opt0, Options) :-
|
||
|
( memberchk(content_length(Bytes), Fields)
|
||
|
-> Options = [content_length(Bytes)|Opt0]
|
||
|
; Options = Opt0
|
||
|
).
|
||
|
|
||
|
%% merge_options(+Defaults, +GivenOptions, -Options)
|
||
|
%
|
||
|
% If an option is not in GivenOptions, use the one from
|
||
|
% Defaults.
|
||
|
|
||
|
merge_options([], Options, Options).
|
||
|
merge_options([H|T], Options0, Options) :-
|
||
|
functor(H, Name, Arity),
|
||
|
functor(H0, Name, Arity),
|
||
|
memberchk(H0, Options0), !,
|
||
|
merge_options(T, Options0, Options).
|
||
|
merge_options([H|T], Options0, Options) :-
|
||
|
merge_options(T, [H|Options0], Options).
|