167 lines
5.3 KiB
Perl
167 lines
5.3 KiB
Perl
|
/* $Id$
|
||
|
|
||
|
Part of SWI-Prolog
|
||
|
|
||
|
Author: Jan Wielemaker
|
||
|
E-mail: J.Wielemaker@cs.vu.nl
|
||
|
WWW: http://www.swi-prolog.org
|
||
|
Copyright (C): 1985-2010, University of Amsterdam
|
||
|
VU University Amsterdam
|
||
|
|
||
|
This program is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU General Public License
|
||
|
as published by the Free Software Foundation; either version 2
|
||
|
of the License, or (at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public
|
||
|
License along with this library; if not, write to the Free Software
|
||
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
||
|
As a special exception, if you link this library with other files,
|
||
|
compiled with a Free Software compiler, to produce an executable, this
|
||
|
library does not by itself cause the resulting executable to be covered
|
||
|
by the GNU General Public License. This exception does not however
|
||
|
invalidate any other reasons why the executable file might be covered by
|
||
|
the GNU General Public License.
|
||
|
*/
|
||
|
|
||
|
|
||
|
:- module(rdf_http_plugin, []).
|
||
|
:- use_module(library(http/http_open)).
|
||
|
:- use_module(library(http/http_header)).
|
||
|
:- use_module(library(semweb/rdf_db)).
|
||
|
:- use_module(library(date)).
|
||
|
:- use_module(library(error)).
|
||
|
|
||
|
|
||
|
/** <module> RDF HTTP Plugin
|
||
|
|
||
|
This module allows loading data into the semantic web library directly
|
||
|
from an HTTP server. The following example loads the RDF core data into
|
||
|
the RDF database.
|
||
|
|
||
|
==
|
||
|
:- use_module(library(semweb/rdf_db)).
|
||
|
:- use_module(library(semweb/rdf_http_plugin)).
|
||
|
|
||
|
...,
|
||
|
rdf_load('http://www.w3.org/1999/02/22-rdf-syntax-ns')
|
||
|
==
|
||
|
*/
|
||
|
|
||
|
:- multifile
|
||
|
rdf_db:rdf_open_hook/8,
|
||
|
rdf_db:url_protocol/1.
|
||
|
|
||
|
rdf_db:url_protocol(http).
|
||
|
|
||
|
|
||
|
%% rdf_extra_headers(-List)
|
||
|
%
|
||
|
% Send extra headers with the request. Note that, although we also
|
||
|
% process RDF embedded in HTML, we do not explicitely ask for it.
|
||
|
% Doing so causes some (e.g., http://w3.org/2004/02/skos/core to
|
||
|
% reply with the HTML description rather than the RDF).
|
||
|
|
||
|
rdf_extra_headers(
|
||
|
[ request_header('Accept' = 'application/rdf+xml, \
|
||
|
text/rdf+xml; q=0.9, \
|
||
|
text/turtle, \
|
||
|
application/x-turtle; q=0.8, \
|
||
|
*/*; q=0.1')
|
||
|
]).
|
||
|
|
||
|
|
||
|
rdf_db:rdf_open_hook(http, SourceURL, HaveModified, Stream, Cleanup,
|
||
|
Modified, Format, Options) :-
|
||
|
modified_since_header(HaveModified, Header),
|
||
|
TypeHdr = [ header(content_type, ContentType),
|
||
|
header(last_modified, ModifiedText)
|
||
|
],
|
||
|
rdf_extra_headers(Extra),
|
||
|
append([Extra, TypeHdr, Header, Options], OpenOptions),
|
||
|
catch(http_open(SourceURL, Stream0, OpenOptions), E, true),
|
||
|
( var(E)
|
||
|
-> ( open_envelope(ContentType, SourceURL,
|
||
|
Stream0, Stream, Format)
|
||
|
-> Cleanup = close(Stream),
|
||
|
( nonvar(ModifiedText),
|
||
|
parse_time(ModifiedText, ModifiedStamp)
|
||
|
-> Modified = last_modified(ModifiedStamp)
|
||
|
; Modified = unknown
|
||
|
)
|
||
|
; close(Stream0),
|
||
|
domain_error(content_type, ContentType)
|
||
|
)
|
||
|
; subsumes_chk(error(_, context(_, status(304, _))), E)
|
||
|
-> Modified = not_modified,
|
||
|
Cleanup = true
|
||
|
; throw(E)
|
||
|
).
|
||
|
|
||
|
|
||
|
%% modified_since_header(+LastModified, -ExtraHeaders) is det.
|
||
|
%
|
||
|
% Add an =|If-modified-since|= if we have a version with the given
|
||
|
% time-stamp.
|
||
|
|
||
|
modified_since_header(HaveModified, []) :-
|
||
|
var(HaveModified), !.
|
||
|
modified_since_header(HaveModified,
|
||
|
[ request_header('If-modified-since' =
|
||
|
Modified)
|
||
|
]) :-
|
||
|
http_timestamp(HaveModified, Modified).
|
||
|
|
||
|
%% open_envelope(+ContentType, +SourceURL, +Stream0, -Stream,
|
||
|
%% ?Format) is semidet.
|
||
|
%
|
||
|
% Open possible envelope formats.
|
||
|
|
||
|
open_envelope('application/x-gzip', SourceURL, Stream0, Stream, Format) :-
|
||
|
rdf_db:rdf_storage_encoding(_, gzip), !,
|
||
|
( var(Format)
|
||
|
-> file_name_extension(BaseURL, _GzExt, SourceURL),
|
||
|
file_name_extension(_, Ext, BaseURL),
|
||
|
rdf_db:rdf_file_type(Ext, Format)
|
||
|
; true
|
||
|
),
|
||
|
rdf_zlib_plugin:zopen(Stream0, Stream, []).
|
||
|
open_envelope(_, _, Stream, Stream, Format) :-
|
||
|
nonvar(Format), !.
|
||
|
open_envelope(ContentType, SourceURL, Stream, Stream, Format) :-
|
||
|
major_content_type(ContentType, Major),
|
||
|
( content_type_format(Major, Format)
|
||
|
-> true
|
||
|
; Major == 'text/plain' % server is not properly configured
|
||
|
-> file_name_extension(_, Ext, SourceURL),
|
||
|
rdf_db:rdf_file_type(Ext, Format)
|
||
|
).
|
||
|
|
||
|
major_content_type(ContentType, Major) :-
|
||
|
sub_atom(ContentType, Pre, _, _, (;)), !,
|
||
|
sub_atom(ContentType, 0, Pre, _, Major).
|
||
|
major_content_type(Major, Major).
|
||
|
|
||
|
%% content_type_format(+ContentType, +URL)
|
||
|
%
|
||
|
% Deduce the RDF encoding from the mime-type.
|
||
|
%
|
||
|
% @bug The turtle parser only parses a subset of n3.
|
||
|
|
||
|
content_type_format('text/rdf', xml).
|
||
|
content_type_format('text/rdf+xml', xml).
|
||
|
content_type_format('application/rdf+xml', xml).
|
||
|
content_type_format('application/x-turtle', turtle).
|
||
|
content_type_format('application/turtle', turtle).
|
||
|
content_type_format('text/turtle', turtle).
|
||
|
content_type_format('text/rdf+n3', turtle). % Bit dubious
|
||
|
content_type_format('text/html', xhtml).
|
||
|
content_type_format('application/xhtml+xml', xhtml).
|
||
|
content_type_format('application/x-gzip', gzip).
|