167 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
			
		
		
	
	
			167 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
/*  $Id$
 | 
						|
 | 
						|
    Part of SWI-Prolog
 | 
						|
 | 
						|
    Author:        Jan Wielemaker
 | 
						|
    E-mail:        J.Wielemaker@cs.vu.nl
 | 
						|
    WWW:           http://www.swi-prolog.org
 | 
						|
    Copyright (C): 1985-2010, University of Amsterdam
 | 
						|
			      VU University Amsterdam
 | 
						|
 | 
						|
    This program is free software; you can redistribute it and/or
 | 
						|
    modify it under the terms of the GNU General Public License
 | 
						|
    as published by the Free Software Foundation; either version 2
 | 
						|
    of the License, or (at your option) any later version.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU General Public
 | 
						|
    License along with this library; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
 | 
						|
    As a special exception, if you link this library with other files,
 | 
						|
    compiled with a Free Software compiler, to produce an executable, this
 | 
						|
    library does not by itself cause the resulting executable to be covered
 | 
						|
    by the GNU General Public License. This exception does not however
 | 
						|
    invalidate any other reasons why the executable file might be covered by
 | 
						|
    the GNU General Public License.
 | 
						|
*/
 | 
						|
 | 
						|
 | 
						|
:- module(rdf_http_plugin, []).
 | 
						|
:- use_module(library(http/http_open)).
 | 
						|
:- use_module(library(http/http_header)).
 | 
						|
:- use_module(library(semweb/rdf_db)).
 | 
						|
:- use_module(library(date)).
 | 
						|
:- use_module(library(error)).
 | 
						|
 | 
						|
 | 
						|
/** <module> RDF HTTP Plugin
 | 
						|
 | 
						|
This module allows loading data into   the semantic web library directly
 | 
						|
from an HTTP server. The following example  loads the RDF core data into
 | 
						|
the RDF database.
 | 
						|
 | 
						|
    ==
 | 
						|
    :- use_module(library(semweb/rdf_db)).
 | 
						|
    :- use_module(library(semweb/rdf_http_plugin)).
 | 
						|
 | 
						|
	...,
 | 
						|
	rdf_load('http://www.w3.org/1999/02/22-rdf-syntax-ns')
 | 
						|
    ==
 | 
						|
*/
 | 
						|
 | 
						|
:- multifile
 | 
						|
	rdf_db:rdf_open_hook/8,
 | 
						|
	rdf_db:url_protocol/1.
 | 
						|
 | 
						|
rdf_db:url_protocol(http).
 | 
						|
 | 
						|
 | 
						|
%%	rdf_extra_headers(-List)
 | 
						|
%
 | 
						|
%	Send extra headers with the request. Note that, although we also
 | 
						|
%	process RDF embedded in HTML, we do  not explicitely ask for it.
 | 
						|
%	Doing so causes some   (e.g., http://w3.org/2004/02/skos/core to
 | 
						|
%	reply with the HTML description rather than the RDF).
 | 
						|
 | 
						|
rdf_extra_headers(
 | 
						|
	[ request_header('Accept' = 'application/rdf+xml, \
 | 
						|
				     text/rdf+xml; q=0.9, \
 | 
						|
				     text/turtle, \
 | 
						|
				     application/x-turtle; q=0.8, \
 | 
						|
				     */*; q=0.1')
 | 
						|
	]).
 | 
						|
 | 
						|
 | 
						|
rdf_db:rdf_open_hook(http, SourceURL, HaveModified, Stream, Cleanup,
 | 
						|
		     Modified, Format, Options) :-
 | 
						|
	modified_since_header(HaveModified, Header),
 | 
						|
	TypeHdr = [ header(content_type, ContentType),
 | 
						|
		    header(last_modified, ModifiedText)
 | 
						|
		  ],
 | 
						|
	rdf_extra_headers(Extra),
 | 
						|
	append([Extra, TypeHdr, Header, Options], OpenOptions),
 | 
						|
	catch(http_open(SourceURL, Stream0, OpenOptions), E, true),
 | 
						|
	(   var(E)
 | 
						|
	->  (   open_envelope(ContentType, SourceURL,
 | 
						|
			      Stream0, Stream, Format)
 | 
						|
	    ->	Cleanup = close(Stream),
 | 
						|
		(   nonvar(ModifiedText),
 | 
						|
		    parse_time(ModifiedText, ModifiedStamp)
 | 
						|
		->  Modified = last_modified(ModifiedStamp)
 | 
						|
		;   Modified = unknown
 | 
						|
		)
 | 
						|
	    ;	close(Stream0),
 | 
						|
		domain_error(content_type, ContentType)
 | 
						|
	    )
 | 
						|
	;   subsumes_chk(error(_, context(_, status(304, _))), E)
 | 
						|
	->  Modified = not_modified,
 | 
						|
	    Cleanup = true
 | 
						|
	;   throw(E)
 | 
						|
	).
 | 
						|
 | 
						|
 | 
						|
%%	modified_since_header(+LastModified, -ExtraHeaders) is det.
 | 
						|
%
 | 
						|
%	Add an =|If-modified-since|= if we have a version with the given
 | 
						|
%	time-stamp.
 | 
						|
 | 
						|
modified_since_header(HaveModified, []) :-
 | 
						|
	var(HaveModified), !.
 | 
						|
modified_since_header(HaveModified,
 | 
						|
		      [ request_header('If-modified-since' =
 | 
						|
				       Modified)
 | 
						|
		      ]) :-
 | 
						|
	http_timestamp(HaveModified, Modified).
 | 
						|
 | 
						|
%%	open_envelope(+ContentType, +SourceURL, +Stream0, -Stream,
 | 
						|
%%		      ?Format) is semidet.
 | 
						|
%
 | 
						|
%	Open possible envelope formats.
 | 
						|
 | 
						|
open_envelope('application/x-gzip', SourceURL, Stream0, Stream, Format) :-
 | 
						|
	rdf_db:rdf_storage_encoding(_, gzip), !,
 | 
						|
	(   var(Format)
 | 
						|
	->  file_name_extension(BaseURL, _GzExt, SourceURL),
 | 
						|
	    file_name_extension(_, Ext, BaseURL),
 | 
						|
	    rdf_db:rdf_file_type(Ext, Format)
 | 
						|
	;   true
 | 
						|
	),
 | 
						|
	rdf_zlib_plugin:zopen(Stream0, Stream, []).
 | 
						|
open_envelope(_, _, Stream, Stream, Format) :-
 | 
						|
	nonvar(Format), !.
 | 
						|
open_envelope(ContentType, SourceURL, Stream, Stream, Format) :-
 | 
						|
	major_content_type(ContentType, Major),
 | 
						|
	(   content_type_format(Major, Format)
 | 
						|
	->  true
 | 
						|
	;   Major == 'text/plain'	% server is not properly configured
 | 
						|
	->  file_name_extension(_, Ext, SourceURL),
 | 
						|
	    rdf_db:rdf_file_type(Ext, Format)
 | 
						|
	).
 | 
						|
 | 
						|
major_content_type(ContentType, Major) :-
 | 
						|
	sub_atom(ContentType, Pre, _, _, (;)), !,
 | 
						|
	sub_atom(ContentType, 0, Pre, _, Major).
 | 
						|
major_content_type(Major, Major).
 | 
						|
 | 
						|
%%	content_type_format(+ContentType, +URL)
 | 
						|
%
 | 
						|
%	Deduce the RDF encoding from the mime-type.
 | 
						|
%
 | 
						|
%	@bug	The turtle parser only parses a subset of n3.
 | 
						|
 | 
						|
content_type_format('text/rdf',	     	     xml).
 | 
						|
content_type_format('text/rdf+xml',	     xml).
 | 
						|
content_type_format('application/rdf+xml',   xml).
 | 
						|
content_type_format('application/x-turtle',  turtle).
 | 
						|
content_type_format('application/turtle',    turtle).
 | 
						|
content_type_format('text/turtle',	     turtle).
 | 
						|
content_type_format('text/rdf+n3',	     turtle).	% Bit dubious
 | 
						|
content_type_format('text/html',	     xhtml).
 | 
						|
content_type_format('application/xhtml+xml', xhtml).
 | 
						|
content_type_format('application/x-gzip',    gzip).
 |