167 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			167 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
|   | /*  $Id$ | ||
|  | 
 | ||
|  |     Part of SWI-Prolog | ||
|  | 
 | ||
|  |     Author:        Jan Wielemaker | ||
|  |     E-mail:        J.Wielemaker@cs.vu.nl | ||
|  |     WWW:           http://www.swi-prolog.org | ||
|  |     Copyright (C): 1985-2010, University of Amsterdam | ||
|  | 			      VU University Amsterdam | ||
|  | 
 | ||
|  |     This program is free software; you can redistribute it and/or | ||
|  |     modify it under the terms of the GNU General Public License | ||
|  |     as published by the Free Software Foundation; either version 2 | ||
|  |     of the License, or (at your option) any later version. | ||
|  | 
 | ||
|  |     This program is distributed in the hope that it will be useful, | ||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||
|  |     GNU General Public License for more details. | ||
|  | 
 | ||
|  |     You should have received a copy of the GNU General Public | ||
|  |     License along with this library; if not, write to the Free Software | ||
|  |     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | ||
|  | 
 | ||
|  |     As a special exception, if you link this library with other files, | ||
|  |     compiled with a Free Software compiler, to produce an executable, this | ||
|  |     library does not by itself cause the resulting executable to be covered | ||
|  |     by the GNU General Public License. This exception does not however | ||
|  |     invalidate any other reasons why the executable file might be covered by | ||
|  |     the GNU General Public License. | ||
|  | */ | ||
|  | 
 | ||
|  | 
 | ||
|  | :- module(rdf_http_plugin, []). | ||
|  | :- use_module(library(http/http_open)). | ||
|  | :- use_module(library(http/http_header)). | ||
|  | :- use_module(library(semweb/rdf_db)). | ||
|  | :- use_module(library(date)). | ||
|  | :- use_module(library(error)). | ||
|  | 
 | ||
|  | 
 | ||
|  | /** <module> RDF HTTP Plugin | ||
|  | 
 | ||
|  | This module allows loading data into   the semantic web library directly | ||
|  | from an HTTP server. The following example  loads the RDF core data into | ||
|  | the RDF database. | ||
|  | 
 | ||
|  |     == | ||
|  |     :- use_module(library(semweb/rdf_db)). | ||
|  |     :- use_module(library(semweb/rdf_http_plugin)). | ||
|  | 
 | ||
|  | 	..., | ||
|  | 	rdf_load('http://www.w3.org/1999/02/22-rdf-syntax-ns') | ||
|  |     == | ||
|  | */ | ||
|  | 
 | ||
|  | :- multifile | ||
|  | 	rdf_db:rdf_open_hook/8, | ||
|  | 	rdf_db:url_protocol/1. | ||
|  | 
 | ||
|  | rdf_db:url_protocol(http). | ||
|  | 
 | ||
|  | 
 | ||
|  | %%	rdf_extra_headers(-List) | ||
|  | % | ||
|  | %	Send extra headers with the request. Note that, although we also | ||
|  | %	process RDF embedded in HTML, we do  not explicitely ask for it. | ||
|  | %	Doing so causes some   (e.g., http://w3.org/2004/02/skos/core to | ||
|  | %	reply with the HTML description rather than the RDF). | ||
|  | 
 | ||
|  | rdf_extra_headers( | ||
|  | 	[ request_header('Accept' = 'application/rdf+xml, \ | ||
|  | 				     text/rdf+xml; q=0.9, \ | ||
|  | 				     text/turtle, \ | ||
|  | 				     application/x-turtle; q=0.8, \ | ||
|  | 				     */*; q=0.1') | ||
|  | 	]). | ||
|  | 
 | ||
|  | 
 | ||
|  | rdf_db:rdf_open_hook(http, SourceURL, HaveModified, Stream, Cleanup, | ||
|  | 		     Modified, Format, Options) :- | ||
|  | 	modified_since_header(HaveModified, Header), | ||
|  | 	TypeHdr = [ header(content_type, ContentType), | ||
|  | 		    header(last_modified, ModifiedText) | ||
|  | 		  ], | ||
|  | 	rdf_extra_headers(Extra), | ||
|  | 	append([Extra, TypeHdr, Header, Options], OpenOptions), | ||
|  | 	catch(http_open(SourceURL, Stream0, OpenOptions), E, true), | ||
|  | 	(   var(E) | ||
|  | 	->  (   open_envelope(ContentType, SourceURL, | ||
|  | 			      Stream0, Stream, Format) | ||
|  | 	    ->	Cleanup = close(Stream), | ||
|  | 		(   nonvar(ModifiedText), | ||
|  | 		    parse_time(ModifiedText, ModifiedStamp) | ||
|  | 		->  Modified = last_modified(ModifiedStamp) | ||
|  | 		;   Modified = unknown | ||
|  | 		) | ||
|  | 	    ;	close(Stream0), | ||
|  | 		domain_error(content_type, ContentType) | ||
|  | 	    ) | ||
|  | 	;   subsumes_chk(error(_, context(_, status(304, _))), E) | ||
|  | 	->  Modified = not_modified, | ||
|  | 	    Cleanup = true | ||
|  | 	;   throw(E) | ||
|  | 	). | ||
|  | 
 | ||
|  | 
 | ||
|  | %%	modified_since_header(+LastModified, -ExtraHeaders) is det. | ||
|  | % | ||
|  | %	Add an =|If-modified-since|= if we have a version with the given | ||
|  | %	time-stamp. | ||
|  | 
 | ||
|  | modified_since_header(HaveModified, []) :- | ||
|  | 	var(HaveModified), !. | ||
|  | modified_since_header(HaveModified, | ||
|  | 		      [ request_header('If-modified-since' = | ||
|  | 				       Modified) | ||
|  | 		      ]) :- | ||
|  | 	http_timestamp(HaveModified, Modified). | ||
|  | 
 | ||
|  | %%	open_envelope(+ContentType, +SourceURL, +Stream0, -Stream, | ||
|  | %%		      ?Format) is semidet. | ||
|  | % | ||
|  | %	Open possible envelope formats. | ||
|  | 
 | ||
|  | open_envelope('application/x-gzip', SourceURL, Stream0, Stream, Format) :- | ||
|  | 	rdf_db:rdf_storage_encoding(_, gzip), !, | ||
|  | 	(   var(Format) | ||
|  | 	->  file_name_extension(BaseURL, _GzExt, SourceURL), | ||
|  | 	    file_name_extension(_, Ext, BaseURL), | ||
|  | 	    rdf_db:rdf_file_type(Ext, Format) | ||
|  | 	;   true | ||
|  | 	), | ||
|  | 	rdf_zlib_plugin:zopen(Stream0, Stream, []). | ||
|  | open_envelope(_, _, Stream, Stream, Format) :- | ||
|  | 	nonvar(Format), !. | ||
|  | open_envelope(ContentType, SourceURL, Stream, Stream, Format) :- | ||
|  | 	major_content_type(ContentType, Major), | ||
|  | 	(   content_type_format(Major, Format) | ||
|  | 	->  true | ||
|  | 	;   Major == 'text/plain'	% server is not properly configured | ||
|  | 	->  file_name_extension(_, Ext, SourceURL), | ||
|  | 	    rdf_db:rdf_file_type(Ext, Format) | ||
|  | 	). | ||
|  | 
 | ||
|  | major_content_type(ContentType, Major) :- | ||
|  | 	sub_atom(ContentType, Pre, _, _, (;)), !, | ||
|  | 	sub_atom(ContentType, 0, Pre, _, Major). | ||
|  | major_content_type(Major, Major). | ||
|  | 
 | ||
|  | %%	content_type_format(+ContentType, +URL) | ||
|  | % | ||
|  | %	Deduce the RDF encoding from the mime-type. | ||
|  | % | ||
|  | %	@bug	The turtle parser only parses a subset of n3. | ||
|  | 
 | ||
|  | content_type_format('text/rdf',	     	     xml). | ||
|  | content_type_format('text/rdf+xml',	     xml). | ||
|  | content_type_format('application/rdf+xml',   xml). | ||
|  | content_type_format('application/x-turtle',  turtle). | ||
|  | content_type_format('application/turtle',    turtle). | ||
|  | content_type_format('text/turtle',	     turtle). | ||
|  | content_type_format('text/rdf+n3',	     turtle).	% Bit dubious | ||
|  | content_type_format('text/html',	     xhtml). | ||
|  | content_type_format('application/xhtml+xml', xhtml). | ||
|  | content_type_format('application/x-gzip',    gzip). |