218 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			218 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
|   | :- module(rdf_cache, | ||
|  | 	  [ rdf_set_cache_options/1,	% +Options | ||
|  | 	    rdf_cache_file/3		% +URL, +RW, -File | ||
|  | 	  ]). | ||
|  | :- use_module(library(error)). | ||
|  | 
 | ||
|  | /** <module> Cache RDF triples | ||
|  | 
 | ||
|  | Triples may be cached to reduce load time   as well as access to network | ||
|  | resources (e.g. HTTP). We use two caching locations: typically files may | ||
|  | be cached locally (i.e. in a  .cache   sub-directory  of  the file). All | ||
|  | objects can be cached  in  a  global   cache  directory.  The  policy is | ||
|  | determined by rdf_cache_options/1. | ||
|  | */ | ||
|  | 
 | ||
|  | :- dynamic | ||
|  | 	cache_option/1. | ||
|  | 
 | ||
|  | set_setfault_options :- | ||
|  | 	assert(cache_option(enabled(true))), | ||
|  | 	(   current_prolog_flag(windows, true) | ||
|  | 	->  assert(cache_option(local_directory('_cache'))) | ||
|  | 	;   assert(cache_option(local_directory('.cache'))) | ||
|  | 	). | ||
|  | 
 | ||
|  | :- set_setfault_options.		% _only_ when loading! | ||
|  | 
 | ||
|  | %%	rdf_set_cache_options(+Options) | ||
|  | % | ||
|  | %	Change the cache policy.  Provided options are: | ||
|  | % | ||
|  | %	  * enabled(Boolean) | ||
|  | %	  If =true=, caching is enabled. | ||
|  | % | ||
|  | %	  * local_directory(Name). | ||
|  | %	  Plain name of local directory.  Default =|.cache|= | ||
|  | %	  (=|_cache|= on Windows). | ||
|  | % | ||
|  | %	  * create_local_directory(Bool) | ||
|  | %	  If =true=, try to create local cache directories | ||
|  | % | ||
|  | %	  * global_directory(Dir) | ||
|  | %	  Writeable directory for storing cached parsed files. | ||
|  | % | ||
|  | %	  * create_global_directory(Bool) | ||
|  | %	  If =true=, try to create the global cache directory. | ||
|  | 
 | ||
|  | rdf_set_cache_options([]) :- !. | ||
|  | rdf_set_cache_options([H|T]) :- !, | ||
|  | 	rdf_set_cache_options(H), | ||
|  | 	rdf_set_cache_options(T). | ||
|  | rdf_set_cache_options(Opt) :- | ||
|  | 	functor(Opt, Name, Arity), | ||
|  | 	arg(1, Opt, Value), | ||
|  | 	(   cache_option(Name, Type) | ||
|  | 	->  must_be(Type, Value) | ||
|  | 	;   domain_error(cache_option, Opt) | ||
|  | 	), | ||
|  | 	functor(Gen, Name, Arity), | ||
|  | 	retractall(cache_option(Gen)), | ||
|  | 	expand_option(Opt, EOpt), | ||
|  | 	assert(cache_option(EOpt)). | ||
|  | 
 | ||
|  | cache_option(enabled,		      boolean). | ||
|  | cache_option(local_directory,	      atom). | ||
|  | cache_option(create_local_directory,  boolean). | ||
|  | cache_option(global_directory,	      atom). | ||
|  | cache_option(create_global_directory, boolean). | ||
|  | 
 | ||
|  | expand_option(global_directory(Local), global_directory(Global)) :- !, | ||
|  | 	absolute_file_name(Local, Global). | ||
|  | expand_option(Opt, Opt). | ||
|  | 
 | ||
|  | 
 | ||
|  | %%	rdf_cache_location(+URL, +ReadWrite, -File) is semidet. | ||
|  | % | ||
|  | %	File is the cache file  for  URL.   If  ReadWrite  is =read=, it | ||
|  | %	returns the name of an existing file.  If =write= it returns the | ||
|  | %	where a new cache file can be overwritten or created. | ||
|  | 
 | ||
|  | rdf_cache_file(_URL, _, _File) :- | ||
|  | 	cache_option(enabled(false)), !, | ||
|  | 	fail. | ||
|  | rdf_cache_file(URL, read, File) :- !, | ||
|  | 	(   atom_concat('file://', Path, URL), | ||
|  | 	    cache_option(local_directory(Local)), | ||
|  | 	    file_directory_name(Path, Dir), | ||
|  | 	    local_cache_file(URL, LocalFile), | ||
|  | 	    atomic_list_concat([Dir, Local, LocalFile], /, File) | ||
|  | 	;   cache_option(global_directory(Dir)), | ||
|  | 	    url_cache_file(URL, Dir, trp, read, File) | ||
|  | 	), | ||
|  | 	access_file(File, read), !. | ||
|  | rdf_cache_file(URL, write, File) :- !, | ||
|  | 	(   atom_concat('file://', Path, URL), | ||
|  | 	    cache_option(local_directory(Local)), | ||
|  | 	    file_directory_name(Path, Dir), | ||
|  | 	    (	cache_option(create_local_directory(true)) | ||
|  | 	    ->	RWDir = write | ||
|  | 	    ;	RWDir = read | ||
|  | 	    ), | ||
|  | 	    ensure_dir(Dir, Local, RWDir, CacheDir), | ||
|  | 	    local_cache_file(URL, LocalFile), | ||
|  | 	    atomic_list_concat([CacheDir, LocalFile], /, File) | ||
|  | 	;   cache_option(global_directory(Dir)), | ||
|  | 	    ensure_global_cache(Dir), | ||
|  | 	    url_cache_file(URL, Dir, trp, write, File) | ||
|  | 	), | ||
|  | 	access_file(File, write), !. | ||
|  | 
 | ||
|  | 
 | ||
|  | ensure_global_cache(Dir) :- | ||
|  | 	exists_directory(Dir), !. | ||
|  | ensure_global_cache(Dir) :- | ||
|  | 	cache_option(create_global_directory(true)), | ||
|  | 	make_directory(Dir), | ||
|  | 	print_message(informational, rdf(cache_created(Dir))). | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	   LOCAL CACHE		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %%	local_cache_file(+FileURL, -File) is det. | ||
|  | % | ||
|  | %	Return the name of the cache file   for FileURL. The name is the | ||
|  | %	plain filename with the .trp extension.  As   the  URL is a file | ||
|  | %	URL, it is guaranteed  to  be   a  valid  filename.  Assumes the | ||
|  | %	hosting OS can handle  multiple   exensions  (=|.x.y|=)  though. | ||
|  | %	These days thats even true on Windows. | ||
|  | 
 | ||
|  | local_cache_file(URL, File) :- | ||
|  | 	file_base_name(URL, Name), | ||
|  | 	file_name_extension(Name, trp, File). | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	   GLOBAL CACHE		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | %%	url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet. | ||
|  | % | ||
|  | %	Determine location of cache-file for the   given  URL in Dir. If | ||
|  | %	Ext is provided, the  returned  Path   is  ensured  to  have the | ||
|  | %	specified extension. | ||
|  | % | ||
|  | %	@param RW	If =read=, no directories are created and the call | ||
|  | %			fails if URL is not in the cache. | ||
|  | 
 | ||
|  | url_cache_file(URL, Dir, Ext, RW, Path) :- | ||
|  | 	term_hash(URL, Hash0), | ||
|  | 	Hash is Hash0 + 100000,		% make sure > 4 characters | ||
|  | 	format(string(Hex), '~16r', [Hash]), | ||
|  | 	sub_atom(Hex, _, 2, 0, L1), | ||
|  | 	ensure_dir(Dir, L1, RW, Dir1), | ||
|  | 	sub_atom(Hex, _, 2, 2, L2), | ||
|  | 	ensure_dir(Dir1, L2, RW, Dir2), | ||
|  | 	url_to_file(URL, File), | ||
|  | 	ensure_ext(File, Ext, FileExt), | ||
|  | 	atomic_list_concat([Dir2, /, FileExt], Path). | ||
|  | 
 | ||
|  | ensure_dir(D0, Sub, RW, Dir) :- | ||
|  | 	atomic_list_concat([D0, /, Sub], Dir), | ||
|  | 	(   exists_directory(Dir) | ||
|  | 	->  true | ||
|  | 	;   RW == write | ||
|  | 	->  catch(make_directory(Dir), _, fail) | ||
|  | 	). | ||
|  | 
 | ||
|  | ensure_ext(File, '', File) :- !. | ||
|  | ensure_ext(File, Ext, File) :- | ||
|  | 	file_name_extension(_, Ext, File), !. | ||
|  | ensure_ext(File, Ext, FileExt) :- | ||
|  | 	file_name_extension(File, Ext, FileExt). | ||
|  | 
 | ||
|  | %%	url_to_file(+URL, -File) | ||
|  | % | ||
|  | %	Convert a URL in something that fits  in a file, i.e. avoiding / | ||
|  | %	and :. We  simply  replace  these  by   -.  We  could  also  use | ||
|  | %	www_form_encode/2, but confusion when to replace  as well as the | ||
|  | %	fact that we loose the '.' (extension)   makes this a less ideal | ||
|  | %	choice.  We could also consider base64 encoding of the name. | ||
|  | 
 | ||
|  | url_to_file(URL, File) :- | ||
|  | 	atom_codes(URL, Codes), | ||
|  | 	phrase(safe_file_name(Codes), FileCodes), | ||
|  | 	atom_codes(File, FileCodes). | ||
|  | 
 | ||
|  | safe_file_name([]) --> | ||
|  | 	[]. | ||
|  | safe_file_name([H|T]) --> | ||
|  | 	replace(H), !, | ||
|  | 	safe_file_name(T). | ||
|  | safe_file_name([H|T]) --> | ||
|  | 	[H], | ||
|  | 	safe_file_name(T). | ||
|  | 
 | ||
|  | %%	replace(+Code)// | ||
|  | % | ||
|  | %	Replace a character  code  that  cannot   safely  be  put  in  a | ||
|  | %	filename. Should we use %XX? | ||
|  | 
 | ||
|  | replace(0'/)  --> "-".			% directory separator | ||
|  | replace(0'\\) --> "-".			% not allowed in Windows filename | ||
|  | replace(0':)  --> "-".			% idem | ||
|  | replace(0'?)  --> "-".			% idem | ||
|  | replace(0'*)  --> "-".			% idem | ||
|  | 
 | ||
|  | 
 | ||
|  | 		 /******************************* | ||
|  | 		 *	       MESSAGES		* | ||
|  | 		 *******************************/ | ||
|  | 
 | ||
|  | :- multifile prolog:message/3. | ||
|  | 
 | ||
|  | prolog:message(rdf(cache_created(Dir))) --> | ||
|  | 	[ 'Created RDF cache directory ~w'-[Dir] ]. |