218 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			218 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
| 
								 | 
							
								:- module(rdf_cache,
							 | 
						||
| 
								 | 
							
									  [ rdf_set_cache_options/1,	% +Options
							 | 
						||
| 
								 | 
							
									    rdf_cache_file/3		% +URL, +RW, -File
							 | 
						||
| 
								 | 
							
									  ]).
							 | 
						||
| 
								 | 
							
								:- use_module(library(error)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/** <module> Cache RDF triples
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Triples may be cached to reduce load time   as well as access to network
							 | 
						||
| 
								 | 
							
								resources (e.g. HTTP). We use two caching locations: typically files may
							 | 
						||
| 
								 | 
							
								be cached locally (i.e. in a  .cache   sub-directory  of  the file). All
							 | 
						||
| 
								 | 
							
								objects can be cached  in  a  global   cache  directory.  The  policy is
							 | 
						||
| 
								 | 
							
								determined by rdf_cache_options/1.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- dynamic
							 | 
						||
| 
								 | 
							
									cache_option/1.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								set_setfault_options :-
							 | 
						||
| 
								 | 
							
									assert(cache_option(enabled(true))),
							 | 
						||
| 
								 | 
							
									(   current_prolog_flag(windows, true)
							 | 
						||
| 
								 | 
							
									->  assert(cache_option(local_directory('_cache')))
							 | 
						||
| 
								 | 
							
									;   assert(cache_option(local_directory('.cache')))
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- set_setfault_options.		% _only_ when loading!
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	rdf_set_cache_options(+Options)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Change the cache policy.  Provided options are:
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	  * enabled(Boolean)
							 | 
						||
| 
								 | 
							
								%	  If =true=, caching is enabled.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	  * local_directory(Name).
							 | 
						||
| 
								 | 
							
								%	  Plain name of local directory.  Default =|.cache|=
							 | 
						||
| 
								 | 
							
								%	  (=|_cache|= on Windows).
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	  * create_local_directory(Bool)
							 | 
						||
| 
								 | 
							
								%	  If =true=, try to create local cache directories
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	  * global_directory(Dir)
							 | 
						||
| 
								 | 
							
								%	  Writeable directory for storing cached parsed files.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	  * create_global_directory(Bool)
							 | 
						||
| 
								 | 
							
								%	  If =true=, try to create the global cache directory.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								rdf_set_cache_options([]) :- !.
							 | 
						||
| 
								 | 
							
								rdf_set_cache_options([H|T]) :- !,
							 | 
						||
| 
								 | 
							
									rdf_set_cache_options(H),
							 | 
						||
| 
								 | 
							
									rdf_set_cache_options(T).
							 | 
						||
| 
								 | 
							
								rdf_set_cache_options(Opt) :-
							 | 
						||
| 
								 | 
							
									functor(Opt, Name, Arity),
							 | 
						||
| 
								 | 
							
									arg(1, Opt, Value),
							 | 
						||
| 
								 | 
							
									(   cache_option(Name, Type)
							 | 
						||
| 
								 | 
							
									->  must_be(Type, Value)
							 | 
						||
| 
								 | 
							
									;   domain_error(cache_option, Opt)
							 | 
						||
| 
								 | 
							
									),
							 | 
						||
| 
								 | 
							
									functor(Gen, Name, Arity),
							 | 
						||
| 
								 | 
							
									retractall(cache_option(Gen)),
							 | 
						||
| 
								 | 
							
									expand_option(Opt, EOpt),
							 | 
						||
| 
								 | 
							
									assert(cache_option(EOpt)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								cache_option(enabled,		      boolean).
							 | 
						||
| 
								 | 
							
								cache_option(local_directory,	      atom).
							 | 
						||
| 
								 | 
							
								cache_option(create_local_directory,  boolean).
							 | 
						||
| 
								 | 
							
								cache_option(global_directory,	      atom).
							 | 
						||
| 
								 | 
							
								cache_option(create_global_directory, boolean).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								expand_option(global_directory(Local), global_directory(Global)) :- !,
							 | 
						||
| 
								 | 
							
									absolute_file_name(Local, Global).
							 | 
						||
| 
								 | 
							
								expand_option(Opt, Opt).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	rdf_cache_location(+URL, +ReadWrite, -File) is semidet.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	File is the cache file  for  URL.   If  ReadWrite  is =read=, it
							 | 
						||
| 
								 | 
							
								%	returns the name of an existing file.  If =write= it returns the
							 | 
						||
| 
								 | 
							
								%	where a new cache file can be overwritten or created.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								rdf_cache_file(_URL, _, _File) :-
							 | 
						||
| 
								 | 
							
									cache_option(enabled(false)), !,
							 | 
						||
| 
								 | 
							
									fail.
							 | 
						||
| 
								 | 
							
								rdf_cache_file(URL, read, File) :- !,
							 | 
						||
| 
								 | 
							
									(   atom_concat('file://', Path, URL),
							 | 
						||
| 
								 | 
							
									    cache_option(local_directory(Local)),
							 | 
						||
| 
								 | 
							
									    file_directory_name(Path, Dir),
							 | 
						||
| 
								 | 
							
									    local_cache_file(URL, LocalFile),
							 | 
						||
| 
								 | 
							
									    atomic_list_concat([Dir, Local, LocalFile], /, File)
							 | 
						||
| 
								 | 
							
									;   cache_option(global_directory(Dir)),
							 | 
						||
| 
								 | 
							
									    url_cache_file(URL, Dir, trp, read, File)
							 | 
						||
| 
								 | 
							
									),
							 | 
						||
| 
								 | 
							
									access_file(File, read), !.
							 | 
						||
| 
								 | 
							
								rdf_cache_file(URL, write, File) :- !,
							 | 
						||
| 
								 | 
							
									(   atom_concat('file://', Path, URL),
							 | 
						||
| 
								 | 
							
									    cache_option(local_directory(Local)),
							 | 
						||
| 
								 | 
							
									    file_directory_name(Path, Dir),
							 | 
						||
| 
								 | 
							
									    (	cache_option(create_local_directory(true))
							 | 
						||
| 
								 | 
							
									    ->	RWDir = write
							 | 
						||
| 
								 | 
							
									    ;	RWDir = read
							 | 
						||
| 
								 | 
							
									    ),
							 | 
						||
| 
								 | 
							
									    ensure_dir(Dir, Local, RWDir, CacheDir),
							 | 
						||
| 
								 | 
							
									    local_cache_file(URL, LocalFile),
							 | 
						||
| 
								 | 
							
									    atomic_list_concat([CacheDir, LocalFile], /, File)
							 | 
						||
| 
								 | 
							
									;   cache_option(global_directory(Dir)),
							 | 
						||
| 
								 | 
							
									    ensure_global_cache(Dir),
							 | 
						||
| 
								 | 
							
									    url_cache_file(URL, Dir, trp, write, File)
							 | 
						||
| 
								 | 
							
									),
							 | 
						||
| 
								 | 
							
									access_file(File, write), !.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								ensure_global_cache(Dir) :-
							 | 
						||
| 
								 | 
							
									exists_directory(Dir), !.
							 | 
						||
| 
								 | 
							
								ensure_global_cache(Dir) :-
							 | 
						||
| 
								 | 
							
									cache_option(create_global_directory(true)),
							 | 
						||
| 
								 | 
							
									make_directory(Dir),
							 | 
						||
| 
								 | 
							
									print_message(informational, rdf(cache_created(Dir))).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	   LOCAL CACHE		*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	local_cache_file(+FileURL, -File) is det.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Return the name of the cache file   for FileURL. The name is the
							 | 
						||
| 
								 | 
							
								%	plain filename with the .trp extension.  As   the  URL is a file
							 | 
						||
| 
								 | 
							
								%	URL, it is guaranteed  to  be   a  valid  filename.  Assumes the
							 | 
						||
| 
								 | 
							
								%	hosting OS can handle  multiple   exensions  (=|.x.y|=)  though.
							 | 
						||
| 
								 | 
							
								%	These days thats even true on Windows.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								local_cache_file(URL, File) :-
							 | 
						||
| 
								 | 
							
									file_base_name(URL, Name),
							 | 
						||
| 
								 | 
							
									file_name_extension(Name, trp, File).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	   GLOBAL CACHE		*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Determine location of cache-file for the   given  URL in Dir. If
							 | 
						||
| 
								 | 
							
								%	Ext is provided, the  returned  Path   is  ensured  to  have the
							 | 
						||
| 
								 | 
							
								%	specified extension.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	@param RW	If =read=, no directories are created and the call
							 | 
						||
| 
								 | 
							
								%			fails if URL is not in the cache.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								url_cache_file(URL, Dir, Ext, RW, Path) :-
							 | 
						||
| 
								 | 
							
									term_hash(URL, Hash0),
							 | 
						||
| 
								 | 
							
									Hash is Hash0 + 100000,		% make sure > 4 characters
							 | 
						||
| 
								 | 
							
									format(string(Hex), '~16r', [Hash]),
							 | 
						||
| 
								 | 
							
									sub_atom(Hex, _, 2, 0, L1),
							 | 
						||
| 
								 | 
							
									ensure_dir(Dir, L1, RW, Dir1),
							 | 
						||
| 
								 | 
							
									sub_atom(Hex, _, 2, 2, L2),
							 | 
						||
| 
								 | 
							
									ensure_dir(Dir1, L2, RW, Dir2),
							 | 
						||
| 
								 | 
							
									url_to_file(URL, File),
							 | 
						||
| 
								 | 
							
									ensure_ext(File, Ext, FileExt),
							 | 
						||
| 
								 | 
							
									atomic_list_concat([Dir2, /, FileExt], Path).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								ensure_dir(D0, Sub, RW, Dir) :-
							 | 
						||
| 
								 | 
							
									atomic_list_concat([D0, /, Sub], Dir),
							 | 
						||
| 
								 | 
							
									(   exists_directory(Dir)
							 | 
						||
| 
								 | 
							
									->  true
							 | 
						||
| 
								 | 
							
									;   RW == write
							 | 
						||
| 
								 | 
							
									->  catch(make_directory(Dir), _, fail)
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								ensure_ext(File, '', File) :- !.
							 | 
						||
| 
								 | 
							
								ensure_ext(File, Ext, File) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(_, Ext, File), !.
							 | 
						||
| 
								 | 
							
								ensure_ext(File, Ext, FileExt) :-
							 | 
						||
| 
								 | 
							
									file_name_extension(File, Ext, FileExt).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	url_to_file(+URL, -File)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Convert a URL in something that fits  in a file, i.e. avoiding /
							 | 
						||
| 
								 | 
							
								%	and :. We  simply  replace  these  by   -.  We  could  also  use
							 | 
						||
| 
								 | 
							
								%	www_form_encode/2, but confusion when to replace  as well as the
							 | 
						||
| 
								 | 
							
								%	fact that we loose the '.' (extension)   makes this a less ideal
							 | 
						||
| 
								 | 
							
								%	choice.  We could also consider base64 encoding of the name.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								url_to_file(URL, File) :-
							 | 
						||
| 
								 | 
							
									atom_codes(URL, Codes),
							 | 
						||
| 
								 | 
							
									phrase(safe_file_name(Codes), FileCodes),
							 | 
						||
| 
								 | 
							
									atom_codes(File, FileCodes).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								safe_file_name([]) -->
							 | 
						||
| 
								 | 
							
									[].
							 | 
						||
| 
								 | 
							
								safe_file_name([H|T]) -->
							 | 
						||
| 
								 | 
							
									replace(H), !,
							 | 
						||
| 
								 | 
							
									safe_file_name(T).
							 | 
						||
| 
								 | 
							
								safe_file_name([H|T]) -->
							 | 
						||
| 
								 | 
							
									[H],
							 | 
						||
| 
								 | 
							
									safe_file_name(T).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	replace(+Code)//
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Replace a character  code  that  cannot   safely  be  put  in  a
							 | 
						||
| 
								 | 
							
								%	filename. Should we use %XX?
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								replace(0'/)  --> "-".			% directory separator
							 | 
						||
| 
								 | 
							
								replace(0'\\) --> "-".			% not allowed in Windows filename
							 | 
						||
| 
								 | 
							
								replace(0':)  --> "-".			% idem
							 | 
						||
| 
								 | 
							
								replace(0'?)  --> "-".			% idem
							 | 
						||
| 
								 | 
							
								replace(0'*)  --> "-".			% idem
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	       MESSAGES		*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- multifile prolog:message/3.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								prolog:message(rdf(cache_created(Dir))) -->
							 | 
						||
| 
								 | 
							
									[ 'Created RDF cache directory ~w'-[Dir] ].
							 |