:- module(rdf_cache,
	  [ rdf_set_cache_options/1,	% +Options
	    rdf_cache_file/3		% +URL, +RW, -File
	  ]).
:- use_module(library(error)).

/** <module> Cache RDF triples

Triples may be cached to reduce load time   as well as access to network
resources (e.g. HTTP). We use two caching locations: typically files may
be cached locally (i.e. in a  .cache   sub-directory  of  the file). All
objects can be cached  in  a  global   cache  directory.  The  policy is
determined by rdf_cache_options/1.
*/

:- dynamic
	cache_option/1.

set_setfault_options :-
	assert(cache_option(enabled(true))),
	(   current_prolog_flag(windows, true)
	->  assert(cache_option(local_directory('_cache')))
	;   assert(cache_option(local_directory('.cache')))
	).

:- set_setfault_options.		% _only_ when loading!

%%	rdf_set_cache_options(+Options)
%
%	Change the cache policy.  Provided options are:
%
%	  * enabled(Boolean)
%	  If =true=, caching is enabled.
%
%	  * local_directory(Name).
%	  Plain name of local directory.  Default =|.cache|=
%	  (=|_cache|= on Windows).
%
%	  * create_local_directory(Bool)
%	  If =true=, try to create local cache directories
%
%	  * global_directory(Dir)
%	  Writeable directory for storing cached parsed files.
%
%	  * create_global_directory(Bool)
%	  If =true=, try to create the global cache directory.

rdf_set_cache_options([]) :- !.
rdf_set_cache_options([H|T]) :- !,
	rdf_set_cache_options(H),
	rdf_set_cache_options(T).
rdf_set_cache_options(Opt) :-
	functor(Opt, Name, Arity),
	arg(1, Opt, Value),
	(   cache_option(Name, Type)
	->  must_be(Type, Value)
	;   domain_error(cache_option, Opt)
	),
	functor(Gen, Name, Arity),
	retractall(cache_option(Gen)),
	expand_option(Opt, EOpt),
	assert(cache_option(EOpt)).

cache_option(enabled,		      boolean).
cache_option(local_directory,	      atom).
cache_option(create_local_directory,  boolean).
cache_option(global_directory,	      atom).
cache_option(create_global_directory, boolean).

expand_option(global_directory(Local), global_directory(Global)) :- !,
	absolute_file_name(Local, Global).
expand_option(Opt, Opt).


%%	rdf_cache_location(+URL, +ReadWrite, -File) is semidet.
%
%	File is the cache file  for  URL.   If  ReadWrite  is =read=, it
%	returns the name of an existing file.  If =write= it returns the
%	where a new cache file can be overwritten or created.

rdf_cache_file(_URL, _, _File) :-
	cache_option(enabled(false)), !,
	fail.
rdf_cache_file(URL, read, File) :- !,
	(   atom_concat('file://', Path, URL),
	    cache_option(local_directory(Local)),
	    file_directory_name(Path, Dir),
	    local_cache_file(URL, LocalFile),
	    atomic_list_concat([Dir, Local, LocalFile], /, File)
	;   cache_option(global_directory(Dir)),
	    url_cache_file(URL, Dir, trp, read, File)
	),
	access_file(File, read), !.
rdf_cache_file(URL, write, File) :- !,
	(   atom_concat('file://', Path, URL),
	    cache_option(local_directory(Local)),
	    file_directory_name(Path, Dir),
	    (	cache_option(create_local_directory(true))
	    ->	RWDir = write
	    ;	RWDir = read
	    ),
	    ensure_dir(Dir, Local, RWDir, CacheDir),
	    local_cache_file(URL, LocalFile),
	    atomic_list_concat([CacheDir, LocalFile], /, File)
	;   cache_option(global_directory(Dir)),
	    ensure_global_cache(Dir),
	    url_cache_file(URL, Dir, trp, write, File)
	),
	access_file(File, write), !.


ensure_global_cache(Dir) :-
	exists_directory(Dir), !.
ensure_global_cache(Dir) :-
	cache_option(create_global_directory(true)),
	make_directory(Dir),
	print_message(informational, rdf(cache_created(Dir))).


		 /*******************************
		 *	   LOCAL CACHE		*
		 *******************************/

%%	local_cache_file(+FileURL, -File) is det.
%
%	Return the name of the cache file   for FileURL. The name is the
%	plain filename with the .trp extension.  As   the  URL is a file
%	URL, it is guaranteed  to  be   a  valid  filename.  Assumes the
%	hosting OS can handle  multiple   exensions  (=|.x.y|=)  though.
%	These days thats even true on Windows.

local_cache_file(URL, File) :-
	file_base_name(URL, Name),
	file_name_extension(Name, trp, File).


		 /*******************************
		 *	   GLOBAL CACHE		*
		 *******************************/

%%	url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet.
%
%	Determine location of cache-file for the   given  URL in Dir. If
%	Ext is provided, the  returned  Path   is  ensured  to  have the
%	specified extension.
%
%	@param RW	If =read=, no directories are created and the call
%			fails if URL is not in the cache.

url_cache_file(URL, Dir, Ext, RW, Path) :-
	term_hash(URL, Hash0),
	Hash is Hash0 + 100000,		% make sure > 4 characters
	format(string(Hex), '~16r', [Hash]),
	sub_atom(Hex, _, 2, 0, L1),
	ensure_dir(Dir, L1, RW, Dir1),
	sub_atom(Hex, _, 2, 2, L2),
	ensure_dir(Dir1, L2, RW, Dir2),
	url_to_file(URL, File),
	ensure_ext(File, Ext, FileExt),
	atomic_list_concat([Dir2, /, FileExt], Path).

ensure_dir(D0, Sub, RW, Dir) :-
	atomic_list_concat([D0, /, Sub], Dir),
	(   exists_directory(Dir)
	->  true
	;   RW == write
	->  catch(make_directory(Dir), _, fail)
	).

ensure_ext(File, '', File) :- !.
ensure_ext(File, Ext, File) :-
	file_name_extension(_, Ext, File), !.
ensure_ext(File, Ext, FileExt) :-
	file_name_extension(File, Ext, FileExt).

%%	url_to_file(+URL, -File)
%
%	Convert a URL in something that fits  in a file, i.e. avoiding /
%	and :. We  simply  replace  these  by   -.  We  could  also  use
%	www_form_encode/2, but confusion when to replace  as well as the
%	fact that we loose the '.' (extension)   makes this a less ideal
%	choice.  We could also consider base64 encoding of the name.

url_to_file(URL, File) :-
	atom_codes(URL, Codes),
	phrase(safe_file_name(Codes), FileCodes),
	atom_codes(File, FileCodes).

safe_file_name([]) -->
	[].
safe_file_name([H|T]) -->
	replace(H), !,
	safe_file_name(T).
safe_file_name([H|T]) -->
	[H],
	safe_file_name(T).

%%	replace(+Code)//
%
%	Replace a character  code  that  cannot   safely  be  put  in  a
%	filename. Should we use %XX?

replace(0'/)  --> "-".			% directory separator
replace(0'\\) --> "-".			% not allowed in Windows filename
replace(0':)  --> "-".			% idem
replace(0'?)  --> "-".			% idem
replace(0'*)  --> "-".			% idem


		 /*******************************
		 *	       MESSAGES		*
		 *******************************/

:- multifile prolog:message/3.

prolog:message(rdf(cache_created(Dir))) -->
	[ 'Created RDF cache directory ~w'-[Dir] ].