218 lines
6.3 KiB
Perl
218 lines
6.3 KiB
Perl
|
:- module(rdf_cache,
|
||
|
[ rdf_set_cache_options/1, % +Options
|
||
|
rdf_cache_file/3 % +URL, +RW, -File
|
||
|
]).
|
||
|
:- use_module(library(error)).
|
||
|
|
||
|
/** <module> Cache RDF triples
|
||
|
|
||
|
Triples may be cached to reduce load time as well as access to network
|
||
|
resources (e.g. HTTP). We use two caching locations: typically files may
|
||
|
be cached locally (i.e. in a .cache sub-directory of the file). All
|
||
|
objects can be cached in a global cache directory. The policy is
|
||
|
determined by rdf_cache_options/1.
|
||
|
*/
|
||
|
|
||
|
:- dynamic
|
||
|
cache_option/1.
|
||
|
|
||
|
set_setfault_options :-
|
||
|
assert(cache_option(enabled(true))),
|
||
|
( current_prolog_flag(windows, true)
|
||
|
-> assert(cache_option(local_directory('_cache')))
|
||
|
; assert(cache_option(local_directory('.cache')))
|
||
|
).
|
||
|
|
||
|
:- set_setfault_options. % _only_ when loading!
|
||
|
|
||
|
%% rdf_set_cache_options(+Options)
|
||
|
%
|
||
|
% Change the cache policy. Provided options are:
|
||
|
%
|
||
|
% * enabled(Boolean)
|
||
|
% If =true=, caching is enabled.
|
||
|
%
|
||
|
% * local_directory(Name).
|
||
|
% Plain name of local directory. Default =|.cache|=
|
||
|
% (=|_cache|= on Windows).
|
||
|
%
|
||
|
% * create_local_directory(Bool)
|
||
|
% If =true=, try to create local cache directories
|
||
|
%
|
||
|
% * global_directory(Dir)
|
||
|
% Writeable directory for storing cached parsed files.
|
||
|
%
|
||
|
% * create_global_directory(Bool)
|
||
|
% If =true=, try to create the global cache directory.
|
||
|
|
||
|
rdf_set_cache_options([]) :- !.
|
||
|
rdf_set_cache_options([H|T]) :- !,
|
||
|
rdf_set_cache_options(H),
|
||
|
rdf_set_cache_options(T).
|
||
|
rdf_set_cache_options(Opt) :-
|
||
|
functor(Opt, Name, Arity),
|
||
|
arg(1, Opt, Value),
|
||
|
( cache_option(Name, Type)
|
||
|
-> must_be(Type, Value)
|
||
|
; domain_error(cache_option, Opt)
|
||
|
),
|
||
|
functor(Gen, Name, Arity),
|
||
|
retractall(cache_option(Gen)),
|
||
|
expand_option(Opt, EOpt),
|
||
|
assert(cache_option(EOpt)).
|
||
|
|
||
|
cache_option(enabled, boolean).
|
||
|
cache_option(local_directory, atom).
|
||
|
cache_option(create_local_directory, boolean).
|
||
|
cache_option(global_directory, atom).
|
||
|
cache_option(create_global_directory, boolean).
|
||
|
|
||
|
expand_option(global_directory(Local), global_directory(Global)) :- !,
|
||
|
absolute_file_name(Local, Global).
|
||
|
expand_option(Opt, Opt).
|
||
|
|
||
|
|
||
|
%% rdf_cache_location(+URL, +ReadWrite, -File) is semidet.
|
||
|
%
|
||
|
% File is the cache file for URL. If ReadWrite is =read=, it
|
||
|
% returns the name of an existing file. If =write= it returns the
|
||
|
% where a new cache file can be overwritten or created.
|
||
|
|
||
|
rdf_cache_file(_URL, _, _File) :-
|
||
|
cache_option(enabled(false)), !,
|
||
|
fail.
|
||
|
rdf_cache_file(URL, read, File) :- !,
|
||
|
( atom_concat('file://', Path, URL),
|
||
|
cache_option(local_directory(Local)),
|
||
|
file_directory_name(Path, Dir),
|
||
|
local_cache_file(URL, LocalFile),
|
||
|
atomic_list_concat([Dir, Local, LocalFile], /, File)
|
||
|
; cache_option(global_directory(Dir)),
|
||
|
url_cache_file(URL, Dir, trp, read, File)
|
||
|
),
|
||
|
access_file(File, read), !.
|
||
|
rdf_cache_file(URL, write, File) :- !,
|
||
|
( atom_concat('file://', Path, URL),
|
||
|
cache_option(local_directory(Local)),
|
||
|
file_directory_name(Path, Dir),
|
||
|
( cache_option(create_local_directory(true))
|
||
|
-> RWDir = write
|
||
|
; RWDir = read
|
||
|
),
|
||
|
ensure_dir(Dir, Local, RWDir, CacheDir),
|
||
|
local_cache_file(URL, LocalFile),
|
||
|
atomic_list_concat([CacheDir, LocalFile], /, File)
|
||
|
; cache_option(global_directory(Dir)),
|
||
|
ensure_global_cache(Dir),
|
||
|
url_cache_file(URL, Dir, trp, write, File)
|
||
|
),
|
||
|
access_file(File, write), !.
|
||
|
|
||
|
|
||
|
ensure_global_cache(Dir) :-
|
||
|
exists_directory(Dir), !.
|
||
|
ensure_global_cache(Dir) :-
|
||
|
cache_option(create_global_directory(true)),
|
||
|
make_directory(Dir),
|
||
|
print_message(informational, rdf(cache_created(Dir))).
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* LOCAL CACHE *
|
||
|
*******************************/
|
||
|
|
||
|
%% local_cache_file(+FileURL, -File) is det.
|
||
|
%
|
||
|
% Return the name of the cache file for FileURL. The name is the
|
||
|
% plain filename with the .trp extension. As the URL is a file
|
||
|
% URL, it is guaranteed to be a valid filename. Assumes the
|
||
|
% hosting OS can handle multiple exensions (=|.x.y|=) though.
|
||
|
% These days thats even true on Windows.
|
||
|
|
||
|
local_cache_file(URL, File) :-
|
||
|
file_base_name(URL, Name),
|
||
|
file_name_extension(Name, trp, File).
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* GLOBAL CACHE *
|
||
|
*******************************/
|
||
|
|
||
|
%% url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet.
|
||
|
%
|
||
|
% Determine location of cache-file for the given URL in Dir. If
|
||
|
% Ext is provided, the returned Path is ensured to have the
|
||
|
% specified extension.
|
||
|
%
|
||
|
% @param RW If =read=, no directories are created and the call
|
||
|
% fails if URL is not in the cache.
|
||
|
|
||
|
url_cache_file(URL, Dir, Ext, RW, Path) :-
|
||
|
term_hash(URL, Hash0),
|
||
|
Hash is Hash0 + 100000, % make sure > 4 characters
|
||
|
format(string(Hex), '~16r', [Hash]),
|
||
|
sub_atom(Hex, _, 2, 0, L1),
|
||
|
ensure_dir(Dir, L1, RW, Dir1),
|
||
|
sub_atom(Hex, _, 2, 2, L2),
|
||
|
ensure_dir(Dir1, L2, RW, Dir2),
|
||
|
url_to_file(URL, File),
|
||
|
ensure_ext(File, Ext, FileExt),
|
||
|
atomic_list_concat([Dir2, /, FileExt], Path).
|
||
|
|
||
|
ensure_dir(D0, Sub, RW, Dir) :-
|
||
|
atomic_list_concat([D0, /, Sub], Dir),
|
||
|
( exists_directory(Dir)
|
||
|
-> true
|
||
|
; RW == write
|
||
|
-> catch(make_directory(Dir), _, fail)
|
||
|
).
|
||
|
|
||
|
ensure_ext(File, '', File) :- !.
|
||
|
ensure_ext(File, Ext, File) :-
|
||
|
file_name_extension(_, Ext, File), !.
|
||
|
ensure_ext(File, Ext, FileExt) :-
|
||
|
file_name_extension(File, Ext, FileExt).
|
||
|
|
||
|
%% url_to_file(+URL, -File)
|
||
|
%
|
||
|
% Convert a URL in something that fits in a file, i.e. avoiding /
|
||
|
% and :. We simply replace these by -. We could also use
|
||
|
% www_form_encode/2, but confusion when to replace as well as the
|
||
|
% fact that we loose the '.' (extension) makes this a less ideal
|
||
|
% choice. We could also consider base64 encoding of the name.
|
||
|
|
||
|
url_to_file(URL, File) :-
|
||
|
atom_codes(URL, Codes),
|
||
|
phrase(safe_file_name(Codes), FileCodes),
|
||
|
atom_codes(File, FileCodes).
|
||
|
|
||
|
safe_file_name([]) -->
|
||
|
[].
|
||
|
safe_file_name([H|T]) -->
|
||
|
replace(H), !,
|
||
|
safe_file_name(T).
|
||
|
safe_file_name([H|T]) -->
|
||
|
[H],
|
||
|
safe_file_name(T).
|
||
|
|
||
|
%% replace(+Code)//
|
||
|
%
|
||
|
% Replace a character code that cannot safely be put in a
|
||
|
% filename. Should we use %XX?
|
||
|
|
||
|
replace(0'/) --> "-". % directory separator
|
||
|
replace(0'\\) --> "-". % not allowed in Windows filename
|
||
|
replace(0':) --> "-". % idem
|
||
|
replace(0'?) --> "-". % idem
|
||
|
replace(0'*) --> "-". % idem
|
||
|
|
||
|
|
||
|
/*******************************
|
||
|
* MESSAGES *
|
||
|
*******************************/
|
||
|
|
||
|
:- multifile prolog:message/3.
|
||
|
|
||
|
prolog:message(rdf(cache_created(Dir))) -->
|
||
|
[ 'Created RDF cache directory ~w'-[Dir] ].
|