/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: wielemak@science.uva.nl WWW: http://www.swi-prolog.org Copyright (C): 2007, University of Amsterdam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA As a special exception, if you link this library with other files, compiled with a Free Software compiler, to produce an executable, this library does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. */ :- module(rdf_library, [ rdf_attach_library/1, % +Dir rdf_load_library/1, % +Ontology rdf_load_library/2, % +Ontology, +Options rdf_list_library/0, rdf_list_library/1, % +Ontology rdf_list_library/2, % +Ontology, +Options rdf_library_index/2 % ?Id, ?Facet ]). :- use_module(library('semweb/rdf_db')). :- use_module(library('semweb/rdf_turtle')). :- use_module(library(rdf)). :- use_module(library(lists)). :- use_module(library(option)). :- use_module(library(debug)). :- use_module(library(error)). :- use_module(library(pairs)). :- use_module(library(date)). :- use_module(library(uri)). :- use_module(library(http/http_open)). :- use_module(library(thread)). /** RDF Library Manager This module manages an ontology library. Such a library consists of a directory with manifest files named =manifest.rdf= or =manifest.ttl= (Turtle). The manifest files define ontologies appearing in the library as well as namespace mnemonics and dependencies. The typical usage scenario is == ?- rdf_attach_library('/some/directory'). ?- rdf_load_library(my_ontology). == @tbd Add caching info @author Jan Wielemaker */ :- rdf_register_ns(lib, 'http://www.swi-prolog.org/rdf/library/'). :- dynamic manifest/2, % Path, Time library_db/3. % Name, URL, Facets % Force compile-time namespace expansion :- rdf_meta edge(+, r,r,o). /******************************* * LOADING * *******************************/ %% rdf_load_library(+Id) is det. %% rdf_load_library(+Id, +Options) is det. % % Load ontologies from the library. A library must first be % attached using rdf_attach_library/1. Defined Options are: % % * import(Bool) % If =true= (default), also load ontologies that are % explicitely imported. % % * base_uri(URI) % BaseURI used for loading RDF. Local definitions in % ontologies overrule this option. % % * claimed_source(URL) % URL from which we claim to have loaded the data. % % * not_found(+Level) % The system does a pre-check for the existence of % all references RDF databases. If Level is =error= % it reports missing databases as an error and fails. % If =warning= it prints them, but continues. If % =silent=, no checks are preformed. Default is =error=. % % * concurrent(Threads) % Perform the load concurrently using N threads. If not % specified, the number is determined by % guess_concurrency/2. % % * load(+Bool) % If =false=, to all the preparation, but do not execute % the actual loading. See also rdf_list_library/2. rdf_load_library(Id) :- rdf_load_library(Id, []). rdf_load_library(Id, Options) :- load_commands(Id, Options, Pairs), pairs_values(Pairs, Commands), list_to_set(Commands, Cmds2), delete_virtual(Cmds2, Cmds3), find_conflicts(Cmds3), check_existence(Cmds3, Cmds, Options), ( option(concurrent(Threads), Options) -> true ; guess_concurrency(Cmds, Threads) ), length(Cmds, NSources), print_message(informational, rdf(loading(NSources, Threads))), ( option(load(true), Options, true) -> concurrent(Threads, Cmds, []) ; true ). delete_virtual([], []). delete_virtual([virtual(_)|T0], T) :- !, delete_virtual(T0, T). delete_virtual([H|T0], [H|T]) :- delete_virtual(T0, T). %% find_conflicts(+LoadCommands) is semidet. % % Find possibly conflicting options for loading the same source find_conflicts(Commands) :- sort(Commands, Cmds), conflicts(Cmds, Conflicts), report_conflics(Conflicts), Conflicts == []. conflicts([], []). conflicts([C1, C2|T0], [C1-C2|T]) :- conflict(C1, C2), !, conflicts([C2|T0], T). conflicts([_|T0], T) :- conflicts(T0, T). conflict(rdf_load(Src, Options1), rdf_load(Src, Options2)) :- sort(Options1, S1), sort(Options2, S2), S1 \== S2. report_conflics([]). report_conflics([C1-C2|T]) :- print_message(warning, rdf(load_conflict(C1,C2))), report_conflics(T). %% check_existence(+CommandsIn, -Commands, +Options) is det. % % Report existence errors. Fail if at least one source does not % exist. and the not_found level is not =silent=. % % @error existence_error(urls, ListOfUrls) check_existence(CommandsIn, Commands, Options) :- option(not_found(Level), Options, error), must_be(oneof([error,warning,silent]), Level), ( Level == silent -> true ; missing_urls(CommandsIn, Commands, Missing), ( Missing == [] -> true ; Level == warning -> report_missing(Missing, Level) ; existence_error(urls, Missing) ) ). missing_urls([], [], []). missing_urls([H|T0], Cmds, Missing) :- H = rdf_load(URL, _), ( catch(exists_url(URL), error(existence_error(_,_), _), fail) -> Cmds = [H|T], missing_urls(T0, T, Missing) ; Missing = [URL|T], missing_urls(T0, Cmds, T) ). report_missing([], _). report_missing([H|T], Level) :- print_message(Level, error(existence_error(url, H), _)), report_missing(T, Level). %% guess_concurrency(+Commands, -Threads) is det. % % How much concurrency to use? Set to the number of CPUs if all % input comes from files or 5 if network based loading is % demanded. guess_concurrency(Commands, Threads) :- count_uris(Commands, FileURLs, OtherURLs), ( FileURLs > 0 -> ( current_prolog_flag(cpu_count, CPUs) -> true ; CPUs = 1 ), FileThreads is min(FileURLs, CPUs) ; FileThreads = 0 ), ( OtherURLs > 0 -> OtherThreads is min(5, OtherURLs) ; OtherThreads = 0 ), Threads is FileThreads + OtherThreads. count_uris([], 0, 0). count_uris([rdf_load(URL, _)|T], F, NF) :- count_uris(T, F0, NF0), ( sub_atom(URL, 0, _, _, 'file://') -> F is F0 + 1, NF = NF0 ; NF is NF0 + 1, F = F0 ). %% load_commands(+Id, +Options, -Pairs:list(Level-Command)) is det. % % Commands are the RDF commands to execute for rdf_load_library/2. % Splitting in command collection and execution allows for % concurrent execution as well as forward checking of possible % problems. % % @tbd Fix poor style; avoid assert/retract. :- thread_local command/2. load_commands(Id, Options, Commands) :- retractall(command(_,_)), rdf_update_library_index, dry_load(Id, 1, Options), findall(Level-Cmd, retract(command(Level, Cmd)), Commands). dry_load(Id, Level, Options) :- ( library(Id, File, Facets) -> merge_base_uri(Facets, Options, Options1), merge_source(Facets, Options1, Options2), merge_blanks(Facets, Options2, Options3), ( \+ memberchk(virtual, Facets) -> load_options(Options3, File, RdfOptions), assert(command(Level, rdf_load(File, RdfOptions))) ; assert(command(Level, virtual(File))) ), ( option(import(true), Options, true) -> Level1 is Level + 1, forall(member(imports(_, Import), Facets), import(Import, Level1, Options3)) ; true ) ; existence_error(ontology, Id) ). merge_base_uri(Facets, Options0, Options) :- ( option(base_uri(Base), Facets) -> delete(Options0, base_uri(_), Options1), Options = [base_uri(Base)|Options1] ; Options = Options0 ). merge_source(Facets, Options0, Options) :- ( option(claimed_source(Base), Facets) -> delete(Options0, claimed_source(_), Options1), Options = [claimed_source(Base)|Options1] ; Options = Options0 ). merge_blanks(Facets, Options0, Options) :- ( option(blank_nodes(Share), Facets) -> delete(Options0, blank_nodes(_), Options1), Options = [blank_nodes(Share)|Options1] ; Options = Options0 ). load_options(Options, File, RDFOptions) :- findall(O, load_option(Options, File, O), RDFOptions). load_option(Options, File, db(Source)) :- option(claimed_source(Source0), Options), ( sub_atom(Source0, _, _, 0, /) -> file_base_name(File, Base), atom_concat(Source0, Base, Source) ; atom_concat(Source, #, Source0) -> true ). load_option(Options, File, base_uri(BaseURI)) :- option(base_uri(Base0), Options), sub_atom(/, _, _, 0, Base0), atom_concat(Base0, File, BaseURI). load_option(Options, _File, blank_nodes(Share)) :- option(blank_nodes(Share), Options). %% import(+URL, +Level, +Options) is det. import(Path, Level, Options) :- ( ( library(Id, Path, _) -> true ; manifest_for_path(Path, Manifest), catch(exists_url(Manifest), _, fail) -> process_manifest(Manifest), library(Id, Path, _) ) -> dry_load(Id, Level, Options) ; load_options(Options, Path, RdfOptions), assert(command(Level, rdf_load(Path, RdfOptions))) ). manifest_for_path(URL, Manifest) :- file_directory_name(URL, Parent), manifest_file(Base), rdf_extension(Ext), concat_atom([Parent, /, Base, '.', Ext], Manifest). %% rdf_list_library(+Id) is det. %% rdf_list_library(+Id, +Options) is det. % % Print library dependency tree to the terminal. Options include % options for rdf_load_library/2 and % % * show_source(+Boolean) % If =true= (default), show location we are loading % % * show_graph(+Boolean) % If =true= (default =false=), show name of graph % % * show_virtual(+Boolean) % If =false= (default =true=), do not show virtual % repositories. % % * indent(Atom) % Atom repeated for indentation levels rdf_list_library(Id) :- rdf_list_library(Id, []). rdf_list_library(Id, Options) :- load_commands(Id, Options, Commands), maplist(print_load(Options), Commands). print_load(Options, _Level-virtual(_)) :- option(show_virtual(false), Options), !. print_load(Options, Level-Command) :- option(indent(Indent), Options, '. '), forall(between(2, Level, _), format(Indent)), print_command(Command, Options), format('~N'). print_command(virtual(URL), _Options) :- format('<~w>', [URL]). print_command(rdf_load(URL), Options) :- print_command(rdf_load(URL, []), Options). print_command(rdf_load(URL, RDFOptions), Options) :- ( option(show_source(true), Options, true) -> format('~w', [URL]), ( option(blank_nodes(noshare), RDFOptions) -> format(' ') ; true ), ( exists_url(URL) -> true ; format(' [NOT FOUND]') ) ; true ), ( option(show_graph(true), Options, false), option(db(Base), RDFOptions) -> format('~N\tSource: ~w', [Base]) ; true ). exists_url(URL) :- rdf_db:rdf_input(URL, Source, _BaseURI), existing_url_source(Source). existing_url_source(file(Path)) :- !, access_file(Path, read). existing_url_source(url(http, URL)) :- !, catch(http_open(URL, Stream, [ method(head) ]), _, fail), close(Stream). %% rdf_list_library % % Prints known RDF library identifiers to current output. rdf_list_library :- rdf_update_library_index, ( rdf_library_index(Id, title(Title)), format('~w ~t~20|~w', [Id, Title]), ( rdf_library_index(Id, version(Version)) -> format(' (version ~w)', [Version]) ; true ), nl, fail ; true ). %% rdf_library_index(?Id, ?Facet) is nondet. % % Query the content of the library. Defined facets are: % % * source(URL) % Location from which to load the ontology % % * title(Atom) % Title used for the ontology % % * comment(Atom) % Additional comments for the ontology % % * version(Atom) % Version information on the ontology % % * imports(Type, URL) % URLs needed by this ontology. May succeed multiple % times. Type is one of =ontology=, =schema= or =instances=. % % * base_uri(BaseURI) % Base URI to use when loading documents. If BaseURI % ends in =|/|=, the actual filename is attached. % % * claimed_source(Source) % URL from which we claim to have loaded the RDF. If % Source ends in =|/|=, the actual filename is % attached. % % * blank_nodes(Share) % Defines how equivalent blank nodes are handled, where % Share is one of =share= or =noshare=. Default is to % share. % % * provides_ns(URL) % Ontology provides definitions in the namespace URL. % The formal definition of this is troublesome, but in % practice it means the ontology has triples whose % subjects are in the given namespace. % % * uses_ns(URL) % The ontology depends on the given namespace. Normally % means it contains triples that have predicates or % objects in the given namespace. % % * manifest(Path) % Manifest file this ontology is defined in % % * virtual % Entry is virtual (cannot be loaded) rdf_library_index(Id, Facet) :- library(Id, Path, Facets), ( Facet = source(Path) ; member(Facet, Facets) ). /******************************* * MANIFEST PROCESSING * *******************************/ %% rdf_attach_library(+Source) % % Attach manifest from Source. Source is one of % % * URL % Load single manifest from this URL % * File % Load single manifest from this file % * Directory % Scan all subdirectories and load all =|Manifest.ttl|= or % =|Manifest.rdf|= found. % % Encountered namespaces are registered using rdf_register_ns/2. % Encountered ontologies are added to the index. If a manifest was % already loaded it will be reloaded if the modification time has % changed. rdf_attach_library(URL) :- atom(URL), uri_is_global(URL), \+ is_absolute_file_name(URL), !, % avoid interpreting C: as a schema process_manifest(URL). rdf_attach_library(File) :- absolute_file_name(File, Path, [ extensions([rdf,ttl]), access(read), file_errors(fail) ]), !, process_manifest(Path). rdf_attach_library(Dir) :- absolute_file_name(Dir, Path, [ file_type(directory), access(read) ]), attach_dir(Path, []). %% rdf_update_library_index % % Reload all Manifest files. rdf_update_library_index :- forall(manifest(Location, _Time), process_manifest(Location)). attach_dir(Path, Visited) :- memberchk(Path, Visited), !. attach_dir(Path, Visited) :- atom_concat(Path, '/*', Pattern), expand_file_name(Pattern, Members), ( member(Manifest, Members), is_manifest_file(Manifest) -> process_manifest(Manifest) ; print_message(silent, rdf(no_manifest(Path))) ), ( member(Dir, Members), exists_directory(Dir), file_base_name(Dir, Base), \+ hidden_base(Base), attach_dir(Dir, [Path|Visited]), fail ; true ). hidden_base('CVS'). hidden_base('cvs'). % Windows %% process_manifest(+Location) is det. % % Process a manifest file, registering encountered namespaces and % creating clauses for library/3. No op if manifest was loaded and % not changed. Removes old data if the manifest was changed. % % @param Location is either a path name or a URL. process_manifest(Source) :- ( uri_file_name(Source, Manifest0) -> absolute_file_name(Manifest0, Manifest) ; absolute_file_name(Source, Manifest) ), % Manifest is a canonical filename source_time(Manifest, MT), ( manifest(Manifest, Time), ( MT =< Time -> ! ; retractall(manifest(Manifest, Time)), library_db(Id, URL, Facets), memberchk(manifest(Manifest), Facets), retractall(library_db(Id, URL, Facets)), fail ) ; read_triples(Manifest, Triples), process_triples(Manifest, Triples), print_message(informational, rdf(manifest(loaded, Manifest))), assert(manifest(Manifest, MT)) ). process_triples(Manifest, Triples) :- findall(ns(Mnemonic, NameSpace), extract_namespace(Triples, Mnemonic, NameSpace), NameSpaces), findall(Ontology, extract_ontology(Triples, Ontology), Ontologies), maplist(define_namespace, NameSpaces), maplist(assert_ontology(Manifest), Ontologies). %% extract_namespace(+Triples, -Mnemonic, -NameSpace) % % True if Mnemonic is an abbreviation of NameSpace. extract_namespace(Triples, Mnemonic, Namespace) :- edge(Triples, Decl, lib:mnemonic, literal(Mnemonic)), edge(Triples, Decl, lib:namespace, Namespace). %% extract_ontology(+Triples, -Ontology) is nondet. % % Extract definition of an ontology extract_ontology(Triples, library(Name, URL, Options)) :- edge(Triples, URL, rdf:type, Type), ( ontology_type(Type) -> file_base_name(URL, BaseName), file_name_extension(Name, _, BaseName), findall(Facet, facet(Triples, URL, Facet), Options) ). ontology_type(X) :- ( rdf_equal(X, lib:'Ontology') ; rdf_equal(X, lib:'Schema') ; rdf_equal(X, lib:'Instances') ). %% facet(+Triples, +File, -Facet) is nondet. % % Enumerate facets about File from Triples. Facets are described % with rdf_library_index/2. facet(Triples, File, title(Title)) :- edge(Triples, File, dc:title, literal(Title)). :- if(rdf_current_ns(dcterms, _)). facet(Triples, File, title(Title)) :- edge(Triples, File, dcterms:title, literal(Title)). :- endif. facet(Triples, File, version(Version)) :- edge(Triples, File, owl:versionInfo, literal(Version)). facet(Triples, File, comment(Comment)) :- edge(Triples, File, rdfs:comment, literal(Comment)). facet(Triples, File, base_uri(BaseURI)) :- edge(Triples, File, lib:baseURI, BaseURI). facet(Triples, File, claimed_source(Source)) :- edge(Triples, File, lib:source, Source). facet(Triples, File, blank_nodes(Mode)) :- edge(Triples, File, lib:blankNodes, literal(Mode)), must_be(oneof([share,noshare]), Mode). facet(Triples, File, imports(ontology, Path)) :- edge(Triples, File, owl:imports, Path). facet(Triples, File, imports(schema, Path)) :- edge(Triples, File, lib:schema, Path). facet(Triples, File, imports(instances, Path)) :- edge(Triples, File, lib:instances, Path). facet(Triples, File, provides_ns(NS)) :- edge(Triples, File, lib:providesNamespace, NSDecl), edge(Triples, NSDecl, lib:namespace, NS). facet(Triples, File, uses_ns(NS)) :- edge(Triples, File, lib:usesNamespace, NSDecl), edge(Triples, NSDecl, lib:namespace, NS). facet(Triples, File, virtual) :- edge(Triples, File, rdf:type, lib:'Virtual'). %% edge(+Triples, ?S, ?P, ?O) is nondet. % % Like rdf/3 over a list of Triples. edge(Triples, S, P, O) :- member(rdf(S,P,O), Triples). %% source_time(+Source, -Modified) is semidet. % % Modified is the last modification time of Source. % % @error existence_error(Type, Source). source_time(URL, Modified) :- sub_atom(URL, 0, _, _, 'http://'), !, http_open(URL, Stream, [ header(last_modified, Date), method(head) ]), close(Stream), Date \== '', parse_time(Date, Modified). source_time(URL, Modified) :- uri_file_name(URL, File), !, time_file(File, Modified). source_time(File, Modified) :- time_file(File, Modified). %% read_triples(+File, -Triples) is det. % % Read RDF/XML or Turtle file into a list of triples. read_triples(File, Triples) :- file_name_extension(_, rdf, File), !, load_rdf(File, Triples). read_triples(File, Triples) :- file_name_extension(_, ttl, File), !, rdf_load_turtle(File, Triples, []). %% is_manifest_file(+Path) % % True if Path is the name of a manifest file. is_manifest_file(Path) :- file_base_name(Path, File), downcase_atom(File, Lwr), file_name_extension(Base, Ext, Lwr), manifest_file(Base), rdf_extension(Ext), !. manifest_file('Manifest'). manifest_file('manifest'). rdf_extension(ttl). rdf_extension(rdf). %% assert_ontology(+Manifest, +Term:library(Name, File, Facets)) is det. % % Add ontology to our library. % % @tbd Proper behaviour of re-definition? assert_ontology(Manifest, Term) :- Term = library(Name, URL, Facets), ( library(Name, _URL2, Facets2) -> memberchk(manifest(Manifest2), Facets2), print_message(warning, rdf(redefined(Manifest, Name, Manifest2))) ; true ), assert(library_db(Name, URL, [ manifest(Manifest) | Facets ])). %% library(?Id, ?URL, ?Facets) % % Access DB for library information. library(Id, URL, Facets) :- nonvar(URL), normalize_url(URL, CanonicalURL), library_db(Id, CanonicalURL, Facets). library(Id, URL, Facets) :- library_db(Id, URL, Facets). %% normalize_url(+URL, -Normalized) % % Like uri_normalized/2, but we also need (platform dependent) % filename canonization. normalize_url(URL, CanonicalURL) :- uri_file_name(URL, File), !, absolute_file_name(File, CanFile), uri_file_name(CanonicalURL, CanFile). normalize_url(URL, CanonicalURL) :- uri_normalized(URL, CanonicalURL). %% define_namespace(NS:ns(Mnemonic, Namespace)) is det. % % Add namespace declaration for Mnemonic. define_namespace(ns(Mnemonic, Namespace)) :- debug(rdf_library, 'Adding NS ~w = ~q', [Mnemonic, Namespace]), rdf_register_ns(Mnemonic, Namespace, [ ]). /******************************* * MESSAGES * *******************************/ :- multifile prolog:message/3. prolog:message(rdf(no_manifest(Path))) --> [ 'Directory ~w has no Manifest.{ttl,rdf} file'-[Path] ]. prolog:message(rdf(redefined(Manifest, Name, Manifest2))) --> [ '~w: Ontology ~w already defined in ~w'- [Manifest, Name, Manifest2] ]. prolog:message(rdf(manifest(loaded, Manifest))) --> [ 'Loaded RDF manifest ~w'-[Manifest] ]. prolog:message(rdf(load_conflict(C1, C2))) --> [ 'Conflicting loads: ~p <-> ~p'-[C1, C2] ]. prolog:message(rdf(loading(Files, Threads))) --> [ 'Loading ~D files using ~D threads ...'-[Files, Threads] ].