703 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			703 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
| 
								 | 
							
								/*  $Id$
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Part of SWI-Prolog
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    Author:        Jan Wielemaker
							 | 
						||
| 
								 | 
							
								    E-mail:        wielemak@science.uva.nl
							 | 
						||
| 
								 | 
							
								    WWW:           http://www.swi-prolog.org
							 | 
						||
| 
								 | 
							
								    Copyright (C): 2006, University of Amsterdam
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This program is free software; you can redistribute it and/or
							 | 
						||
| 
								 | 
							
								    modify it under the terms of the GNU General Public License
							 | 
						||
| 
								 | 
							
								    as published by the Free Software Foundation; either version 2
							 | 
						||
| 
								 | 
							
								    of the License, or (at your option) any later version.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    This program is distributed in the hope that it will be useful,
							 | 
						||
| 
								 | 
							
								    but WITHOUT ANY WARRANTY; without even the implied warranty of
							 | 
						||
| 
								 | 
							
								    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
							 | 
						||
| 
								 | 
							
								    GNU General Public License for more details.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    You should have received a copy of the GNU General Public
							 | 
						||
| 
								 | 
							
								    License along with this library; if not, write to the Free Software
							 | 
						||
| 
								 | 
							
								    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    As a special exception, if you link this library with other files,
							 | 
						||
| 
								 | 
							
								    compiled with a Free Software compiler, to produce an executable, this
							 | 
						||
| 
								 | 
							
								    library does not by itself cause the resulting executable to be covered
							 | 
						||
| 
								 | 
							
								    by the GNU General Public License. This exception does not however
							 | 
						||
| 
								 | 
							
								    invalidate any other reasons why the executable file might be covered by
							 | 
						||
| 
								 | 
							
								    the GNU General Public License.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- module(rdf_litindex,
							 | 
						||
| 
								 | 
							
									  [ rdf_set_literal_index_option/1,	% +Options
							 | 
						||
| 
								 | 
							
									    rdf_tokenize_literal/2,		% +Literal, -Tokens
							 | 
						||
| 
								 | 
							
									    rdf_find_literals/2,		% +Spec, -ListOfLiterals
							 | 
						||
| 
								 | 
							
									    rdf_token_expansions/2		% +Spec, -Expansions
							 | 
						||
| 
								 | 
							
									  ]).
							 | 
						||
| 
								 | 
							
								:- use_module(rdf_db).
							 | 
						||
| 
								 | 
							
								:- use_module(library(debug)).
							 | 
						||
| 
								 | 
							
								:- use_module(library(lists)).
							 | 
						||
| 
								 | 
							
								:- use_module(library(error)).
							 | 
						||
| 
								 | 
							
								:- use_module(library(porter_stem)).
							 | 
						||
| 
								 | 
							
								:- use_module(library(double_metaphone)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/** <module> Search literals
							 | 
						||
| 
								 | 
							
								This module finds literals of the RDF database based on stemming and
							 | 
						||
| 
								 | 
							
								being flexible to ordering of tokens.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- dynamic
							 | 
						||
| 
								 | 
							
									literal_map/2,			% Type, -Map
							 | 
						||
| 
								 | 
							
									new_token/1,			% Hook
							 | 
						||
| 
								 | 
							
									setting/1.
							 | 
						||
| 
								 | 
							
								:- volatile
							 | 
						||
| 
								 | 
							
									literal_map/2.
							 | 
						||
| 
								 | 
							
								:- multifile
							 | 
						||
| 
								 | 
							
									tokenization/2,			% +Literal, -Tokens
							 | 
						||
| 
								 | 
							
									exclude_from_index/2.		% +Which, +Token
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								setting(verbose(true)).			% print progress messages
							 | 
						||
| 
								 | 
							
								setting(index_threads(1)).		% # threads for creating the index
							 | 
						||
| 
								 | 
							
								setting(index(default)).		% Use a thread for incremental updates
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	rdf_set_literal_index_option(+Options:list)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Set options for the literal package.  Currently defined options
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%		* verbose(Bool)
							 | 
						||
| 
								 | 
							
								%		If =true=, print progress messages while building the
							 | 
						||
| 
								 | 
							
								%		index tables.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%		* index_threads(+Count)
							 | 
						||
| 
								 | 
							
								%		Number of threads to use for initial indexing of
							 | 
						||
| 
								 | 
							
								%		literals
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%		* index(+How)
							 | 
						||
| 
								 | 
							
								%		How to deal with indexing new literals.  How is one of
							 | 
						||
| 
								 | 
							
								%		=self= (execute in the same thread), thread(N) (execute
							 | 
						||
| 
								 | 
							
								%		in N concurrent threads) or =default= (depends on number
							 | 
						||
| 
								 | 
							
								%		of cores).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								rdf_set_literal_index_option([]) :- !.
							 | 
						||
| 
								 | 
							
								rdf_set_literal_index_option([H|T]) :- !,
							 | 
						||
| 
								 | 
							
									set_option(H),
							 | 
						||
| 
								 | 
							
									rdf_set_literal_index_option(T).
							 | 
						||
| 
								 | 
							
								rdf_set_literal_index_option(Option) :-
							 | 
						||
| 
								 | 
							
									set_option(Option).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								set_option(Term) :-
							 | 
						||
| 
								 | 
							
									check_option(Term),
							 | 
						||
| 
								 | 
							
									functor(Term, Name, Arity),
							 | 
						||
| 
								 | 
							
									functor(General, Name, Arity),
							 | 
						||
| 
								 | 
							
									retractall(setting(General)),
							 | 
						||
| 
								 | 
							
									assert(setting(Term)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								check_option(X) :-
							 | 
						||
| 
								 | 
							
									var(X), !,
							 | 
						||
| 
								 | 
							
									instantiation_error(X).
							 | 
						||
| 
								 | 
							
								check_option(verbose(X)) :- !,
							 | 
						||
| 
								 | 
							
									must_be(boolean, X).
							 | 
						||
| 
								 | 
							
								check_option(index_threads(Count)) :- !,
							 | 
						||
| 
								 | 
							
									must_be(nonneg, Count).
							 | 
						||
| 
								 | 
							
								check_option(index(How)) :- !,
							 | 
						||
| 
								 | 
							
									must_be(oneof([default,thread(_),self]), How).
							 | 
						||
| 
								 | 
							
								check_option(Option) :-
							 | 
						||
| 
								 | 
							
									domain_error(literal_option, Option).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	      QUERY		*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	rdf_find_literals(+Spec, -Literals)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Find literals in the RDF database matching Spec.  Spec is defined
							 | 
						||
| 
								 | 
							
								%	as:
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	==
							 | 
						||
| 
								 | 
							
								%	Spec ::= and(Spec,Spec)
							 | 
						||
| 
								 | 
							
								%	Spec ::= or(Spec,Spec)
							 | 
						||
| 
								 | 
							
								%	Spec ::= not(Spec)
							 | 
						||
| 
								 | 
							
								%	Spec ::= sounds(Like)
							 | 
						||
| 
								 | 
							
								%	Spec ::= stem(Like)
							 | 
						||
| 
								 | 
							
								%	Spec ::= prefix(Prefix)
							 | 
						||
| 
								 | 
							
								%	Spec ::= between(Low, High)	% Numerical between
							 | 
						||
| 
								 | 
							
								%	Spec ::= ge(High)		% Numerical greater-equal
							 | 
						||
| 
								 | 
							
								%	Spec ::= le(Low)		% Numerical less-equal
							 | 
						||
| 
								 | 
							
								%	Spec ::= Token
							 | 
						||
| 
								 | 
							
								%	==
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	sounds(Like) and stem(Like) both map to  a disjunction. First we
							 | 
						||
| 
								 | 
							
								%	compile the spec to normal form:   a disjunction of conjunctions
							 | 
						||
| 
								 | 
							
								%	on elementary tokens. Then we execute   all the conjunctions and
							 | 
						||
| 
								 | 
							
								%	generate the union using ordered-set algorithms.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	@tbd Exploit ordering of numbers and allow for > N, < N, etc.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								rdf_find_literals(Spec, Literals) :-
							 | 
						||
| 
								 | 
							
									compile_spec(Spec, DNF),
							 | 
						||
| 
								 | 
							
									token_index(Map),
							 | 
						||
| 
								 | 
							
									lookup(DNF, Map, _, SuperSet),
							 | 
						||
| 
								 | 
							
									flatten(SuperSet, Set0),
							 | 
						||
| 
								 | 
							
									sort(Set0, Literals).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	rdf_token_expansions(+Spec, -Extensions)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Determine which extensions of  a   token  contribute  to finding
							 | 
						||
| 
								 | 
							
								%	literals.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								rdf_token_expansions(prefix(Prefix), [prefix(Prefix, Tokens)]) :-
							 | 
						||
| 
								 | 
							
									token_index(Map),
							 | 
						||
| 
								 | 
							
									rdf_keys_in_literal_map(Map, prefix(Prefix), Tokens).
							 | 
						||
| 
								 | 
							
								rdf_token_expansions(sounds(Like), [sounds(Like, Tokens)]) :-
							 | 
						||
| 
								 | 
							
									metaphone_index(Map),
							 | 
						||
| 
								 | 
							
									rdf_find_literal_map(Map, [Like], Tokens).
							 | 
						||
| 
								 | 
							
								rdf_token_expansions(stem(Like), [stem(Like, Tokens)]) :-
							 | 
						||
| 
								 | 
							
									porter_index(Map),
							 | 
						||
| 
								 | 
							
									rdf_find_literal_map(Map, [Like], Tokens).
							 | 
						||
| 
								 | 
							
								rdf_token_expansions(Spec, Expansions) :-
							 | 
						||
| 
								 | 
							
									compile_spec(Spec, DNF),
							 | 
						||
| 
								 | 
							
									token_index(Map),
							 | 
						||
| 
								 | 
							
									lookup(DNF, Map, SCS, _),
							 | 
						||
| 
								 | 
							
									flatten(SCS, CS),
							 | 
						||
| 
								 | 
							
									sort(CS, Expansions0),
							 | 
						||
| 
								 | 
							
									join_expansions(Expansions0, Expansions).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								join_expansions([], []).
							 | 
						||
| 
								 | 
							
								join_expansions([H0|T0], [H|T]) :-
							 | 
						||
| 
								 | 
							
									untag(H0, Tag, V0),
							 | 
						||
| 
								 | 
							
									Tag =.. L0,
							 | 
						||
| 
								 | 
							
									append(L0, [[V0|Values]], L1),
							 | 
						||
| 
								 | 
							
									H =.. L1,
							 | 
						||
| 
								 | 
							
									join_expansions_by_tag(T0, Tag, T1, Values),
							 | 
						||
| 
								 | 
							
									join_expansions(T1, T).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								join_expansions_by_tag([H|T0], Tag, T, [V0|VT]) :-
							 | 
						||
| 
								 | 
							
									untag(H, Tag, V0), !,
							 | 
						||
| 
								 | 
							
									join_expansions_by_tag(T0, Tag, T, VT).
							 | 
						||
| 
								 | 
							
								join_expansions_by_tag(L, _, L, []).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								lookup(@(false), _, [], []) :- !.
							 | 
						||
| 
								 | 
							
								lookup(or(H0,T0), Map, [CH|CT], [H|T]) :- !,
							 | 
						||
| 
								 | 
							
									lookup(H0, Map, CH, H),
							 | 
						||
| 
								 | 
							
									lookup(T0, Map, CT, T).
							 | 
						||
| 
								 | 
							
								lookup(H0, Map, [C], [H]) :-
							 | 
						||
| 
								 | 
							
									lookup1(H0, Map, C, H).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								lookup1(Conj, Map, Cond, Literals) :-
							 | 
						||
| 
								 | 
							
									phrase(conj_to_list(Conj), List), !,
							 | 
						||
| 
								 | 
							
									rdf_find_literal_map(Map, List, Literals),
							 | 
						||
| 
								 | 
							
									(   Literals \== []
							 | 
						||
| 
								 | 
							
									->  phrase(conj_to_cond(Conj), Cond)
							 | 
						||
| 
								 | 
							
									;   Cond = []
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								lookup1(_, _, _, []).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								conj_to_list(and(A,B)) --> !,
							 | 
						||
| 
								 | 
							
									conj_to_list(A),
							 | 
						||
| 
								 | 
							
									conj_to_list(B).
							 | 
						||
| 
								 | 
							
								conj_to_list(@(false)) --> !,
							 | 
						||
| 
								 | 
							
									{fail}.
							 | 
						||
| 
								 | 
							
								conj_to_list(Tagged) -->
							 | 
						||
| 
								 | 
							
									{ untag(Tagged, L) }, !,
							 | 
						||
| 
								 | 
							
									[L].
							 | 
						||
| 
								 | 
							
								conj_to_list(L) -->
							 | 
						||
| 
								 | 
							
									[L].
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								conj_to_cond(and(A,B)) --> !,
							 | 
						||
| 
								 | 
							
									conj_to_cond(A),
							 | 
						||
| 
								 | 
							
									conj_to_cond(B).
							 | 
						||
| 
								 | 
							
								conj_to_cond(Tagged) -->
							 | 
						||
| 
								 | 
							
									{ untag(Tagged, _) }, !,
							 | 
						||
| 
								 | 
							
									[ Tagged ].
							 | 
						||
| 
								 | 
							
								conj_to_cond(_) -->
							 | 
						||
| 
								 | 
							
									[].
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	compile_spec(+Spec, -Compiled)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Compile a specification as above into disjunctive normal form
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								compile_spec(Spec, DNF) :-
							 | 
						||
| 
								 | 
							
									expand_fuzzy(Spec, Spec2),
							 | 
						||
| 
								 | 
							
									nnf(Spec2, NNF),
							 | 
						||
| 
								 | 
							
									dnf(NNF, DNF).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								expand_fuzzy(Var, _) :-
							 | 
						||
| 
								 | 
							
									var(Var), !,
							 | 
						||
| 
								 | 
							
									throw(error(instantiation_error, _)).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(sounds(Like), Or) :- !,
							 | 
						||
| 
								 | 
							
									metaphone_index(Map),
							 | 
						||
| 
								 | 
							
									double_metaphone(Like, Key),
							 | 
						||
| 
								 | 
							
									rdf_find_literal_map(Map, [Key], Tokens),
							 | 
						||
| 
								 | 
							
									list_to_or(Tokens, sounds(Like), Or).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(stem(Like), Or) :- !,
							 | 
						||
| 
								 | 
							
									porter_index(Map),
							 | 
						||
| 
								 | 
							
									porter_stem(Like, Key),
							 | 
						||
| 
								 | 
							
									rdf_find_literal_map(Map, [Key], Tokens),
							 | 
						||
| 
								 | 
							
									list_to_or(Tokens, stem(Like), Or).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(prefix(Prefix), Or) :- !,
							 | 
						||
| 
								 | 
							
									token_index(Map),
							 | 
						||
| 
								 | 
							
									rdf_keys_in_literal_map(Map, prefix(Prefix), Tokens),
							 | 
						||
| 
								 | 
							
									list_to_or(Tokens, prefix(Prefix), Or).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(case(String), Or) :- !,
							 | 
						||
| 
								 | 
							
									token_index(Map),
							 | 
						||
| 
								 | 
							
									rdf_keys_in_literal_map(Map, case(String), Tokens),
							 | 
						||
| 
								 | 
							
									list_to_or(Tokens, case(String), Or).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(or(A0, B0), E) :- !,
							 | 
						||
| 
								 | 
							
									expand_fuzzy(A0, A),
							 | 
						||
| 
								 | 
							
									expand_fuzzy(B0, B),
							 | 
						||
| 
								 | 
							
									simplify(or(A,B), E).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(and(A0, B0), E) :- !,
							 | 
						||
| 
								 | 
							
									expand_fuzzy(A0, A),
							 | 
						||
| 
								 | 
							
									expand_fuzzy(B0, B),
							 | 
						||
| 
								 | 
							
									simplify(and(A,B), E).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(not(A0), not(A)) :- !,
							 | 
						||
| 
								 | 
							
									expand_fuzzy(A0, A).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(between(Low, High), Or) :- !,
							 | 
						||
| 
								 | 
							
									token_index(Map),
							 | 
						||
| 
								 | 
							
									rdf_keys_in_literal_map(Map, between(Low, High), Tokens),
							 | 
						||
| 
								 | 
							
									list_to_or(Tokens, between(Low, High), Or).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(le(High), Or) :- !,
							 | 
						||
| 
								 | 
							
									token_index(Map),
							 | 
						||
| 
								 | 
							
									rdf_keys_in_literal_map(Map, le(High), Tokens),
							 | 
						||
| 
								 | 
							
									list_to_or(Tokens, le(High), Or).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(ge(Low), Or) :- !,
							 | 
						||
| 
								 | 
							
									token_index(Map),
							 | 
						||
| 
								 | 
							
									rdf_keys_in_literal_map(Map, ge(Low), Tokens),
							 | 
						||
| 
								 | 
							
									list_to_or(Tokens, ge(Low), Or).
							 | 
						||
| 
								 | 
							
								expand_fuzzy(Token, Token) :-
							 | 
						||
| 
								 | 
							
									atomic(Token), !.
							 | 
						||
| 
								 | 
							
								expand_fuzzy(Token, _) :-
							 | 
						||
| 
								 | 
							
									throw(error(type_error(Token, boolean_expression), _)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								simplify(Expr0, Expr) :-
							 | 
						||
| 
								 | 
							
									simple(Expr0, Expr), !.
							 | 
						||
| 
								 | 
							
								simplify(Expr, Expr).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								simple(and(@(false), _), @(false)).
							 | 
						||
| 
								 | 
							
								simple(and(_, @(false)), @(false)).
							 | 
						||
| 
								 | 
							
								simple(or(@(false), X), X).
							 | 
						||
| 
								 | 
							
								simple(or(X, @(false)), X).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								list_to_or([], _, @(false)) :- !.
							 | 
						||
| 
								 | 
							
								list_to_or([X], How, One) :- !,
							 | 
						||
| 
								 | 
							
									tag(How, X, One).
							 | 
						||
| 
								 | 
							
								list_to_or([H0|T0], How, or(H, T)) :-
							 | 
						||
| 
								 | 
							
									tag(How, H0, H),
							 | 
						||
| 
								 | 
							
									list_to_or(T0, How, T).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								tag(sounds(X),	  Y, sounds(X,Y)).
							 | 
						||
| 
								 | 
							
								tag(stem(X),	  Y, stem(X,Y)).
							 | 
						||
| 
								 | 
							
								tag(prefix(X),	  Y, prefix(X,Y)).
							 | 
						||
| 
								 | 
							
								tag(case(X),	  Y, case(X,Y)).
							 | 
						||
| 
								 | 
							
								tag(between(L,H), Y, between(L,H,Y)).
							 | 
						||
| 
								 | 
							
								tag(ge(L),	  Y, ge(L,Y)).
							 | 
						||
| 
								 | 
							
								tag(le(H),	  Y, le(H,Y)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								untag(sounds(_,Y),    Y).
							 | 
						||
| 
								 | 
							
								untag(stem(_,Y),      Y).
							 | 
						||
| 
								 | 
							
								untag(prefix(_,Y),    Y).
							 | 
						||
| 
								 | 
							
								untag(case(_,Y),      Y).
							 | 
						||
| 
								 | 
							
								untag(between(_,_,Y), Y).
							 | 
						||
| 
								 | 
							
								untag(le(_,Y),	      Y).
							 | 
						||
| 
								 | 
							
								untag(ge(_,Y),	      Y).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								untag(sounds(X,Y),    sounds(X),    Y).
							 | 
						||
| 
								 | 
							
								untag(stem(X,Y),      stem(X),	    Y).
							 | 
						||
| 
								 | 
							
								untag(prefix(X,Y),    prefix(X),    Y).
							 | 
						||
| 
								 | 
							
								untag(case(X,Y),      case(X),	    Y).
							 | 
						||
| 
								 | 
							
								untag(between(L,H,Y), between(L,H), Y).
							 | 
						||
| 
								 | 
							
								untag(ge(L,Y),	      ge(L),	    Y).
							 | 
						||
| 
								 | 
							
								untag(le(H,Y),	      le(H),	    Y).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	nnf(+Formula, -NNF)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Rewrite to Negative Normal Form, meaning negations only appear
							 | 
						||
| 
								 | 
							
								%	around literals.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								nnf(not(not(A0)), A) :- !,
							 | 
						||
| 
								 | 
							
									nnf(A0, A).
							 | 
						||
| 
								 | 
							
								nnf(not(and(A0,B0)), or(A,B)) :- !,
							 | 
						||
| 
								 | 
							
									nnf(not(A0), A),
							 | 
						||
| 
								 | 
							
									nnf(not(B0), B).
							 | 
						||
| 
								 | 
							
								nnf(not(or(A0,B0)), and(A,B)) :- !,
							 | 
						||
| 
								 | 
							
									nnf(not(A0), A),
							 | 
						||
| 
								 | 
							
									nnf(not(B0), B).
							 | 
						||
| 
								 | 
							
								nnf(A, A).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	dnf(+NNF, -DNF)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Convert a formula in NNF to Disjunctive Normal Form (DNF)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								dnf(or(A0,B0), or(A, B)) :- !,
							 | 
						||
| 
								 | 
							
									dnf(A0, A),
							 | 
						||
| 
								 | 
							
									dnf(B0, B).
							 | 
						||
| 
								 | 
							
								dnf(and(A0,B0), DNF):- !,
							 | 
						||
| 
								 | 
							
									dnf(A0, A1),
							 | 
						||
| 
								 | 
							
									dnf(B0, B1),
							 | 
						||
| 
								 | 
							
									dnf1(and(A1,B1), DNF).
							 | 
						||
| 
								 | 
							
								dnf(DNF, DNF).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								dnf1(and(A0, or(B,C)), or(P,Q)) :- !,
							 | 
						||
| 
								 | 
							
									dnf1(and(A0,B), P),
							 | 
						||
| 
								 | 
							
									dnf1(and(A0,C), Q).
							 | 
						||
| 
								 | 
							
								dnf1(and(or(B,C), A0), or(P,Q)) :- !,
							 | 
						||
| 
								 | 
							
									dnf1(and(A0,B), P),
							 | 
						||
| 
								 | 
							
									dnf1(and(A0,C), Q).
							 | 
						||
| 
								 | 
							
								dnf1(DNF, DNF).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	    TOKEN INDEX		*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	token_index(-Map)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Get the index of tokens. If  not   present,  create one from the
							 | 
						||
| 
								 | 
							
								%	current database. Once created, the map is kept up-to-date using
							 | 
						||
| 
								 | 
							
								%	a monitor hook.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								token_index(Map) :-
							 | 
						||
| 
								 | 
							
									literal_map(tokens, Map), !.
							 | 
						||
| 
								 | 
							
								token_index(Map) :-
							 | 
						||
| 
								 | 
							
									rdf_new_literal_map(Map),
							 | 
						||
| 
								 | 
							
									assert(literal_map(tokens, Map)),
							 | 
						||
| 
								 | 
							
									make_literal_index,
							 | 
						||
| 
								 | 
							
									verbose('~N', []),
							 | 
						||
| 
								 | 
							
									Monitor = [ reset,
							 | 
						||
| 
								 | 
							
										    new_literal,
							 | 
						||
| 
								 | 
							
										    old_literal
							 | 
						||
| 
								 | 
							
										  ],
							 | 
						||
| 
								 | 
							
									(   setting(index(default))
							 | 
						||
| 
								 | 
							
									->  (   current_prolog_flag(cpu_count, N), N > 1
							 | 
						||
| 
								 | 
							
									    ->	create_update_literal_thread(1),
							 | 
						||
| 
								 | 
							
										rdf_monitor(thread_monitor_literal, Monitor)
							 | 
						||
| 
								 | 
							
									    ;	rdf_monitor(monitor_literal, Monitor)
							 | 
						||
| 
								 | 
							
									    )
							 | 
						||
| 
								 | 
							
									;   setting(index(thread(N)))
							 | 
						||
| 
								 | 
							
									->  create_update_literal_thread(N),
							 | 
						||
| 
								 | 
							
									    rdf_monitor(thread_monitor_literal, Monitor)
							 | 
						||
| 
								 | 
							
									;   rdf_monitor(monitor_literal, Monitor)
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	make_literal_index
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Create the initial literal index.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								make_literal_index :-
							 | 
						||
| 
								 | 
							
									setting(index_threads(N)), !,
							 | 
						||
| 
								 | 
							
									threaded_literal_index(N).
							 | 
						||
| 
								 | 
							
								make_literal_index :-
							 | 
						||
| 
								 | 
							
									current_prolog_flag(cpu_count, X),
							 | 
						||
| 
								 | 
							
									threaded_literal_index(X).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								threaded_literal_index(N) :-
							 | 
						||
| 
								 | 
							
									N > 1, !,
							 | 
						||
| 
								 | 
							
									message_queue_create(Q, [max_size(1000)]),
							 | 
						||
| 
								 | 
							
									create_index_threads(N, Q, Ids),
							 | 
						||
| 
								 | 
							
									forall(rdf_current_literal(Literal),
							 | 
						||
| 
								 | 
							
									       thread_send_message(Q, Literal)),
							 | 
						||
| 
								 | 
							
									forall(between(1, N, _),
							 | 
						||
| 
								 | 
							
									       thread_send_message(Q, done(true))),
							 | 
						||
| 
								 | 
							
									maplist(thread_join, Ids, _).
							 | 
						||
| 
								 | 
							
								threaded_literal_index(_) :-
							 | 
						||
| 
								 | 
							
									forall(rdf_current_literal(Literal),
							 | 
						||
| 
								 | 
							
									       register_literal(Literal)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								create_index_threads(N, Q, [Id|T]) :-
							 | 
						||
| 
								 | 
							
									N > 0, !,
							 | 
						||
| 
								 | 
							
									thread_create(index_worker(Q), Id,
							 | 
						||
| 
								 | 
							
										      [ local(1000),
							 | 
						||
| 
								 | 
							
											global(1000),
							 | 
						||
| 
								 | 
							
											trail(1000)
							 | 
						||
| 
								 | 
							
										      ]),
							 | 
						||
| 
								 | 
							
									N2 is N - 1,
							 | 
						||
| 
								 | 
							
									create_index_threads(N2, Q, T).
							 | 
						||
| 
								 | 
							
								create_index_threads(_, _, []) :- !.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								index_worker(Queue) :-
							 | 
						||
| 
								 | 
							
									repeat,
							 | 
						||
| 
								 | 
							
									    thread_get_message(Queue, Msg),
							 | 
						||
| 
								 | 
							
									    work(Msg).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								work(done(true)) :- !.
							 | 
						||
| 
								 | 
							
								work(Literal) :-
							 | 
						||
| 
								 | 
							
									register_literal(Literal),
							 | 
						||
| 
								 | 
							
									fail.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%	clean_token_index
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Clean after a reset.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								clean_token_index :-
							 | 
						||
| 
								 | 
							
									forall(literal_map(_, Map),
							 | 
						||
| 
								 | 
							
									       rdf_reset_literal_map(Map)).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	  THREADED UPDATE	*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%	create_update_literal_thread(+Threads)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Setup literal monitoring using threads.  While loading databases
							 | 
						||
| 
								 | 
							
								%	through rdf_attach_db/2 from  rdf_persistency.pl,   most  of the
							 | 
						||
| 
								 | 
							
								%	time is spent updating the literal token database. While loading
							 | 
						||
| 
								 | 
							
								%	the RDF triples, most of the time   is spend in updating the AVL
							 | 
						||
| 
								 | 
							
								%	tree holding the literals. Updating  the   token  index hangs on
							 | 
						||
| 
								 | 
							
								%	updating the AVL trees holding the   tokens.  Both tasks however
							 | 
						||
| 
								 | 
							
								%	can run concurrently.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								create_update_literal_thread(Threads) :-
							 | 
						||
| 
								 | 
							
									message_queue_create(_,
							 | 
						||
| 
								 | 
							
											     [ alias(rdf_literal_monitor_queue),
							 | 
						||
| 
								 | 
							
											       max_size(10000)
							 | 
						||
| 
								 | 
							
											     ]),
							 | 
						||
| 
								 | 
							
									forall(between(1, Threads, N),
							 | 
						||
| 
								 | 
							
									       (   atom_concat(rdf_literal_monitor_, N, Alias),
							 | 
						||
| 
								 | 
							
										   thread_create(monitor_literals, _,
							 | 
						||
| 
								 | 
							
												 [ alias(Alias),
							 | 
						||
| 
								 | 
							
												   local(1000),
							 | 
						||
| 
								 | 
							
												   global(1000),
							 | 
						||
| 
								 | 
							
												   trail(1000)
							 | 
						||
| 
								 | 
							
												 ])
							 | 
						||
| 
								 | 
							
									       )).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								monitor_literals :-
							 | 
						||
| 
								 | 
							
									set_prolog_flag(agc_margin, 0),	% we don't create garbage
							 | 
						||
| 
								 | 
							
									repeat,
							 | 
						||
| 
								 | 
							
									    thread_get_message(rdf_literal_monitor_queue, Literal),
							 | 
						||
| 
								 | 
							
									    register_literal(Literal),
							 | 
						||
| 
								 | 
							
									fail.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								thread_monitor_literal(new_literal(Literal)) :- !,
							 | 
						||
| 
								 | 
							
									thread_send_message(rdf_literal_monitor_queue, Literal).
							 | 
						||
| 
								 | 
							
								thread_monitor_literal(Action) :- !,
							 | 
						||
| 
								 | 
							
									monitor_literal(Action).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	 MONITORED UPDATE	*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								monitor_literal(new_literal(Literal)) :-
							 | 
						||
| 
								 | 
							
									register_literal(Literal).
							 | 
						||
| 
								 | 
							
								monitor_literal(old_literal(Literal)) :-
							 | 
						||
| 
								 | 
							
									unregister_literal(Literal).
							 | 
						||
| 
								 | 
							
								monitor_literal(transaction(begin, reset)) :-
							 | 
						||
| 
								 | 
							
									rdf_monitor(monitor_literal, [-old_literal]),
							 | 
						||
| 
								 | 
							
									clean_token_index.
							 | 
						||
| 
								 | 
							
								monitor_literal(transaction(end, reset)) :-
							 | 
						||
| 
								 | 
							
									rdf_monitor(monitor_literal, [+old_literal]).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	register_literal(+Literal)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Associate the tokens of a literal with the literal itself.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								register_literal(Literal) :-
							 | 
						||
| 
								 | 
							
									(   rdf_tokenize_literal(Literal, Tokens)
							 | 
						||
| 
								 | 
							
									->  text_of(Literal, Text),
							 | 
						||
| 
								 | 
							
									    literal_map(tokens, Map),
							 | 
						||
| 
								 | 
							
									    add_tokens(Tokens, Text, Map)
							 | 
						||
| 
								 | 
							
									;   true
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								add_tokens([], _, _).
							 | 
						||
| 
								 | 
							
								add_tokens([H|T], Literal, Map) :-
							 | 
						||
| 
								 | 
							
									rdf_insert_literal_map(Map, H, Literal, Keys),
							 | 
						||
| 
								 | 
							
									(   var(Keys)
							 | 
						||
| 
								 | 
							
									->  true
							 | 
						||
| 
								 | 
							
									;   forall(new_token(H), true),
							 | 
						||
| 
								 | 
							
									    (	Keys mod 1000 =:= 0
							 | 
						||
| 
								 | 
							
									    ->	progress(Map, 'Tokens')
							 | 
						||
| 
								 | 
							
									    ;	true
							 | 
						||
| 
								 | 
							
									    )
							 | 
						||
| 
								 | 
							
									),
							 | 
						||
| 
								 | 
							
									add_tokens(T, Literal, Map).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	unregister_literal(+Literal)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Literal is removed from the database.   As we abstract from lang
							 | 
						||
| 
								 | 
							
								%	and type qualifiers we first have to  check this is the last one
							 | 
						||
| 
								 | 
							
								%	that is destroyed.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								unregister_literal(Literal) :-
							 | 
						||
| 
								 | 
							
									text_of(Literal, Text),
							 | 
						||
| 
								 | 
							
									(   rdf(_,_,literal(Text))
							 | 
						||
| 
								 | 
							
									->  true			% still something left
							 | 
						||
| 
								 | 
							
									;   rdf_tokenize_literal(Literal, Tokens),
							 | 
						||
| 
								 | 
							
									    literal_map(tokens, Map),
							 | 
						||
| 
								 | 
							
									    del_tokens(Tokens, Text, Map)
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								del_tokens([], _, _).
							 | 
						||
| 
								 | 
							
								del_tokens([H|T], Literal, Map) :-
							 | 
						||
| 
								 | 
							
									rdf_delete_literal_map(Map, H, Literal),
							 | 
						||
| 
								 | 
							
									del_tokens(T, Literal, Map).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	rdf_tokenize_literal(+Literal, -Tokens) is semidet.
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Tokenize a literal. We make  this   hookable  as tokenization is
							 | 
						||
| 
								 | 
							
								%	generally domain dependent.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								rdf_tokenize_literal(Literal, Tokens) :-
							 | 
						||
| 
								 | 
							
									tokenization(Literal, Tokens), !. 		% Hook
							 | 
						||
| 
								 | 
							
								rdf_tokenize_literal(Literal, Tokens) :-
							 | 
						||
| 
								 | 
							
									text_of(Literal, Text),
							 | 
						||
| 
								 | 
							
									atom(Text),
							 | 
						||
| 
								 | 
							
									tokenize_atom(Text, Tokens0),
							 | 
						||
| 
								 | 
							
									select_tokens(Tokens0, Tokens).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								select_tokens([], []).
							 | 
						||
| 
								 | 
							
								select_tokens([H|T0], T) :-
							 | 
						||
| 
								 | 
							
									(   exclude_from_index(token, H)
							 | 
						||
| 
								 | 
							
									->  select_tokens(T0, T)
							 | 
						||
| 
								 | 
							
									;   number(H)
							 | 
						||
| 
								 | 
							
									->  (   integer(H),
							 | 
						||
| 
								 | 
							
									        between(-1073741824, 1073741823, H)
							 | 
						||
| 
								 | 
							
									    ->	T = [H|T1],
							 | 
						||
| 
								 | 
							
										select_tokens(T0, T1)
							 | 
						||
| 
								 | 
							
									    ;   select_tokens(T0, T)
							 | 
						||
| 
								 | 
							
									    )
							 | 
						||
| 
								 | 
							
									;   atom_length(H, 1)
							 | 
						||
| 
								 | 
							
									->  select_tokens(T0, T)
							 | 
						||
| 
								 | 
							
									;   no_index_token(H)
							 | 
						||
| 
								 | 
							
									->  select_tokens(T0, T)
							 | 
						||
| 
								 | 
							
									;   T = [H|T1],
							 | 
						||
| 
								 | 
							
									    select_tokens(T0, T1)
							 | 
						||
| 
								 | 
							
									).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%	no_index_token/1
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Tokens we do not wish to index,   as  they creat huge amounts of
							 | 
						||
| 
								 | 
							
								%	data with little or no value.  Is   there  a more general way to
							 | 
						||
| 
								 | 
							
								%	describe this? Experience shows that simply  word count is not a
							 | 
						||
| 
								 | 
							
								%	good criterium as it often rules out popular domain terms.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								no_index_token(and).
							 | 
						||
| 
								 | 
							
								no_index_token(an).
							 | 
						||
| 
								 | 
							
								no_index_token(or).
							 | 
						||
| 
								 | 
							
								no_index_token(of).
							 | 
						||
| 
								 | 
							
								no_index_token(on).
							 | 
						||
| 
								 | 
							
								no_index_token(in).
							 | 
						||
| 
								 | 
							
								no_index_token(this).
							 | 
						||
| 
								 | 
							
								no_index_token(the).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%	text_of(+LiteralArg, -Text)
							 | 
						||
| 
								 | 
							
								%
							 | 
						||
| 
								 | 
							
								%	Get the textual  or  (integer)   numerical  information  from  a
							 | 
						||
| 
								 | 
							
								%	literal value.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								text_of(type(_, Text), Text) :- !.
							 | 
						||
| 
								 | 
							
								text_of(lang(_, Text), Text) :- !.
							 | 
						||
| 
								 | 
							
								text_of(Text, Text) :- atom(Text), !.
							 | 
						||
| 
								 | 
							
								text_of(Text, Text) :- integer(Text).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	   PORTER INDEX		*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								porter_index(Map) :-
							 | 
						||
| 
								 | 
							
									literal_map(porter, Map), !.
							 | 
						||
| 
								 | 
							
								porter_index(Map) :-
							 | 
						||
| 
								 | 
							
									rdf_new_literal_map(Map),
							 | 
						||
| 
								 | 
							
									assert(literal_map(porter, Map)),
							 | 
						||
| 
								 | 
							
									fill_porter_index(Map),
							 | 
						||
| 
								 | 
							
									assert((new_token(Token) :- add_stem(Token, Map))).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								fill_porter_index(PorterMap) :-
							 | 
						||
| 
								 | 
							
									token_index(TokenMap),
							 | 
						||
| 
								 | 
							
									rdf_keys_in_literal_map(TokenMap, all, Tokens),
							 | 
						||
| 
								 | 
							
									stem(Tokens, PorterMap).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								stem([], _).
							 | 
						||
| 
								 | 
							
								stem([Token|T], Map) :-
							 | 
						||
| 
								 | 
							
									(   atom(Token)
							 | 
						||
| 
								 | 
							
									->  porter_stem(Token, Stem),
							 | 
						||
| 
								 | 
							
									    rdf_insert_literal_map(Map, Stem, Token, Keys),
							 | 
						||
| 
								 | 
							
									    (	integer(Keys),
							 | 
						||
| 
								 | 
							
										Keys mod 1000 =:= 0
							 | 
						||
| 
								 | 
							
									    ->  progress(Map, 'Porter')
							 | 
						||
| 
								 | 
							
									    ;	true
							 | 
						||
| 
								 | 
							
									    )
							 | 
						||
| 
								 | 
							
									;   true
							 | 
						||
| 
								 | 
							
									),
							 | 
						||
| 
								 | 
							
									stem(T, Map).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								add_stem(Token, Map) :-
							 | 
						||
| 
								 | 
							
									porter_stem(Token, Stem),
							 | 
						||
| 
								 | 
							
									rdf_insert_literal_map(Map, Stem, Token, _).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	  METAPHONE INDEX	*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								metaphone_index(Map) :-
							 | 
						||
| 
								 | 
							
									literal_map(metaphone, Map), !.
							 | 
						||
| 
								 | 
							
								metaphone_index(Map) :-
							 | 
						||
| 
								 | 
							
									rdf_new_literal_map(Map),
							 | 
						||
| 
								 | 
							
									assert(literal_map(metaphone, Map)),
							 | 
						||
| 
								 | 
							
									fill_metaphone_index(Map),
							 | 
						||
| 
								 | 
							
									assert((new_token(Token) :- add_metaphone(Token, Map))).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								fill_metaphone_index(PorterMap) :-
							 | 
						||
| 
								 | 
							
									token_index(TokenMap),
							 | 
						||
| 
								 | 
							
									rdf_keys_in_literal_map(TokenMap, all, Tokens),
							 | 
						||
| 
								 | 
							
									metaphone(Tokens, PorterMap).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								metaphone([], _).
							 | 
						||
| 
								 | 
							
								metaphone([Token|T], Map) :-
							 | 
						||
| 
								 | 
							
									(   atom(Token)
							 | 
						||
| 
								 | 
							
									->  double_metaphone(Token, SoundEx),
							 | 
						||
| 
								 | 
							
									    rdf_insert_literal_map(Map, SoundEx, Token, Keys),
							 | 
						||
| 
								 | 
							
									    (	integer(Keys),
							 | 
						||
| 
								 | 
							
										Keys mod 1000 =:= 0
							 | 
						||
| 
								 | 
							
									    ->	progress(Map, 'Metaphone')
							 | 
						||
| 
								 | 
							
									    ;	true
							 | 
						||
| 
								 | 
							
									    )
							 | 
						||
| 
								 | 
							
									;   true
							 | 
						||
| 
								 | 
							
									),
							 | 
						||
| 
								 | 
							
									metaphone(T, Map).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								add_metaphone(Token, Map) :-
							 | 
						||
| 
								 | 
							
									double_metaphone(Token, SoundEx),
							 | 
						||
| 
								 | 
							
									rdf_insert_literal_map(Map, SoundEx, Token).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
										 /*******************************
							 | 
						||
| 
								 | 
							
										 *	       UTIL		*
							 | 
						||
| 
								 | 
							
										 *******************************/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								verbose(Fmt, Args) :-
							 | 
						||
| 
								 | 
							
									setting(verbose(true)), !,
							 | 
						||
| 
								 | 
							
									format(user_error, Fmt, Args).
							 | 
						||
| 
								 | 
							
								verbose(_, _).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								progress(Map, Which) :-
							 | 
						||
| 
								 | 
							
									setting(verbose(true)), !,
							 | 
						||
| 
								 | 
							
									rdf_statistics_literal_map(Map, size(Keys, Values)),
							 | 
						||
| 
								 | 
							
									format(user_error,
							 | 
						||
| 
								 | 
							
									       '\r~t~w: ~12|Keys: ~t~D~15+; Values: ~t~D~20+',
							 | 
						||
| 
								 | 
							
									       [Which, Keys, Values]).
							 | 
						||
| 
								 | 
							
								progress(_,_).
							 |