git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@1905 b08c6af1-5177-4d33-ba66-4b1c6b8b522a
		
			
				
	
	
		
			1395 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			1395 lines
		
	
	
		
			66 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
| %==============================================================================
 | ||
| \documentclass{llncs} 
 | ||
| %------------------------------------------------------------------------------
 | ||
| \usepackage[latin1]{inputenc}
 | ||
| \usepackage{float}
 | ||
| \usepackage{alltt}
 | ||
| \usepackage{xspace}
 | ||
| \usepackage{epsfig}
 | ||
| \usepackage{wrapfig}
 | ||
| \usepackage{subfigure}
 | ||
| 
 | ||
| \renewcommand{\rmdefault}{ptm}
 | ||
| %------------------------------------------------------------------------------
 | ||
| \newcommand{\Paragraph}[1]{\vspace*{-.5em}\paragraph{#1}}
 | ||
| %------------------------------------------------------------------------------
 | ||
| \floatstyle{ruled}
 | ||
| \newfloat{Algorithm}{ht}{lop}
 | ||
| %------------------------------------------------------------------------------
 | ||
| \newcommand{\wamcodesize}{scriptsize}
 | ||
| \newcommand{\code}[1]{\texttt{#1}}
 | ||
| \newcommand{\instr}[1]{\textsf{#1}}
 | ||
| \newcommand{\try}{\instr{try}\xspace}
 | ||
| \newcommand{\retry}{\mbox{\instr{retry}}\xspace}
 | ||
| \newcommand{\trust}{\instr{trust}\xspace}
 | ||
| \newcommand{\TryRetryTrust}{\mbox{\instr{try-retry-trust}}\xspace}
 | ||
| \newcommand{\fail}{\instr{fail}\xspace}
 | ||
| \newcommand{\jump}{\instr{jump}\xspace}
 | ||
| \newcommand{\jitiSTAR}{\mbox{\instr{dindex\_on\_*}}\xspace}
 | ||
| \newcommand{\switchSTAR}{\mbox{\instr{switch\_on\_*}}\xspace}
 | ||
| \newcommand{\jitiONterm}{\mbox{\instr{dindex\_on\_term}}\xspace}
 | ||
| \newcommand{\jitiONconstant}{\mbox{\instr{dindex\_on\_constant}}\xspace}
 | ||
| \newcommand{\jitiONstructure}{\mbox{\instr{dindex\_on\_structure}}\xspace}
 | ||
| \newcommand{\switchONterm}{\mbox{\instr{switch\_on\_term}}\xspace}
 | ||
| \newcommand{\switchONconstant}{\mbox{\instr{switch\_on\_constant}}\xspace}
 | ||
| \newcommand{\switchONstructure}{\mbox{\instr{switch\_on\_structure}}\xspace}
 | ||
| \newcommand{\getcon}{\mbox{\instr{get\_constant}}\xspace}
 | ||
| \newcommand{\proceed}{\instr{proceed}\xspace}
 | ||
| \newcommand{\Cline}{\cline{2-3}}
 | ||
| \newcommand{\JITI}{demand-driven indexing\xspace}
 | ||
| %------------------------------------------------------------------------------
 | ||
| \newcommand{\bench}[1]{\textbf{\textsf{#1}}}
 | ||
| \newcommand{\tcLio}{\bench{tc\_l\_io}\xspace}
 | ||
| \newcommand{\tcRio}{\bench{tc\_r\_io}\xspace}
 | ||
| \newcommand{\tcDio}{\bench{tc\_d\_io}\xspace}
 | ||
| \newcommand{\tcLoo}{\bench{tc\_l\_oo}\xspace}
 | ||
| \newcommand{\tcRoo}{\bench{tc\_r\_oo}\xspace}
 | ||
| \newcommand{\tcDoo}{\bench{tc\_d\_oo}\xspace}
 | ||
| \newcommand{\compress}{\bench{compress}\xspace}
 | ||
| \newcommand{\sgCyl}{\bench{sg\_cyl}\xspace}
 | ||
| \newcommand{\muta}{\bench{muta}\xspace}
 | ||
| \newcommand{\pta}{\bench{pta}\xspace}
 | ||
| \newcommand{\tea}{\bench{tea}\xspace}
 | ||
| %------------------------------------------------------------------------------
 | ||
| \newcommand{\BreastCancer}{\bench{BreastCancer}\xspace}
 | ||
| \newcommand{\Carcino}{\bench{Carcinogenesis}\xspace}
 | ||
| \newcommand{\Choline}{\bench{Choline}\xspace}
 | ||
| \newcommand{\GeneExpr}{\bench{GeneExpression}\xspace}
 | ||
| \newcommand{\IEProtein}{\bench{IE-Protein\_Extraction}\xspace}
 | ||
| %\newcommand{\Krki}{\bench{Krki}\xspace}
 | ||
| %\newcommand{\KrkiII}{\bench{Krki~II}\xspace}
 | ||
| \newcommand{\Mesh}{\bench{Mesh}\xspace}
 | ||
| \newcommand{\Pyrimidines}{\bench{Pyrimidines}\xspace}
 | ||
| \newcommand{\Susi}{\bench{Susi}\xspace}
 | ||
| \newcommand{\Thermolysin}{\bench{Thermolysin}\xspace}
 | ||
| %------------------------------------------------------------------------------
 | ||
| \newenvironment{SmallProg}{\begin{tt}\begin{small}\begin{tabular}[b]{l}}{\end{tabular}\end{small}\end{tt}}
 | ||
| \newenvironment{ScriptProg}{\begin{tt}\begin{scriptsize}\begin{tabular}[b]{l}}{\end{tabular}\end{scriptsize}\end{tt}}
 | ||
| \newenvironment{FootProg}{\begin{tt}\begin{footnotesize}\begin{tabular}[c]{l}}{\end{tabular}\end{footnotesize}\end{tt}}
 | ||
| 
 | ||
| \newcommand{\TODOcomment}[2]{%
 | ||
|   \stepcounter{TODOcounter#1}%
 | ||
|   {\scriptsize\bf$^{(\arabic{TODOcounter#1})}$}%
 | ||
|   \marginpar[\fbox{
 | ||
|     \parbox{2cm}{\raggedleft
 | ||
|       \scriptsize$^{({\bf{\arabic{TODOcounter#1}{#1}}})}$%
 | ||
|       \scriptsize #2}}]%
 | ||
|   {\fbox{\parbox{2cm}{\raggedright
 | ||
|       \scriptsize$^{({\bf{\arabic{TODOcounter#1}{#1}}})}$%
 | ||
|       \scriptsize #2}}}
 | ||
| }%
 | ||
| \newcounter{TODOcounter}
 | ||
| \newcommand{\TODO}[1]{\TODOcomment{}{#1}}
 | ||
| %------------------------------------------------------------------------------
 | ||
| 
 | ||
| \title{Demand-Driven Indexing of Prolog Clauses\thanks{Dedicated to
 | ||
|     the memory of our friend, colleague and co-author Ricardo Lopes.
 | ||
|     We miss you!}}
 | ||
| \titlerunning{Demand-Driven Indexing of Prolog Clauses}
 | ||
| 
 | ||
| \author{V\'{\i}tor Santos Costa\inst{1} \and Konstantinos
 | ||
|   Sagonas\inst{2} \and Ricardo Lopes}
 | ||
| \authorrunning{V. Santos Costa, K. Sagonas and R. Lopes}
 | ||
| 
 | ||
| \institute{
 | ||
|   LIACC- DCC/FCUP, University of Porto, Portugal
 | ||
|   \and
 | ||
|   National Technical University of Athens, Greece
 | ||
| }
 | ||
| 
 | ||
| \begin{document}
 | ||
| \maketitle
 | ||
| 
 | ||
| \begin{abstract}
 | ||
|   As logic programming applications grow in size, Prolog systems need
 | ||
|   to efficiently access larger and larger data sets and the need for
 | ||
|   any- and multi-argument indexing becomes more and more profound.
 | ||
|   Static generation of multi-argument indexing is one alternative, but
 | ||
|   applications often rely on features that are inherently dynamic
 | ||
|   which makes static techniques inapplicable or inaccurate. Another
 | ||
|   alternative is to employ dynamic schemes for flexible demand-driven
 | ||
|   indexing of Prolog clauses. We propose such schemes and discuss
 | ||
|   issues that need to be addressed for their efficient implementation
 | ||
|   in the context of WAM-based Prolog systems. We have implemented
 | ||
|   demand-driven indexing in two different Prolog systems and have been
 | ||
|   able to obtain non-negligible performance speedups: from a few
 | ||
|   percent up to orders of magnitude. Given these results, we see very
 | ||
|   little reason for Prolog systems not to incorporate some form of
 | ||
|   dynamic indexing based on actual demand. In fact, we see
 | ||
|   demand-driven indexing as only the first step towards effective
 | ||
|   runtime optimization of Prolog programs.
 | ||
| \end{abstract}
 | ||
| 
 | ||
| 
 | ||
| \section{Introduction}
 | ||
| %=====================
 | ||
| The WAM~\cite{Warren83} has mostly been a blessing but occasionally
 | ||
| also a curse for Prolog systems. Its ingenious design has allowed
 | ||
| implementors to get byte code compilers with decent performance --- it
 | ||
| is not a fluke that most Prolog systems are still based on the WAM. On
 | ||
| the other hand, \emph{because} the WAM gives good performance in many
 | ||
| cases, implementors have not incorporated in their systems many
 | ||
| features that drastically depart from WAM's basic characteristics.
 | ||
| %
 | ||
| For example, first argument indexing is sufficient for many Prolog
 | ||
| applications. However, it is clearly sub-optimal for applications
 | ||
| accessing large data sets; for a long time now, the database community
 | ||
| has recognized that good indexing is the basis for fast query
 | ||
| processing.
 | ||
| 
 | ||
| As logic programming applications grow in size, Prolog systems need to
 | ||
| efficiently access larger and larger data sets and the need for any-
 | ||
| and multi-argument indexing becomes more and more profound. Static
 | ||
| generation of multi-argument indexing is one alternative. The problem
 | ||
| is that this alternative is often unattractive because it may
 | ||
| drastically increase the size of the generated byte code and do so
 | ||
| unnecessarily. Static analysis can partly address this concern, but in
 | ||
| applications that rely on features which are inherently dynamic (e.g.,
 | ||
| generating hypotheses for inductive logic programming data sets during
 | ||
| runtime) static analysis is inapplicable or grossly inaccurate.
 | ||
| Another alternative, which has not been investigated so far, is to do
 | ||
| flexible indexing on demand during program execution.
 | ||
| 
 | ||
| This is precisely what we advocate with this paper. More specifically,
 | ||
| we present a small extension to the WAM that allows for flexible
 | ||
| indexing of Prolog clauses during runtime based on actual demand. For
 | ||
| static predicates, the scheme we propose is partly guided by the
 | ||
| compiler; for dynamic code, besides being demand-driven by queries,
 | ||
| the method needs to cater for code updates during runtime. Where our
 | ||
| schemes radically depart from current practice is that they generate
 | ||
| new byte code during runtime, in effect doing a form of just-in-time
 | ||
| compilation. In our experience these schemes pay off. We have
 | ||
| implemented \JITI in two different Prolog systems (YAP and XXX) and
 | ||
| have obtained non-trivial speedups, ranging from a few percent to
 | ||
| orders of magnitude, across a wide range of applications. Given these
 | ||
| results, we see very little reason for Prolog systems not to
 | ||
| incorporate some form of indexing based on actual demand from queries.
 | ||
| In fact, we see \JITI as only the first step towards effective runtime
 | ||
| optimization of Prolog programs.
 | ||
| 
 | ||
| \Paragraph{Organization.}
 | ||
| %------------------------
 | ||
| After commenting on the state of the art and related work concerning
 | ||
| indexing in Prolog systems (Sect.~\ref{sec:related}) we briefly review
 | ||
| indexing in the WAM (Sect.~\ref{sec:prelims}). We then present \JITI
 | ||
| schemes for static (Sect.~\ref{sec:static}) and dynamic
 | ||
| (Sect.~\ref{sec:dynamic}) predicates, their implementation in two
 | ||
| Prolog systems (Sect.~\ref{sec:impl}) and the performance benefits
 | ||
| they bring (Sect.~\ref{sec:perf}). The paper ends with some concluding
 | ||
| remarks.
 | ||
| 
 | ||
| 
 | ||
| \section{State of the Art and Related Work} \label{sec:related}
 | ||
| %==============================================================
 | ||
| % Indexing in Prolog systems:
 | ||
| Many Prolog systems still only support
 | ||
| indexing on the main functor symbol of the first argument. Some
 | ||
| others, such as YAP version 4, can look inside some compound
 | ||
| terms~\cite{YAP}. SICStus Prolog supports \emph{shallow
 | ||
|   backtracking}~\cite{ShallowBacktracking@ICLP-89}; choice points are
 | ||
| fully populated only when it is certain that execution will enter the
 | ||
| clause body. While shallow backtracking avoids some of the performance
 | ||
| problems of unnecessary choice point creation, it does not offer the
 | ||
| full benefits that indexing can provide. Other systems such as
 | ||
| BIM-Prolog~\cite{IndexingProlog@NACLP-89}, SWI-Prolog~\cite{SWI} and
 | ||
| XSB~\cite{XSB} allow for user-controlled multi-argument indexing.
 | ||
| Notably, ilProlog~\cite{ilProlog} uses compile-time heuristics and
 | ||
| generates code for multi-argument indexing automatically. In all these
 | ||
| systems, this support comes with various implementation restrictions.
 | ||
| For example, in SWI-Prolog at most four arguments can be indexed; in
 | ||
| XSB the compiler does not offer multi-argument indexing and the
 | ||
| predicates need to be asserted instead; we know of no system where
 | ||
| multi-argument indexing looks inside compound terms. More importantly,
 | ||
| requiring users to specify arguments to index on is neither
 | ||
| user-friendly nor guarantees good performance results.
 | ||
| 
 | ||
| % Trees, tries and unification factoring:
 | ||
| Recognizing the need for better indexing, researchers have proposed
 | ||
| more flexible indexing mechanisms for Prolog. For example, Hickey and
 | ||
| Mudambi proposed \emph{switching trees}~\cite{HickeyMudambi@JLP-89},
 | ||
| which rely on the presence of mode information. Similar proposals were
 | ||
| put forward by Van Roy, Demoen and Willems who investigated indexing
 | ||
| on several arguments in the form of a \emph{selection tree}~\cite{VRDW87}
 | ||
| and by Zhou et al.\ who implemented a \emph{matching tree} oriented
 | ||
| abstract machine for Prolog~\cite{TOAM@ICLP-90}. For static
 | ||
| predicates, the XSB compiler offers support for \emph{unification
 | ||
| factoring}~\cite{UnifFact@POPL-95}; for asserted code, XSB can
 | ||
| represent databases of facts using \emph{tries}~\cite{Tries@JLP-99}
 | ||
| which provide left-to-right multi-argument indexing. However, in XSB
 | ||
| none of these mechanisms is used automatically; instead the user has
 | ||
| to specify appropriate directives.
 | ||
| 
 | ||
| % Comparison with static analysis techniques and Mercury:
 | ||
| Long ago, Kliger and Shapiro argued that such tree-based indexing
 | ||
| schemes are not cost effective for the compilation of Prolog
 | ||
| programs~\cite{KligerShapiro@ICLP-88}. Some of their arguments make
 | ||
| sense for certain applications, but, as we shall show, in general 
 | ||
| they underestimate the benefits of indexing on EDB predicates.
 | ||
| Nevertheless, it is true that unless the modes of
 | ||
| predicates are known we run the risk of doing indexing on output
 | ||
| arguments, whose only effect is an unnecessary increase in compilation
 | ||
| times and, more importantly, in code size. In a programming language
 | ||
| such as Mercury~\cite{Mercury@JLP-96} where modes are known the compiler
 | ||
| can of course avoid this risk; indeed in Mercury modes (and types) are
 | ||
| used to guide the compiler generate good indexing tables. However, the
 | ||
| situation is different for a language like Prolog. Getting accurate
 | ||
| information about the set of all possible modes of predicates requires
 | ||
| a global static analyzer in the compiler --- and most Prolog systems
 | ||
| do not come with one. More importantly, it requires a lot of
 | ||
| discipline from the programmer (e.g., that applications use the module
 | ||
| system religiously and never bypass it). As a result, most Prolog
 | ||
| systems currently do not provide the type of indexing that
 | ||
| applications require. Even in systems such as Ciao~\cite{Ciao@SCP-05},
 | ||
| which do come with a built-in static analyzer and more or less force
 | ||
| such a discipline on the programmer, mode information is not used for
 | ||
| multi-argument indexing.
 | ||
| 
 | ||
| % The grand finale:
 | ||
| The situation is actually worse for certain types of Prolog
 | ||
| applications. For example, consider applications in the area of
 | ||
| inductive logic programming. These applications on the one hand have
 | ||
| high demands for effective indexing since they need to efficiently
 | ||
| access big datasets and on the other they are unfit for static
 | ||
| analysis since queries are often ad hoc and generated only during
 | ||
| runtime as new hypotheses are formed or refined.
 | ||
| %
 | ||
| Our thesis is that the abstract machine should be able to adapt
 | ||
| automatically to the runtime requirements of such or, even better, of
 | ||
| all applications by employing increasingly aggressive forms of dynamic
 | ||
| compilation. As a concrete example of what this means in practice, in
 | ||
| this paper we will attack the problem of satisfying the indexing needs
 | ||
| of applications during runtime. Naturally, we will base our technique
 | ||
| on the existing support for indexing that the WAM provides, but we
 | ||
| will extend this support with the technique of \JITI that we describe
 | ||
| in the next sections.
 | ||
| 
 | ||
| 
 | ||
| \section{Indexing in the WAM} \label{sec:prelims}
 | ||
| %================================================
 | ||
| To make the paper relatively self-contained we review the indexing
 | ||
| instructions of the WAM and their use. In the WAM, the first level of
 | ||
| dispatching involves a test on the type of the argument. The
 | ||
| \switchONterm instruction checks the tag of the dereferenced value in
 | ||
| the first argument register and implements a four-way branch where one
 | ||
| branch is for the dereferenced register being an unbound variable, one
 | ||
| for being atomic, one for (non-empty) list, and one for structure. In
 | ||
| any case, control goes to a bucket of clauses. In the buckets for
 | ||
| constants and structures the second level of dispatching involves the
 | ||
| value of the register. The \switchONconstant and \switchONstructure
 | ||
| instructions implement this dispatching: typically with a \fail
 | ||
| instruction when the bucket is empty, with a \jump instruction for
 | ||
| only one clause, with a sequential scan when the number of clauses is
 | ||
| small, and with a hash table lookup when the number of clauses exceeds
 | ||
| a threshold. For this reason the \switchONconstant and
 | ||
| \switchONstructure instructions take as arguments the hash table
 | ||
| \instr{T} and the number of clauses \instr{N} the table contains. In
 | ||
| each bucket of this hash table and also in the bucket for the variable
 | ||
| case of \switchONterm the code sequentially backtracks through the
 | ||
| clauses using a \TryRetryTrust chain of instructions. The \try
 | ||
| instruction sets up a choice point, the \retry instructions (if~any)
 | ||
| update certain fields of this choice point, and the \trust instruction
 | ||
| removes it.
 | ||
| 
 | ||
| The WAM has additional indexing instructions (\instr{try\_me\_else}
 | ||
| and friends) that allow indexing to be interspersed with the code of
 | ||
| clauses. We will not consider them here. This is not a problem since
 | ||
| the above scheme handles all programs. Also, we will feel free to do
 | ||
| some minor modifications and optimizations when this simplifies
 | ||
| things.
 | ||
| 
 | ||
| Let's see an example. Consider the Prolog code shown in
 | ||
| Fig.~\ref{fig:carc:facts}, a fragment of the machine learning dataset
 | ||
| \textit{Carcinogenesis}.
 | ||
| %
 | ||
| These clauses get compiled to the WAM code shown in
 | ||
| Fig.~\ref{fig:carc:clauses}. The first argument indexing code that a
 | ||
| Prolog compiler generates is shown in Fig.~\ref{fig:carc:index}. This
 | ||
| code is typically placed before the code for the clauses and the
 | ||
| \switchONconstant is the entry point of the predicate. Note that compared
 | ||
| with vanilla WAM this instruction has an extra argument: the register
 | ||
| on the value of which we index ($r_1$). This extra argument will allow
 | ||
| us to go beyond first argument indexing. Another departure from the
 | ||
| WAM is that if this argument register contains an unbound variable
 | ||
| instead of a constant then execution will continue with the next
 | ||
| instruction; in effect we have merged part of the functionality of
 | ||
| \switchONterm into the \switchONconstant instruction. This small
 | ||
| change in the behavior of \switchONconstant will allow us to get
 | ||
| \JITI. Let's see how.
 | ||
| 
 | ||
| %------------------------------------------------------------------------------
 | ||
| \begin{figure}[t]
 | ||
| \centering
 | ||
| \begin{tabular}[b]{c}
 | ||
|   \subfigure[Some Prolog clauses\label{fig:carc:facts}]{%
 | ||
|     \begin{ScriptProg}
 | ||
|       has\_property(d1,salmonella,p).\\
 | ||
|       has\_property(d1,salmonella\_n,p).\\
 | ||
|       has\_property(d2,salmonella,p). \\
 | ||
|       has\_property(d2,cytogen\_ca,n).\\
 | ||
|       has\_property(d3,cytogen\_ca,p).\\[5pt]
 | ||
|     \end{ScriptProg}
 | ||
|   }\\ \hline\hline%
 | ||
|   \subfigure[WAM indexing\label{fig:carc:index}]{%
 | ||
|     \begin{sf}
 | ||
|       \begin{\wamcodesize}
 | ||
| 	\begin{tabular}[b]{l}
 | ||
|           \switchONconstant $r_1$ 5 $T_1$  \\
 | ||
|           \try   $L_1$ \\
 | ||
|           \retry $L_2$ \\
 | ||
|           \retry $L_3$ \\
 | ||
|           \retry $L_4$ \\
 | ||
|           \trust $L_5$ \\
 | ||
| 	  \\
 | ||
| 	  \begin{tabular}[b]{r|c@{\ }|l|}
 | ||
| 	    \Cline
 | ||
| 	    $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline
 | ||
| 	    \      & d1 & \try   $L_1$ \\
 | ||
| 	    \      &    & \trust $L_2$ \\ \Cline
 | ||
|             \      & d2 & \try   $L_3$ \\
 | ||
| 	    \      &    & \trust $L_4$ \\ \Cline
 | ||
| 	    \      & d3 & \jump  $L_5$ \\
 | ||
| 	    \Cline
 | ||
| 	  \end{tabular}\\[3pt]
 | ||
| 	\end{tabular}
 | ||
|     \end{\wamcodesize}
 | ||
|     \end{sf}
 | ||
|   }%
 | ||
| \end{tabular}%
 | ||
| \subfigure[Code for the clauses\label{fig:carc:clauses}]{%
 | ||
|   \begin{sf}
 | ||
|     \begin{\wamcodesize}
 | ||
|       \begin{tabular}[b]{rl}
 | ||
| 	$L_1$: & \getcon $r_1$ d1            \\
 | ||
| 	\      & \getcon $r_2$ salmonella    \\
 | ||
| 	\      & \getcon $r_3$ p             \\
 | ||
|         \      & \proceed                    \\
 | ||
| 	$L_2$: & \getcon $r_1$ d1            \\
 | ||
|         \      & \getcon $r_2$ salmonella\_n \\
 | ||
|         \      & \getcon $r_3$ p             \\
 | ||
|         \      & \proceed                    \\
 | ||
| 	$L_3$: & \getcon $r_1$ d2            \\
 | ||
|         \      & \getcon $r_2$ salmonella    \\
 | ||
|         \      & \getcon $r_3$ p             \\
 | ||
|         \      & \proceed                    \\
 | ||
| 	$L_4$: & \getcon $r_1$ d2            \\
 | ||
| 	\      & \getcon $r_2$ cytogen\_ca   \\
 | ||
| 	\      & \getcon $r_3$ n             \\
 | ||
| 	\      & \proceed                    \\
 | ||
| 	$L_5$: & \getcon $r_1$ d3            \\
 | ||
| 	\      & \getcon $r_2$ cytogen\_ca   \\
 | ||
| 	\      & \getcon $r_3$ p             \\
 | ||
| 	\      & \proceed
 | ||
|       \end{tabular}
 | ||
|     \end{\wamcodesize}
 | ||
|   \end{sf}
 | ||
| }%
 | ||
| \subfigure[Any arg indexing\label{fig:carc:jiti_single:before}]{%
 | ||
|   \begin{sf}
 | ||
|     \begin{\wamcodesize}
 | ||
|       \begin{tabular}[b]{l}
 | ||
|         \switchONconstant $r_1$ 5 $T_1$  \\
 | ||
|         \jitiONconstant $r_2$   5 3    \\
 | ||
|         \jitiONconstant $r_3$   5 3    \\
 | ||
|         \try   $L_1$ \\
 | ||
|         \retry $L_2$ \\
 | ||
|         \retry $L_3$ \\
 | ||
|         \retry $L_4$ \\
 | ||
|         \trust $L_5$ \\
 | ||
| 	\\
 | ||
| 	\begin{tabular}[b]{r|c@{\ }|l|}
 | ||
| 	  \Cline
 | ||
| 	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline
 | ||
| 	  \      & \code{d1} & \try   $L_1$ \\
 | ||
| 	  \      &           & \trust $L_2$ \\ \Cline
 | ||
|           \      & \code{d2} & \try   $L_3$ \\
 | ||
| 	  \      &           & \trust $L_4$ \\ \Cline
 | ||
| 	  \      & \code{d3} & \jump  $L_5$ \\
 | ||
| 	  \Cline
 | ||
| 	\end{tabular}
 | ||
|       \end{tabular}
 | ||
|     \end{\wamcodesize}
 | ||
|   \end{sf}
 | ||
| }%
 | ||
| \caption{Part of the Carcinogenesis dataset and WAM code that a byte
 | ||
|   code compiler generates}
 | ||
| \label{fig:carc}
 | ||
| \vspace*{-1em}
 | ||
| \end{figure}
 | ||
| %------------------------------------------------------------------------------
 | ||
| 
 | ||
| 
 | ||
| \section{Demand-Driven Indexing of Static Predicates} \label{sec:static}
 | ||
| %=======================================================================
 | ||
| For static predicates the compiler has complete information about all
 | ||
| clauses and shapes of their head arguments. It is both desirable and
 | ||
| possible to take advantage of this information at compile time and so
 | ||
| we treat the case of static predicates separately.
 | ||
| %
 | ||
| We will do so with schemes of increasing effectiveness and
 | ||
| implementation complexity.
 | ||
| 
 | ||
| \subsection{A simple WAM extension for any argument indexing}
 | ||
| %------------------------------------------------------------
 | ||
| Let us initially consider the case where the predicates to index
 | ||
| consist only of Datalog facts. This is commonly the case for all
 | ||
| extensional database predicates where indexing is most effective and
 | ||
| called for.
 | ||
| 
 | ||
| Refer to the example in Fig.~\ref{fig:carc}.
 | ||
| %
 | ||
| The indexing code of Fig.~\ref{fig:carc:index} incurs a small cost for
 | ||
| a call where the first argument is a variable (namely, executing the
 | ||
| \switchONconstant instruction) but the instruction pays off for calls
 | ||
| where the first argument is bound. On the other hand, for calls where
 | ||
| the first argument is a free variable and some other argument is
 | ||
| bound, a choice point will be created, the \TryRetryTrust chain will
 | ||
| be used, and execution will go through the code of all clauses. This
 | ||
| is clearly inefficient, more so for larger data sets.
 | ||
| %
 | ||
| We can do much better with the relatively simple scheme shown in
 | ||
| Fig.~\ref{fig:carc:jiti_single:before}. Immediately after the
 | ||
| \switchONconstant instruction, we can statically generate
 | ||
| \jitiONconstant (demand indexing) instructions, one for each remaining
 | ||
| argument. Recall that the entry point of the predicate is the
 | ||
| \switchONconstant instruction. The \jitiONconstant $r_i$ \instr{N A}
 | ||
| instruction works as follows:
 | ||
| \begin{itemize}
 | ||
| \item if the argument $r_i$ is a free variable,
 | ||
|   execution continues with the next instruction;
 | ||
| \item otherwise, \JITI kicks in as follows. The abstract machine
 | ||
|   scans the WAM code of the clauses and creates an index table for the
 | ||
|   values of the corresponding argument. It can do so because the
 | ||
|   instruction takes as arguments the number of clauses \instr{N} to
 | ||
|   index and the arity \instr{A} of the predicate. (In our example, the
 | ||
|   numbers 5 and 3.) For Datalog facts, this information is sufficient.
 | ||
|   Because the WAM byte code for the clauses has a very regular
 | ||
|   structure, the index table can be created very quickly. Upon its
 | ||
|   creation, the \jitiONconstant instruction gets transformed to a
 | ||
|   \switchONconstant. Again this is straightforward because of the two
 | ||
|   instructions have similar layouts in memory. Execution of the
 | ||
|   abstract machine then continues with the \switchONconstant
 | ||
|   instruction.
 | ||
| \end{itemize}
 | ||
| Figure~\ref{fig:carg:jiti_single:after} shows the index table $T_2$
 | ||
| which is created for our example and how the indexing code looks after
 | ||
| the execution of a call with mode \code{(out,in,?)}. Note that the
 | ||
| \jitiONconstant instruction for argument register $r_2$ has been
 | ||
| appropriately patched. The call that triggered \JITI and subsequent
 | ||
| calls of the same mode will use table $T_2$. The index for the second
 | ||
| argument has been created.
 | ||
| %------------------------------------------------------------------------------
 | ||
| \begin{figure}[t]
 | ||
|   \centering
 | ||
|   \begin{sf}
 | ||
|     \begin{\wamcodesize}
 | ||
|       \begin{tabular}{c@{\hspace*{2em}}c@{\hspace*{2em}}c}
 | ||
| 	\begin{tabular}{l}
 | ||
|           \switchONconstant $r_1$ 5 $T_1$ \\
 | ||
|           \switchONconstant $r_2$ 5 $T_2$ \\
 | ||
|           \jitiONconstant $r_3$   5 3     \\
 | ||
|           \try $L_1$   \\
 | ||
|           \retry $L_2$ \\
 | ||
|           \retry $L_3$ \\
 | ||
|           \retry $L_4$ \\
 | ||
|           \trust $L_5$ \\
 | ||
| 	\end{tabular}
 | ||
| 	&
 | ||
| 	\begin{tabular}{r|c@{\ }|l|}
 | ||
| 	  \Cline
 | ||
| 	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline
 | ||
| 	  \      & \code{d1} & \try   $L_1$ \\
 | ||
| 	  \      &           & \trust $L_2$ \\ \Cline
 | ||
|           \      & \code{d2} & \try   $L_3$ \\
 | ||
| 	  \      &           & \trust $L_4$ \\ \Cline
 | ||
| 	  \      & \code{d3} & \jump  $L_5$ \\
 | ||
| 	  \Cline
 | ||
| 	\end{tabular}
 | ||
| 	&
 | ||
| 	\begin{tabular}{r|c@{\ }|l|}
 | ||
| 	  \Cline
 | ||
| 	  $T_2$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline
 | ||
| 	  \      & \code{salmonella}    & \try $L_1$   \\
 | ||
| 	  \      &                      & \trust $L_3$ \\ \Cline
 | ||
| 	  \      & \code{salmonella\_n} & \jump $L_2$  \\ \Cline
 | ||
| 	  \      & \code{cytrogen\_ca}  & \try $L_4$   \\
 | ||
| 	  \      &                      & \trust $L_5$ \\
 | ||
| 	  \Cline
 | ||
| 	\end{tabular}
 | ||
|       \end{tabular}
 | ||
|     \end{\wamcodesize}
 | ||
|   \end{sf}
 | ||
|   \caption{WAM code after demand-driven indexing for argument 2;
 | ||
|     $T_2$ is generated dynamically}
 | ||
|   \label{fig:carg:jiti_single:after}
 | ||
| \end{figure}
 | ||
| %------------------------------------------------------------------------------
 | ||
| 
 | ||
| The main advantage of this scheme is its simplicity. The compiled code
 | ||
| (Fig.~\ref{fig:carc:jiti_single:before}) is not significantly bigger
 | ||
| than the code which a WAM-based compiler would generate
 | ||
| (Fig.~\ref{fig:carc:index}) and, if \JITI turns out unnecessary
 | ||
| during runtime (e.g. execution encounters only open calls or with only
 | ||
| the first argument bound), the extra overhead is minimal: the
 | ||
| execution of some \jitiONconstant instructions for the open call only.
 | ||
| %
 | ||
| In short, this is a simple scheme that allows for indexing on \emph{any
 | ||
| single} argument. At least for big sets of Datalog facts, we see
 | ||
| little reason not to use it.
 | ||
| 
 | ||
| \Paragraph{Optimizations.}
 | ||
| Because we are dealing with static code, there are opportunities for
 | ||
| some easy optimizations. Suppose we statically determine that there
 | ||
| will never be any calls with \code{in} mode for some arguments or that
 | ||
| these arguments are not discriminating enough.\footnote{In our
 | ||
| example, suppose the third argument of \code{has\_property/3} was the
 | ||
| atom \code{p} throughout.} Then we can avoid generating
 | ||
| \jitiONconstant instructions for them. Also, suppose we know that some
 | ||
| arguments are most likely than others to be used in the \code{in}
 | ||
| mode. Then we can simply place the \jitiONconstant instructions for
 | ||
| them before the instructions for other arguments. This is possible
 | ||
| since all indexing instructions take the argument register number as
 | ||
| an argument; their order does not matter.
 | ||
| 
 | ||
| \subsection{From any argument indexing to multi-argument indexing}
 | ||
| %-----------------------------------------------------------------
 | ||
| The scheme of the previous section gives us only single argument
 | ||
| indexing. However, all the infrastructure we need is already in place.
 | ||
| We can use it to obtain any fixed-order multi-argument \JITI in a
 | ||
| straightforward way.
 | ||
| 
 | ||
| Note that the compiler knows exactly the set of clauses that need to
 | ||
| be tried for each query with a specific symbol in the first argument.
 | ||
| % This information is needed in order to construct, at compile time, the
 | ||
| % hash table $T_1$ of Fig.~\ref{fig:carc:index}.
 | ||
| For multi-argument \JITI, instead of generating for each hash bucket
 | ||
| only \TryRetryTrust instructions, the compiler can prepend appropriate
 | ||
| demand indexing instructions. We illustrate this on our running
 | ||
| example. The table $T_1$ contains four \jitiONconstant instructions:
 | ||
| two for each of the remaining two arguments of hash buckets with more
 | ||
| than one alternative. For hash buckets with none or only one
 | ||
| alternative (e.g., for \code{d3}'s bucket) there is obviously no need
 | ||
| to resort to \JITI for the remaining arguments.
 | ||
| Figure~\ref{fig:carc:jiti_multi} shows the state of the hash tables
 | ||
| after the execution of queries \code{has\_property(C,salmonella,T)},
 | ||
| which creates $T_2$, and \code{has\_property(d2,P,n)} which creates
 | ||
| the $T_3$ table and transforms the \jitiONconstant instruction for
 | ||
| \code{d2} and register $r_3$ to the appropriate \switchONconstant
 | ||
| instruction.
 | ||
| 
 | ||
| %------------------------------------------------------------------------------
 | ||
| \begin{figure}[t]
 | ||
|   \centering
 | ||
|   \begin{sf}
 | ||
|     \begin{\wamcodesize}
 | ||
|       \begin{tabular}{@{}c@{}c@{}c@{}}
 | ||
| 	\begin{tabular}{l}
 | ||
|           \switchONconstant $r_1$ 5 $T_1$ \\
 | ||
|           \switchONconstant $r_2$ 5 $T_2$ \\
 | ||
|           \jitiONconstant $r_3$   5 3     \\
 | ||
|           \try $L_1$   \\
 | ||
|           \retry $L_2$ \\
 | ||
|           \retry $L_3$ \\
 | ||
|           \retry $L_4$ \\
 | ||
|           \trust $L_5$ \\
 | ||
| 	\end{tabular}
 | ||
| 	&
 | ||
| 	\begin{tabular}{r|c@{\ }|l|}
 | ||
| 	  \Cline
 | ||
| 	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline
 | ||
| 	  \      & \code{d1} & \jitiONconstant $r_2$ 2 3 \\
 | ||
| 	  \      &           & \jitiONconstant $r_3$ 2 3 \\
 | ||
| 	  \      &           & \try   $L_1$ \\
 | ||
| 	  \      &           & \trust $L_2$ \\ \Cline
 | ||
|           \      & \code{d2} & \jitiONconstant $r_2$ 2 3 \\
 | ||
| 	  \      &           & \switchONconstant $r_3$ 2 $T_3$ \\
 | ||
| 	  \      &           & \try   $L_3$ \\
 | ||
| 	  \      &           & \trust $L_4$ \\ \Cline
 | ||
| 	  \      & \code{d3} & \jump  $L_5$ \\
 | ||
| 	  \Cline
 | ||
| 	\end{tabular}
 | ||
| 	&
 | ||
| 	\begin{tabular}{c}
 | ||
| 	\begin{tabular}{r|c@{\ }|l|}
 | ||
| 	  \Cline
 | ||
| 	  $T_2$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline
 | ||
| 	  \      & \code{salmonella}    & \jitiONconstant $r_3$ 2 3 \\
 | ||
| 	  \      &                      & \try $L_1$   \\
 | ||
| 	  \      &                      & \trust $L_3$ \\ \Cline
 | ||
| 	  \      & \code{salmonella\_n} & \jump $L_2$  \\ \Cline
 | ||
| 	  \      & \code{cytrogen\_ca}  & \jitiONconstant $r_3$ 2 3 \\
 | ||
| 	  \      &                      & \try $L_4$   \\
 | ||
| 	  \      &                      & \trust $L_5$ \\
 | ||
| 	  \Cline
 | ||
| 	\end{tabular}
 | ||
| 	\\
 | ||
| 	\ \\
 | ||
| 	\begin{tabular}{r|c@{\ }|l|}
 | ||
| 	  \Cline
 | ||
| 	  $T_3$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline
 | ||
| 	  \      & \code{p} & \jump $L_3$ \\ \Cline
 | ||
| 	  \      & \code{n} & \jump $L_4$ \\
 | ||
| 	  \Cline
 | ||
| 	\end{tabular}
 | ||
| 	\end{tabular}
 | ||
|       \end{tabular}
 | ||
|     \end{\wamcodesize}
 | ||
|   \end{sf}
 | ||
|   \caption{\JITI for all arguments;
 | ||
|     $T_1$ is static; $T_2$ and $T_3$ are created dynamically}
 | ||
|   \label{fig:carc:jiti_multi}
 | ||
| \end{figure}
 | ||
| %------------------------------------------------------------------------------
 | ||
| 
 | ||
| \Paragraph{Implementation issues.}
 | ||
| In the \jitiONconstant instructions of Fig.~\ref{fig:carc:jiti_multi}
 | ||
| notice the integer 2 which denotes the number of clauses that the
 | ||
| instruction will index. Using this number an index table of
 | ||
| appropriate size will be created, such as $T_3$. To fill this table we
 | ||
| need information about the clauses to index and the symbols to hash
 | ||
| on. The clauses can be obtained by scanning the labels of the
 | ||
| \TryRetryTrust instructions following \jitiONconstant; the symbols by
 | ||
| looking at appropriate byte code offsets (based on the argument
 | ||
| register number) from these labels. In our running example, the
 | ||
| symbols can be obtained by looking at the second argument of the
 | ||
| \getcon instruction whose argument register is $r_2$. In the loaded
 | ||
| bytecode, assuming the argument register is represented in one byte,
 | ||
| these symbols are found $sizeof(\getcon) + sizeof(opcode) + 1$ bytes
 | ||
| away from the clause label; see Fig.~\ref{fig:carc:clauses}. Thus,
 | ||
| multi-argument \JITI is easy to get and the creation of index tables
 | ||
| can be extremely fast when indexing Datalog facts.
 | ||
| 
 | ||
| \subsection{Beyond Datalog and other implementation issues}
 | ||
| %----------------------------------------------------------
 | ||
| Indexing on demand clauses with function symbols is not significantly
 | ||
| more difficult. The scheme we have described is applicable but
 | ||
| requires the following extensions:
 | ||
| \begin{enumerate}
 | ||
| \item Besides \jitiONconstant we also need \jitiONterm and
 | ||
|   \jitiONstructure instructions. These are the \JITI counterparts of
 | ||
|   the WAM's \switchONterm and \switchONstructure.
 | ||
| \item Because the byte code for the clause heads does not necessarily
 | ||
|   have a regular structure, the abstract machine needs to be able to
 | ||
|   ``walk'' the byte code instructions and recover the symbols on which
 | ||
|   indexing will be based. Writing such a code walking procedure is not
 | ||
|   hard.
 | ||
| \item Indexing on a position that contains unconstrained variables
 | ||
|   for some clauses is tricky. The WAM needs to group clauses in this
 | ||
|   case and without special treatment creates two choice points for
 | ||
|   this argument (one for the variables and one per each group of
 | ||
|   clauses). However, this issue and how to deal with it is well-known
 | ||
|   by now. Possible solutions to it are described in a paper by
 | ||
|   Carlsson~\cite{FreezeIndexing@ICLP-87} and can be readily adapted to
 | ||
|   \JITI. Alternatively, in a simple implementation, we can skip \JITI
 | ||
|   for positions with variables in some clauses.
 | ||
| \end{enumerate}
 | ||
| Before describing \JITI more formally, we remark on the following
 | ||
| design decisions whose rationale may not be immediately obvious:
 | ||
| \begin{itemize}
 | ||
| \item By default, only table $T_1$ is generated at compile time (as in
 | ||
|   the WAM) and the additional index tables $T_2, T_3, \ldots$ are
 | ||
|   generated dynamically. This is because we do not want to increase
 | ||
|   compiled code size unnecessarily (i.e., when there is no demand for
 | ||
|   these indices).
 | ||
| \item On the other hand, we generate \jitiSTAR instructions at compile
 | ||
|   time for the head arguments.\footnote{The \jitiSTAR instructions for
 | ||
|   $T_1$ can be generated either by the compiler or the loader.} This
 | ||
|   does not noticeably increase the generated byte code but it greatly
 | ||
|   simplifies code loading. Notice that a nice property of the scheme
 | ||
|   we have described is that the loaded byte code can be patched
 | ||
|   \emph{without} the need to move any instructions.
 | ||
| % The indexing tables are typically not intersperced with the byte code.
 | ||
| \item Finally, one may wonder why the \jitiSTAR instructions create
 | ||
|   the dynamic index tables with an additional code walking pass
 | ||
|   instead of piggy-backing on the pass which examines all clauses via
 | ||
|   the main \TryRetryTrust chain. Main reasons are: 1) in many cases
 | ||
|   the code walking can be selective and guided by offsets and 2) by
 | ||
|   first creating the index table and then using it we speed up the
 | ||
|   execution of the queries and often avoid unnecessary choice point
 | ||
|   creations.
 | ||
| \end{itemize}
 | ||
| Note that all these decisions are orthogonal to the main idea and are
 | ||
| under compiler control. For example, if analysis determines that some
 | ||
| argument sequences will never demand indexing we can simply avoid
 | ||
| generation of \jitiSTAR instructions for them. Similarly, if some
 | ||
| argument sequences will definitely demand indexing we can speed up
 | ||
| execution by generating the appropriate tables at compile time instead
 | ||
| of dynamically.
 | ||
| 
 | ||
| \subsection{Demand-driven index construction and its properties}
 | ||
| %---------------------------------------------------------------
 | ||
| The idea behind \JITI can be captured in a single sentence: \emph{we
 | ||
| can generate every index we need during program execution when this
 | ||
| index is demanded}. Subsequent uses of these indices can speed up
 | ||
| execution considerably more than the time it takes to construct them
 | ||
| (more on this below) so this runtime action makes sense.%\footnote{In
 | ||
| %fact, because choice points are expensive in the WAM, \JITI can speed
 | ||
| %up even the execution of the query that triggers the process, not only
 | ||
| %subsequent queries.}
 | ||
| %
 | ||
| %We describe the process of demand-driven index construction.
 | ||
| 
 | ||
| % \subsubsection{Demand-driven index construction}
 | ||
| %-------------------------------------------------
 | ||
| Let $p/k$ be a predicate with $n$ clauses.
 | ||
| %
 | ||
| At a high level, its indices form a tree whose root is the entry point
 | ||
| of the predicate. For simplicity, assume that the root node of the
 | ||
| tree and the interior nodes corresponding to the index table for the
 | ||
| first argument have been constructed at compile time. Leaves of this
 | ||
| tree are the nodes containing the code for the clauses of the
 | ||
| predicate and each clause is identified by a unique label \mbox{$L_i,
 | ||
| 1 \leq i \leq n$}. Execution always starts at the first instruction of
 | ||
| the root node and follows Algorithm~\ref{alg:construction}. The
 | ||
| algorithm might look complicated but is actually quite simple.
 | ||
| %
 | ||
| Each non-leaf node contains a sequence of byte code instructions with
 | ||
| groups of the form \mbox{$\langle I_1, \ldots, I_m, T_1, \ldots, T_l
 | ||
| \rangle, 0 \leq m \leq k, 1 \leq l \leq n$} where each of the $I$
 | ||
| instructions, if any, is either a \switchSTAR or a \jitiSTAR
 | ||
| instruction and each of the $T$ instructions either forms a sequence
 | ||
| of \TryRetryTrust instructions (if $l > 1$) or is a \jump instruction
 | ||
| (if \mbox{$l = 1$}). Step~2.2 dynamically constructs an index table
 | ||
| $\cal T$ whose buckets are the newly created interior nodes in the
 | ||
| tree. Each bucket associated with a single clause contains a \jump to
 | ||
| the label of that clause. Each bucket associated with many clauses
 | ||
| starts with the $I$ instructions which are yet to be visited and
 | ||
| continues with a \TryRetryTrust chain pointing to the clauses. When
 | ||
| the index construction is done, the instruction mutates to a
 | ||
| \switchSTAR WAM instruction.
 | ||
| %-------------------------------------------------------------------------
 | ||
| \begin{Algorithm}[t]
 | ||
|   \caption{Actions of the abstract machine with \JITI}
 | ||
|   \label{alg:construction}
 | ||
|   \begin{enumerate}
 | ||
|   \item if the current instruction $I$ is a \switchSTAR, \try, \retry,
 | ||
|     \trust or \jump, act as in the WAM;
 | ||
|   \item if the current instruction $I$ is a \jitiSTAR with arguments $r,
 | ||
|     l$, and $k$ ($r$ is a register) then
 | ||
|     \begin{enumerate}
 | ||
|     \item[2.1] if register $r$ contains a variable, the action is a
 | ||
|       \instr{goto} the next instruction in the node;
 | ||
|     \item[2.2] if register $r$ contains a value $v$, the action is to
 | ||
|       dynamically construct the index:
 | ||
|       \begin{itemize}
 | ||
|       \item[2.2.1] collect the subsequent instructions in a list $\cal I$
 | ||
| 	until the next instruction is a \try;
 | ||
|       \item[2.2.2] for each label $L$ in the \TryRetryTrust chain
 | ||
| 	inspect the code of the clause with label $L$ to find the
 | ||
| 	symbol~$c$ associated with register $r$ in the clause; (This
 | ||
| 	step creates a list of $\langle c, L \rangle$ pairs.)
 | ||
|       \item[2.2.3] create an index table $\cal T$ out of these pairs as
 | ||
| 	follows:
 | ||
| 	\begin{itemize}
 | ||
| 	\item if $I$ is a \jitiONconstant or a \jitiONstructure then
 | ||
| 	  create an index table for the symbols in the list of pairs;
 | ||
| 	  each entry of the table is identified by a symbol $c$ and
 | ||
| 	  contains:
 | ||
| 	  \begin{itemize}
 | ||
| 	  \item the instruction \jump $L_c$ if $L_c$ is the only label
 | ||
| 	    associated with $c$;
 | ||
| 	  \item the sequence of instructions obtained by appending to
 | ||
| 	    $\cal I$ a \TryRetryTrust chain for the sequence of labels
 | ||
| 	    $L'_1, \ldots, L'_l$ that are associated with $c$
 | ||
| 	  \end{itemize}
 | ||
| 	\item if $I$ is a \jitiONterm then
 | ||
| 	  \begin{itemize}
 | ||
| 	  \item partition the sequence of labels $\cal L$ in the list
 | ||
| 	    of pairs into sequences of labels ${\cal L}_c, {\cal L}_l$
 | ||
| 	    and ${\cal L}_s$ for constants, lists and structures,
 | ||
| 	    respectively;
 | ||
| 	  \item for each of the four sequences ${\cal L}, {\cal L}_c,
 | ||
| 	    {\cal L}_l, {\cal L}_s$ of labels create code:
 | ||
| 	    \begin{itemize}
 | ||
| 	    \item the instruction \fail if the sequence is empty;
 | ||
| 	    \item the instruction \jump $L$ if $L$ is the only label in
 | ||
| 	      the sequence;
 | ||
| 	    \item the sequence of instructions obtained by appending to
 | ||
| 	      $\cal I$ a \TryRetryTrust chain for the current sequence
 | ||
| 	      of labels;
 | ||
| 	    \end{itemize}
 | ||
| 	  \end{itemize}
 | ||
| 	\end{itemize}
 | ||
|       \item[2.2.4] transform the \jitiSTAR $r, l, k$ instruction to
 | ||
| 	a \switchSTAR $r, l, {\cal T}$ instruction;
 | ||
|       \item[2.2.5] continue execution with this instruction.
 | ||
|       \end{itemize}
 | ||
|     \end{enumerate}
 | ||
|   \end{enumerate}
 | ||
| \vspace*{-.7em}
 | ||
| \end{Algorithm}
 | ||
| %-------------------------------------------------------------------------
 | ||
| 
 | ||
| \Paragraph{Complexity properties.}
 | ||
| Index construction during runtime does not change the complexity of
 | ||
| query execution. First, note that each demanded index table will be
 | ||
| constructed at most once. Also, a \jitiSTAR instruction will be
 | ||
| encountered only in cases where execution would examine all clauses in
 | ||
| the \TryRetryTrust chain.\footnote{This statement is possibly not
 | ||
| valid in the presence of Prolog cuts.} The construction visits these
 | ||
| clauses \emph{once} and then creates the index table in time linear in
 | ||
| the number of clauses as one pass over the list of $\langle c, L
 | ||
| \rangle$ pairs suffices. After index construction, execution will
 | ||
| visit a subset of these clauses as the index table will be consulted.
 | ||
| %% Finally, note that the maximum number of \jitiSTAR instructions
 | ||
| %% that will be visited for each query is bounded by the maximum
 | ||
| %% number of index positions (symbols) in the clause heads of the
 | ||
| %% predicate.
 | ||
| Thus, in cases where \JITI is not effective, execution of a query will
 | ||
| at most double due to dynamic index construction. In fact, this worst
 | ||
| case is pessimistic and unlikely in practice. On the other hand, \JITI
 | ||
| can change the complexity of query evaluation from $O(n)$ to $O(1)$
 | ||
| where $n$ is the number of clauses.
 | ||
| 
 | ||
| \subsection{More implementation choices}
 | ||
| %---------------------------------------
 | ||
| The observant reader has no doubt noticed that
 | ||
| Algorithm~\ref{alg:construction} provides multi-argument indexing but
 | ||
| only for the main functor symbol. For clauses with compound terms that
 | ||
| require indexing in their sub-terms we can either employ a program
 | ||
| transformation such as \emph{unification
 | ||
| factoring}~\cite{UnifFact@POPL-95} at compile time or modify the
 | ||
| algorithm to consider index positions inside compound terms. This is
 | ||
| relatively easy to do but requires support from the register allocator
 | ||
| (passing the sub-terms of compound terms in appropriate registers)
 | ||
| and/or a new set of instructions. Due to space limitations we omit
 | ||
| further details.
 | ||
| 
 | ||
| Algorithm~\ref{alg:construction} relies on a procedure that inspects
 | ||
| the code of a clause and collects the symbols associated with some
 | ||
| particular index position (step~2.2.2). If we are satisfied with
 | ||
| looking only at clause heads, this procedure needs to understand only
 | ||
| the structure of \instr{get} and \instr{unify} instructions. Thus, it
 | ||
| is easy to write. At the cost of increased implementation complexity,
 | ||
| this step can of course take into account other information that may
 | ||
| exist in the body of the clause (e.g., type tests such as
 | ||
| \code{var(X)}, \code{atom(X)}, aliasing constraints such as \code{X =
 | ||
| Y}, numeric constraints such as \code{X > 0}, etc.).
 | ||
| 
 | ||
| A reasonable concern for \JITI is increased memory consumption. In our
 | ||
| experience, this does not seem to be a problem in practice since most
 | ||
| applications do not have demand for indexing on many argument
 | ||
| combinations. In applications where it does become a problem or when
 | ||
| running in an environment with limited memory, we can easily put a
 | ||
| bound on the size of index tables, either globally or for each
 | ||
| predicate separately. For example, the \jitiSTAR instructions can
 | ||
| either become inactive when this limit is reached, or better yet we
 | ||
| can recover the space of some tables. To do so, we can employ any
 | ||
| standard recycling algorithm (e.g., LRU) and reclaim the memory of
 | ||
| index tables that are no longer in use. This is easy to do by
 | ||
| reverting the corresponding \switchSTAR instructions back to \jitiSTAR
 | ||
| instructions. If the indices are demanded again at a time when memory
 | ||
| is available, they can simply be regenerated.
 | ||
| 
 | ||
| 
 | ||
| \section{Demand-Driven Indexing of Dynamic Predicates} \label{sec:dynamic}
 | ||
| %=========================================================================
 | ||
| We have so far lived in the comfortable world of static predicates,
 | ||
| where the set of clauses to index is fixed and the compiler can take
 | ||
| advantage of this knowledge. Dynamic code introduces several
 | ||
| complications:
 | ||
| \begin{itemize}
 | ||
| \item We need mechanisms to update multiple indices when new clauses
 | ||
|   are asserted or retracted. In particular, we need the ability to
 | ||
|   expand and possibly shrink multiple code chunks after code updates.
 | ||
| \item We do not know a priori which are the best index positions and
 | ||
|   cannot determine whether indexing on some arguments is avoidable.
 | ||
| \item Supporting the logical update (LU) semantics of ISO Prolog
 | ||
|   becomes harder.
 | ||
| \end{itemize}
 | ||
| We briefly discuss possible ways of addressing these issues.
 | ||
| However, note that Prolog systems typically provide indexing for
 | ||
| dynamic predicates and thus already deal in some way or another with
 | ||
| these issues; \JITI makes the problems more involved but not
 | ||
| fundamentally different than with only first argument indexing.
 | ||
| 
 | ||
| The first complication suggests that we should allocate memory for
 | ||
| dynamic indices in separate chunks, so that these can be expanded and
 | ||
| deallocated independently. Indeed, this is what we do.
 | ||
| %
 | ||
| Regarding the second complication, in the absence of any other
 | ||
| information, the only alternative is to generate indices for all
 | ||
| arguments. As optimizations, we can avoid indexing predicates with
 | ||
| only one clause and exclude arguments where some clause has a
 | ||
| variable.
 | ||
| 
 | ||
| Under LU semantics, calls to dynamic predicates execute in a
 | ||
| ``snapshot'' of the corresponding predicate. Each call sees the
 | ||
| clauses that existed at the time when the call was made, even if some
 | ||
| of the clauses were later retracted or new clauses were asserted. If
 | ||
| several calls are alive in the stack, several snapshots will be alive
 | ||
| at the same time. The standard solution to this problem is to use time
 | ||
| stamps to tell which clauses are \emph{live} for which calls.
 | ||
| %
 | ||
| This solution complicates freeing index tables because: (1) an index
 | ||
| table holds references to clauses, and (2) the table may be in use
 | ||
| (i.e., may be accessible from the execution stacks). An index
 | ||
| table thus is killed in several steps:
 | ||
| \begin{enumerate}
 | ||
| \item Detach the index table from the indexing tree.
 | ||
| \item Recursively \emph{kill} every child of the current table; if a
 | ||
|   table is killed so are its children.
 | ||
| \item Wait until the table is not in use, that is, it is not pointed
 | ||
|   to from anywhere.
 | ||
| \item Walk the table and release any references it may hold.
 | ||
| \item Physically recover space.
 | ||
| \end{enumerate}
 | ||
| 
 | ||
| 
 | ||
| \section{Implementation in XXX and in YAP} \label{sec:impl}
 | ||
| %==========================================================
 | ||
| The implementation of \JITI in XXX follows a variant of the scheme
 | ||
| presented in Sect.~\ref{sec:static}. The compiler uses heuristics to
 | ||
| determine the best argument to index on (i.e., this argument is not
 | ||
| necessarily the first) and employs \switchSTAR instructions for this
 | ||
| task. It also statically generates \jitiONconstant instructions for
 | ||
| other arguments that are good candidates for \JITI. Currently, an
 | ||
| argument is considered a good candidate if it has only constants or
 | ||
| only structure symbols in all clauses. Thus, XXX uses only
 | ||
| \jitiONconstant and \jitiONstructure instructions, never a
 | ||
| \jitiONterm. Also, XXX does not perform \JITI inside structure
 | ||
| symbols. For dynamic predicates, \JITI is employed only if they
 | ||
| consist of Datalog facts; if a clause which is not a Datalog fact is
 | ||
| asserted, all dynamically created index tables for the predicate are
 | ||
| simply removed and the \jitiONconstant instruction becomes a
 | ||
| \instr{noop}. All this is done automatically, but the user can disable
 | ||
| \JITI in compiled code using an option.
 | ||
| 
 | ||
| YAP implements \JITI since version 5. The current implementation
 | ||
| supports static code, dynamic code, and the internal database. It
 | ||
| differs from the algorithm presented in Sect.~\ref{sec:static} in that
 | ||
| \emph{all indexing code is generated on demand}. Thus, YAP cannot
 | ||
| assume that a \jitiSTAR instruction is followed by a \TryRetryTrust
 | ||
| chain. Instead, by default YAP has to search the whole predicate for
 | ||
| clauses that match the current position in the indexing code. Doing so
 | ||
| for every index expansion was found to be very inefficient for larger
 | ||
| relations: in such cases YAP will maintain a list of matching clauses
 | ||
| at each \jitiSTAR node. Indexing dynamic predicates in YAP follows
 | ||
| very much the same algorithm as static indexing: the key idea is that
 | ||
| most nodes in the index tree must be allocated separately so that they
 | ||
| can grow or shrink independently. YAP can index arguments where some
 | ||
| clauses have unconstrained variables, but only for static predicates,
 | ||
| as in dynamic code this would complicate support for LU semantics.
 | ||
| 
 | ||
| YAP uses the term JITI (Just-In-Time Indexing) to refer to \JITI. In
 | ||
| the next section we will take the liberty to use this term as a
 | ||
| convenient abbreviation.
 | ||
| 
 | ||
| 
 | ||
| \section{Performance Evaluation} \label{sec:perf}
 | ||
| %================================================
 | ||
| We evaluate JITI on a set of benchmarks and applications.
 | ||
| Throughout, we compare performance of JITI with first argument
 | ||
| indexing. For the benchmarks of Sect.~\ref{sec:perf:ineffective}
 | ||
| and~\ref{sec:perf:effective} which involve both systems, we used a
 | ||
| 2.4~GHz P4-based laptop with 512~MB of memory.
 | ||
| % and report times in milliseconds.
 | ||
| For the benchmarks of Sect.~\ref{sec:perf:ILP} which involve
 | ||
| YAP~5.1.2 only, we used a 8-node cluster, where each node is a
 | ||
| dual-core AMD~2600+ machine with 2GB of memory.
 | ||
| % and report times in seconds.
 | ||
| 
 | ||
| %------------------------------------------------------------------------------
 | ||
| \begin{table}[t]
 | ||
|   \centering
 | ||
|   \caption{Performance of some benchmarks with 1st vs. \JITI (times in msecs)}
 | ||
|   \vspace*{-1em}
 | ||
|   \subfigure[When JITI is ineffective]{%
 | ||
|     \label{tab:ineffective}
 | ||
|     \begin{tabular}[b]{|l||r|r||r|r|} \hline
 | ||
|       & \multicolumn{2}{|c||}{\bf YAP} & \multicolumn{2}{|c|}{\bf XXX} \\
 | ||
|       \cline{2-5}
 | ||
|       Benchmark     &   1st  &  JITI         &   1st  &  JITI          \\
 | ||
|       \hline
 | ||
|       \tcLio (8000) &     13 &    14         &      4 &     4          \\
 | ||
|       \tcRio (2000) &   1445 &  1469         &    614 &   615          \\
 | ||
|       \tcDio ( 400) &   3208 &  3260         &   2338 &  2300          \\
 | ||
|       \tcLoo (2000) &   3935 &  3987         &   2026 &  2105          \\
 | ||
|       \tcRoo (2000) &   2841 &  2952         &   1502 &  1512          \\
 | ||
|       \tcDoo ( 400) &   3735 &  3805         &   4976 &  4978          \\
 | ||
|       \compress     &   3614 &  3595         &   2875 &  2848          \\
 | ||
|       \hline
 | ||
|     \end{tabular}
 | ||
|   }%
 | ||
|   \subfigure[When JITI is effective]{
 | ||
|     \label{tab:effective}
 | ||
|     \begin{tabular}[b]{|l||r|r|r||r|r|r|} \hline
 | ||
|       & \multicolumn{3}{|c||}{\bf YAP} & \multicolumn{3}{|c|}{\bf XXX} \\
 | ||
|       \cline{2-7}
 | ||
|                 &   1st  &  JITI &{\bf ratio}&  1st  &  JITI &{\bf ratio}\\
 | ||
|       \hline
 | ||
|       \sgCyl    &    2,864 &    24 & $119\times$& 2,390 &    28 &  $85\times$\\
 | ||
|       \muta     &   30,057 &16,782 &$1.79\times$&26,314 &21,574 &$1.22\times$\\
 | ||
|       \pta      &    5,131 &   188 &  $27\times$& 4,442 &   279 &  $16\times$\\
 | ||
|       \tea      &1,478,813 &54,616 &  $27\times$&   --- &   --- &      ---   \\
 | ||
|       \hline
 | ||
|     \end{tabular}
 | ||
|   }%
 | ||
|   \vspace*{-1em}
 | ||
| \end{table}
 | ||
| %------------------------------------------------------------------------------
 | ||
| 
 | ||
| \subsection{Performance of \JITI when ineffective} \label{sec:perf:ineffective}
 | ||
| %------------------------------------------------------------------------------
 | ||
| In some programs, \JITI does not trigger\footnote{In XXX only; even
 | ||
| 1st argument indexing is generated on demand when JITI is used in
 | ||
| YAP.} or might trigger but have no effect other than an overhead due
 | ||
| to runtime index construction. We therefore wanted to measure this
 | ||
| overhead.
 | ||
| %
 | ||
| As both systems support tabling, we decided to use tabling benchmarks
 | ||
| because they are small and easy to understand, and because they are a
 | ||
| bad case for JITI in the following sense: tabling avoids generating
 | ||
| repetitive queries and the benchmarks operate over extensional
 | ||
| database (EDB) predicates of size approximately equal to the size of
 | ||
| the program. We used \compress, a tabled program that solves a puzzle
 | ||
| from an ICLP Prolog programming competition. The other benchmarks are
 | ||
| different variants of tabled left, right and doubly recursive
 | ||
| transitive closure over an EDB predicate forming a chain of size shown
 | ||
| in Table~\ref{tab:ineffective} in parentheses. For each variant of
 | ||
| transitive closure, we issue two queries: one with mode
 | ||
| \code{(in,out)} and one with mode \code{(out,out)}.
 | ||
| %
 | ||
| For YAP, indices on the first argument and \TryRetryTrust chains are
 | ||
| built on all benchmarks under \JITI.
 | ||
| %
 | ||
| For XXX, \JITI triggers on no benchmark but the \jitiONconstant
 | ||
| instructions are executed for the three \bench{tc\_?\_oo} benchmarks.
 | ||
| %
 | ||
| As can be seen in Table~\ref{tab:ineffective}, \JITI, even when
 | ||
| ineffective, incurs a runtime overhead that is at the level of noise
 | ||
| and goes mostly unnoticed.
 | ||
| %
 | ||
| We also note that our aim here is \emph{not} to compare the two
 | ||
| systems, so the \textbf{YAP} and \textbf{XXX} columns should be read
 | ||
| separately.
 | ||
| 
 | ||
| \vspace*{-0.5em}
 | ||
| \subsection{Performance of \JITI when effective} \label{sec:perf:effective}
 | ||
| %--------------------------------------------------------------------------
 | ||
| On the other hand, when \JITI is effective, it can significantly
 | ||
| improve runtime performance. We use the following programs and
 | ||
| applications:
 | ||
| %------------------------------------------------------------------------------
 | ||
| \begin{small}
 | ||
| \begin{description}
 | ||
| \item[\sgCyl] The same generation DB benchmark on a $24 \times 24
 | ||
|   \times 2$ cylinder. We issue the open query.
 | ||
| \item[\muta] A computationally intensive application where most
 | ||
|   predicates are defined intentionally.
 | ||
| \item[\pta] A tabled logic program implementing Andersen's points-to
 | ||
|   analysis. A medium-sized imperative program is encoded as a set of
 | ||
|   facts (about 16,000) and properties of interest are encoded using
 | ||
|   rules. Program properties are then determined by the closure of
 | ||
|   these rules.
 | ||
| \item[\tea] Another implementation of Andersen's points-to analysis.
 | ||
|   The analyzed program, the \texttt{javac} benchmark, is encoded in a
 | ||
|   file of 411,696 facts (62,759,581 bytes in total). Its compilation
 | ||
|   exceeds the limits of the XXX compiler (w/o JITI). So we run this
 | ||
|   benchmark only in YAP.
 | ||
| \end{description}
 | ||
| \end{small}
 | ||
| %------------------------------------------------------------------------------
 | ||
| 
 | ||
| As can be seen in Table~\ref{tab:effective}, \JITI significantly
 | ||
| improves the performance of these applications. In \muta, which spends
 | ||
| most of its time in recursive predicates, the speed up is only $79\%$
 | ||
| in YAP and~$22\%$ in XXX. The remaining benchmarks execute several
 | ||
| times (from~$16$ up to~$119$) faster. It is important to realize that
 | ||
| \emph{these speedups are obtained automatically}, i.e., without any
 | ||
| programmer intervention or by using any compiler directives, in all
 | ||
| these applications.
 | ||
| 
 | ||
| \subsection{Performance of \JITI on ILP applications} \label{sec:perf:ILP}
 | ||
| %-------------------------------------------------------------------------
 | ||
| The need for \JITI was originally noticed in inductive logic
 | ||
| programming applications. These applications tend to issue ad hoc
 | ||
| queries during execution and thus their indexing requirements cannot
 | ||
| be determined at compile time. On the other hand, they operate on lots
 | ||
| of data, so memory consumption is a reasonable concern. We evaluate
 | ||
| JITI's time and space performance on some learning tasks using the
 | ||
| Aleph system~\cite{ALEPH} and the datasets of
 | ||
| Fig.~\ref{fig:ilp:datasets} which issue simple queries in an
 | ||
| extensional database. Several of these datasets are standard in the
 | ||
| ILP literature.
 | ||
| 
 | ||
| \Paragraph{Time performance.}
 | ||
| We compare times for 10 runs of the saturation/refinement cycle of the
 | ||
| ILP system; see Table~\ref{tab:ilp:time}.
 | ||
| %% The \Krki datasets have small search spaces and small databases, so
 | ||
| %% they achieve the same performance under both versions: there is no
 | ||
| %% slowdown. 
 | ||
| The \Mesh and \Pyrimidines applications are the only ones that do not
 | ||
| benefit much from indexing in the database; they do benefit through
 | ||
| from indexing in the dynamic representation of the search space, as
 | ||
| their running times improve somewhat with \JITI.
 | ||
| 
 | ||
| The \BreastCancer and \GeneExpr applications use unstructured data.
 | ||
| The speedup here is mostly from multiple argument indexing.
 | ||
| \BreastCancer is particularly interesting. It consists of 40 binary
 | ||
| relations with 65k elements each, where the first argument is the key.
 | ||
| We know that most calls have the first argument bound, hence indexing
 | ||
| was not expected to matter much. Instead, the results show \JITI to
 | ||
| improve running time by more than an order of magnitude. This suggests
 | ||
| that even a small percentage of badly indexed calls can end up
 | ||
| dominating runtime.
 | ||
| 
 | ||
| \IEProtein and \Thermolysin are example applications that manipulate
 | ||
| structured data. \IEProtein is the largest dataset we consider, and
 | ||
| indexing is absolutely critical. The speedup is not just impressive;
 | ||
| it is simply not possible to run the application in reasonable time
 | ||
| with only first argument indexing. \Thermolysin is smaller and
 | ||
| performs some computation per query, but even so, \JITI improves its
 | ||
| performance by an order of magnitude. The remaining benchmarks improve
 | ||
| from one to more than two orders of magnitude.
 | ||
| 
 | ||
| %------------------------------------------------------------------------------
 | ||
| \begin{table}[t]
 | ||
|   \centering
 | ||
|   \caption{Time and space performance of JITI
 | ||
|     on Inductive Logic Programming datasets}
 | ||
|   \vspace*{-1em}
 | ||
|   \label{tab:ilp}
 | ||
|   \setlength{\tabcolsep}{2.5pt}
 | ||
|   \subfigure[Time (in seconds)]{\label{tab:ilp:time}
 | ||
|     \begin{tabular}{|l||r|r|r||} \hline
 | ||
|                   & \multicolumn{3}{|c||}{Time} \\
 | ||
|     \cline{2-4}
 | ||
|     Benchmark     &    1st    &   JITI  &{\bf ratio} \\
 | ||
|     \hline
 | ||
|     \BreastCancer &     1,450 &      88 &  $16\times$ \\
 | ||
|     \Carcino      &    17,705 &     192 &  $92\times$ \\
 | ||
|     \Choline      &    14,766 &   1,397 &  $11\times$ \\
 | ||
|     \GeneExpr     &   193,283 &   7,483 &  $26\times$ \\
 | ||
|     \IEProtein    & 1,677,146 &   2,909 & $577\times$ \\
 | ||
| %%  \Krki         &       0.3 &     0.3 &   $1$ \\
 | ||
| %%  \KrkiII       &       1.3 &     1.3 &   $1$ \\
 | ||
|     \Mesh         &         4 &       3 & $1.3\times$ \\
 | ||
|     \Pyrimidines  &   487,545 & 253,235 & $1.9\times$ \\
 | ||
|     \Susi         &   105,091 &     307 & $342\times$ \\
 | ||
|     \Thermolysin  &    50,279 &   5,213 &  $10\times$ \\
 | ||
|     \hline
 | ||
|     \end{tabular}
 | ||
|   }%
 | ||
|   \subfigure[Memory usage (in KB)]{\label{tab:ilp:memory}
 | ||
|     \begin{tabular}{||r|r|r|r||} \hline
 | ||
|                 \multicolumn{2}{||c|}{Static code}
 | ||
|               & \multicolumn{2}{|c||}{Dynamic code} \\
 | ||
|     \hline
 | ||
|                 \multicolumn{1}{||c|}{Clauses} & \multicolumn{1}{c}{Index}
 | ||
|               & \multicolumn{1}{|c|}{Clauses} & \multicolumn{1}{c||}{Index}\\
 | ||
|     \hline
 | ||
| 	        60,940 &  46,887 &     630 &     14 \\
 | ||
| 	         1,801 &   2,678 &  13,512 &    942 \\
 | ||
| 	           666 &     174 &   3,172 &    174 \\
 | ||
| 	        46,726 &  22,629 & 116,463 &  9,015 \\
 | ||
| 	       146,033 & 129,333 &  53,423 &  1,531 \\
 | ||
| %%	           678 &     117 &   2,047 &     24 \\
 | ||
| %%	         1,866 &     715 &   2,055 &     26 \\
 | ||
| 	           802 &     161 &   2,149 &    109 \\
 | ||
| 	           774 &     218 &  25,840 & 12,291 \\
 | ||
|  	         5,007 &   2,509 &   4,497 &    759 \\
 | ||
| 	         2,317 &     929 & 116,129 &  7,064 \\
 | ||
|     \hline
 | ||
|     \end{tabular}
 | ||
|   }%
 | ||
| \end{table}
 | ||
| %------------------------------------------------------------------------------
 | ||
| 
 | ||
| %------------------------------------------------------------------------------
 | ||
| \begin{figure}
 | ||
|   \hrule \ \\[-2em]
 | ||
|   \begin{description}
 | ||
| %%  \item[\Krki] tries to learn rules from a small database of chess end-games;
 | ||
|   \item[\GeneExpr] learns rules for yeast gene activity given a
 | ||
|     database of genes, their interactions, and micro-array gene
 | ||
|     expression data; %~\cite{Regulatory@ILP-06};
 | ||
|   \item[\BreastCancer] processes real-life patient reports towards
 | ||
|     predicting whether an abnormality may be
 | ||
|     malignant; %~\cite{DavisBDPRCS@IJCAI-05-short};
 | ||
|   \item[\IEProtein] processes information extraction from paper
 | ||
|     abstracts to search proteins;
 | ||
|   \item[\Susi] learns from shopping patterns;
 | ||
|   \item[\Mesh] learns rules for finite-methods mesh design;
 | ||
|   \item[\Carcino, \Choline, \Pyrimidines] try to predict chemical
 | ||
|     properties of compounds and store them as tables, given their
 | ||
|     chemical composition and major properties;
 | ||
|   \item[\Thermolysin] also manipulates chemical compounds but learns
 | ||
|     from the 3D-structure of a molecule's conformations.
 | ||
|   \end{description}
 | ||
|   \hrule
 | ||
|   \caption{Description of the ILP datasets used in the performance
 | ||
|     comparison of Table~\ref{tab:ilp}}
 | ||
|   \label{fig:ilp:datasets}
 | ||
| \end{figure}
 | ||
| %------------------------------------------------------------------------------
 | ||
| 
 | ||
| \Paragraph{Space performance.}
 | ||
| Table~\ref{tab:ilp:memory} shows memory usage when using \JITI. The
 | ||
| table presents data obtained at a point near the end of execution;
 | ||
| memory usage should be at the maximum. These applications use a
 | ||
| mixture of static and dynamic predicates and we show their memory
 | ||
| usage separately. On static predicates, memory usage varies widely,
 | ||
| from only 10\% to the worst case, \Carcino, where the index tables
 | ||
| take more space than the original program. Hash tables dominate usage
 | ||
| in \IEProtein and \Susi, whereas \TryRetryTrust chains dominate in
 | ||
| \BreastCancer. In most other cases no single component dominates
 | ||
| memory usage. Memory usage for dynamic predicates is shown in the last
 | ||
| two columns; this data is mostly used to store the search space.
 | ||
| Observe that there is a much lower overhead in this case. A more
 | ||
| detailed analysis shows that most space is occupied by the hash tables
 | ||
| and by internal nodes of the tree, and that relatively little space is
 | ||
| occupied by \TryRetryTrust chains, suggesting that \JITI is behaving
 | ||
| well in practice.
 | ||
| 
 | ||
| 
 | ||
| \section{Concluding Remarks}
 | ||
| %===========================
 | ||
| Motivated by the needs of applications in the areas of inductive
 | ||
| logic programming, program analysis, deductive databases, etc.\ to
 | ||
| access large datasets efficiently, we have described a novel but also
 | ||
| simple idea: \emph{indexing Prolog clauses on demand during program
 | ||
| execution}.
 | ||
| %
 | ||
| Given the impressive speedups this idea can provide for many LP
 | ||
| applications, we are a bit surprised similar techniques have not been
 | ||
| explored before. In general, Prolog systems have been reluctant to
 | ||
| perform code optimizations during runtime and our feeling is that LP
 | ||
| implementation has been left a bit behind. We hold that this
 | ||
| should change.
 | ||
| %
 | ||
| Indeed, we see \JITI as only a first, very successful, step towards
 | ||
| effective runtime optimization of logic programs.
 | ||
| 
 | ||
| As presented, \JITI is a hybrid technique: index generation occurs
 | ||
| during runtime but is partly guided by the compiler, because we want
 | ||
| to combine it with compile-time WAM-style indexing. More flexible
 | ||
| schemes are of course possible. For example, index generation can be
 | ||
| fully dynamic (as in YAP), combined with user declarations, or driven
 | ||
| by static analysis to be even more selective or go beyond fixed-order
 | ||
| indexing.
 | ||
| %
 | ||
| Last, observe that \JITI fully respects Prolog semantics. Better
 | ||
| performance can be achieved in the context of one solution
 | ||
| computations, or in the context of tabling where order of clauses and
 | ||
| solutions does not matter and repeated solutions are discarded.
 | ||
| 
 | ||
| \paragragh{Acknowledgments}
 | ||
| 
 | ||
| This work is dedicated to the memory of our friend and colleague
 | ||
| Ricardo Lopes. We miss you! V\{\i}tor Santos Costa was partially
 | ||
| supported by CNPq and would like to acknowledge support received while
 | ||
| visiting at UW-Madison and the support of the YAP user community.
 | ||
| This work has been partially supported by MYDDAS (POSC/EIA/59154/2004)
 | ||
| and by funds granted to LIACC through the Programa de Financiamento
 | ||
| Plurianual, Funda<64><61>o para a Ci<43>ncia e Tecnologia and Programa POSC.
 | ||
| 
 | ||
| \Paragraph{Acknowledgments}
 | ||
| %--------------------------
 | ||
| V\'{\i}tor Santos Costa was partially supported by CNPq and would like
 | ||
| to acknowledge support received while visiting at UW-Madison and the
 | ||
| support of the YAP user community. This work has been partially
 | ||
| supported by MYDDAS (POSC/EIA/59154/2004) and by funds granted to
 | ||
| LIACC through the Programa de Financiamento Plurianual, Funda<64><61>o para
 | ||
| a Ci<43>ncia e Tecnologia and Programa POSC.
 | ||
| 
 | ||
| %==============================================================================
 | ||
| \begin{thebibliography}{10}
 | ||
| 
 | ||
| \bibitem{Warren83}
 | ||
| Warren, D.H.D.:
 | ||
| \newblock An abstract {P}rolog instruction set.
 | ||
| \newblock Tech. Note 309, SRI International (1983)
 | ||
| 
 | ||
| \bibitem{YAP}
 | ||
| Santos~Costa, V., Damas, L., Reis, R., Azevedo, R.:
 | ||
| \newblock {YAP} User's Manual. (2002)
 | ||
| 
 | ||
| \bibitem{ShallowBacktracking@ICLP-89}
 | ||
| Carlsson, M.:
 | ||
| \newblock On the efficiency of optimising shallow backtracking in compiled
 | ||
|   {Prolog}.
 | ||
| \newblock In Levi, G., Martelli, M., eds.: Proceedings of the Sixth
 | ||
|   ICLP, MIT Press (June 1989)  3--15
 | ||
| 
 | ||
| \bibitem{IndexingProlog@NACLP-89}
 | ||
| Demoen, B., Mari{\"e}n, A., Callebaut, A.:
 | ||
| \newblock Indexing in {P}rolog.
 | ||
| \newblock In Lusk, E.L., Overbeek, R.A., eds.: Proceedings of NACLP,
 | ||
|   MIT Press (1989)  1001--1012
 | ||
| 
 | ||
| \bibitem{SWI}
 | ||
| Wielemaker, J.:
 | ||
| \newblock {SWI-Prolog 5.1}: Reference Manual.
 | ||
| \newblock {SWI}, University of Amsterdam, Roetersstraat 15, 1018 WB Amsterdam,
 | ||
|   The Netherlands. (1997--2003)
 | ||
| 
 | ||
| \bibitem{XSB}
 | ||
| Sagonas, K.F., Swift, T., Warren, D.S., Freire, J., Rao, P.:
 | ||
| \newblock The {XSB} Pro\-grammer's Manual.
 | ||
| \newblock State University of New York at Stony Brook. (1997)
 | ||
| 
 | ||
| \bibitem{ilProlog}
 | ||
| Tron\c{c}on, R., Janssens, G., Demoen, B., Vandecasteele, H.:
 | ||
| \newblock Fast frequent quering with lazy control flow compilation.
 | ||
| \newblock Theory and Practice of Logic Programming (2007) To appear.
 | ||
| 
 | ||
| \bibitem{HickeyMudambi@JLP-89}
 | ||
| Hickey, T., Mudambi, S.:
 | ||
| \newblock Global compilation of {P}rolog.
 | ||
| \newblock JLP \textbf{7}(3) (November 1989)  193--230
 | ||
| 
 | ||
| \bibitem{VRDW87}
 | ||
| {Van Roy}, P., Demoen, B., Willems, Y.D.:
 | ||
| \newblock Improving the execution speed of compiled {Prolog} with modes, clause
 | ||
|   selection and determinism.
 | ||
| \newblock In: TAPSOFT'87, Springer (1987)  111--125
 | ||
| 
 | ||
| \bibitem{TOAM@ICLP-90}
 | ||
| Zhou, N.F., Takagi, T., Kazuo, U.:
 | ||
| \newblock A matching tree oriented abstract machine for {P}rolog.
 | ||
| \newblock In Warren, D.H.D., Szeredi, P., eds.: ICLP90, MIT Press (1990)
 | ||
|   158--173
 | ||
| 
 | ||
| \bibitem{UnifFact@POPL-95}
 | ||
| Dawson, S., Ramakrishnan, C.R., Ramakrishnan, I.V., Sagonas, K., Skiena, S.,
 | ||
|   Swift, T., Warren, D.S.:
 | ||
| \newblock Unification factoring for the efficient execution of logic programs.
 | ||
| \newblock In: Conference Record of POPL'95, ACM Press (January 1995)  247--258
 | ||
| 
 | ||
| \bibitem{Tries@JLP-99}
 | ||
| Ramakrishnan, I.V., Rao, P., Sagonas, K., Swift, T., Warren, D.S.:
 | ||
| \newblock Efficient access mechanisms for tabled logic programs.
 | ||
| \newblock Journal of Logic Programming \textbf{38}(1) (January 1999)  31--54
 | ||
| 
 | ||
| \bibitem{KligerShapiro@ICLP-88}
 | ||
| Kliger, S., Shapiro, E.:
 | ||
| \newblock A decision tree compilation algorithm for {FCP($|$,:,?)}.
 | ||
| \newblock In: Proceedings of the Fifth ICSLP, MIT Press (August 1988) 1315--1336
 | ||
| 
 | ||
| \bibitem{Mercury@JLP-96}
 | ||
| Somogyi, Z., Henderson, F., Conway, T.:
 | ||
| \newblock The execution algorithm of {Mercury}, an efficient purely declarative
 | ||
|   logic programming language.
 | ||
| \newblock JLP \textbf{26}(1--3) (December 1996)  17--64
 | ||
| 
 | ||
| \bibitem{Ciao@SCP-05}
 | ||
| Hermenegildo, M.V., Puebla, G., Bueno, F., L{\'o}pez-Garc\'{\i}a, P.:
 | ||
| \newblock Integrated program debugging, verification, and optimization using
 | ||
|   abstract interpretation (and the {Ciao} system preprocessor).
 | ||
| \newblock Science of Computer Programming \textbf{58}(1--2) (2005)  115--140
 | ||
| 
 | ||
| \bibitem{FreezeIndexing@ICLP-87}
 | ||
| Carlsson, M.:
 | ||
| \newblock Freeze, indexing, and other implementation issues in the {WAM}.
 | ||
| \newblock In Lassez, J.L., ed.: Proceedings of the Fourth ICLP,
 | ||
|   MIT Press (May 1987)  40--58
 | ||
| 
 | ||
| \bibitem{ALEPH}
 | ||
| Srinivasan, A.:
 | ||
| \newblock The Aleph Manual. (2001)
 | ||
| 
 | ||
| \end{thebibliography}
 | ||
| %==============================================================================
 | ||
| 
 | ||
| \end{document}
 |