%==============================================================================
\documentclass{llncs} 
%------------------------------------------------------------------------------
\usepackage{a4wide}
\usepackage{float}
\usepackage{xspace}
\usepackage{epsfig}
\usepackage{wrapfig}
\usepackage{subfigure}

\renewcommand{\rmdefault}{ptm}
%------------------------------------------------------------------------------
\floatstyle{ruled}
\newfloat{Algorithm}{ht}{lop}
%------------------------------------------------------------------------------
\newcommand{\wamcodesize}{scriptsize}
\newcommand{\code}[1]{\texttt{#1}}
\newcommand{\instr}[1]{\textsf{#1}}
\newcommand{\try}{\instr{try}\xspace}
\newcommand{\retry}{\mbox{\instr{retry}}\xspace}
\newcommand{\trust}{\instr{trust}\xspace}
\newcommand{\TryRetryTrust}{\mbox{\instr{try-retry-trust}}\xspace}
\newcommand{\fail}{\instr{fail}\xspace}
\newcommand{\jump}{\instr{jump}\xspace}
\newcommand{\jitiSTAR}{\mbox{\instr{dindex\_on\_*}}\xspace}
\newcommand{\switchSTAR}{\mbox{\instr{switch\_on\_*}}\xspace}
\newcommand{\jitiONterm}{\mbox{\instr{dindex\_on\_term}}\xspace}
\newcommand{\jitiONconstant}{\mbox{\instr{dindex\_on\_constant}}\xspace}
\newcommand{\jitiONstructure}{\mbox{\instr{dindex\_on\_structure}}\xspace}
\newcommand{\switchONterm}{\mbox{\instr{switch\_on\_term}}\xspace}
\newcommand{\switchONconstant}{\mbox{\instr{switch\_on\_constant}}\xspace}
\newcommand{\switchONstructure}{\mbox{\instr{switch\_on\_structure}}\xspace}
\newcommand{\getcon}{\mbox{\instr{get\_constant}}\xspace}
\newcommand{\proceed}{\instr{proceed}\xspace}
\newcommand{\Cline}{\cline{2-3}}
\newcommand{\JITI}{demand-driven indexing\xspace}
%------------------------------------------------------------------------------
\newenvironment{SmallProg}{\begin{tt}\begin{small}\begin{tabular}[b]{l}}{\end{tabular}\end{small}\end{tt}}
\newenvironment{ScriptProg}{\begin{tt}\begin{scriptsize}\begin{tabular}[b]{l}}{\end{tabular}\end{scriptsize}\end{tt}}
\newenvironment{FootProg}{\begin{tt}\begin{footnotesize}\begin{tabular}[c]{l}}{\end{tabular}\end{footnotesize}\end{tt}}

\newcommand{\TODOcomment}[2]{%
  \stepcounter{TODOcounter#1}%
  {\scriptsize\bf$^{(\arabic{TODOcounter#1})}$}%
  \marginpar[\fbox{
    \parbox{2cm}{\raggedleft
      \scriptsize$^{({\bf{\arabic{TODOcounter#1}{#1}}})}$%
      \scriptsize #2}}]%
  {\fbox{\parbox{2cm}{\raggedright
      \scriptsize$^{({\bf{\arabic{TODOcounter#1}{#1}}})}$%
      \scriptsize #2}}}
}%
\newcounter{TODOcounter}
\newcommand{\TODO}[1]{\TODOcomment{}{#1}}
%------------------------------------------------------------------------------

\title{Demand-Driven Indexing of Prolog Clauses}
\titlerunning{Demand-Driven Indexing of Prolog Clauses}

\author{V\'{\i}tor Santos Costa\inst{1} \and Konstantinos
  Sagonas\inst{2} \and Ricardo Lopes\inst{1}}
\authorrunning{V. Santos Costa, K. Sagonas and R. Lopes}

\institute{
  University of Porto, Portugal
  \and
  National Technical University of Athens, Greece
}

\begin{document}
\maketitle

\begin{abstract}
  As logic programming applications grow in size, Prolog systems need
  to efficiently access larger and larger data sets and the need for
  any- and multi-argument indexing becomes more and more profound.
  Static generation of multi-argument indexing is one alternative, but
  applications often rely on features that are inherently dynamic
  (e.g., generating hypotheses for ILP data sets during runtime) which
  makes static techniques inapplicable or inaccurate. Another
  alternative, which has not been investigated so far, is to employ
  dynamic schemes for flexible demand-driven indexing of Prolog
  clauses. We propose such schemes and discuss issues that need to be
  addressed for their efficient implementation in the context of
  WAM-based Prolog systems. We have implemented demand-driven indexing
  in two different Prolog systems and have been able to obtain
  non-negligible performance speedups: from a few percent up to orders
  of magnitude. Given these results, we see very little reason for
  Prolog systems not to incorporate some form of dynamic indexing
  based on actual demand. In fact, we see demand-driven indexing as
  the first step towards effective runtime optimization of Prolog
  programs.
\end{abstract}


\section{Introduction}
%=====================
The WAM~\cite{Warren83}


\section{State of the Art and Related Work} \label{sec:related}
%==============================================================
% Indexing in Prolog systems:
Even nowadays, some Prolog systems are still influenced by the WAM
design and only support indexing on the main functor symbol of the
first argument. Some others, like YAP~\cite{YAP}, can look inside
compound terms. SICStus Prolog supports \emph{shallow
backtracking}~\cite{ShallowBacktracking@ICLP-89}; choice points are
fully populated only when it is certain execution will enter the
clause body. While shallow backtracking avoids some of the performance
problems of unnecessary choice point creation, it does not offer the
full benefits that indexing can provide. Other systems like
BIM-Prolog~\cite{IndexingProlog@NACLP-89}, ilProlog,
SWI-Prolog~\cite{SWI}, and XSB~\cite{XSB} allow for user-controlled
multi-argument indexing (via an \code{:-~index} directive). Typically,
this support comes with various implementation restrictions. For
example, in SWI-Prolog at most four arguments can be indexed; in XSB
the compiler does not offer multi-argument indexing support and the
predicates need to be asserted instead; we know of no system where
multi-argument indexing looks inside compound terms. More importantly,
requiring users to specify arguments to index on is neither
user-friendly nor guarantees good performance results. Our thesis is
that it is much better if the abstract machine is able to
automatically adapt to the runtime indexing requirements of Prolog
applications.

% Trees, tries and unification factoring:
Recognizing the need for better indexing, researchers have proposed
more flexible index mechanisms for Prolog. For example, Hickey and
Mudambi proposed \emph{switching trees}~\cite{HickeyMudambi@JLP-89},
which rely on the presence of mode information. Similar proposals were
followed by Van Roy, Demoen and Willems who perform indexing on
several arguments to form a \emph{selection tree}~\cite{VRDW87}, and
by Zhou et al.\ who implemented a \emph{matching tree} oriented
abstract machine for Prolog~\cite{TOAM@ICLP-90}. For static
predicates, the XSB compiler offers support for \emph{unification
factoring}~\cite{UnifFact@POPL-95}; for asserted code, XSB can
represent databases of facts using \emph{tries}~\cite{Tries@JLP-99}
which provide left-to-right multi-argument indexing. However, none of
these mechanisms is used automatically; instead the user has to
specify appropriate directives.

% Comparison with static analysis techniques and Mercury:
Long ago, Kliger and Shapiro argued that such tree-based indexing
schemes are not cost effective for the compilation of Prolog
programs~\cite{KligerShapiro@ICLP-88}. We disagree with their
conclusion. On the other hand it is true that unless the modes of
predicates are known there is a risk of doing indexing on output
arguments, whose only effect will be an unnecessary increase in
compilation times and, more importantly, code size. In a programming
language like Mercury~\cite{Mercury@JLP-96} where modes are known the
compiler can of course avoid this risk; in Mercury modes are in fact
used to guide the compiler in generating indexing tables. However, the
situation is different for a language Prolog. Getting accurate
information about the set of all possible modes of predicates requires
a global static analyzer in the compiler --- and most Prolog systems
do not come with one --- but more importantly, it requires a lot of
discipline from the programmer (e.g., that applications use the module
system religiously and never bypass it). As a result, most Prolog
systems currently do not provide the type of indexing that
applications require. Even in systems like Ciao~\cite{Ciao@SCP-05},
which do come with built-in static analysis and more or less force
such a discipline to the programmer, mode information is not used for
multi-argument index construction.

\begin{itemize}
% \item Alternative: interface with a DB system?
\item Just-In-Time and dynamic compilation techniques (VITOR, IS THERE
  ANYTHING FOR PROLOG?)
\end{itemize}


\section{Demand-Driven Indexing of Static Predicates} \label{sec:static}
%=======================================================================
For static predicates the compiler has complete information about all
clauses and shapes of their arguments. It is both desirable and
possible to take advantage of this information at compile time and so
we treat the case of static predicates separately.
%
We will do so with schemes of increasing effectiveness and
implementation complexity.

\subsection{A simple WAM extension for any argument indexing}
%------------------------------------------------------------
Let us initially consider the case where the predicates to index
consist only of Datalog facts. This is commonly the case for all
extensional database predicates where indexing is most effective and
called for. One such code example is shown in
Fig.~\ref{fig:carc:facts}. It is a fragment of the well-known machine
learning dataset \textit{Carcinogenesis}~\cite{Carcinogenesis@ILP-97}.
These clauses get compiled to the WAM code shown in
Fig.~\ref{fig:carc:clauses}. Assuming WAM-style, first argument
indexing, the indexing code that a Prolog compiler generates is shown
in Fig.~\ref{fig:carc:index}. This code is typically placed before the
code for the clauses and the \switchONconstant instruction is the
entry point of predicate. Note that compared to vanilla WAM this
instruction has an extra argument: the register on the value of which
we will hash ($r_1$). Another difference is that if this argument
register contains an unbound variable instead of a constant then
execution will continue with the next instruction. The reason for the
extra argument and this small change in the behavior of
\switchONconstant will become apparent soon.

%------------------------------------------------------------------------------
\begin{figure}[t]
\centering
\subfigure[Some Prolog clauses\label{fig:carc:facts}]{%
  \begin{ScriptProg}
    has\_property(d1,salmonella,p).\\
    has\_property(d1,salmonella\_n,p).\\
    has\_property(d2,salmonella,p). \\
    has\_property(d2,cytogen\_ca,n).\\
    has\_property(d3,cytogen\_ca,p).
  \end{ScriptProg}
}%
\subfigure[WAM indexing\label{fig:carc:index}]{%
  \begin{sf}
    \begin{\wamcodesize}
      \begin{tabular}[b]{l}
        \switchONconstant $r_1$ 5 $T_1$  \\
        \try   $L_1$ \\
        \retry $L_2$ \\
        \retry $L_3$ \\
        \retry $L_4$ \\
        \trust $L_5$ \\
	\\
	\begin{tabular}[b]{r|c@{\ }|l|}
	  \Cline
	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline
	  \      & d1 & \try   $L_1$ \\
	  \      &    & \trust $L_2$ \\ \Cline
          \      & d2 & \try   $L_3$ \\
	  \      &    & \trust $L_4$ \\ \Cline
	  \      & d3 & \jump  $L_5$ \\
	  \Cline
	\end{tabular}
      \end{tabular}
    \end{\wamcodesize}
  \end{sf}
}%
\subfigure[Code for the clauses\label{fig:carc:clauses}]{%
  \begin{sf}
    \begin{\wamcodesize}
      \begin{tabular}[b]{rl}
	$L_1$: & \getcon $r_1$ d1            \\
	\      & \getcon $r_2$ salmonella    \\
	\      & \getcon $r_3$ p             \\
        \      & \proceed                    \\
	$L_2$: & \getcon $r_1$ d1            \\
        \      & \getcon $r_2$ salmonella\_n \\
        \      & \getcon $r_3$ p             \\
        \      & \proceed                    \\
	$L_3$: & \getcon $r_1$ d2            \\
        \      & \getcon $r_2$ salmonella    \\
        \      & \getcon $r_3$ p             \\
        \      & \proceed                    \\
	$L_4$: & \getcon $r_1$ d2            \\
	\      & \getcon $r_2$ cytogen\_ca   \\
	\      & \getcon $r_3$ n             \\
	\      & \proceed                    \\
	$L_5$: & \getcon $r_1$ d3            \\
	\      & \getcon $r_2$ cytogen\_ca   \\
	\      & \getcon $r_3$ p             \\
	\      & \proceed
      \end{tabular}
    \end{\wamcodesize}
  \end{sf}
}%
\subfigure[Any arg indexing\label{fig:carc:jiti_single:before}]{%
  \begin{sf}
    \begin{\wamcodesize}
      \begin{tabular}[b]{l}
        \switchONconstant $r_1$ 5 $T_1$  \\
        \jitiONconstant $r_2$   5 3    \\
        \jitiONconstant $r_3$   5 3    \\
        \try   $L_1$ \\
        \retry $L_2$ \\
        \retry $L_3$ \\
        \retry $L_4$ \\
        \trust $L_5$ \\
	\\
	\begin{tabular}[b]{r|c@{\ }|l|}
	  \Cline
	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline
	  \      & \code{d1} & \try   $L_1$ \\
	  \      &           & \trust $L_2$ \\ \Cline
          \      & \code{d2} & \try   $L_3$ \\
	  \      &           & \trust $L_4$ \\ \Cline
	  \      & \code{d3} & \jump  $L_5$ \\
	  \Cline
	\end{tabular}
      \end{tabular}
    \end{\wamcodesize}
  \end{sf}
}%
\caption{Part of the Carcinogenesis dataset and WAM code that a byte
  code compiler generates}
\label{fig:carc}
\end{figure}
%------------------------------------------------------------------------------

The indexing code of Fig.~\ref{fig:carc:index} incurs a small cost for
the open call (executing the \switchONconstant instruction) but this
cost pays off for calls where the first argument is bound. On the
other hand, for calls where the first argument is a free variable and
some other argument is bound, a choice point will be created, the
\TryRetryTrust chain will be used, and execution will go through the
code of all clauses. This is clearly inefficient, more so for larger
data sets.
%
We can do much better with the relatively simple scheme shown in
Fig.~\ref{fig:carc:jiti_single:before}. Immediately after the
\switchONconstant instruction, we can generate \jitiONconstant (demand
indexing) instructions, one for each remaining argument. Recall that
the entry point of the predicate is the \switchONconstant instruction.
The \jitiONconstant $r_i$ \instr{N A} instruction works as follows:
\begin{itemize}
\item if the argument register $r_i$ is a free variable, then
  execution continues with the next instruction;
\item otherwise, \JITI kicks in as follows. The abstract machine will
  scan the WAM code of the clauses and create an index table for the
  values of the corresponding argument. It can do so, because the
  instruction takes as arguments the number of clauses \instr{N} to
  index and the arity \instr{A} of the predicate. (In our example, the
  numbers 5 and 3.) For Datalog facts, this information is sufficient.
  Also, because the WAM byte code for the clauses has a very regular
  structure, the index table can be created very quickly. Upon its
  creation, the \jitiONconstant instruction will get transformed to a
  \switchONconstant. Again this is straightforward because of the two
  instructions have similar layouts in memory. Execution will continue
  with the \switchONconstant instruction.
\end{itemize}
Figure~\ref{fig:carg:jiti_single:after} shows the index table $T_2$
which is created for our example and how the indexing code looks after
the execution of a call with mode \code{(out,in,?)}. Note that the
\jitiONconstant instruction for argument register $r_2$ has been
appropriately patched. The call that triggered \JITI and subsequent
calls of the same mode will use table $T_2$. The index for the second
argument has been created.
%------------------------------------------------------------------------------
\begin{figure}
  \centering
  \begin{sf}
    \begin{\wamcodesize}
      \begin{tabular}{c@{\hspace*{2em}}c@{\hspace*{2em}}c}
	\begin{tabular}{l}
          \switchONconstant $r_1$ 5 $T_1$ \\
          \switchONconstant $r_2$ 5 $T_2$ \\
          \jitiONconstant $r_3$   5 3     \\
          \try $L_1$   \\
          \retry $L_2$ \\
          \retry $L_3$ \\
          \retry $L_4$ \\
          \trust $L_5$ \\
	\end{tabular}
	&
	\begin{tabular}{r|c@{\ }|l|}
	  \Cline
	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline
	  \      & \code{d1} & \try   $L_1$ \\
	  \      &           & \trust $L_2$ \\ \Cline
          \      & \code{d2} & \try   $L_3$ \\
	  \      &           & \trust $L_4$ \\ \Cline
	  \      & \code{d3} & \jump  $L_5$ \\
	  \Cline
	\end{tabular}
	&
	\begin{tabular}{r|c@{\ }|l|}
	  \Cline
	  $T_2$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline
	  \      & \code{salmonella}    & \try $L_1$   \\
	  \      &                      & \trust $L_3$ \\ \Cline
	  \      & \code{salmonella\_n} & \jump $L_2$  \\ \Cline
	  \      & \code{cytrogen\_ca}  & \try $L_4$   \\
	  \      &                      & \trust $L_5$ \\
	  \Cline
	\end{tabular}
      \end{tabular}
    \end{\wamcodesize}
  \end{sf}
  \caption{WAM code after demand-driven indexing for argument 2;
    table $T_2$ is generated dynamically}
  \label{fig:carg:jiti_single:after}
\end{figure}
%------------------------------------------------------------------------------

The main advantage of this scheme is its simplicity. The compiled code
(Fig.~\ref{fig:carc:jiti_single:before}) is not significantly bigger
than the code which a WAM-based compiler would generate
(Fig.~\ref{fig:carc:index}) and, even if \JITI turns out unnecessary
during runtime (e.g. execution encounters only open calls or with only
the first argument bound), the extra overhead is minimal: the
execution of some \jitiONconstant instructions for the open call only.
%
In short, this is a simple scheme that allows for \JITI on \emph{any
single} argument. At least for big sets of Datalog facts, we see
little reason not to use this indexing scheme.

\paragraph*{Optimizations.}
Because we are dealing with static code, there are opportunities for
some easy optimizations. Suppose we statically determine that there
will never be any calls with \code{in} mode for some arguments or that
these arguments are not discriminating enough.\footnote{In our example,
suppose the third argument of \code{has\_property/3} had the atom
\code{p} as value throughout.} Then we can avoid generating
\jitiONconstant instructions for them. Also, suppose we detect or
heuristically decide that some arguments are most likely than others
to be used in the \code{in} mode. Then we can simply place the
\jitiONconstant instructions for these arguments \emph{before} the
instructions for other arguments. This is possible since all indexing
instructions take the argument register number as an argument.

\subsection{From any argument indexing to multi-argument indexing}
%-----------------------------------------------------------------
The scheme of the previous section gives us only single argument
indexing. However, all the infrastructure we need is already in place.
We can use it to support (fixed-order) multi-argument \JITI in a
straightforward way.

Note that the compiler knows exactly the set of clauses that need to
be tried for each query with a specific symbol in the first argument.
This information is needed in order to construct, at compile time, the
hash table $T_1$ of Fig.~\ref{fig:carc:index}. For multi-argument
\JITI, instead of generating for each hash bucket only \TryRetryTrust
instructions, the compiler can prepend appropriate \JITI instructions.
We illustrate this on our running example. The table $T_1$ contains
four \jitiONconstant instructions: two for each of the remaining two
arguments of hash buckets with more than one alternative. For hash
buckets with none or only one alternative (e.g., \code{d3}'s bucket)
there is obviously no need to resort to \JITI for the remaining
arguments. Figure~\ref{fig:carc:jiti_multi} shows the state of the
hash tables after the execution of queries
\code{has\_property(C,salmonella,T)}, which creates table $T_2$, and
\code{has\_property(d2,P,n)} which creates the $T_3$ table and
transforms the \jitiONconstant instruction for \code{d2} and register
$r_3$ to the appropriate \switchONconstant instruction.

%------------------------------------------------------------------------------
\begin{figure}[t]
  \centering
  \begin{sf}
    \begin{\wamcodesize}
      \begin{tabular}{@{}cccc@{}}
	\begin{tabular}{l}
          \switchONconstant $r_1$ 5 $T_1$ \\
          \switchONconstant $r_2$ 5 $T_2$ \\
          \jitiONconstant $r_3$   5 3     \\
          \try $L_1$   \\
          \retry $L_2$ \\
          \retry $L_3$ \\
          \retry $L_4$ \\
          \trust $L_5$ \\
	\end{tabular}
	&
	\begin{tabular}{r|c@{\ }|l|}
	  \Cline
	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline
	  \      & \code{d1} & \jitiONconstant $r_2$ 2 3 \\
	  \      &           & \jitiONconstant $r_3$ 2 3 \\
	  \      &           & \try   $L_1$ \\
	  \      &           & \trust $L_2$ \\ \Cline
          \      & \code{d2} & \jitiONconstant $r_2$ 2 3 \\
	  \      &           & \switchONconstant $r_3$ 2 $T_3$ \\
	  \      &           & \try   $L_3$ \\
	  \      &           & \trust $L_4$ \\ \Cline
	  \      & \code{d3} & \jump  $L_5$ \\
	  \Cline
	\end{tabular}
	&
	\begin{tabular}{r|c@{\ }|l|}
	  \Cline
	  $T_2$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline
	  \      & \code{salmonella}    & \jitiONconstant $r_3$ 2 3 \\
	  \      &                      & \try $L_1$   \\
	  \      &                      & \trust $L_3$ \\ \Cline
	  \      & \code{salmonella\_n} & \jump $L_2$  \\ \Cline
	  \      & \code{cytrogen\_ca}  & \jitiONconstant $r_3$ 2 3 \\
	  \      &                      & \try $L_4$   \\
	  \      &                      & \trust $L_5$ \\
	  \Cline
	\end{tabular}
	&
	\begin{tabular}{r|c@{\ }|l|}
	  \Cline
	  $T_3$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline
	  \      & \code{p} & \jump $L_3$ \\ \Cline
	  \      & \code{n} & \jump $L_4$ \\
	  \Cline
	\end{tabular}
      \end{tabular}
    \end{\wamcodesize}
  \end{sf}
  \caption{\JITI for all argument combinations;
    table $T_1$ is static; $T_2$ and $T_3$ are generated dynamically}
  \label{fig:carc:jiti_multi}
\end{figure}
%------------------------------------------------------------------------------

\paragraph{Implementation issues.}
In the \jitiONconstant instructions of Fig.~\ref{fig:carc:jiti_multi}
notice the integer 2 which denotes the number of clauses that the
instruction will index. Using this number an index table of
appropriate size will be created, such as $T_3$. To fill this table we
need information about the clauses to index and the symbols to hash
on. The clauses can be obtained by scanning the labels of the
\TryRetryTrust instructions following \jitiONconstant; the symbols by
appropriate byte code offsets (based on the argument register number)
from these labels. Thus, multi-argument \JITI is easy to get and the
creation of index tables can be extremely fast when indexing Datalog
facts.

\subsection{Beyond Datalog and other implementation issues}
%----------------------------------------------------------
Indexing on demand clauses with function symbols is not significantly
more difficult. The scheme we have described is applicable but
requires the following extensions:
\begin{enumerate}
\item Besides \jitiONconstant we also need \jitiONterm and
  \jitiONstructure instructions, the \JITI counterparts of the WAM's
  \switchONterm and \switchONstructure.
\item Because the byte code for the clause heads does not necessarily
  have a regular structure, the abstract machine needs to be able to
  ``walk'' the byte code instructions and recover the symbols on which
  indexing will be based. Writing such a code walking procedure is not
  hard.\footnote{In many Prolog systems, a procedure with similar
  functionality often exists for the disassembler, the debugger, etc.}
\item Indexing on an argument that contains unconstrained variables
  for some clauses can be tricky. The WAM needs to group clauses in
  this case and without special treatment creates two choice points
  for this argument (one for the variables and one per each group of
  clauses). However, this issue and how to deal with it is well-known
  by now. Possible solutions to it are described in a 1987 paper by
  Carlsson~\cite{FreezeIndexing@ICLP-87} and can be readily adapted to
  \JITI. Alternatively, in a simple implementation, we can skip \JITI
  for arguments with variables in some clauses.
\end{enumerate}
Before describing \JITI more formally, we remark on the following
design decisions whose rationale may not be immediately obvious:
\begin{itemize}
\item By default, only $T_1$ is generated at compile time (as in the
  WAM) and the additional index tables $T_2, T_3, \ldots$ are
  generated dynamically. This is because we do not want to increase
  compiled code size unnecessarily (i.e., when there is no demand for
  these indices).
\item On the other hand, we generate \jitiSTAR instructions at compile
  time for the head arguments.\footnote{The \jitiSTAR instructions for
  the $T_1$ table can be generated either by the compiler or by the
  loader.} This does not noticeably increase the generated byte code
  but it greatly simplifies code loading. Notice that a nice property
  of the scheme we have described is that the loaded byte code can be
  patched \emph{without} the need to move any instructions.
% The indexing tables are typically not intersperced with the byte code.
\item Finally, one may wonder why the \jitiSTAR instructions create
  the dynamic index tables with an additional code walking pass
  instead of piggy-backing on the pass which examines all clauses via
  the main \TryRetryTrust chain. Main reasons are: 1) in many cases
  the code walking can be selective and guided by offsets and 2) by
  first creating the hash table and then using it we speed up the
  execution of the queries encountered during runtime and often avoid
  unnecessary choice point creations.
\end{itemize}
This is \JITI as we have implemented it.
% in one of our Prolog systems.
However, we note that these decisions are orthogonal to the main idea
and under compiler control. If, for example, analysis determines that
some argument sequences will never demand indexing we can simply avoid
generation of \jitiSTAR instructions for them. Similarly, if we
determine that some argument sequences will definitely demand indexing
we can speed up execution by generating the appropriate index tables
at compile time instead of dynamically.

\subsection{Demand-driven index construction and its properties}
%---------------------------------------------------------------
The idea behind \JITI can be captured in a single sentence: \emph{we
can generate every index we need during program execution when this
index is demanded}. Subsequent uses of these indices can speed up
execution considerably more than the time it takes to construct them
(more on this below) so this runtime action makes sense.\footnote{In
fact, because choice points are expensive in the WAM, \JITI can speed
up even the execution of the query that triggers the process, not only
subsequent queries.}
%
We describe the process of demand-driven index construction.

% \subsubsection{Demand-driven index construction}
%-------------------------------------------------
Let $p/k$ be a predicate with $n$ clauses.
%
At a high level, its indices form a tree whose root is the entry point
of the predicate. For simplicity, we assume that the root node of the
tree and the interior nodes corresponding to the index table for the
first argument have been constructed at compile time. Leaves of this
tree are the nodes containing the code for the clauses of the
predicate and each clause is identified by a unique label \mbox{$L_i,
1 \leq i \leq n$}. Execution always starts at the first instruction of
the root node and follows Algorithm~\ref{alg:construction}. The
algorithm might look complicated but is actually quite simple.
%
Each non-leaf node contains a sequence of byte code instructions with
groups of the form \mbox{$\langle I_1, \ldots, I_m, T_1, \ldots, T_l
\rangle, 0 \leq m \leq k, 1 \leq l \leq n$} where each of the $I$
instructions, if any, is either a \switchSTAR or a \jitiSTAR
instruction and the $T$ instructions are either a sequence of
\TryRetryTrust instructions (if $l > 1$) or a \jump instruction (if
\mbox{$l = 1$}). Step~2.2 dynamically constructs an index table $\cal
T$ whose buckets are the newly created interior nodes in the tree.
Each bucket associated with a single clause contains a \jump
instruction to the label of that clause. Each bucket associated with
many clauses starts with the $I$ instructions which are yet to be
visited and continues with a \TryRetryTrust chain pointing to the
clauses. When the index construction is done, the instruction mutates
to a \switchSTAR WAM instruction.
%-------------------------------------------------------------------------
\begin{Algorithm}
  \caption{Actions of the abstract machine with \JITI}
  \label{alg:construction}
  \begin{enumerate}
  \item if the current instruction $I$ is a \switchSTAR, \try, \retry,
    \trust or \jump, the action is an in the WAM;
  \item if the current instruction $I$ is a \jitiSTAR with arguments $r,
    l$, and $k$ where $r$ is a register then
    \begin{enumerate}
    \item[2.1] if register $r$ contains a variable, the action is simply to
      \instr{goto} the next instruction in the node;
    \item[2.2] if register $r$ contains a value $v$, the action is to
      dynamically construct the index as follows:
      \begin{itemize}
      \item[2.2.1] collect the subsequent instructions in a list $\cal I$
	until the next instruction is a \try;\footnote{Note that there
	will always be a \try following a \jitiSTAR instruction.}
      \item[2.2.2] for each label $L$ in the \TryRetryTrust chain
	inspect the code of the clause with label $L$ to find the
	symbol~$c$ associated with register $r$ in the clause; (This
	step creates a list of $\langle c, L \rangle$ pairs.)
      \item[2.2.3] create an index table $\cal T$ out of these pairs as
	follows:
	\begin{itemize}
	\item if $I$ is a \jitiONconstant or a \jitiONstructure then
	  create an index table for the symbols in the list of pairs;
	  each entry of the table is identified by a symbol $c$ and
	  contains:
	  \begin{itemize}
	  \item the instruction \jump $L_c$ if $L_c$ is the only label
	    associated with $c$;
	  \item the sequence of instructions obtained by appending to
	    $\cal I$ a \TryRetryTrust chain for the sequence of labels
	    $L'_1, \ldots, L'_l$ that are associated with $c$
	  \end{itemize}
	\item if $I$ is a \jitiONterm then
	  \begin{itemize}
	  \item partition the sequence of labels $\cal L$ in the list
	    of pairs into sequences of labels ${\cal L}_c, {\cal L}_l$
	    and ${\cal L}_s$ for constants, lists and structures,
	    respectively;
	  \item for each of the four sequences ${\cal L}, {\cal L}_c,
	    {\cal L}_l, {\cal L}_s$ of labels create code as follows:
	    \begin{itemize}
	    \item the instruction \fail if the sequence is empty;
	    \item the instruction \jump $L$ if $L$ is the only label in
	      the sequence;
	    \item the sequence of instructions obtained by appending to
	      $\cal I$ a \TryRetryTrust chain for the current sequence
	      of labels;
	    \end{itemize}
	  \end{itemize}
	\end{itemize}
      \item[2.2.4] transform the \jitiSTAR $r, l, k$ instruction to
	a \switchSTAR $r, l, \&{\cal T}$ instruction; and
      \item[2.2.5] continue execution with this instruction.
      \end{itemize}
    \end{enumerate}
  \end{enumerate}
\end{Algorithm}
%-------------------------------------------------------------------------

Complexity-wise, dynamic index construction does not add any overhead
to program execution. First, note that each demanded index table will
be constructed at most once. Also, a \jitiSTAR instruction will be
encountered only in cases where execution would examine all clauses in
the \TryRetryTrust chain.\footnote{This statement is possibly not
valid the presence of Prolog cuts.} The construction visits these
clauses \emph{once} and then creates the index table in time linear in
the number of clauses as one pass over the list of $\langle c, L
\rangle$ pairs suffices. After index construction, execution will
visit only a subset of these clauses as the index table will be
consulted.
%% Finally, note that the maximum number of \jitiSTAR instructions
%% that will be visited for each query is bounded by the maximum
%% number of index positions (symbols) in the clause heads of the
%% predicate.
Thus, in cases where \JITI is not effective, execution of a query will
at most double due to dynamic index construction. In fact, this worst
case is extremely unlikely in practice. On the other hand, \JITI can
change the complexity of evaluating a predicate call from $O(n)$ to
$O(1)$ where $n$ is the number of clauses.

\subsection{More implementation choices}
%---------------------------------------
The observant reader has no doubt noticed that
Algorithm~\ref{alg:construction} provides multi-argument indexing but
only for the outermost symbols of arguments. For clauses with
structured terms that require indexing in their subterms we can either
employ a compile-time program transformation like \emph{unification
factoring}~\cite{UnifFact@POPL-95} or modify the algorithm to consider
index positions inside structure symbols. This is relatively easy to
do but requires support from the register allocator (passing the
subterms of structures in appropriate argument registers) and/or a new
set of instructions. Due to space limitations we omit further details.

Algorithm~\ref{alg:construction} relies on a procedure that inspects
the code of a clause and collects the symbols associated with some
particular index position (step~2.2.2). At the cost of increased
implementation complexity, this step can of course take into account
other information that may exist in the body of the clause (e.g., type
tests such as \code{var(X)}, \code{atom(X)}, aliasing constraints such
as \code{X = Y}, numeric constraints \code{X > 0}, etc).

A reasonable concern for \JITI is increased memory consumption due to
the index tables. In our experience, this does not seem to be a
problem in practice since most applications do not have demand for
indexing on all argument combinations. In applications where it
becomes a problem or when running in an environment where memory is
limited, we can easily put a bound on the size of index tables, either
globally or for each predicate. The \jitiSTAR instructions can either
become inactive when this limit is reached, or better yet we can
recover the space of some tables. We can employ any standard recycling
algorithm (e.g., least recently used) and reclaim the space for some
tables that are no longer in use. This is easy to do by reverting the
corresponding \jitiSTAR instructions back to \switchSTAR instructions.
If the indices are needed again, they can simply be regenerated.


\section{Demand-Driven Indexing of Dynamic Predicates} \label{sec:dynamic}
%=========================================================================


\section{Performance Evaluation} \label{sec:perf}
%================================================


\section{Concluding Remarks}
%===========================
\begin{itemize}
\item Mention the non-trivial speedups in actual applications; also
  that it is important to realize that certain applications have ad
  hoc query patterns (e.g., ILP) are not amenable to static analyses
\end{itemize}

%==============================================================================
\bibliographystyle{splncs}
\bibliography{lp}
%==============================================================================

\end{document}