| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | %==============================================================================
 | 
					
						
							|  |  |  |  | \documentclass{llncs}  | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \usepackage[latin1]{inputenc} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \usepackage{float} | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | \usepackage{alltt} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \usepackage{xspace} | 
					
						
							|  |  |  |  | \usepackage{epsfig} | 
					
						
							|  |  |  |  | \usepackage{wrapfig} | 
					
						
							|  |  |  |  | \usepackage{subfigure} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \renewcommand{\rmdefault}{ptm} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \newcommand{\Paragraph}[1]{\vspace*{-.5em}\paragraph{#1}} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \floatstyle{ruled} | 
					
						
							|  |  |  |  | \newfloat{Algorithm}{ht}{lop} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | \newcommand{\wamcodesize}{scriptsize} | 
					
						
							|  |  |  |  | \newcommand{\code}[1]{\texttt{#1}} | 
					
						
							|  |  |  |  | \newcommand{\instr}[1]{\textsf{#1}} | 
					
						
							|  |  |  |  | \newcommand{\try}{\instr{try}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\retry}{\mbox{\instr{retry}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\trust}{\instr{trust}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\TryRetryTrust}{\mbox{\instr{try-retry-trust}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\fail}{\instr{fail}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\jump}{\instr{jump}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\jitiSTAR}{\mbox{\instr{dindex\_on\_*}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\switchSTAR}{\mbox{\instr{switch\_on\_*}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\jitiONterm}{\mbox{\instr{dindex\_on\_term}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\jitiONconstant}{\mbox{\instr{dindex\_on\_constant}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\jitiONstructure}{\mbox{\instr{dindex\_on\_structure}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\switchONterm}{\mbox{\instr{switch\_on\_term}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\switchONconstant}{\mbox{\instr{switch\_on\_constant}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\switchONstructure}{\mbox{\instr{switch\_on\_structure}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\getcon}{\mbox{\instr{get\_constant}}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\proceed}{\instr{proceed}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\Cline}{\cline{2-3}} | 
					
						
							|  |  |  |  | \newcommand{\JITI}{demand-driven indexing\xspace} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | \newcommand{\bench}[1]{\textbf{\textsf{#1}}} | 
					
						
							|  |  |  |  | \newcommand{\tcLio}{\bench{tc\_l\_io}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\tcRio}{\bench{tc\_r\_io}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\tcDio}{\bench{tc\_d\_io}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\tcLoo}{\bench{tc\_l\_oo}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\tcRoo}{\bench{tc\_r\_oo}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\tcDoo}{\bench{tc\_d\_oo}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\compress}{\bench{compress}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\sgCyl}{\bench{sg\_cyl}\xspace} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \newcommand{\muta}{\bench{muta}\xspace} | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | \newcommand{\pta}{\bench{pta}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\tea}{\bench{tea}\xspace} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | \newcommand{\BreastCancer}{\bench{BreastCancer}\xspace} | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  | \newcommand{\Carcino}{\bench{Carcinogenesis}\xspace} | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | \newcommand{\Choline}{\bench{Choline}\xspace} | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  | \newcommand{\GeneExpr}{\bench{GeneExpression}\xspace} | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | \newcommand{\IEProtein}{\bench{IE-Protein\_Extraction}\xspace} | 
					
						
							| 
									
										
										
										
											2007-03-11 23:30:00 +00:00
										 |  |  |  | %\newcommand{\Krki}{\bench{Krki}\xspace}
 | 
					
						
							|  |  |  |  | %\newcommand{\KrkiII}{\bench{Krki~II}\xspace}
 | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | \newcommand{\Mesh}{\bench{Mesh}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\Pyrimidines}{\bench{Pyrimidines}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\Susi}{\bench{Susi}\xspace} | 
					
						
							|  |  |  |  | \newcommand{\Thermolysin}{\bench{Thermolysin}\xspace} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \newenvironment{SmallProg}{\begin{tt}\begin{small}\begin{tabular}[b]{l}}{\end{tabular}\end{small}\end{tt}} | 
					
						
							|  |  |  |  | \newenvironment{ScriptProg}{\begin{tt}\begin{scriptsize}\begin{tabular}[b]{l}}{\end{tabular}\end{scriptsize}\end{tt}} | 
					
						
							|  |  |  |  | \newenvironment{FootProg}{\begin{tt}\begin{footnotesize}\begin{tabular}[c]{l}}{\end{tabular}\end{footnotesize}\end{tt}} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \newcommand{\TODOcomment}[2]{%
 | 
					
						
							|  |  |  |  |   \stepcounter{TODOcounter#1}%
 | 
					
						
							|  |  |  |  |   {\scriptsize\bf$^{(\arabic{TODOcounter#1})}$}%
 | 
					
						
							|  |  |  |  |   \marginpar[\fbox{ | 
					
						
							|  |  |  |  |     \parbox{2cm}{\raggedleft | 
					
						
							|  |  |  |  |       \scriptsize$^{({\bf{\arabic{TODOcounter#1}{#1}}})}$%
 | 
					
						
							|  |  |  |  |       \scriptsize #2}}]%
 | 
					
						
							|  |  |  |  |   {\fbox{\parbox{2cm}{\raggedright | 
					
						
							|  |  |  |  |       \scriptsize$^{({\bf{\arabic{TODOcounter#1}{#1}}})}$%
 | 
					
						
							|  |  |  |  |       \scriptsize #2}}} | 
					
						
							|  |  |  |  | }%
 | 
					
						
							|  |  |  |  | \newcounter{TODOcounter} | 
					
						
							|  |  |  |  | \newcommand{\TODO}[1]{\TODOcomment{}{#1}} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \title{Demand-Driven Indexing of Prolog Clauses\thanks{Dedicated to | 
					
						
							|  |  |  |  |     the memory of our friend, colleague and co-author Ricardo Lopes. | 
					
						
							|  |  |  |  |     We miss you!}} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \titlerunning{Demand-Driven Indexing of Prolog Clauses} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \author{V\'{\i}tor Santos Costa\inst{1} \and Konstantinos | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   Sagonas\inst{2} \and Ricardo Lopes} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \authorrunning{V. Santos Costa, K. Sagonas and R. Lopes} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \institute{ | 
					
						
							| 
									
										
										
										
											2007-06-09 15:47:30 +00:00
										 |  |  |  |   LIACC- DCC/FCUP, University of Porto, Portugal | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   \and | 
					
						
							|  |  |  |  |   National Technical University of Athens, Greece | 
					
						
							|  |  |  |  | } | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \begin{document} | 
					
						
							|  |  |  |  | \maketitle | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \begin{abstract} | 
					
						
							|  |  |  |  |   As logic programming applications grow in size, Prolog systems need | 
					
						
							|  |  |  |  |   to efficiently access larger and larger data sets and the need for | 
					
						
							|  |  |  |  |   any- and multi-argument indexing becomes more and more profound. | 
					
						
							|  |  |  |  |   Static generation of multi-argument indexing is one alternative, but | 
					
						
							|  |  |  |  |   applications often rely on features that are inherently dynamic | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   which makes static techniques inapplicable or inaccurate. Another | 
					
						
							|  |  |  |  |   alternative is to employ dynamic schemes for flexible demand-driven | 
					
						
							|  |  |  |  |   indexing of Prolog clauses. We propose such schemes and discuss | 
					
						
							|  |  |  |  |   issues that need to be addressed for their efficient implementation | 
					
						
							|  |  |  |  |   in the context of WAM-based Prolog systems. We have implemented | 
					
						
							|  |  |  |  |   demand-driven indexing in two different Prolog systems and have been | 
					
						
							|  |  |  |  |   able to obtain non-negligible performance speedups: from a few | 
					
						
							|  |  |  |  |   percent up to orders of magnitude. Given these results, we see very | 
					
						
							|  |  |  |  |   little reason for Prolog systems not to incorporate some form of | 
					
						
							|  |  |  |  |   dynamic indexing based on actual demand. In fact, we see | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  |   demand-driven indexing as only the first step towards effective | 
					
						
							|  |  |  |  |   runtime optimization of Prolog programs. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \end{abstract} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \section{Introduction} | 
					
						
							|  |  |  |  | %=====================
 | 
					
						
							| 
									
										
										
										
											2007-03-08 23:31:52 +00:00
										 |  |  |  | The WAM~\cite{Warren83} has mostly been a blessing but occasionally | 
					
						
							|  |  |  |  | also a curse for Prolog systems. Its ingenious design has allowed | 
					
						
							|  |  |  |  | implementors to get byte code compilers with decent performance --- it | 
					
						
							|  |  |  |  | is not a fluke that most Prolog systems are still based on the WAM. On | 
					
						
							|  |  |  |  | the other hand, \emph{because} the WAM gives good performance in many | 
					
						
							|  |  |  |  | cases, implementors have not incorporated in their systems many | 
					
						
							|  |  |  |  | features that drastically depart from WAM's basic characteristics. | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-03-08 23:31:52 +00:00
										 |  |  |  | For example, first argument indexing is sufficient for many Prolog | 
					
						
							|  |  |  |  | applications. However, it is clearly sub-optimal for applications | 
					
						
							| 
									
										
										
										
											2007-06-06 21:01:46 +00:00
										 |  |  |  | accessing large data sets; for a long time now, the database community | 
					
						
							| 
									
										
										
										
											2007-03-08 23:31:52 +00:00
										 |  |  |  | has recognized that good indexing is the basis for fast query | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | processing. | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | As logic programming applications grow in size, Prolog systems need to | 
					
						
							|  |  |  |  | efficiently access larger and larger data sets and the need for any- | 
					
						
							|  |  |  |  | and multi-argument indexing becomes more and more profound. Static | 
					
						
							| 
									
										
										
										
											2007-03-08 23:31:52 +00:00
										 |  |  |  | generation of multi-argument indexing is one alternative. The problem | 
					
						
							|  |  |  |  | is that this alternative is often unattractive because it may | 
					
						
							|  |  |  |  | drastically increase the size of the generated byte code and do so | 
					
						
							|  |  |  |  | unnecessarily. Static analysis can partly address this concern, but in | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | applications that rely on features which are inherently dynamic (e.g., | 
					
						
							|  |  |  |  | generating hypotheses for inductive logic programming data sets during | 
					
						
							| 
									
										
										
										
											2007-03-08 23:31:52 +00:00
										 |  |  |  | runtime) static analysis is inapplicable or grossly inaccurate. | 
					
						
							|  |  |  |  | Another alternative, which has not been investigated so far, is to do | 
					
						
							|  |  |  |  | flexible indexing on demand during program execution. | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-08 23:31:52 +00:00
										 |  |  |  | This is precisely what we advocate with this paper. More specifically, | 
					
						
							|  |  |  |  | we present a small extension to the WAM that allows for flexible | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | indexing of Prolog clauses during runtime based on actual demand. For | 
					
						
							|  |  |  |  | static predicates, the scheme we propose is partly guided by the | 
					
						
							|  |  |  |  | compiler; for dynamic code, besides being demand-driven by queries, | 
					
						
							| 
									
										
										
										
											2007-03-08 23:31:52 +00:00
										 |  |  |  | the method needs to cater for code updates during runtime. Where our | 
					
						
							|  |  |  |  | schemes radically depart from current practice is that they generate | 
					
						
							|  |  |  |  | new byte code during runtime, in effect doing a form of just-in-time | 
					
						
							|  |  |  |  | compilation. In our experience these schemes pay off. We have | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | implemented \JITI in two different Prolog systems (YAP and XXX) and | 
					
						
							| 
									
										
										
										
											2007-03-08 23:31:52 +00:00
										 |  |  |  | have obtained non-trivial speedups, ranging from a few percent to | 
					
						
							|  |  |  |  | orders of magnitude, across a wide range of applications. Given these | 
					
						
							|  |  |  |  | results, we see very little reason for Prolog systems not to | 
					
						
							|  |  |  |  | incorporate some form of indexing based on actual demand from queries. | 
					
						
							|  |  |  |  | In fact, we see \JITI as only the first step towards effective runtime | 
					
						
							|  |  |  |  | optimization of Prolog programs. | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | \Paragraph{Organization.} | 
					
						
							|  |  |  |  | %------------------------
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | After commenting on the state of the art and related work concerning | 
					
						
							|  |  |  |  | indexing in Prolog systems (Sect.~\ref{sec:related}) we briefly review | 
					
						
							|  |  |  |  | indexing in the WAM (Sect.~\ref{sec:prelims}). We then present \JITI | 
					
						
							|  |  |  |  | schemes for static (Sect.~\ref{sec:static}) and dynamic | 
					
						
							|  |  |  |  | (Sect.~\ref{sec:dynamic}) predicates, their implementation in two | 
					
						
							|  |  |  |  | Prolog systems (Sect.~\ref{sec:impl}) and the performance benefits | 
					
						
							|  |  |  |  | they bring (Sect.~\ref{sec:perf}). The paper ends with some concluding | 
					
						
							|  |  |  |  | remarks. | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | \section{State of the Art and Related Work} \label{sec:related} | 
					
						
							|  |  |  |  | %==============================================================
 | 
					
						
							|  |  |  |  | % Indexing in Prolog systems:
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | Many Prolog systems still only support | 
					
						
							| 
									
										
										
										
											2007-03-10 15:05:05 +00:00
										 |  |  |  | indexing on the main functor symbol of the first argument. Some | 
					
						
							| 
									
										
										
										
											2007-06-09 15:47:30 +00:00
										 |  |  |  | others, such as YAP version 4, can look inside some compound | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | terms~\cite{YAP}. SICStus Prolog supports \emph{shallow | 
					
						
							| 
									
										
										
										
											2007-03-10 15:05:05 +00:00
										 |  |  |  |   backtracking}~\cite{ShallowBacktracking@ICLP-89}; choice points are | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | fully populated only when it is certain that execution will enter the | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | clause body. While shallow backtracking avoids some of the performance | 
					
						
							|  |  |  |  | problems of unnecessary choice point creation, it does not offer the | 
					
						
							| 
									
										
										
										
											2007-06-09 15:47:30 +00:00
										 |  |  |  | full benefits that indexing can provide. Other systems such as | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | BIM-Prolog~\cite{IndexingProlog@NACLP-89}, SWI-Prolog~\cite{SWI} and | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | XSB~\cite{XSB} allow for user-controlled multi-argument indexing. | 
					
						
							|  |  |  |  | Notably, ilProlog~\cite{ilProlog} uses compile-time heuristics and | 
					
						
							|  |  |  |  | generates code for multi-argument indexing automatically. In all these | 
					
						
							|  |  |  |  | systems, this support comes with various implementation restrictions. | 
					
						
							|  |  |  |  | For example, in SWI-Prolog at most four arguments can be indexed; in | 
					
						
							|  |  |  |  | XSB the compiler does not offer multi-argument indexing and the | 
					
						
							|  |  |  |  | predicates need to be asserted instead; we know of no system where | 
					
						
							|  |  |  |  | multi-argument indexing looks inside compound terms. More importantly, | 
					
						
							|  |  |  |  | requiring users to specify arguments to index on is neither | 
					
						
							|  |  |  |  | user-friendly nor guarantees good performance results. | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | % Trees, tries and unification factoring:
 | 
					
						
							|  |  |  |  | Recognizing the need for better indexing, researchers have proposed | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | more flexible indexing mechanisms for Prolog. For example, Hickey and | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | Mudambi proposed \emph{switching trees}~\cite{HickeyMudambi@JLP-89}, | 
					
						
							|  |  |  |  | which rely on the presence of mode information. Similar proposals were | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | put forward by Van Roy, Demoen and Willems who investigated indexing | 
					
						
							|  |  |  |  | on several arguments in the form of a \emph{selection tree}~\cite{VRDW87} | 
					
						
							|  |  |  |  | and by Zhou et al.\ who implemented a \emph{matching tree} oriented | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | abstract machine for Prolog~\cite{TOAM@ICLP-90}. For static | 
					
						
							|  |  |  |  | predicates, the XSB compiler offers support for \emph{unification | 
					
						
							|  |  |  |  | factoring}~\cite{UnifFact@POPL-95}; for asserted code, XSB can | 
					
						
							|  |  |  |  | represent databases of facts using \emph{tries}~\cite{Tries@JLP-99} | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | which provide left-to-right multi-argument indexing. However, in XSB | 
					
						
							|  |  |  |  | none of these mechanisms is used automatically; instead the user has | 
					
						
							|  |  |  |  | to specify appropriate directives. | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | % Comparison with static analysis techniques and Mercury:
 | 
					
						
							|  |  |  |  | Long ago, Kliger and Shapiro argued that such tree-based indexing | 
					
						
							|  |  |  |  | schemes are not cost effective for the compilation of Prolog | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | programs~\cite{KligerShapiro@ICLP-88}. Some of their arguments make | 
					
						
							| 
									
										
										
										
											2007-03-10 15:05:05 +00:00
										 |  |  |  | sense for certain applications, but, as we shall show, in general  | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | they underestimate the benefits of indexing on EDB predicates. | 
					
						
							|  |  |  |  | Nevertheless, it is true that unless the modes of | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | predicates are known we run the risk of doing indexing on output | 
					
						
							|  |  |  |  | arguments, whose only effect is an unnecessary increase in compilation | 
					
						
							|  |  |  |  | times and, more importantly, in code size. In a programming language | 
					
						
							| 
									
										
										
										
											2007-06-09 15:47:30 +00:00
										 |  |  |  | such as Mercury~\cite{Mercury@JLP-96} where modes are known the compiler | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | can of course avoid this risk; indeed in Mercury modes (and types) are | 
					
						
							|  |  |  |  | used to guide the compiler generate good indexing tables. However, the | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | situation is different for a language like Prolog. Getting accurate | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | information about the set of all possible modes of predicates requires | 
					
						
							|  |  |  |  | a global static analyzer in the compiler --- and most Prolog systems | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | do not come with one. More importantly, it requires a lot of | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | discipline from the programmer (e.g., that applications use the module | 
					
						
							|  |  |  |  | system religiously and never bypass it). As a result, most Prolog | 
					
						
							|  |  |  |  | systems currently do not provide the type of indexing that | 
					
						
							| 
									
										
										
										
											2007-06-09 15:47:30 +00:00
										 |  |  |  | applications require. Even in systems such as Ciao~\cite{Ciao@SCP-05}, | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | which do come with a built-in static analyzer and more or less force | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | such a discipline on the programmer, mode information is not used for | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | multi-argument indexing. | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | % The grand finale:
 | 
					
						
							|  |  |  |  | The situation is actually worse for certain types of Prolog | 
					
						
							|  |  |  |  | applications. For example, consider applications in the area of | 
					
						
							|  |  |  |  | inductive logic programming. These applications on the one hand have | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | high demands for effective indexing since they need to efficiently | 
					
						
							|  |  |  |  | access big datasets and on the other they are unfit for static | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | analysis since queries are often ad hoc and generated only during | 
					
						
							|  |  |  |  | runtime as new hypotheses are formed or refined. | 
					
						
							|  |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  | Our thesis is that the abstract machine should be able to adapt | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | automatically to the runtime requirements of such or, even better, of | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | all applications by employing increasingly aggressive forms of dynamic | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | compilation. As a concrete example of what this means in practice, in | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | this paper we will attack the problem of satisfying the indexing needs | 
					
						
							|  |  |  |  | of applications during runtime. Naturally, we will base our technique | 
					
						
							|  |  |  |  | on the existing support for indexing that the WAM provides, but we | 
					
						
							|  |  |  |  | will extend this support with the technique of \JITI that we describe | 
					
						
							|  |  |  |  | in the next sections. | 
					
						
							| 
									
										
										
										
											2007-03-07 22:00:18 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | \section{Indexing in the WAM} \label{sec:prelims} | 
					
						
							|  |  |  |  | %================================================
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | To make the paper relatively self-contained we review the indexing | 
					
						
							|  |  |  |  | instructions of the WAM and their use. In the WAM, the first level of | 
					
						
							|  |  |  |  | dispatching involves a test on the type of the argument. The | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | \switchONterm instruction checks the tag of the dereferenced value in | 
					
						
							|  |  |  |  | the first argument register and implements a four-way branch where one | 
					
						
							|  |  |  |  | branch is for the dereferenced register being an unbound variable, one | 
					
						
							|  |  |  |  | for being atomic, one for (non-empty) list, and one for structure. In | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | any case, control goes to a bucket of clauses. In the buckets for | 
					
						
							|  |  |  |  | constants and structures the second level of dispatching involves the | 
					
						
							|  |  |  |  | value of the register. The \switchONconstant and \switchONstructure | 
					
						
							|  |  |  |  | instructions implement this dispatching: typically with a \fail | 
					
						
							|  |  |  |  | instruction when the bucket is empty, with a \jump instruction for | 
					
						
							|  |  |  |  | only one clause, with a sequential scan when the number of clauses is | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | small, and with a hash table lookup when the number of clauses exceeds | 
					
						
							|  |  |  |  | a threshold. For this reason the \switchONconstant and | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | \switchONstructure instructions take as arguments the hash table | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \instr{T} and the number of clauses \instr{N} the table contains. In | 
					
						
							|  |  |  |  | each bucket of this hash table and also in the bucket for the variable | 
					
						
							|  |  |  |  | case of \switchONterm the code sequentially backtracks through the | 
					
						
							|  |  |  |  | clauses using a \TryRetryTrust chain of instructions. The \try | 
					
						
							|  |  |  |  | instruction sets up a choice point, the \retry instructions (if~any) | 
					
						
							|  |  |  |  | update certain fields of this choice point, and the \trust instruction | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | removes it. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | The WAM has additional indexing instructions (\instr{try\_me\_else} | 
					
						
							| 
									
										
										
										
											2007-03-08 15:19:16 +00:00
										 |  |  |  | and friends) that allow indexing to be interspersed with the code of | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | clauses. We will not consider them here. This is not a problem since | 
					
						
							|  |  |  |  | the above scheme handles all programs. Also, we will feel free to do | 
					
						
							|  |  |  |  | some minor modifications and optimizations when this simplifies | 
					
						
							|  |  |  |  | things. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | Let's see an example. Consider the Prolog code shown in | 
					
						
							|  |  |  |  | Fig.~\ref{fig:carc:facts}, a fragment of the machine learning dataset | 
					
						
							|  |  |  |  | \textit{Carcinogenesis}. | 
					
						
							|  |  |  |  | %
 | 
					
						
							|  |  |  |  | These clauses get compiled to the WAM code shown in | 
					
						
							|  |  |  |  | Fig.~\ref{fig:carc:clauses}. The first argument indexing code that a | 
					
						
							|  |  |  |  | Prolog compiler generates is shown in Fig.~\ref{fig:carc:index}. This | 
					
						
							|  |  |  |  | code is typically placed before the code for the clauses and the | 
					
						
							|  |  |  |  | \switchONconstant is the entry point of the predicate. Note that compared | 
					
						
							|  |  |  |  | with vanilla WAM this instruction has an extra argument: the register | 
					
						
							|  |  |  |  | on the value of which we index ($r_1$). This extra argument will allow | 
					
						
							|  |  |  |  | us to go beyond first argument indexing. Another departure from the | 
					
						
							|  |  |  |  | WAM is that if this argument register contains an unbound variable | 
					
						
							|  |  |  |  | instead of a constant then execution will continue with the next | 
					
						
							|  |  |  |  | instruction; in effect we have merged part of the functionality of | 
					
						
							|  |  |  |  | \switchONterm into the \switchONconstant instruction. This small | 
					
						
							|  |  |  |  | change in the behavior of \switchONconstant will allow us to get | 
					
						
							|  |  |  |  | \JITI. Let's see how. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | \begin{figure}[t] | 
					
						
							|  |  |  |  | \centering | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \begin{tabular}[b]{c} | 
					
						
							|  |  |  |  |   \subfigure[Some Prolog clauses\label{fig:carc:facts}]{%
 | 
					
						
							|  |  |  |  |     \begin{ScriptProg} | 
					
						
							|  |  |  |  |       has\_property(d1,salmonella,p).\\ | 
					
						
							|  |  |  |  |       has\_property(d1,salmonella\_n,p).\\ | 
					
						
							|  |  |  |  |       has\_property(d2,salmonella,p). \\ | 
					
						
							|  |  |  |  |       has\_property(d2,cytogen\_ca,n).\\ | 
					
						
							|  |  |  |  |       has\_property(d3,cytogen\_ca,p).\\[5pt] | 
					
						
							|  |  |  |  |     \end{ScriptProg} | 
					
						
							|  |  |  |  |   }\\ \hline\hline%
 | 
					
						
							|  |  |  |  |   \subfigure[WAM indexing\label{fig:carc:index}]{%
 | 
					
						
							|  |  |  |  |     \begin{sf} | 
					
						
							|  |  |  |  |       \begin{\wamcodesize} | 
					
						
							|  |  |  |  | 	\begin{tabular}[b]{l} | 
					
						
							|  |  |  |  |           \switchONconstant $r_1$ 5 $T_1$  \\ | 
					
						
							|  |  |  |  |           \try   $L_1$ \\ | 
					
						
							|  |  |  |  |           \retry $L_2$ \\ | 
					
						
							|  |  |  |  |           \retry $L_3$ \\ | 
					
						
							|  |  |  |  |           \retry $L_4$ \\ | 
					
						
							|  |  |  |  |           \trust $L_5$ \\ | 
					
						
							|  |  |  |  | 	  \\ | 
					
						
							|  |  |  |  | 	  \begin{tabular}[b]{r|c@{\ }|l|} | 
					
						
							|  |  |  |  | 	    \Cline | 
					
						
							|  |  |  |  | 	    $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline | 
					
						
							|  |  |  |  | 	    \      & d1 & \try   $L_1$ \\ | 
					
						
							|  |  |  |  | 	    \      &    & \trust $L_2$ \\ \Cline | 
					
						
							|  |  |  |  |             \      & d2 & \try   $L_3$ \\ | 
					
						
							|  |  |  |  | 	    \      &    & \trust $L_4$ \\ \Cline | 
					
						
							|  |  |  |  | 	    \      & d3 & \jump  $L_5$ \\ | 
					
						
							|  |  |  |  | 	    \Cline | 
					
						
							|  |  |  |  | 	  \end{tabular}\\[3pt] | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 	\end{tabular} | 
					
						
							|  |  |  |  |     \end{\wamcodesize} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |     \end{sf} | 
					
						
							|  |  |  |  |   }%
 | 
					
						
							|  |  |  |  | \end{tabular}%
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \subfigure[Code for the clauses\label{fig:carc:clauses}]{%
 | 
					
						
							|  |  |  |  |   \begin{sf} | 
					
						
							|  |  |  |  |     \begin{\wamcodesize} | 
					
						
							|  |  |  |  |       \begin{tabular}[b]{rl} | 
					
						
							|  |  |  |  | 	$L_1$: & \getcon $r_1$ d1            \\ | 
					
						
							|  |  |  |  | 	\      & \getcon $r_2$ salmonella    \\ | 
					
						
							|  |  |  |  | 	\      & \getcon $r_3$ p             \\ | 
					
						
							|  |  |  |  |         \      & \proceed                    \\ | 
					
						
							|  |  |  |  | 	$L_2$: & \getcon $r_1$ d1            \\ | 
					
						
							|  |  |  |  |         \      & \getcon $r_2$ salmonella\_n \\ | 
					
						
							|  |  |  |  |         \      & \getcon $r_3$ p             \\ | 
					
						
							|  |  |  |  |         \      & \proceed                    \\ | 
					
						
							|  |  |  |  | 	$L_3$: & \getcon $r_1$ d2            \\ | 
					
						
							|  |  |  |  |         \      & \getcon $r_2$ salmonella    \\ | 
					
						
							|  |  |  |  |         \      & \getcon $r_3$ p             \\ | 
					
						
							|  |  |  |  |         \      & \proceed                    \\ | 
					
						
							|  |  |  |  | 	$L_4$: & \getcon $r_1$ d2            \\ | 
					
						
							|  |  |  |  | 	\      & \getcon $r_2$ cytogen\_ca   \\ | 
					
						
							|  |  |  |  | 	\      & \getcon $r_3$ n             \\ | 
					
						
							|  |  |  |  | 	\      & \proceed                    \\ | 
					
						
							|  |  |  |  | 	$L_5$: & \getcon $r_1$ d3            \\ | 
					
						
							|  |  |  |  | 	\      & \getcon $r_2$ cytogen\_ca   \\ | 
					
						
							|  |  |  |  | 	\      & \getcon $r_3$ p             \\ | 
					
						
							|  |  |  |  | 	\      & \proceed | 
					
						
							|  |  |  |  |       \end{tabular} | 
					
						
							|  |  |  |  |     \end{\wamcodesize} | 
					
						
							|  |  |  |  |   \end{sf} | 
					
						
							|  |  |  |  | }%
 | 
					
						
							|  |  |  |  | \subfigure[Any arg indexing\label{fig:carc:jiti_single:before}]{%
 | 
					
						
							|  |  |  |  |   \begin{sf} | 
					
						
							|  |  |  |  |     \begin{\wamcodesize} | 
					
						
							|  |  |  |  |       \begin{tabular}[b]{l} | 
					
						
							|  |  |  |  |         \switchONconstant $r_1$ 5 $T_1$  \\ | 
					
						
							|  |  |  |  |         \jitiONconstant $r_2$   5 3    \\ | 
					
						
							|  |  |  |  |         \jitiONconstant $r_3$   5 3    \\ | 
					
						
							|  |  |  |  |         \try   $L_1$ \\ | 
					
						
							|  |  |  |  |         \retry $L_2$ \\ | 
					
						
							|  |  |  |  |         \retry $L_3$ \\ | 
					
						
							|  |  |  |  |         \retry $L_4$ \\ | 
					
						
							|  |  |  |  |         \trust $L_5$ \\ | 
					
						
							|  |  |  |  | 	\\ | 
					
						
							|  |  |  |  | 	\begin{tabular}[b]{r|c@{\ }|l|} | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline | 
					
						
							|  |  |  |  | 	  \      & \code{d1} & \try   $L_1$ \\ | 
					
						
							|  |  |  |  | 	  \      &           & \trust $L_2$ \\ \Cline | 
					
						
							|  |  |  |  |           \      & \code{d2} & \try   $L_3$ \\ | 
					
						
							|  |  |  |  | 	  \      &           & \trust $L_4$ \\ \Cline | 
					
						
							|  |  |  |  | 	  \      & \code{d3} & \jump  $L_5$ \\ | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	\end{tabular} | 
					
						
							|  |  |  |  |       \end{tabular} | 
					
						
							|  |  |  |  |     \end{\wamcodesize} | 
					
						
							|  |  |  |  |   \end{sf} | 
					
						
							|  |  |  |  | }%
 | 
					
						
							|  |  |  |  | \caption{Part of the Carcinogenesis dataset and WAM code that a byte | 
					
						
							|  |  |  |  |   code compiler generates} | 
					
						
							|  |  |  |  | \label{fig:carc} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \vspace*{-1em} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \end{figure} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | \section{Demand-Driven Indexing of Static Predicates} \label{sec:static} | 
					
						
							|  |  |  |  | %=======================================================================
 | 
					
						
							|  |  |  |  | For static predicates the compiler has complete information about all | 
					
						
							|  |  |  |  | clauses and shapes of their head arguments. It is both desirable and | 
					
						
							|  |  |  |  | possible to take advantage of this information at compile time and so | 
					
						
							|  |  |  |  | we treat the case of static predicates separately. | 
					
						
							|  |  |  |  | %
 | 
					
						
							|  |  |  |  | We will do so with schemes of increasing effectiveness and | 
					
						
							|  |  |  |  | implementation complexity. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \subsection{A simple WAM extension for any argument indexing} | 
					
						
							|  |  |  |  | %------------------------------------------------------------
 | 
					
						
							|  |  |  |  | Let us initially consider the case where the predicates to index | 
					
						
							|  |  |  |  | consist only of Datalog facts. This is commonly the case for all | 
					
						
							|  |  |  |  | extensional database predicates where indexing is most effective and | 
					
						
							|  |  |  |  | called for. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | Refer to the example in Fig.~\ref{fig:carc}. | 
					
						
							|  |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | The indexing code of Fig.~\ref{fig:carc:index} incurs a small cost for | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  | a call where the first argument is a variable (namely, executing the | 
					
						
							|  |  |  |  | \switchONconstant instruction) but the instruction pays off for calls | 
					
						
							|  |  |  |  | where the first argument is bound. On the other hand, for calls where | 
					
						
							|  |  |  |  | the first argument is a free variable and some other argument is | 
					
						
							|  |  |  |  | bound, a choice point will be created, the \TryRetryTrust chain will | 
					
						
							|  |  |  |  | be used, and execution will go through the code of all clauses. This | 
					
						
							|  |  |  |  | is clearly inefficient, more so for larger data sets. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | %
 | 
					
						
							|  |  |  |  | We can do much better with the relatively simple scheme shown in | 
					
						
							|  |  |  |  | Fig.~\ref{fig:carc:jiti_single:before}. Immediately after the | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  | \switchONconstant instruction, we can statically generate | 
					
						
							|  |  |  |  | \jitiONconstant (demand indexing) instructions, one for each remaining | 
					
						
							|  |  |  |  | argument. Recall that the entry point of the predicate is the | 
					
						
							|  |  |  |  | \switchONconstant instruction. The \jitiONconstant $r_i$ \instr{N A} | 
					
						
							|  |  |  |  | instruction works as follows: | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \begin{itemize} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \item if the argument $r_i$ is a free variable, | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   execution continues with the next instruction; | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  | \item otherwise, \JITI kicks in as follows. The abstract machine | 
					
						
							|  |  |  |  |   scans the WAM code of the clauses and creates an index table for the | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  |   values of the corresponding argument. It can do so because the | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   instruction takes as arguments the number of clauses \instr{N} to | 
					
						
							|  |  |  |  |   index and the arity \instr{A} of the predicate. (In our example, the | 
					
						
							|  |  |  |  |   numbers 5 and 3.) For Datalog facts, this information is sufficient. | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  |   Because the WAM byte code for the clauses has a very regular | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   structure, the index table can be created very quickly. Upon its | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  |   creation, the \jitiONconstant instruction gets transformed to a | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   \switchONconstant. Again this is straightforward because of the two | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  |   instructions have similar layouts in memory. Execution of the | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  |   abstract machine then continues with the \switchONconstant | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  |   instruction. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \end{itemize} | 
					
						
							|  |  |  |  | Figure~\ref{fig:carg:jiti_single:after} shows the index table $T_2$ | 
					
						
							|  |  |  |  | which is created for our example and how the indexing code looks after | 
					
						
							|  |  |  |  | the execution of a call with mode \code{(out,in,?)}. Note that the | 
					
						
							|  |  |  |  | \jitiONconstant instruction for argument register $r_2$ has been | 
					
						
							|  |  |  |  | appropriately patched. The call that triggered \JITI and subsequent | 
					
						
							|  |  |  |  | calls of the same mode will use table $T_2$. The index for the second | 
					
						
							|  |  |  |  | argument has been created. | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \begin{figure}[t] | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   \centering | 
					
						
							|  |  |  |  |   \begin{sf} | 
					
						
							|  |  |  |  |     \begin{\wamcodesize} | 
					
						
							|  |  |  |  |       \begin{tabular}{c@{\hspace*{2em}}c@{\hspace*{2em}}c} | 
					
						
							|  |  |  |  | 	\begin{tabular}{l} | 
					
						
							|  |  |  |  |           \switchONconstant $r_1$ 5 $T_1$ \\ | 
					
						
							|  |  |  |  |           \switchONconstant $r_2$ 5 $T_2$ \\ | 
					
						
							|  |  |  |  |           \jitiONconstant $r_3$   5 3     \\ | 
					
						
							|  |  |  |  |           \try $L_1$   \\ | 
					
						
							|  |  |  |  |           \retry $L_2$ \\ | 
					
						
							|  |  |  |  |           \retry $L_3$ \\ | 
					
						
							|  |  |  |  |           \retry $L_4$ \\ | 
					
						
							|  |  |  |  |           \trust $L_5$ \\ | 
					
						
							|  |  |  |  | 	\end{tabular} | 
					
						
							|  |  |  |  | 	& | 
					
						
							|  |  |  |  | 	\begin{tabular}{r|c@{\ }|l|} | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline | 
					
						
							|  |  |  |  | 	  \      & \code{d1} & \try   $L_1$ \\ | 
					
						
							|  |  |  |  | 	  \      &           & \trust $L_2$ \\ \Cline | 
					
						
							|  |  |  |  |           \      & \code{d2} & \try   $L_3$ \\ | 
					
						
							|  |  |  |  | 	  \      &           & \trust $L_4$ \\ \Cline | 
					
						
							|  |  |  |  | 	  \      & \code{d3} & \jump  $L_5$ \\ | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	\end{tabular} | 
					
						
							|  |  |  |  | 	& | 
					
						
							|  |  |  |  | 	\begin{tabular}{r|c@{\ }|l|} | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	  $T_2$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline | 
					
						
							|  |  |  |  | 	  \      & \code{salmonella}    & \try $L_1$   \\ | 
					
						
							|  |  |  |  | 	  \      &                      & \trust $L_3$ \\ \Cline | 
					
						
							|  |  |  |  | 	  \      & \code{salmonella\_n} & \jump $L_2$  \\ \Cline | 
					
						
							|  |  |  |  | 	  \      & \code{cytrogen\_ca}  & \try $L_4$   \\ | 
					
						
							|  |  |  |  | 	  \      &                      & \trust $L_5$ \\ | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	\end{tabular} | 
					
						
							|  |  |  |  |       \end{tabular} | 
					
						
							|  |  |  |  |     \end{\wamcodesize} | 
					
						
							|  |  |  |  |   \end{sf} | 
					
						
							|  |  |  |  |   \caption{WAM code after demand-driven indexing for argument 2; | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |     $T_2$ is generated dynamically} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   \label{fig:carg:jiti_single:after} | 
					
						
							|  |  |  |  | \end{figure} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | The main advantage of this scheme is its simplicity. The compiled code | 
					
						
							|  |  |  |  | (Fig.~\ref{fig:carc:jiti_single:before}) is not significantly bigger | 
					
						
							|  |  |  |  | than the code which a WAM-based compiler would generate | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  | (Fig.~\ref{fig:carc:index}) and, if \JITI turns out unnecessary | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | during runtime (e.g. execution encounters only open calls or with only | 
					
						
							|  |  |  |  | the first argument bound), the extra overhead is minimal: the | 
					
						
							|  |  |  |  | execution of some \jitiONconstant instructions for the open call only. | 
					
						
							|  |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | In short, this is a simple scheme that allows for indexing on \emph{any | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | single} argument. At least for big sets of Datalog facts, we see | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | little reason not to use it. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \Paragraph{Optimizations.} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | Because we are dealing with static code, there are opportunities for | 
					
						
							|  |  |  |  | some easy optimizations. Suppose we statically determine that there | 
					
						
							|  |  |  |  | will never be any calls with \code{in} mode for some arguments or that | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | these arguments are not discriminating enough.\footnote{In our | 
					
						
							|  |  |  |  | example, suppose the third argument of \code{has\_property/3} was the | 
					
						
							|  |  |  |  | atom \code{p} throughout.} Then we can avoid generating | 
					
						
							|  |  |  |  | \jitiONconstant instructions for them. Also, suppose we know that some | 
					
						
							|  |  |  |  | arguments are most likely than others to be used in the \code{in} | 
					
						
							|  |  |  |  | mode. Then we can simply place the \jitiONconstant instructions for | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | them before the instructions for other arguments. This is possible | 
					
						
							|  |  |  |  | since all indexing instructions take the argument register number as | 
					
						
							|  |  |  |  | an argument; their order does not matter. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | \subsection{From any argument indexing to multi-argument indexing} | 
					
						
							|  |  |  |  | %-----------------------------------------------------------------
 | 
					
						
							|  |  |  |  | The scheme of the previous section gives us only single argument | 
					
						
							|  |  |  |  | indexing. However, all the infrastructure we need is already in place. | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | We can use it to obtain any fixed-order multi-argument \JITI in a | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | straightforward way. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | Note that the compiler knows exactly the set of clauses that need to | 
					
						
							|  |  |  |  | be tried for each query with a specific symbol in the first argument. | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | % This information is needed in order to construct, at compile time, the
 | 
					
						
							|  |  |  |  | % hash table $T_1$ of Fig.~\ref{fig:carc:index}.
 | 
					
						
							|  |  |  |  | For multi-argument \JITI, instead of generating for each hash bucket | 
					
						
							|  |  |  |  | only \TryRetryTrust instructions, the compiler can prepend appropriate | 
					
						
							|  |  |  |  | demand indexing instructions. We illustrate this on our running | 
					
						
							|  |  |  |  | example. The table $T_1$ contains four \jitiONconstant instructions: | 
					
						
							|  |  |  |  | two for each of the remaining two arguments of hash buckets with more | 
					
						
							|  |  |  |  | than one alternative. For hash buckets with none or only one | 
					
						
							|  |  |  |  | alternative (e.g., for \code{d3}'s bucket) there is obviously no need | 
					
						
							|  |  |  |  | to resort to \JITI for the remaining arguments. | 
					
						
							|  |  |  |  | Figure~\ref{fig:carc:jiti_multi} shows the state of the hash tables | 
					
						
							|  |  |  |  | after the execution of queries \code{has\_property(C,salmonella,T)}, | 
					
						
							|  |  |  |  | which creates $T_2$, and \code{has\_property(d2,P,n)} which creates | 
					
						
							|  |  |  |  | the $T_3$ table and transforms the \jitiONconstant instruction for | 
					
						
							|  |  |  |  | \code{d2} and register $r_3$ to the appropriate \switchONconstant | 
					
						
							|  |  |  |  | instruction. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | \begin{figure}[t] | 
					
						
							|  |  |  |  |   \centering | 
					
						
							|  |  |  |  |   \begin{sf} | 
					
						
							|  |  |  |  |     \begin{\wamcodesize} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |       \begin{tabular}{@{}c@{}c@{}c@{}} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 	\begin{tabular}{l} | 
					
						
							|  |  |  |  |           \switchONconstant $r_1$ 5 $T_1$ \\ | 
					
						
							|  |  |  |  |           \switchONconstant $r_2$ 5 $T_2$ \\ | 
					
						
							|  |  |  |  |           \jitiONconstant $r_3$   5 3     \\ | 
					
						
							|  |  |  |  |           \try $L_1$   \\ | 
					
						
							|  |  |  |  |           \retry $L_2$ \\ | 
					
						
							|  |  |  |  |           \retry $L_3$ \\ | 
					
						
							|  |  |  |  |           \retry $L_4$ \\ | 
					
						
							|  |  |  |  |           \trust $L_5$ \\ | 
					
						
							|  |  |  |  | 	\end{tabular} | 
					
						
							|  |  |  |  | 	& | 
					
						
							|  |  |  |  | 	\begin{tabular}{r|c@{\ }|l|} | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	  $T_1$: & \multicolumn{2}{c|}{Hash Table Info}\\ \Cline\Cline | 
					
						
							|  |  |  |  | 	  \      & \code{d1} & \jitiONconstant $r_2$ 2 3 \\ | 
					
						
							|  |  |  |  | 	  \      &           & \jitiONconstant $r_3$ 2 3 \\ | 
					
						
							|  |  |  |  | 	  \      &           & \try   $L_1$ \\ | 
					
						
							|  |  |  |  | 	  \      &           & \trust $L_2$ \\ \Cline | 
					
						
							|  |  |  |  |           \      & \code{d2} & \jitiONconstant $r_2$ 2 3 \\ | 
					
						
							|  |  |  |  | 	  \      &           & \switchONconstant $r_3$ 2 $T_3$ \\ | 
					
						
							|  |  |  |  | 	  \      &           & \try   $L_3$ \\ | 
					
						
							|  |  |  |  | 	  \      &           & \trust $L_4$ \\ \Cline | 
					
						
							|  |  |  |  | 	  \      & \code{d3} & \jump  $L_5$ \\ | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	\end{tabular} | 
					
						
							|  |  |  |  | 	& | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | 	\begin{tabular}{c} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 	\begin{tabular}{r|c@{\ }|l|} | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	  $T_2$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline | 
					
						
							|  |  |  |  | 	  \      & \code{salmonella}    & \jitiONconstant $r_3$ 2 3 \\ | 
					
						
							|  |  |  |  | 	  \      &                      & \try $L_1$   \\ | 
					
						
							|  |  |  |  | 	  \      &                      & \trust $L_3$ \\ \Cline | 
					
						
							|  |  |  |  | 	  \      & \code{salmonella\_n} & \jump $L_2$  \\ \Cline | 
					
						
							|  |  |  |  | 	  \      & \code{cytrogen\_ca}  & \jitiONconstant $r_3$ 2 3 \\ | 
					
						
							|  |  |  |  | 	  \      &                      & \try $L_4$   \\ | 
					
						
							|  |  |  |  | 	  \      &                      & \trust $L_5$ \\ | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	\end{tabular} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | 	\\ | 
					
						
							|  |  |  |  | 	\ \\ | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 	\begin{tabular}{r|c@{\ }|l|} | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	  $T_3$: & \multicolumn{2}{|c|}{Hash Table Info}\\ \Cline\Cline | 
					
						
							|  |  |  |  | 	  \      & \code{p} & \jump $L_3$ \\ \Cline | 
					
						
							|  |  |  |  | 	  \      & \code{n} & \jump $L_4$ \\ | 
					
						
							|  |  |  |  | 	  \Cline | 
					
						
							|  |  |  |  | 	\end{tabular} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | 	\end{tabular} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |       \end{tabular} | 
					
						
							|  |  |  |  |     \end{\wamcodesize} | 
					
						
							|  |  |  |  |   \end{sf} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   \caption{\JITI for all arguments; | 
					
						
							|  |  |  |  |     $T_1$ is static; $T_2$ and $T_3$ are created dynamically} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   \label{fig:carc:jiti_multi} | 
					
						
							|  |  |  |  | \end{figure} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \Paragraph{Implementation issues.} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | In the \jitiONconstant instructions of Fig.~\ref{fig:carc:jiti_multi} | 
					
						
							|  |  |  |  | notice the integer 2 which denotes the number of clauses that the | 
					
						
							|  |  |  |  | instruction will index. Using this number an index table of | 
					
						
							|  |  |  |  | appropriate size will be created, such as $T_3$. To fill this table we | 
					
						
							|  |  |  |  | need information about the clauses to index and the symbols to hash | 
					
						
							|  |  |  |  | on. The clauses can be obtained by scanning the labels of the | 
					
						
							|  |  |  |  | \TryRetryTrust instructions following \jitiONconstant; the symbols by | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  | looking at appropriate byte code offsets (based on the argument | 
					
						
							|  |  |  |  | register number) from these labels. In our running example, the | 
					
						
							|  |  |  |  | symbols can be obtained by looking at the second argument of the | 
					
						
							|  |  |  |  | \getcon instruction whose argument register is $r_2$. In the loaded | 
					
						
							|  |  |  |  | bytecode, assuming the argument register is represented in one byte, | 
					
						
							|  |  |  |  | these symbols are found $sizeof(\getcon) + sizeof(opcode) + 1$ bytes | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | away from the clause label; see Fig.~\ref{fig:carc:clauses}. Thus, | 
					
						
							|  |  |  |  | multi-argument \JITI is easy to get and the creation of index tables | 
					
						
							|  |  |  |  | can be extremely fast when indexing Datalog facts. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | \subsection{Beyond Datalog and other implementation issues} | 
					
						
							|  |  |  |  | %----------------------------------------------------------
 | 
					
						
							|  |  |  |  | Indexing on demand clauses with function symbols is not significantly | 
					
						
							|  |  |  |  | more difficult. The scheme we have described is applicable but | 
					
						
							|  |  |  |  | requires the following extensions: | 
					
						
							|  |  |  |  | \begin{enumerate} | 
					
						
							|  |  |  |  | \item Besides \jitiONconstant we also need \jitiONterm and | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  |   \jitiONstructure instructions. These are the \JITI counterparts of | 
					
						
							|  |  |  |  |   the WAM's \switchONterm and \switchONstructure. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \item Because the byte code for the clause heads does not necessarily | 
					
						
							|  |  |  |  |   have a regular structure, the abstract machine needs to be able to | 
					
						
							|  |  |  |  |   ``walk'' the byte code instructions and recover the symbols on which | 
					
						
							| 
									
										
										
										
											2007-03-06 23:46:17 +00:00
										 |  |  |  |   indexing will be based. Writing such a code walking procedure is not | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   hard. | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | \item Indexing on a position that contains unconstrained variables | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  |   for some clauses is tricky. The WAM needs to group clauses in this | 
					
						
							|  |  |  |  |   case and without special treatment creates two choice points for | 
					
						
							|  |  |  |  |   this argument (one for the variables and one per each group of | 
					
						
							| 
									
										
										
										
											2007-03-06 23:46:17 +00:00
										 |  |  |  |   clauses). However, this issue and how to deal with it is well-known | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   by now. Possible solutions to it are described in a paper by | 
					
						
							| 
									
										
										
										
											2007-03-07 15:46:03 +00:00
										 |  |  |  |   Carlsson~\cite{FreezeIndexing@ICLP-87} and can be readily adapted to | 
					
						
							|  |  |  |  |   \JITI. Alternatively, in a simple implementation, we can skip \JITI | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  |   for positions with variables in some clauses. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \end{enumerate} | 
					
						
							|  |  |  |  | Before describing \JITI more formally, we remark on the following | 
					
						
							|  |  |  |  | design decisions whose rationale may not be immediately obvious: | 
					
						
							|  |  |  |  | \begin{itemize} | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  | \item By default, only table $T_1$ is generated at compile time (as in | 
					
						
							|  |  |  |  |   the WAM) and the additional index tables $T_2, T_3, \ldots$ are | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   generated dynamically. This is because we do not want to increase | 
					
						
							|  |  |  |  |   compiled code size unnecessarily (i.e., when there is no demand for | 
					
						
							|  |  |  |  |   these indices). | 
					
						
							|  |  |  |  | \item On the other hand, we generate \jitiSTAR instructions at compile | 
					
						
							|  |  |  |  |   time for the head arguments.\footnote{The \jitiSTAR instructions for | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   $T_1$ can be generated either by the compiler or the loader.} This | 
					
						
							|  |  |  |  |   does not noticeably increase the generated byte code but it greatly | 
					
						
							|  |  |  |  |   simplifies code loading. Notice that a nice property of the scheme | 
					
						
							|  |  |  |  |   we have described is that the loaded byte code can be patched | 
					
						
							|  |  |  |  |   \emph{without} the need to move any instructions. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | % The indexing tables are typically not intersperced with the byte code.
 | 
					
						
							|  |  |  |  | \item Finally, one may wonder why the \jitiSTAR instructions create | 
					
						
							|  |  |  |  |   the dynamic index tables with an additional code walking pass | 
					
						
							|  |  |  |  |   instead of piggy-backing on the pass which examines all clauses via | 
					
						
							|  |  |  |  |   the main \TryRetryTrust chain. Main reasons are: 1) in many cases | 
					
						
							|  |  |  |  |   the code walking can be selective and guided by offsets and 2) by | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  |   first creating the index table and then using it we speed up the | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   execution of the queries and often avoid unnecessary choice point | 
					
						
							|  |  |  |  |   creations. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \end{itemize} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | Note that all these decisions are orthogonal to the main idea and are | 
					
						
							|  |  |  |  | under compiler control. For example, if analysis determines that some | 
					
						
							|  |  |  |  | argument sequences will never demand indexing we can simply avoid | 
					
						
							|  |  |  |  | generation of \jitiSTAR instructions for them. Similarly, if some | 
					
						
							|  |  |  |  | argument sequences will definitely demand indexing we can speed up | 
					
						
							|  |  |  |  | execution by generating the appropriate tables at compile time instead | 
					
						
							|  |  |  |  | of dynamically. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | \subsection{Demand-driven index construction and its properties} | 
					
						
							|  |  |  |  | %---------------------------------------------------------------
 | 
					
						
							|  |  |  |  | The idea behind \JITI can be captured in a single sentence: \emph{we | 
					
						
							|  |  |  |  | can generate every index we need during program execution when this | 
					
						
							|  |  |  |  | index is demanded}. Subsequent uses of these indices can speed up | 
					
						
							|  |  |  |  | execution considerably more than the time it takes to construct them | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | (more on this below) so this runtime action makes sense.%\footnote{In
 | 
					
						
							|  |  |  |  | %fact, because choice points are expensive in the WAM, \JITI can speed
 | 
					
						
							|  |  |  |  | %up even the execution of the query that triggers the process, not only
 | 
					
						
							|  |  |  |  | %subsequent queries.}
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | %We describe the process of demand-driven index construction.
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | % \subsubsection{Demand-driven index construction}
 | 
					
						
							|  |  |  |  | %-------------------------------------------------
 | 
					
						
							|  |  |  |  | Let $p/k$ be a predicate with $n$ clauses. | 
					
						
							|  |  |  |  | %
 | 
					
						
							|  |  |  |  | At a high level, its indices form a tree whose root is the entry point | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  | of the predicate. For simplicity, assume that the root node of the | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | tree and the interior nodes corresponding to the index table for the | 
					
						
							|  |  |  |  | first argument have been constructed at compile time. Leaves of this | 
					
						
							|  |  |  |  | tree are the nodes containing the code for the clauses of the | 
					
						
							|  |  |  |  | predicate and each clause is identified by a unique label \mbox{$L_i,
 | 
					
						
							|  |  |  |  | 1 \leq i \leq n$}. Execution always starts at the first instruction of
 | 
					
						
							|  |  |  |  | the root node and follows Algorithm~\ref{alg:construction}. The | 
					
						
							|  |  |  |  | algorithm might look complicated but is actually quite simple. | 
					
						
							|  |  |  |  | %
 | 
					
						
							|  |  |  |  | Each non-leaf node contains a sequence of byte code instructions with | 
					
						
							|  |  |  |  | groups of the form \mbox{$\langle I_1, \ldots, I_m, T_1, \ldots, T_l
 | 
					
						
							|  |  |  |  | \rangle, 0 \leq m \leq k, 1 \leq l \leq n$} where each of the $I$
 | 
					
						
							|  |  |  |  | instructions, if any, is either a \switchSTAR or a \jitiSTAR | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | instruction and each of the $T$ instructions either forms a sequence | 
					
						
							|  |  |  |  | of \TryRetryTrust instructions (if $l > 1$) or is a \jump instruction | 
					
						
							|  |  |  |  | (if \mbox{$l = 1$}). Step~2.2 dynamically constructs an index table | 
					
						
							|  |  |  |  | $\cal T$ whose buckets are the newly created interior nodes in the | 
					
						
							|  |  |  |  | tree. Each bucket associated with a single clause contains a \jump to | 
					
						
							|  |  |  |  | the label of that clause. Each bucket associated with many clauses | 
					
						
							|  |  |  |  | starts with the $I$ instructions which are yet to be visited and | 
					
						
							|  |  |  |  | continues with a \TryRetryTrust chain pointing to the clauses. When | 
					
						
							|  |  |  |  | the index construction is done, the instruction mutates to a | 
					
						
							|  |  |  |  | \switchSTAR WAM instruction. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | %-------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | \begin{Algorithm}[t] | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   \caption{Actions of the abstract machine with \JITI} | 
					
						
							|  |  |  |  |   \label{alg:construction} | 
					
						
							|  |  |  |  |   \begin{enumerate} | 
					
						
							|  |  |  |  |   \item if the current instruction $I$ is a \switchSTAR, \try, \retry, | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |     \trust or \jump, act as in the WAM; | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |   \item if the current instruction $I$ is a \jitiSTAR with arguments $r,
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |     l$, and $k$ ($r$ is a register) then
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |     \begin{enumerate} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |     \item[2.1] if register $r$ contains a variable, the action is a | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |       \instr{goto} the next instruction in the node; | 
					
						
							|  |  |  |  |     \item[2.2] if register $r$ contains a value $v$, the action is to | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |       dynamically construct the index: | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |       \begin{itemize} | 
					
						
							|  |  |  |  |       \item[2.2.1] collect the subsequent instructions in a list $\cal I$ | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | 	until the next instruction is a \try; | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |       \item[2.2.2] for each label $L$ in the \TryRetryTrust chain | 
					
						
							|  |  |  |  | 	inspect the code of the clause with label $L$ to find the | 
					
						
							|  |  |  |  | 	symbol~$c$ associated with register $r$ in the clause; (This | 
					
						
							|  |  |  |  | 	step creates a list of $\langle c, L \rangle$ pairs.) | 
					
						
							|  |  |  |  |       \item[2.2.3] create an index table $\cal T$ out of these pairs as | 
					
						
							|  |  |  |  | 	follows: | 
					
						
							|  |  |  |  | 	\begin{itemize} | 
					
						
							|  |  |  |  | 	\item if $I$ is a \jitiONconstant or a \jitiONstructure then | 
					
						
							|  |  |  |  | 	  create an index table for the symbols in the list of pairs; | 
					
						
							|  |  |  |  | 	  each entry of the table is identified by a symbol $c$ and | 
					
						
							|  |  |  |  | 	  contains: | 
					
						
							|  |  |  |  | 	  \begin{itemize} | 
					
						
							|  |  |  |  | 	  \item the instruction \jump $L_c$ if $L_c$ is the only label | 
					
						
							|  |  |  |  | 	    associated with $c$; | 
					
						
							|  |  |  |  | 	  \item the sequence of instructions obtained by appending to | 
					
						
							|  |  |  |  | 	    $\cal I$ a \TryRetryTrust chain for the sequence of labels | 
					
						
							|  |  |  |  | 	    $L'_1, \ldots, L'_l$ that are associated with $c$ | 
					
						
							|  |  |  |  | 	  \end{itemize} | 
					
						
							|  |  |  |  | 	\item if $I$ is a \jitiONterm then | 
					
						
							|  |  |  |  | 	  \begin{itemize} | 
					
						
							|  |  |  |  | 	  \item partition the sequence of labels $\cal L$ in the list | 
					
						
							|  |  |  |  | 	    of pairs into sequences of labels ${\cal L}_c, {\cal L}_l$ | 
					
						
							|  |  |  |  | 	    and ${\cal L}_s$ for constants, lists and structures, | 
					
						
							|  |  |  |  | 	    respectively; | 
					
						
							|  |  |  |  | 	  \item for each of the four sequences ${\cal L}, {\cal L}_c,
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | 	    {\cal L}_l, {\cal L}_s$ of labels create code:
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 	    \begin{itemize} | 
					
						
							|  |  |  |  | 	    \item the instruction \fail if the sequence is empty; | 
					
						
							|  |  |  |  | 	    \item the instruction \jump $L$ if $L$ is the only label in | 
					
						
							|  |  |  |  | 	      the sequence; | 
					
						
							|  |  |  |  | 	    \item the sequence of instructions obtained by appending to | 
					
						
							|  |  |  |  | 	      $\cal I$ a \TryRetryTrust chain for the current sequence | 
					
						
							|  |  |  |  | 	      of labels; | 
					
						
							|  |  |  |  | 	    \end{itemize} | 
					
						
							|  |  |  |  | 	  \end{itemize} | 
					
						
							|  |  |  |  | 	\end{itemize} | 
					
						
							|  |  |  |  |       \item[2.2.4] transform the \jitiSTAR $r, l, k$ instruction to | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | 	a \switchSTAR $r, l, {\cal T}$ instruction; | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  |       \item[2.2.5] continue execution with this instruction. | 
					
						
							|  |  |  |  |       \end{itemize} | 
					
						
							|  |  |  |  |     \end{enumerate} | 
					
						
							|  |  |  |  |   \end{enumerate} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \vspace*{-.7em} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \end{Algorithm} | 
					
						
							|  |  |  |  | %-------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \Paragraph{Complexity properties.} | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | Index construction during runtime does not change the complexity of | 
					
						
							|  |  |  |  | query execution. First, note that each demanded index table will be | 
					
						
							|  |  |  |  | constructed at most once. Also, a \jitiSTAR instruction will be | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | encountered only in cases where execution would examine all clauses in | 
					
						
							|  |  |  |  | the \TryRetryTrust chain.\footnote{This statement is possibly not | 
					
						
							| 
									
										
										
										
											2007-06-06 21:01:46 +00:00
										 |  |  |  | valid in the presence of Prolog cuts.} The construction visits these | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | clauses \emph{once} and then creates the index table in time linear in | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | the number of clauses as one pass over the list of $\langle c, L
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \rangle$ pairs suffices. After index construction, execution will
 | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | visit a subset of these clauses as the index table will be consulted. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | %% Finally, note that the maximum number of \jitiSTAR instructions
 | 
					
						
							|  |  |  |  | %% that will be visited for each query is bounded by the maximum
 | 
					
						
							|  |  |  |  | %% number of index positions (symbols) in the clause heads of the
 | 
					
						
							|  |  |  |  | %% predicate.
 | 
					
						
							|  |  |  |  | Thus, in cases where \JITI is not effective, execution of a query will | 
					
						
							|  |  |  |  | at most double due to dynamic index construction. In fact, this worst | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | case is pessimistic and unlikely in practice. On the other hand, \JITI | 
					
						
							|  |  |  |  | can change the complexity of query evaluation from $O(n)$ to $O(1)$ | 
					
						
							|  |  |  |  | where $n$ is the number of clauses. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | \subsection{More implementation choices} | 
					
						
							|  |  |  |  | %---------------------------------------
 | 
					
						
							|  |  |  |  | The observant reader has no doubt noticed that | 
					
						
							|  |  |  |  | Algorithm~\ref{alg:construction} provides multi-argument indexing but | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | only for the main functor symbol. For clauses with compound terms that | 
					
						
							|  |  |  |  | require indexing in their sub-terms we can either employ a program | 
					
						
							| 
									
										
										
										
											2007-06-09 15:47:30 +00:00
										 |  |  |  | transformation such as \emph{unification | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  | factoring}~\cite{UnifFact@POPL-95} at compile time or modify the | 
					
						
							| 
									
										
										
										
											2007-03-08 12:07:35 +00:00
										 |  |  |  | algorithm to consider index positions inside compound terms. This is | 
					
						
							|  |  |  |  | relatively easy to do but requires support from the register allocator | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | (passing the sub-terms of compound terms in appropriate registers) | 
					
						
							|  |  |  |  | and/or a new set of instructions. Due to space limitations we omit | 
					
						
							|  |  |  |  | further details. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | Algorithm~\ref{alg:construction} relies on a procedure that inspects | 
					
						
							|  |  |  |  | the code of a clause and collects the symbols associated with some | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  | particular index position (step~2.2.2). If we are satisfied with | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | looking only at clause heads, this procedure needs to understand only | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  | the structure of \instr{get} and \instr{unify} instructions. Thus, it | 
					
						
							|  |  |  |  | is easy to write. At the cost of increased implementation complexity, | 
					
						
							|  |  |  |  | this step can of course take into account other information that may | 
					
						
							|  |  |  |  | exist in the body of the clause (e.g., type tests such as | 
					
						
							|  |  |  |  | \code{var(X)}, \code{atom(X)}, aliasing constraints such as \code{X = | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | Y}, numeric constraints such as \code{X > 0}, etc.). | 
					
						
							| 
									
										
										
										
											2007-03-08 00:16:59 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | A reasonable concern for \JITI is increased memory consumption. In our | 
					
						
							|  |  |  |  | experience, this does not seem to be a problem in practice since most | 
					
						
							|  |  |  |  | applications do not have demand for indexing on many argument | 
					
						
							|  |  |  |  | combinations. In applications where it does become a problem or when | 
					
						
							|  |  |  |  | running in an environment with limited memory, we can easily put a | 
					
						
							|  |  |  |  | bound on the size of index tables, either globally or for each | 
					
						
							|  |  |  |  | predicate separately. For example, the \jitiSTAR instructions can | 
					
						
							|  |  |  |  | either become inactive when this limit is reached, or better yet we | 
					
						
							|  |  |  |  | can recover the space of some tables. To do so, we can employ any | 
					
						
							|  |  |  |  | standard recycling algorithm (e.g., LRU) and reclaim the memory of | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | index tables that are no longer in use. This is easy to do by | 
					
						
							|  |  |  |  | reverting the corresponding \switchSTAR instructions back to \jitiSTAR | 
					
						
							|  |  |  |  | instructions. If the indices are demanded again at a time when memory | 
					
						
							|  |  |  |  | is available, they can simply be regenerated. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | \section{Demand-Driven Indexing of Dynamic Predicates} \label{sec:dynamic} | 
					
						
							|  |  |  |  | %=========================================================================
 | 
					
						
							|  |  |  |  | We have so far lived in the comfortable world of static predicates, | 
					
						
							|  |  |  |  | where the set of clauses to index is fixed and the compiler can take | 
					
						
							|  |  |  |  | advantage of this knowledge. Dynamic code introduces several | 
					
						
							|  |  |  |  | complications: | 
					
						
							|  |  |  |  | \begin{itemize} | 
					
						
							|  |  |  |  | \item We need mechanisms to update multiple indices when new clauses | 
					
						
							|  |  |  |  |   are asserted or retracted. In particular, we need the ability to | 
					
						
							|  |  |  |  |   expand and possibly shrink multiple code chunks after code updates. | 
					
						
							|  |  |  |  | \item We do not know a priori which are the best index positions and | 
					
						
							|  |  |  |  |   cannot determine whether indexing on some arguments is avoidable. | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \item Supporting the logical update (LU) semantics of ISO Prolog | 
					
						
							|  |  |  |  |   becomes harder. | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | \end{itemize} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | We briefly discuss possible ways of addressing these issues. | 
					
						
							|  |  |  |  | However, note that Prolog systems typically provide indexing for | 
					
						
							| 
									
										
										
										
											2007-03-10 18:59:32 +00:00
										 |  |  |  | dynamic predicates and thus already deal in some way or another with | 
					
						
							| 
									
										
										
										
											2007-03-11 13:22:43 +00:00
										 |  |  |  | these issues; \JITI makes the problems more involved but not | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | fundamentally different than with only first argument indexing. | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-11 13:22:43 +00:00
										 |  |  |  | The first complication suggests that we should allocate memory for | 
					
						
							|  |  |  |  | dynamic indices in separate chunks, so that these can be expanded and | 
					
						
							|  |  |  |  | deallocated independently. Indeed, this is what we do. | 
					
						
							|  |  |  |  | %
 | 
					
						
							|  |  |  |  | Regarding the second complication, in the absence of any other | 
					
						
							|  |  |  |  | information, the only alternative is to generate indices for all | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | arguments. As optimizations, we can avoid indexing predicates with | 
					
						
							|  |  |  |  | only one clause and exclude arguments where some clause has a | 
					
						
							|  |  |  |  | variable. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | Under LU semantics, calls to dynamic predicates execute in a | 
					
						
							|  |  |  |  | ``snapshot'' of the corresponding predicate. Each call sees the | 
					
						
							|  |  |  |  | clauses that existed at the time when the call was made, even if some | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  | of the clauses were later retracted or new clauses were asserted. If | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | several calls are alive in the stack, several snapshots will be alive | 
					
						
							|  |  |  |  | at the same time. The standard solution to this problem is to use time | 
					
						
							|  |  |  |  | stamps to tell which clauses are \emph{live} for which calls. | 
					
						
							| 
									
										
										
										
											2007-03-11 13:22:43 +00:00
										 |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  | This solution complicates freeing index tables because: (1) an index | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | table holds references to clauses, and (2) the table may be in use | 
					
						
							|  |  |  |  | (i.e., may be accessible from the execution stacks). An index | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | table thus is killed in several steps: | 
					
						
							| 
									
										
										
										
											2007-03-11 13:22:43 +00:00
										 |  |  |  | \begin{enumerate} | 
					
						
							|  |  |  |  | \item Detach the index table from the indexing tree. | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \item Recursively \emph{kill} every child of the current table; if a | 
					
						
							|  |  |  |  |   table is killed so are its children. | 
					
						
							| 
									
										
										
										
											2007-03-11 13:22:43 +00:00
										 |  |  |  | \item Wait until the table is not in use, that is, it is not pointed | 
					
						
							| 
									
										
										
										
											2007-06-08 22:18:12 +00:00
										 |  |  |  |   to from anywhere. | 
					
						
							| 
									
										
										
										
											2007-03-11 13:22:43 +00:00
										 |  |  |  | \item Walk the table and release any references it may hold. | 
					
						
							|  |  |  |  | \item Physically recover space. | 
					
						
							|  |  |  |  | \end{enumerate} | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | \section{Implementation in XXX and in YAP} \label{sec:impl} | 
					
						
							|  |  |  |  | %==========================================================
 | 
					
						
							|  |  |  |  | The implementation of \JITI in XXX follows a variant of the scheme | 
					
						
							|  |  |  |  | presented in Sect.~\ref{sec:static}. The compiler uses heuristics to | 
					
						
							|  |  |  |  | determine the best argument to index on (i.e., this argument is not | 
					
						
							|  |  |  |  | necessarily the first) and employs \switchSTAR instructions for this | 
					
						
							|  |  |  |  | task. It also statically generates \jitiONconstant instructions for | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | other arguments that are good candidates for \JITI. Currently, an | 
					
						
							|  |  |  |  | argument is considered a good candidate if it has only constants or | 
					
						
							|  |  |  |  | only structure symbols in all clauses. Thus, XXX uses only | 
					
						
							|  |  |  |  | \jitiONconstant and \jitiONstructure instructions, never a | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | \jitiONterm. Also, XXX does not perform \JITI inside structure | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | symbols. For dynamic predicates, \JITI is employed only if they | 
					
						
							|  |  |  |  | consist of Datalog facts; if a clause which is not a Datalog fact is | 
					
						
							|  |  |  |  | asserted, all dynamically created index tables for the predicate are | 
					
						
							|  |  |  |  | simply removed and the \jitiONconstant instruction becomes a | 
					
						
							|  |  |  |  | \instr{noop}. All this is done automatically, but the user can disable | 
					
						
							|  |  |  |  | \JITI in compiled code using an option. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | YAP implements \JITI since version 5. The current implementation | 
					
						
							|  |  |  |  | supports static code, dynamic code, and the internal database. It | 
					
						
							|  |  |  |  | differs from the algorithm presented in Sect.~\ref{sec:static} in that | 
					
						
							|  |  |  |  | \emph{all indexing code is generated on demand}. Thus, YAP cannot | 
					
						
							|  |  |  |  | assume that a \jitiSTAR instruction is followed by a \TryRetryTrust | 
					
						
							|  |  |  |  | chain. Instead, by default YAP has to search the whole predicate for | 
					
						
							|  |  |  |  | clauses that match the current position in the indexing code. Doing so | 
					
						
							|  |  |  |  | for every index expansion was found to be very inefficient for larger | 
					
						
							|  |  |  |  | relations: in such cases YAP will maintain a list of matching clauses | 
					
						
							|  |  |  |  | at each \jitiSTAR node. Indexing dynamic predicates in YAP follows | 
					
						
							|  |  |  |  | very much the same algorithm as static indexing: the key idea is that | 
					
						
							|  |  |  |  | most nodes in the index tree must be allocated separately so that they | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  | can grow or shrink independently. YAP can index arguments where some | 
					
						
							| 
									
										
										
										
											2007-03-10 18:59:32 +00:00
										 |  |  |  | clauses have unconstrained variables, but only for static predicates, | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | as in dynamic code this would complicate support for LU semantics. | 
					
						
							| 
									
										
										
										
											2007-03-10 18:59:32 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | YAP uses the term JITI (Just-In-Time Indexing) to refer to \JITI. In | 
					
						
							|  |  |  |  | the next section we will take the liberty to use this term as a | 
					
						
							|  |  |  |  | convenient abbreviation. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-10 17:36:25 +00:00
										 |  |  |  | \section{Performance Evaluation} \label{sec:perf} | 
					
						
							|  |  |  |  | %================================================
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | We evaluate JITI on a set of benchmarks and applications. | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | Throughout, we compare performance of JITI with first argument | 
					
						
							|  |  |  |  | indexing. For the benchmarks of Sect.~\ref{sec:perf:ineffective} | 
					
						
							|  |  |  |  | and~\ref{sec:perf:effective} which involve both systems, we used a | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | 2.4~GHz P4-based laptop with 512~MB of memory. | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | % and report times in milliseconds.
 | 
					
						
							|  |  |  |  | For the benchmarks of Sect.~\ref{sec:perf:ILP} which involve | 
					
						
							|  |  |  |  | YAP~5.1.2 only, we used a 8-node cluster, where each node is a | 
					
						
							|  |  |  |  | dual-core AMD~2600+ machine with 2GB of memory. | 
					
						
							|  |  |  |  | % and report times in seconds.
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | \begin{table}[t] | 
					
						
							|  |  |  |  |   \centering | 
					
						
							|  |  |  |  |   \caption{Performance of some benchmarks with 1st vs. \JITI (times in msecs)} | 
					
						
							|  |  |  |  |   \vspace*{-1em} | 
					
						
							|  |  |  |  |   \subfigure[When JITI is ineffective]{%
 | 
					
						
							|  |  |  |  |     \label{tab:ineffective} | 
					
						
							|  |  |  |  |     \begin{tabular}[b]{|l||r|r||r|r|} \hline | 
					
						
							|  |  |  |  |       & \multicolumn{2}{|c||}{\bf YAP} & \multicolumn{2}{|c|}{\bf XXX} \\ | 
					
						
							|  |  |  |  |       \cline{2-5} | 
					
						
							|  |  |  |  |       Benchmark     &   1st  &  JITI         &   1st  &  JITI          \\ | 
					
						
							|  |  |  |  |       \hline | 
					
						
							|  |  |  |  |       \tcLio (8000) &     13 &    14         &      4 &     4          \\ | 
					
						
							|  |  |  |  |       \tcRio (2000) &   1445 &  1469         &    614 &   615          \\ | 
					
						
							|  |  |  |  |       \tcDio ( 400) &   3208 &  3260         &   2338 &  2300          \\ | 
					
						
							|  |  |  |  |       \tcLoo (2000) &   3935 &  3987         &   2026 &  2105          \\ | 
					
						
							|  |  |  |  |       \tcRoo (2000) &   2841 &  2952         &   1502 &  1512          \\ | 
					
						
							|  |  |  |  |       \tcDoo ( 400) &   3735 &  3805         &   4976 &  4978          \\ | 
					
						
							|  |  |  |  |       \compress     &   3614 &  3595         &   2875 &  2848          \\ | 
					
						
							|  |  |  |  |       \hline | 
					
						
							|  |  |  |  |     \end{tabular} | 
					
						
							|  |  |  |  |   }%
 | 
					
						
							|  |  |  |  |   \subfigure[When JITI is effective]{ | 
					
						
							|  |  |  |  |     \label{tab:effective} | 
					
						
							|  |  |  |  |     \begin{tabular}[b]{|l||r|r|r||r|r|r|} \hline | 
					
						
							|  |  |  |  |       & \multicolumn{3}{|c||}{\bf YAP} & \multicolumn{3}{|c|}{\bf XXX} \\ | 
					
						
							|  |  |  |  |       \cline{2-7} | 
					
						
							|  |  |  |  |                 &   1st  &  JITI &{\bf ratio}&  1st  &  JITI &{\bf ratio}\\ | 
					
						
							|  |  |  |  |       \hline | 
					
						
							|  |  |  |  |       \sgCyl    &    2,864 &    24 & $119\times$& 2,390 &    28 &  $85\times$\\ | 
					
						
							|  |  |  |  |       \muta     &   30,057 &16,782 &$1.79\times$&26,314 &21,574 &$1.22\times$\\ | 
					
						
							|  |  |  |  |       \pta      &    5,131 &   188 &  $27\times$& 4,442 &   279 &  $16\times$\\ | 
					
						
							|  |  |  |  |       \tea      &1,478,813 &54,616 &  $27\times$&   --- &   --- &      ---   \\ | 
					
						
							|  |  |  |  |       \hline | 
					
						
							|  |  |  |  |     \end{tabular} | 
					
						
							|  |  |  |  |   }%
 | 
					
						
							|  |  |  |  |   \vspace*{-1em} | 
					
						
							|  |  |  |  | \end{table} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | \subsection{Performance of \JITI when ineffective} \label{sec:perf:ineffective} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | In some programs, \JITI does not trigger\footnote{In XXX only; even | 
					
						
							|  |  |  |  | 1st argument indexing is generated on demand when JITI is used in | 
					
						
							|  |  |  |  | YAP.} or might trigger but have no effect other than an overhead due | 
					
						
							|  |  |  |  | to runtime index construction. We therefore wanted to measure this | 
					
						
							|  |  |  |  | overhead. | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | %
 | 
					
						
							|  |  |  |  | As both systems support tabling, we decided to use tabling benchmarks | 
					
						
							| 
									
										
										
										
											2007-03-11 20:57:22 +00:00
										 |  |  |  | because they are small and easy to understand, and because they are a | 
					
						
							| 
									
										
										
										
											2007-06-06 21:01:46 +00:00
										 |  |  |  | bad case for JITI in the following sense: tabling avoids generating | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | repetitive queries and the benchmarks operate over extensional | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  | database (EDB) predicates of size approximately equal to the size of | 
					
						
							|  |  |  |  | the program. We used \compress, a tabled program that solves a puzzle | 
					
						
							|  |  |  |  | from an ICLP Prolog programming competition. The other benchmarks are | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | different variants of tabled left, right and doubly recursive | 
					
						
							|  |  |  |  | transitive closure over an EDB predicate forming a chain of size shown | 
					
						
							|  |  |  |  | in Table~\ref{tab:ineffective} in parentheses. For each variant of | 
					
						
							|  |  |  |  | transitive closure, we issue two queries: one with mode | 
					
						
							|  |  |  |  | \code{(in,out)} and one with mode \code{(out,out)}. | 
					
						
							| 
									
										
										
										
											2007-03-10 18:59:32 +00:00
										 |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  | For YAP, indices on the first argument and \TryRetryTrust chains are | 
					
						
							|  |  |  |  | built on all benchmarks under \JITI. | 
					
						
							| 
									
										
										
										
											2007-03-10 18:59:32 +00:00
										 |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | For XXX, \JITI triggers on no benchmark but the \jitiONconstant | 
					
						
							|  |  |  |  | instructions are executed for the three \bench{tc\_?\_oo} benchmarks. | 
					
						
							|  |  |  |  | %
 | 
					
						
							|  |  |  |  | As can be seen in Table~\ref{tab:ineffective}, \JITI, even when | 
					
						
							|  |  |  |  | ineffective, incurs a runtime overhead that is at the level of noise | 
					
						
							|  |  |  |  | and goes mostly unnoticed. | 
					
						
							|  |  |  |  | %
 | 
					
						
							|  |  |  |  | We also note that our aim here is \emph{not} to compare the two | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | systems, so the \textbf{YAP} and \textbf{XXX} columns should be read | 
					
						
							|  |  |  |  | separately. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \vspace*{-0.5em} | 
					
						
							|  |  |  |  | \subsection{Performance of \JITI when effective} \label{sec:perf:effective} | 
					
						
							|  |  |  |  | %--------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | On the other hand, when \JITI is effective, it can significantly | 
					
						
							|  |  |  |  | improve runtime performance. We use the following programs and | 
					
						
							|  |  |  |  | applications: | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | \begin{small} | 
					
						
							|  |  |  |  | \begin{description} | 
					
						
							|  |  |  |  | \item[\sgCyl] The same generation DB benchmark on a $24 \times 24
 | 
					
						
							|  |  |  |  |   \times 2$ cylinder. We issue the open query.
 | 
					
						
							|  |  |  |  | \item[\muta] A computationally intensive application where most | 
					
						
							|  |  |  |  |   predicates are defined intentionally. | 
					
						
							|  |  |  |  | \item[\pta] A tabled logic program implementing Andersen's points-to | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   analysis. A medium-sized imperative program is encoded as a set of | 
					
						
							|  |  |  |  |   facts (about 16,000) and properties of interest are encoded using | 
					
						
							|  |  |  |  |   rules. Program properties are then determined by the closure of | 
					
						
							|  |  |  |  |   these rules. | 
					
						
							|  |  |  |  | \item[\tea] Another implementation of Andersen's points-to analysis. | 
					
						
							|  |  |  |  |   The analyzed program, the \texttt{javac} benchmark, is encoded in a | 
					
						
							|  |  |  |  |   file of 411,696 facts (62,759,581 bytes in total). Its compilation | 
					
						
							|  |  |  |  |   exceeds the limits of the XXX compiler (w/o JITI). So we run this | 
					
						
							|  |  |  |  |   benchmark only in YAP. | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | \end{description} | 
					
						
							|  |  |  |  | \end{small} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | As can be seen in Table~\ref{tab:effective}, \JITI significantly | 
					
						
							|  |  |  |  | improves the performance of these applications. In \muta, which spends | 
					
						
							| 
									
										
										
										
											2007-03-11 20:57:22 +00:00
										 |  |  |  | most of its time in recursive predicates, the speed up is only $79\%$ | 
					
						
							|  |  |  |  | in YAP and~$22\%$ in XXX. The remaining benchmarks execute several | 
					
						
							|  |  |  |  | times (from~$16$ up to~$119$) faster. It is important to realize that | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | \emph{these speedups are obtained automatically}, i.e., without any | 
					
						
							|  |  |  |  | programmer intervention or by using any compiler directives, in all | 
					
						
							|  |  |  |  | these applications. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \subsection{Performance of \JITI on ILP applications} \label{sec:perf:ILP} | 
					
						
							|  |  |  |  | %-------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-11 20:57:22 +00:00
										 |  |  |  | The need for \JITI was originally noticed in inductive logic | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | programming applications. These applications tend to issue ad hoc | 
					
						
							|  |  |  |  | queries during execution and thus their indexing requirements cannot | 
					
						
							|  |  |  |  | be determined at compile time. On the other hand, they operate on lots | 
					
						
							|  |  |  |  | of data, so memory consumption is a reasonable concern. We evaluate | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  | JITI's time and space performance on some learning tasks using the | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | Aleph system~\cite{ALEPH} and the datasets of | 
					
						
							|  |  |  |  | Fig.~\ref{fig:ilp:datasets} which issue simple queries in an | 
					
						
							| 
									
										
										
										
											2007-03-12 13:14:17 +00:00
										 |  |  |  | extensional database. Several of these datasets are standard in the | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | ILP literature. | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \Paragraph{Time performance.} | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | We compare times for 10 runs of the saturation/refinement cycle of the | 
					
						
							|  |  |  |  | ILP system; see Table~\ref{tab:ilp:time}. | 
					
						
							|  |  |  |  | %% The \Krki datasets have small search spaces and small databases, so
 | 
					
						
							|  |  |  |  | %% they achieve the same performance under both versions: there is no
 | 
					
						
							|  |  |  |  | %% slowdown. 
 | 
					
						
							|  |  |  |  | The \Mesh and \Pyrimidines applications are the only ones that do not | 
					
						
							|  |  |  |  | benefit much from indexing in the database; they do benefit through | 
					
						
							|  |  |  |  | from indexing in the dynamic representation of the search space, as | 
					
						
							|  |  |  |  | their running times improve somewhat with \JITI. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | The \BreastCancer and \GeneExpr applications use unstructured data. | 
					
						
							|  |  |  |  | The speedup here is mostly from multiple argument indexing. | 
					
						
							|  |  |  |  | \BreastCancer is particularly interesting. It consists of 40 binary | 
					
						
							|  |  |  |  | relations with 65k elements each, where the first argument is the key. | 
					
						
							|  |  |  |  | We know that most calls have the first argument bound, hence indexing | 
					
						
							|  |  |  |  | was not expected to matter much. Instead, the results show \JITI to | 
					
						
							|  |  |  |  | improve running time by more than an order of magnitude. This suggests | 
					
						
							|  |  |  |  | that even a small percentage of badly indexed calls can end up | 
					
						
							|  |  |  |  | dominating runtime. | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | \IEProtein and \Thermolysin are example applications that manipulate | 
					
						
							|  |  |  |  | structured data. \IEProtein is the largest dataset we consider, and | 
					
						
							|  |  |  |  | indexing is absolutely critical. The speedup is not just impressive; | 
					
						
							|  |  |  |  | it is simply not possible to run the application in reasonable time | 
					
						
							|  |  |  |  | with only first argument indexing. \Thermolysin is smaller and | 
					
						
							|  |  |  |  | performs some computation per query, but even so, \JITI improves its | 
					
						
							|  |  |  |  | performance by an order of magnitude. The remaining benchmarks improve | 
					
						
							|  |  |  |  | from one to more than two orders of magnitude. | 
					
						
							| 
									
										
										
										
											2007-03-10 19:05:26 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | \begin{table}[t] | 
					
						
							| 
									
										
										
										
											2007-03-10 19:05:26 +00:00
										 |  |  |  |   \centering | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  |   \caption{Time and space performance of JITI | 
					
						
							|  |  |  |  |     on Inductive Logic Programming datasets} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   \vspace*{-1em} | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  |   \label{tab:ilp} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   \setlength{\tabcolsep}{2.5pt} | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  |   \subfigure[Time (in seconds)]{\label{tab:ilp:time} | 
					
						
							|  |  |  |  |     \begin{tabular}{|l||r|r|r||} \hline | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  |                   & \multicolumn{3}{|c||}{Time} \\ | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  |     \cline{2-4} | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  |     Benchmark     &    1st    &   JITI  &{\bf ratio} \\ | 
					
						
							|  |  |  |  |     \hline | 
					
						
							| 
									
										
										
										
											2007-03-12 15:24:34 +00:00
										 |  |  |  |     \BreastCancer &     1,450 &      88 &  $16\times$ \\ | 
					
						
							|  |  |  |  |     \Carcino      &    17,705 &     192 &  $92\times$ \\ | 
					
						
							|  |  |  |  |     \Choline      &    14,766 &   1,397 &  $11\times$ \\ | 
					
						
							|  |  |  |  |     \GeneExpr     &   193,283 &   7,483 &  $26\times$ \\ | 
					
						
							|  |  |  |  |     \IEProtein    & 1,677,146 &   2,909 & $577\times$ \\ | 
					
						
							| 
									
										
										
										
											2007-03-11 23:30:00 +00:00
										 |  |  |  | %%  \Krki         &       0.3 &     0.3 &   $1$ \\
 | 
					
						
							|  |  |  |  | %%  \KrkiII       &       1.3 &     1.3 &   $1$ \\
 | 
					
						
							| 
									
										
										
										
											2007-03-12 15:24:34 +00:00
										 |  |  |  |     \Mesh         &         4 &       3 & $1.3\times$ \\ | 
					
						
							|  |  |  |  |     \Pyrimidines  &   487,545 & 253,235 & $1.9\times$ \\ | 
					
						
							|  |  |  |  |     \Susi         &   105,091 &     307 & $342\times$ \\ | 
					
						
							|  |  |  |  |     \Thermolysin  &    50,279 &   5,213 &  $10\times$ \\ | 
					
						
							| 
									
										
										
										
											2007-03-10 19:05:26 +00:00
										 |  |  |  |     \hline | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  |     \end{tabular} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   }%
 | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  |   \subfigure[Memory usage (in KB)]{\label{tab:ilp:memory} | 
					
						
							|  |  |  |  |     \begin{tabular}{||r|r|r|r||} \hline | 
					
						
							|  |  |  |  |                 \multicolumn{2}{||c|}{Static code} | 
					
						
							|  |  |  |  |               & \multicolumn{2}{|c||}{Dynamic code} \\ | 
					
						
							| 
									
										
										
										
											2007-03-10 19:05:26 +00:00
										 |  |  |  |     \hline | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  |                 \multicolumn{1}{||c|}{Clauses} & \multicolumn{1}{c}{Index} | 
					
						
							|  |  |  |  |               & \multicolumn{1}{|c|}{Clauses} & \multicolumn{1}{c||}{Index}\\ | 
					
						
							|  |  |  |  |     \hline | 
					
						
							|  |  |  |  | 	        60,940 &  46,887 &     630 &     14 \\ | 
					
						
							|  |  |  |  | 	         1,801 &   2,678 &  13,512 &    942 \\ | 
					
						
							|  |  |  |  | 	           666 &     174 &   3,172 &    174 \\ | 
					
						
							|  |  |  |  | 	        46,726 &  22,629 & 116,463 &  9,015 \\ | 
					
						
							|  |  |  |  | 	       146,033 & 129,333 &  53,423 &  1,531 \\ | 
					
						
							| 
									
										
										
										
											2007-03-11 23:30:00 +00:00
										 |  |  |  | %%	           678 &     117 &   2,047 &     24 \\
 | 
					
						
							|  |  |  |  | %%	         1,866 &     715 &   2,055 &     26 \\
 | 
					
						
							| 
									
										
										
										
											2007-03-11 23:19:47 +00:00
										 |  |  |  | 	           802 &     161 &   2,149 &    109 \\ | 
					
						
							|  |  |  |  | 	           774 &     218 &  25,840 & 12,291 \\ | 
					
						
							|  |  |  |  |  	         5,007 &   2,509 &   4,497 &    759 \\ | 
					
						
							|  |  |  |  | 	         2,317 &     929 & 116,129 &  7,064 \\ | 
					
						
							|  |  |  |  |     \hline | 
					
						
							|  |  |  |  |     \end{tabular} | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  |   }%
 | 
					
						
							| 
									
										
										
										
											2007-03-10 19:05:26 +00:00
										 |  |  |  | \end{table} | 
					
						
							| 
									
										
										
										
											2007-03-11 12:15:17 +00:00
										 |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-10 19:05:26 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							|  |  |  |  | \begin{figure} | 
					
						
							|  |  |  |  |   \hrule \ \\[-2em] | 
					
						
							|  |  |  |  |   \begin{description} | 
					
						
							|  |  |  |  | %%  \item[\Krki] tries to learn rules from a small database of chess end-games;
 | 
					
						
							|  |  |  |  |   \item[\GeneExpr] learns rules for yeast gene activity given a | 
					
						
							|  |  |  |  |     database of genes, their interactions, and micro-array gene | 
					
						
							| 
									
										
										
										
											2007-03-12 14:44:18 +00:00
										 |  |  |  |     expression data; %~\cite{Regulatory@ILP-06};
 | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  |   \item[\BreastCancer] processes real-life patient reports towards | 
					
						
							|  |  |  |  |     predicting whether an abnormality may be | 
					
						
							| 
									
										
										
										
											2007-03-12 15:24:34 +00:00
										 |  |  |  |     malignant; %~\cite{DavisBDPRCS@IJCAI-05-short};
 | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  |   \item[\IEProtein] processes information extraction from paper | 
					
						
							|  |  |  |  |     abstracts to search proteins; | 
					
						
							|  |  |  |  |   \item[\Susi] learns from shopping patterns; | 
					
						
							|  |  |  |  |   \item[\Mesh] learns rules for finite-methods mesh design; | 
					
						
							|  |  |  |  |   \item[\Carcino, \Choline, \Pyrimidines] try to predict chemical | 
					
						
							| 
									
										
										
										
											2007-06-09 15:47:30 +00:00
										 |  |  |  |     properties of compounds and store them as tables, given their | 
					
						
							|  |  |  |  |     chemical composition and major properties; | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  |   \item[\Thermolysin] also manipulates chemical compounds but learns | 
					
						
							|  |  |  |  |     from the 3D-structure of a molecule's conformations. | 
					
						
							|  |  |  |  |   \end{description} | 
					
						
							|  |  |  |  |   \hrule | 
					
						
							|  |  |  |  |   \caption{Description of the ILP datasets used in the performance | 
					
						
							|  |  |  |  |     comparison of Table~\ref{tab:ilp}} | 
					
						
							|  |  |  |  |   \label{fig:ilp:datasets} | 
					
						
							|  |  |  |  | \end{figure} | 
					
						
							|  |  |  |  | %------------------------------------------------------------------------------
 | 
					
						
							| 
									
										
										
										
											2007-03-11 19:28:35 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \Paragraph{Space performance.} | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | Table~\ref{tab:ilp:memory} shows memory usage when using \JITI. The | 
					
						
							| 
									
										
										
										
											2007-03-12 14:44:18 +00:00
										 |  |  |  | table presents data obtained at a point near the end of execution; | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | memory usage should be at the maximum. These applications use a | 
					
						
							|  |  |  |  | mixture of static and dynamic predicates and we show their memory | 
					
						
							|  |  |  |  | usage separately. On static predicates, memory usage varies widely, | 
					
						
							|  |  |  |  | from only 10\% to the worst case, \Carcino, where the index tables | 
					
						
							|  |  |  |  | take more space than the original program. Hash tables dominate usage | 
					
						
							|  |  |  |  | in \IEProtein and \Susi, whereas \TryRetryTrust chains dominate in | 
					
						
							|  |  |  |  | \BreastCancer. In most other cases no single component dominates | 
					
						
							|  |  |  |  | memory usage. Memory usage for dynamic predicates is shown in the last | 
					
						
							|  |  |  |  | two columns; this data is mostly used to store the search space. | 
					
						
							|  |  |  |  | Observe that there is a much lower overhead in this case. A more | 
					
						
							|  |  |  |  | detailed analysis shows that most space is occupied by the hash tables | 
					
						
							|  |  |  |  | and by internal nodes of the tree, and that relatively little space is | 
					
						
							|  |  |  |  | occupied by \TryRetryTrust chains, suggesting that \JITI is behaving | 
					
						
							|  |  |  |  | well in practice. | 
					
						
							| 
									
										
										
										
											2007-03-10 19:05:26 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | \section{Concluding Remarks} | 
					
						
							|  |  |  |  | %===========================
 | 
					
						
							| 
									
										
										
										
											2007-03-12 15:24:34 +00:00
										 |  |  |  | Motivated by the needs of applications in the areas of inductive | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | logic programming, program analysis, deductive databases, etc.\ to | 
					
						
							|  |  |  |  | access large datasets efficiently, we have described a novel but also | 
					
						
							|  |  |  |  | simple idea: \emph{indexing Prolog clauses on demand during program | 
					
						
							|  |  |  |  | execution}. | 
					
						
							|  |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-03-12 15:24:34 +00:00
										 |  |  |  | Given the impressive speedups this idea can provide for many LP | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | applications, we are a bit surprised similar techniques have not been | 
					
						
							|  |  |  |  | explored before. In general, Prolog systems have been reluctant to | 
					
						
							|  |  |  |  | perform code optimizations during runtime and our feeling is that LP | 
					
						
							| 
									
										
										
										
											2007-03-12 14:44:18 +00:00
										 |  |  |  | implementation has been left a bit behind. We hold that this | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | should change. | 
					
						
							|  |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-03-12 15:24:34 +00:00
										 |  |  |  | Indeed, we see \JITI as only a first, very successful, step towards | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | effective runtime optimization of logic programs. | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | As presented, \JITI is a hybrid technique: index generation occurs | 
					
						
							|  |  |  |  | during runtime but is partly guided by the compiler, because we want | 
					
						
							| 
									
										
										
										
											2007-03-12 11:16:57 +00:00
										 |  |  |  | to combine it with compile-time WAM-style indexing. More flexible | 
					
						
							|  |  |  |  | schemes are of course possible. For example, index generation can be | 
					
						
							| 
									
										
										
										
											2007-06-08 09:11:10 +00:00
										 |  |  |  | fully dynamic (as in YAP), combined with user declarations, or driven | 
					
						
							|  |  |  |  | by static analysis to be even more selective or go beyond fixed-order | 
					
						
							| 
									
										
										
										
											2007-03-12 11:16:57 +00:00
										 |  |  |  | indexing. | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | %
 | 
					
						
							| 
									
										
										
										
											2007-03-12 15:24:34 +00:00
										 |  |  |  | Last, observe that \JITI fully respects Prolog semantics. Better | 
					
						
							| 
									
										
										
										
											2007-03-12 11:10:24 +00:00
										 |  |  |  | performance can be achieved in the context of one solution | 
					
						
							|  |  |  |  | computations, or in the context of tabling where order of clauses and | 
					
						
							| 
									
										
										
										
											2007-03-12 15:24:34 +00:00
										 |  |  |  | solutions does not matter and repeated solutions are discarded. | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-20 13:48:45 +00:00
										 |  |  |  | \paragragh{Acknowledgments} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | This work is dedicated to the memory of our friend and colleague | 
					
						
							|  |  |  |  | Ricardo Lopes. We miss you! V\{\i}tor Santos Costa was partially | 
					
						
							|  |  |  |  | supported by CNPq and would like to acknowledge support received while | 
					
						
							|  |  |  |  | visiting at UW-Madison and the support of the YAP user community. | 
					
						
							|  |  |  |  | This work has been partially supported by MYDDAS (POSC/EIA/59154/2004) | 
					
						
							|  |  |  |  | and by funds granted to LIACC through the Programa de Financiamento | 
					
						
							|  |  |  |  | Plurianual, Funda<64><61>o para a Ci<43>ncia e Tecnologia and Programa POSC. | 
					
						
							| 
									
										
										
										
											2007-06-06 21:01:46 +00:00
										 |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \Paragraph{Acknowledgments} | 
					
						
							|  |  |  |  | %--------------------------
 | 
					
						
							|  |  |  |  | V\'{\i}tor Santos Costa was partially supported by CNPq and would like | 
					
						
							|  |  |  |  | to acknowledge support received while visiting at UW-Madison and the | 
					
						
							|  |  |  |  | support of the YAP user community. This work has been partially | 
					
						
							|  |  |  |  | supported by MYDDAS (POSC/EIA/59154/2004) and by funds granted to | 
					
						
							|  |  |  |  | LIACC through the Programa de Financiamento Plurianual, Funda<64><61>o para | 
					
						
							|  |  |  |  | a Ci<43>ncia e Tecnologia and Programa POSC. | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  | %==============================================================================
 | 
					
						
							| 
									
										
										
										
											2007-06-08 15:34:49 +00:00
										 |  |  |  | \begin{thebibliography}{10} | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{Warren83} | 
					
						
							|  |  |  |  | Warren, D.H.D.: | 
					
						
							|  |  |  |  | \newblock An abstract {P}rolog instruction set. | 
					
						
							|  |  |  |  | \newblock Tech. Note 309, SRI International (1983) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{YAP} | 
					
						
							|  |  |  |  | Santos~Costa, V., Damas, L., Reis, R., Azevedo, R.: | 
					
						
							|  |  |  |  | \newblock {YAP} User's Manual. (2002) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{ShallowBacktracking@ICLP-89} | 
					
						
							|  |  |  |  | Carlsson, M.: | 
					
						
							|  |  |  |  | \newblock On the efficiency of optimising shallow backtracking in compiled | 
					
						
							|  |  |  |  |   {Prolog}. | 
					
						
							|  |  |  |  | \newblock In Levi, G., Martelli, M., eds.: Proceedings of the Sixth | 
					
						
							|  |  |  |  |   ICLP, MIT Press (June 1989)  3--15 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{IndexingProlog@NACLP-89} | 
					
						
							|  |  |  |  | Demoen, B., Mari{\"e}n, A., Callebaut, A.: | 
					
						
							|  |  |  |  | \newblock Indexing in {P}rolog. | 
					
						
							|  |  |  |  | \newblock In Lusk, E.L., Overbeek, R.A., eds.: Proceedings of NACLP, | 
					
						
							|  |  |  |  |   MIT Press (1989)  1001--1012 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{SWI} | 
					
						
							|  |  |  |  | Wielemaker, J.: | 
					
						
							|  |  |  |  | \newblock {SWI-Prolog 5.1}: Reference Manual. | 
					
						
							|  |  |  |  | \newblock {SWI}, University of Amsterdam, Roetersstraat 15, 1018 WB Amsterdam, | 
					
						
							|  |  |  |  |   The Netherlands. (1997--2003) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{XSB} | 
					
						
							|  |  |  |  | Sagonas, K.F., Swift, T., Warren, D.S., Freire, J., Rao, P.: | 
					
						
							|  |  |  |  | \newblock The {XSB} Pro\-grammer's Manual. | 
					
						
							|  |  |  |  | \newblock State University of New York at Stony Brook. (1997) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{ilProlog} | 
					
						
							|  |  |  |  | Tron\c{c}on, R., Janssens, G., Demoen, B., Vandecasteele, H.: | 
					
						
							|  |  |  |  | \newblock Fast frequent quering with lazy control flow compilation. | 
					
						
							|  |  |  |  | \newblock Theory and Practice of Logic Programming (2007) To appear. | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{HickeyMudambi@JLP-89} | 
					
						
							|  |  |  |  | Hickey, T., Mudambi, S.: | 
					
						
							|  |  |  |  | \newblock Global compilation of {P}rolog. | 
					
						
							|  |  |  |  | \newblock JLP \textbf{7}(3) (November 1989)  193--230 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{VRDW87} | 
					
						
							|  |  |  |  | {Van Roy}, P., Demoen, B., Willems, Y.D.: | 
					
						
							|  |  |  |  | \newblock Improving the execution speed of compiled {Prolog} with modes, clause | 
					
						
							|  |  |  |  |   selection and determinism. | 
					
						
							|  |  |  |  | \newblock In: TAPSOFT'87, Springer (1987)  111--125 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{TOAM@ICLP-90} | 
					
						
							|  |  |  |  | Zhou, N.F., Takagi, T., Kazuo, U.: | 
					
						
							|  |  |  |  | \newblock A matching tree oriented abstract machine for {P}rolog. | 
					
						
							|  |  |  |  | \newblock In Warren, D.H.D., Szeredi, P., eds.: ICLP90, MIT Press (1990) | 
					
						
							|  |  |  |  |   158--173 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{UnifFact@POPL-95} | 
					
						
							|  |  |  |  | Dawson, S., Ramakrishnan, C.R., Ramakrishnan, I.V., Sagonas, K., Skiena, S., | 
					
						
							|  |  |  |  |   Swift, T., Warren, D.S.: | 
					
						
							|  |  |  |  | \newblock Unification factoring for the efficient execution of logic programs. | 
					
						
							|  |  |  |  | \newblock In: Conference Record of POPL'95, ACM Press (January 1995)  247--258 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{Tries@JLP-99} | 
					
						
							|  |  |  |  | Ramakrishnan, I.V., Rao, P., Sagonas, K., Swift, T., Warren, D.S.: | 
					
						
							|  |  |  |  | \newblock Efficient access mechanisms for tabled logic programs. | 
					
						
							|  |  |  |  | \newblock Journal of Logic Programming \textbf{38}(1) (January 1999)  31--54 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{KligerShapiro@ICLP-88} | 
					
						
							|  |  |  |  | Kliger, S., Shapiro, E.: | 
					
						
							|  |  |  |  | \newblock A decision tree compilation algorithm for {FCP($|$,:,?)}. | 
					
						
							|  |  |  |  | \newblock In: Proceedings of the Fifth ICSLP, MIT Press (August 1988) 1315--1336 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{Mercury@JLP-96} | 
					
						
							|  |  |  |  | Somogyi, Z., Henderson, F., Conway, T.: | 
					
						
							|  |  |  |  | \newblock The execution algorithm of {Mercury}, an efficient purely declarative | 
					
						
							|  |  |  |  |   logic programming language. | 
					
						
							|  |  |  |  | \newblock JLP \textbf{26}(1--3) (December 1996)  17--64 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{Ciao@SCP-05} | 
					
						
							|  |  |  |  | Hermenegildo, M.V., Puebla, G., Bueno, F., L{\'o}pez-Garc\'{\i}a, P.: | 
					
						
							|  |  |  |  | \newblock Integrated program debugging, verification, and optimization using | 
					
						
							|  |  |  |  |   abstract interpretation (and the {Ciao} system preprocessor). | 
					
						
							|  |  |  |  | \newblock Science of Computer Programming \textbf{58}(1--2) (2005)  115--140 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{FreezeIndexing@ICLP-87} | 
					
						
							|  |  |  |  | Carlsson, M.: | 
					
						
							|  |  |  |  | \newblock Freeze, indexing, and other implementation issues in the {WAM}. | 
					
						
							|  |  |  |  | \newblock In Lassez, J.L., ed.: Proceedings of the Fourth ICLP, | 
					
						
							|  |  |  |  |   MIT Press (May 1987)  40--58 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \bibitem{ALEPH} | 
					
						
							|  |  |  |  | Srinivasan, A.: | 
					
						
							|  |  |  |  | \newblock The Aleph Manual. (2001) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \end{thebibliography} | 
					
						
							| 
									
										
										
										
											2007-03-06 20:45:15 +00:00
										 |  |  |  | %==============================================================================
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | \end{document} |