732 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			732 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*  $Id$
 | 
						|
 | 
						|
    Part of the SWI-Prolog Semweb package
 | 
						|
 | 
						|
    Author:        Jan Wielemaker
 | 
						|
    E-mail:        wielemak@science.uva.nl
 | 
						|
    WWW:           http://www.swi-prolog.org
 | 
						|
    Copyright (C): 2006, University of Amsterdam
 | 
						|
 | 
						|
    This program is free software; you can redistribute it and/or
 | 
						|
    modify it under the terms of the GNU General Public License
 | 
						|
    as published by the Free Software Foundation; either version 2
 | 
						|
    of the License, or (at your option) any later version.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
    GNU General Public License for more details.
 | 
						|
 | 
						|
    You should have received a copy of the GNU Lesser General Public
 | 
						|
    License along with this library; if not, write to the Free Software
 | 
						|
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
*/
 | 
						|
 | 
						|
#ifdef HAVE_CONFIG_H
 | 
						|
#include <config.h>
 | 
						|
#endif
 | 
						|
 | 
						|
#include <SWI-Stream.h>
 | 
						|
#include <SWI-Prolog.h>
 | 
						|
#include "atom.h"
 | 
						|
#include "murmur.h"
 | 
						|
#include <wchar.h>
 | 
						|
#include <wctype.h>
 | 
						|
#include <assert.h>
 | 
						|
 | 
						|
#ifdef __WINDOWS__
 | 
						|
#define inline __inline
 | 
						|
#endif
 | 
						|
 | 
						|
#include "unicode_map.c"
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	   TEXT HANDLING	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static inline int
 | 
						|
get_atom_text(atom_t atom, text *txt)
 | 
						|
{ if ( (txt->a = (const charA*)PL_atom_nchars(atom, &txt->length)) )
 | 
						|
  { txt->w = NULL;
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
  if ( (txt->w = (const charW*)PL_atom_wchars(atom, &txt->length)) )
 | 
						|
  { txt->a = NULL;
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
inline wint_t
 | 
						|
fetch(const text *txt, int i)
 | 
						|
{ return txt->a ? (wint_t)txt->a[i] : (wint_t)txt->w[i];
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
fill_atom_info(atom_info *info)
 | 
						|
{ if ( !info->resolved )
 | 
						|
  { info->resolved = TRUE;
 | 
						|
 | 
						|
    if ( !(info->rc=get_atom_text(info->handle, &info->text)) )
 | 
						|
    { info->text.a = NULL;
 | 
						|
      info->text.w = NULL;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return info->rc;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	      COMPARE		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static inline int
 | 
						|
cmpA(int c1, int c2, int *dl2)
 | 
						|
{ if ( c1 == c2 )
 | 
						|
  { return 0;
 | 
						|
  } else
 | 
						|
  { int k1 = sort_pointA(c1);
 | 
						|
    int k2 = sort_pointA(c2);
 | 
						|
    int d;
 | 
						|
 | 
						|
    if ( (d=((k1>>8)-(k2>>8))) == 0 )
 | 
						|
    { if ( *dl2 == 0 )
 | 
						|
	*dl2 = (k1&0xff) - (k2&0xff);
 | 
						|
    }
 | 
						|
 | 
						|
    return d;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static inline int
 | 
						|
cmpW(int c1, int c2, int *dl2)
 | 
						|
{ if ( c1 == c2 )
 | 
						|
  { return 0;
 | 
						|
  } else
 | 
						|
  { int k1 = sort_point(c1);
 | 
						|
    int k2 = sort_point(c2);
 | 
						|
    int d;
 | 
						|
 | 
						|
    if ( (d=((k1>>8)-(k2>>8))) == 0 )
 | 
						|
    { if ( *dl2 == 0 )
 | 
						|
	*dl2 = (k1&0xff) - (k2&0xff);
 | 
						|
    }
 | 
						|
 | 
						|
    return d;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
cmp_atom_info(atom_info *info, atom_t a2)
 | 
						|
{ text t2;
 | 
						|
  int i;
 | 
						|
  int dl2 = 0;
 | 
						|
  size_t n;
 | 
						|
 | 
						|
  if ( info->handle == a2 )
 | 
						|
    return 0;
 | 
						|
 | 
						|
  if ( !fill_atom_info(info) ||
 | 
						|
       !get_atom_text(a2, &t2) )
 | 
						|
  { goto cmphandles;			/* non-text atoms? */
 | 
						|
  }
 | 
						|
 | 
						|
  if ( info->text.a && t2.a )
 | 
						|
  { const charA *s1 = info->text.a;
 | 
						|
    const charA *s2 = t2.a;
 | 
						|
    int d;
 | 
						|
 | 
						|
    while((d=cmpA(*s1, *s2, &dl2)) == 0)
 | 
						|
    { if ( *s1 == 0 )
 | 
						|
	goto eq;
 | 
						|
      s1++, s2++;
 | 
						|
    }
 | 
						|
    return d;
 | 
						|
  }
 | 
						|
 | 
						|
  n = (info->text.length < t2.length ? info->text.length : t2.length);
 | 
						|
 | 
						|
  if ( info->text.w && t2.w )
 | 
						|
  { const charW *s1 = info->text.w;
 | 
						|
    const charW *s2 = t2.w;
 | 
						|
 | 
						|
    for(;;s1++, s2++)
 | 
						|
    { if ( n-- == 0 )
 | 
						|
      { if ( info->text.length == t2.length )
 | 
						|
	  goto eq;
 | 
						|
 | 
						|
	return info->text.length < t2.length ? -1 : 1;
 | 
						|
      } else
 | 
						|
      { int d;
 | 
						|
 | 
						|
	if ( (d=cmpW(*s1, *s2, &dl2)) != 0 )
 | 
						|
	  return d;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  for(i=0; ; i++)
 | 
						|
  { if ( n-- == 0 )
 | 
						|
    { if ( info->text.length == t2.length )
 | 
						|
	  goto eq;
 | 
						|
 | 
						|
      return info->text.length < t2.length ? -1 : 1;
 | 
						|
    } else
 | 
						|
    { wint_t c1 = fetch(&info->text, i);
 | 
						|
      wint_t c2 = fetch(&t2, i);
 | 
						|
      int d;
 | 
						|
 | 
						|
      if ( (d=cmpW(c1, c2, &dl2)) != 0 )
 | 
						|
	return d;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
eq:
 | 
						|
  if ( dl2 )
 | 
						|
    return dl2;
 | 
						|
 | 
						|
cmphandles:
 | 
						|
  return info->handle < a2 ? -1 : 1;		/* == already covered */
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
cmp_atoms(atom_t a1, atom_t a2)
 | 
						|
{ atom_info info = {0};
 | 
						|
 | 
						|
  if ( a1 == a2 )
 | 
						|
    return 0;
 | 
						|
 | 
						|
  info.handle = a1;
 | 
						|
 | 
						|
  return cmp_atom_info(&info, a2);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	       HASH		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
static unsigned int
 | 
						|
string_hashA(const char *s, size_t len)
 | 
						|
{ const unsigned char *t = (const unsigned char *)s;
 | 
						|
  unsigned int hash = 0;
 | 
						|
 | 
						|
  while( len>0 )
 | 
						|
  { unsigned char buf[256];
 | 
						|
    unsigned char *o = buf-1;
 | 
						|
    int cp = len > 256 ? 256 : (int)len;
 | 
						|
    const unsigned char *e = t+cp;
 | 
						|
 | 
						|
    t--;
 | 
						|
    while(++t<e)
 | 
						|
      *++o = sort_pointA(*t)>>8;
 | 
						|
    hash ^= rdf_murmer_hash(buf, cp, MURMUR_SEED);
 | 
						|
 | 
						|
    len -= cp;
 | 
						|
  }
 | 
						|
 | 
						|
  return hash;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static unsigned int
 | 
						|
string_hashW(const wchar_t *t, size_t len)
 | 
						|
{ unsigned int hash = 0;
 | 
						|
 | 
						|
  while( len>0 )
 | 
						|
  { unsigned short buf[256];
 | 
						|
    unsigned short *o = buf;
 | 
						|
    int cp = len > 256 ? 256 : (int)len;
 | 
						|
    const wchar_t *e = t+cp;
 | 
						|
 | 
						|
    while(t<e)
 | 
						|
      *o++ = (short)(sort_point(*t++)>>8);
 | 
						|
    hash ^= rdf_murmer_hash(buf, cp*sizeof(short), MURMUR_SEED);
 | 
						|
 | 
						|
    len -= cp;
 | 
						|
  }
 | 
						|
 | 
						|
  return hash;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
unsigned int
 | 
						|
atom_hash_case(atom_t a)
 | 
						|
{ const char *s;
 | 
						|
  const wchar_t *w;
 | 
						|
  size_t len;
 | 
						|
 | 
						|
  if ( (s = PL_atom_nchars(a, &len)) )
 | 
						|
    return string_hashA(s, len);
 | 
						|
  else if ( (w = PL_atom_wchars(a, &len)) )
 | 
						|
    return string_hashW(w, len);
 | 
						|
  else
 | 
						|
  { assert(0);
 | 
						|
    return 0;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	    FIND FIRST		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
Given an atom, return a new  one   that  has all its characters modified
 | 
						|
such that it appears first in the   set  of atoms considered equal after
 | 
						|
case canonisation and diacritics removal. This   is  required for prefix
 | 
						|
search to find the first atom of the set.
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
atom_t
 | 
						|
first_atom(atom_t a, int match)
 | 
						|
{ text t;
 | 
						|
 | 
						|
  if ( !get_atom_text(a, &t) )
 | 
						|
  { return (atom_t)0;			/* not a textual atom */
 | 
						|
  } else
 | 
						|
  { size_t len = t.length;
 | 
						|
    wchar_t buf[256];
 | 
						|
    wchar_t *out, *s;
 | 
						|
    int i;
 | 
						|
    wint_t c;
 | 
						|
    atom_t rc;
 | 
						|
 | 
						|
    if ( len <= 256 )
 | 
						|
      out = buf;
 | 
						|
    else
 | 
						|
      out = PL_malloc(len*sizeof(wchar_t));
 | 
						|
 | 
						|
    for(s=out,i=0; (c=fetch(&t,i)); s++,i++)
 | 
						|
    { if ( c == '*' && match == STR_MATCH_LIKE )
 | 
						|
      { if ( i == 0 )			/* like '*...' */
 | 
						|
	  return (atom_t)0;
 | 
						|
	len = i;			/* only up to the first * */
 | 
						|
      }
 | 
						|
      *s = sort_point(c)>>8;
 | 
						|
    }
 | 
						|
 | 
						|
    rc = PL_new_atom_wchars(len, out);
 | 
						|
 | 
						|
    if ( out != buf )
 | 
						|
      PL_free(out);
 | 
						|
 | 
						|
    return rc;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	       MATCH		*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 | 
						|
With the introduction of wide characters there   are two versions of the
 | 
						|
match() function, one using char* and one using a structure and index to
 | 
						|
fetch characters. Overall performance of  the   first  function is about
 | 
						|
twice as good as the general one  and   as  most data will be handled by
 | 
						|
this function in  practice  I  think  it   is  worthwhile  to  have  two
 | 
						|
implementations. Both implementations are  very   similar  in design and
 | 
						|
likely to have the same bugs. If  you   find  one, please fix it in both
 | 
						|
branches!
 | 
						|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 | 
						|
 | 
						|
static const charA *
 | 
						|
nextwordA(const charA *s)
 | 
						|
{ while(*s && iswalnum(*s))
 | 
						|
    s++;
 | 
						|
  while(*s && !iswalnum(*s))
 | 
						|
    s++;
 | 
						|
 | 
						|
  return s;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#define cmp_pointA(i) (sort_pointA(i)>>8)
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
matchA(int how, const charA *f, const charA *l)
 | 
						|
{ switch(how)
 | 
						|
  { case STR_MATCH_EXACT:
 | 
						|
    { for( ; *l && *f; l++, f++ )
 | 
						|
      { if ( cmp_pointA(*l) != cmp_pointA(*f) )
 | 
						|
	  return FALSE;
 | 
						|
      }
 | 
						|
      if ( *l == '\0' && *f == '\0' )
 | 
						|
	return TRUE;
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    case STR_MATCH_PREFIX:
 | 
						|
    { for( ; *l && *f; l++, f++ )
 | 
						|
      { if ( cmp_pointA(*l) != cmp_pointA(*f) )
 | 
						|
	  return FALSE;
 | 
						|
      }
 | 
						|
      if ( *f == '\0' )
 | 
						|
	return TRUE;
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    case STR_MATCH_SUBSTRING:		/* use Boyle-More! */
 | 
						|
    { const charA *h;
 | 
						|
      const charA *f0 = f;
 | 
						|
 | 
						|
      for(h=l; *h; h++)
 | 
						|
      { for( l=h,f=f0; *l && *f; l++, f++ )
 | 
						|
	{ if ( cmp_pointA(*l) != cmp_pointA(*f) )
 | 
						|
	    break;
 | 
						|
	}
 | 
						|
	if ( *f == '\0' )
 | 
						|
	  return TRUE;
 | 
						|
	if ( *h == '\0' )
 | 
						|
	  return FALSE;
 | 
						|
      }
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    case STR_MATCH_WORD:
 | 
						|
    { const charA *h;
 | 
						|
      const charA *f0 = f;
 | 
						|
 | 
						|
      for(h=l; *h; h = nextwordA(h))
 | 
						|
      { for( l=h,f=f0; *l && *f; l++, f++ )
 | 
						|
	{ if ( cmp_pointA(*l) != cmp_pointA(*f) )
 | 
						|
	    break;
 | 
						|
	}
 | 
						|
	if ( *f == '\0' )
 | 
						|
	{ if ( *l == '\0' || !iswalnum(*l) )
 | 
						|
	    return TRUE;
 | 
						|
	}
 | 
						|
	if ( *l == '\0' )
 | 
						|
	  return FALSE;
 | 
						|
      }
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    case STR_MATCH_LIKE:		/* SeRQL like: * --> wildcart */
 | 
						|
    { typedef struct chp { const charA *pattern;
 | 
						|
			   const charA *label; } chp;
 | 
						|
      chp chps[MAX_LIKE_CHOICES];
 | 
						|
      int chn=0;
 | 
						|
 | 
						|
      for( ; *l && *f; l++, f++ )
 | 
						|
      { if ( *f == '*' )
 | 
						|
	{ f++;
 | 
						|
 | 
						|
	  if ( *f == '\0' )		/* foo* */
 | 
						|
	    return TRUE;
 | 
						|
 | 
						|
	search_like:
 | 
						|
	  while ( *l && cmp_pointA(*l) != cmp_pointA(*f) )
 | 
						|
	    l++;
 | 
						|
 | 
						|
	  if ( *l )
 | 
						|
	  { if ( chn >= MAX_LIKE_CHOICES )
 | 
						|
	    { Sdprintf("rdf_db: too many * in `like' expression (>%d)",
 | 
						|
		       MAX_LIKE_CHOICES);
 | 
						|
	      return FALSE;
 | 
						|
	    }
 | 
						|
	    chps[chn].pattern = f;
 | 
						|
	    chps[chn].label   = l+1;
 | 
						|
	    chn++;
 | 
						|
 | 
						|
	    continue;
 | 
						|
	  } else
 | 
						|
	    goto retry_like;
 | 
						|
	}
 | 
						|
 | 
						|
	if ( cmp_pointA(*l) != cmp_pointA(*f) )
 | 
						|
	  goto retry_like;
 | 
						|
      }
 | 
						|
      if ( *l == '\0' && (*f == '\0' ||
 | 
						|
			 (*f == '*' && f[1] == '\0')) )
 | 
						|
	return TRUE;
 | 
						|
 | 
						|
retry_like:
 | 
						|
      if ( chn > 0 )
 | 
						|
      { chn--;
 | 
						|
	f = chps[chn].pattern;
 | 
						|
	l = chps[chn].label;
 | 
						|
	goto search_like;
 | 
						|
      }
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    default:
 | 
						|
      assert(0);
 | 
						|
      return FALSE;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static unsigned int
 | 
						|
nextword(text *txt, unsigned int i)
 | 
						|
{ while(i<txt->length && iswalnum(fetch(txt, i)))
 | 
						|
    i++;
 | 
						|
  while(i<txt->length && !iswalnum(fetch(txt, i)))
 | 
						|
    i++;
 | 
						|
 | 
						|
  return i;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#define cmp_point(i) (sort_point(i)>>8)
 | 
						|
 | 
						|
 | 
						|
int
 | 
						|
match_atoms(int how, atom_t search, atom_t label)
 | 
						|
{ text l, f;
 | 
						|
 | 
						|
  if ( !get_atom_text(label, &l) ||
 | 
						|
       !get_atom_text(search, &f) )
 | 
						|
    return FALSE;			/* error? */
 | 
						|
 | 
						|
  if ( f.length == 0 )
 | 
						|
    return TRUE;
 | 
						|
 | 
						|
  if ( f.a && l.a )
 | 
						|
    return matchA(how, f.a, l.a);
 | 
						|
 | 
						|
  switch(how)
 | 
						|
  { case STR_MATCH_EXACT:
 | 
						|
    { if ( l.length == f.length )
 | 
						|
      { unsigned int i;
 | 
						|
 | 
						|
	for(i=0; i<l.length; i++ )
 | 
						|
	{ if ( cmp_point(fetch(&l, i)) != cmp_point(fetch(&f, i)) )
 | 
						|
	    return FALSE;
 | 
						|
	}
 | 
						|
 | 
						|
        return TRUE;
 | 
						|
      }
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    case STR_MATCH_PREFIX:
 | 
						|
    { if ( f.length <= l.length )
 | 
						|
      { unsigned int i;
 | 
						|
 | 
						|
	for(i=0; i<f.length; i++ )
 | 
						|
	{ if ( cmp_point(fetch(&l, i)) != cmp_point(fetch(&f, i)) )
 | 
						|
	    return FALSE;
 | 
						|
	}
 | 
						|
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    case STR_MATCH_SUBSTRING:		/* use Boyle-More! */
 | 
						|
    { if ( f.length <= l.length )
 | 
						|
      { unsigned int i, s;
 | 
						|
 | 
						|
	for(s=0; s+f.length <= l.length; s++)
 | 
						|
	{ for(i=0; i<f.length; i++)
 | 
						|
	  { if ( cmp_point(fetch(&l, i+s)) != cmp_point(fetch(&f, i)) )
 | 
						|
	      goto snext;
 | 
						|
	  }
 | 
						|
	  return TRUE;
 | 
						|
 | 
						|
	snext:;
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    case STR_MATCH_WORD:
 | 
						|
    { if ( f.length <= l.length )
 | 
						|
      { unsigned int i, s;
 | 
						|
 | 
						|
	for(s=0; s+f.length <= l.length; s = nextword(&l, s))
 | 
						|
	{ for(i=0; i<f.length; i++)
 | 
						|
	  { if ( cmp_point(fetch(&l, i+s)) != cmp_point(fetch(&f, i)) )
 | 
						|
	      goto wnext;
 | 
						|
	  }
 | 
						|
	  if ( i+s == l.length || !iswalnum(fetch(&l,i+s)) )
 | 
						|
	    return TRUE;
 | 
						|
 | 
						|
	wnext:;
 | 
						|
	}
 | 
						|
      }
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    case STR_MATCH_LIKE:		/* SeRQL like: * --> wildcart */
 | 
						|
    { unsigned int ip, il;
 | 
						|
      typedef struct chp { unsigned int ip;
 | 
						|
			   unsigned int il;
 | 
						|
			 } chp;
 | 
						|
      chp chps[MAX_LIKE_CHOICES];
 | 
						|
      int chn=0;
 | 
						|
 | 
						|
      for(ip=il=0; il < l.length && ip < f.length; ip++, il++ )
 | 
						|
      { if ( fetch(&f, ip) == '*' )
 | 
						|
	{ ip++;
 | 
						|
 | 
						|
	  if ( ip == f.length )		/* foo* */
 | 
						|
	    return TRUE;
 | 
						|
 | 
						|
	search_like:
 | 
						|
	  while ( il < l.length &&
 | 
						|
		  cmp_point(fetch(&l, il)) != cmp_point(fetch(&f, ip)) )
 | 
						|
	    il++;
 | 
						|
 | 
						|
	  if ( il < l.length )
 | 
						|
	  { if ( chn >= MAX_LIKE_CHOICES )
 | 
						|
	    { Sdprintf("rdf_db: too many * in `like' expression (>%d)",
 | 
						|
		       MAX_LIKE_CHOICES);
 | 
						|
	      return FALSE;
 | 
						|
	    }
 | 
						|
	    chps[chn].ip = ip;
 | 
						|
	    chps[chn].il = il+1;
 | 
						|
	    chn++;
 | 
						|
 | 
						|
	    continue;
 | 
						|
	  } else
 | 
						|
	    goto retry_like;
 | 
						|
	}
 | 
						|
 | 
						|
	if ( cmp_point(fetch(&l, il)) != cmp_point(fetch(&f, ip)) )
 | 
						|
	  goto retry_like;
 | 
						|
      }
 | 
						|
      if ( il == l.length && (ip == f.length ||
 | 
						|
			      (fetch(&f,ip) == '*' && ip+1 == f.length)) )
 | 
						|
	return TRUE;
 | 
						|
 | 
						|
retry_like:
 | 
						|
      if ( chn > 0 )
 | 
						|
      { chn--;
 | 
						|
	ip = chps[chn].ip;
 | 
						|
	il = chps[chn].il;
 | 
						|
	goto search_like;
 | 
						|
      }
 | 
						|
 | 
						|
      return FALSE;
 | 
						|
    }
 | 
						|
    default:
 | 
						|
      assert(0);
 | 
						|
      return FALSE;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
		 /*******************************
 | 
						|
		 *	  LANGUAGE MATCH	*
 | 
						|
		 *******************************/
 | 
						|
 | 
						|
typedef struct lang_choice
 | 
						|
{ int langp;				/* points after - */
 | 
						|
  int patp;				/* points after *- */
 | 
						|
} lang_choice;
 | 
						|
 | 
						|
#define MAX_CHOICES 10			/* Max number of stars */
 | 
						|
 | 
						|
typedef struct
 | 
						|
{ int il, ip;
 | 
						|
  text l, p;
 | 
						|
  lang_choice choicepoints[MAX_CHOICES];
 | 
						|
  int choice_count;
 | 
						|
} lang_state;
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
create_chp(lang_state *s)
 | 
						|
{ if ( s->choice_count < MAX_CHOICES )
 | 
						|
  { lang_choice *cp = &s->choicepoints[s->choice_count];
 | 
						|
 | 
						|
    cp->langp = s->il;
 | 
						|
    cp->patp = s->ip+2;
 | 
						|
    s->choice_count++;
 | 
						|
 | 
						|
    return TRUE;
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
next_choice(lang_state *s)
 | 
						|
{ for ( ; s->choice_count > 0; s->choice_count-- )
 | 
						|
  { lang_choice *cp = &s->choicepoints[s->choice_count-1];
 | 
						|
    int il = cp->langp;
 | 
						|
 | 
						|
    for(; il<s->l.length; il++)
 | 
						|
    { if ( fetch(&s->l, il) == '-' )
 | 
						|
      { cp->langp = s->il = il+1;
 | 
						|
	s->ip = cp->patp;
 | 
						|
	return TRUE;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return FALSE;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static atom_t ATOM_;
 | 
						|
static atom_t ATOM_star;
 | 
						|
 | 
						|
int
 | 
						|
atom_lang_matches(atom_t lang, atom_t pattern)
 | 
						|
{ lang_state s = {0};
 | 
						|
  int cl, cp;
 | 
						|
 | 
						|
  if ( lang == pattern )		/* exact match */
 | 
						|
    return TRUE;
 | 
						|
 | 
						|
  if ( !ATOM_ )
 | 
						|
  { ATOM_ = PL_new_atom("");
 | 
						|
    ATOM_star = PL_new_atom("*");
 | 
						|
  }
 | 
						|
 | 
						|
  if ( lang == ATOM_ )			/* no language */
 | 
						|
    return FALSE;
 | 
						|
  if ( pattern == ATOM_star )		/* Everything matches "*" */
 | 
						|
    return TRUE;
 | 
						|
 | 
						|
  if ( !get_atom_text(lang, &s.l) ||
 | 
						|
       !get_atom_text(pattern, &s.p) )
 | 
						|
    return FALSE;			/* exception? */
 | 
						|
 | 
						|
  s.il=0; s.ip=0;
 | 
						|
  for(;; s.ip++, s.il++)
 | 
						|
  { if ( s.ip == s.p.length )
 | 
						|
      return TRUE;
 | 
						|
    if ( s.il == s.l.length )
 | 
						|
    { if ( fetch(&s.p, s.ip) == '*' )
 | 
						|
	return TRUE;
 | 
						|
      if ( !next_choice(&s) )
 | 
						|
	return FALSE;
 | 
						|
    }
 | 
						|
 | 
						|
    cl = fetch(&s.l, s.il);
 | 
						|
    cp = fetch(&s.p, s.ip);
 | 
						|
    if ( cl == cp )
 | 
						|
      continue;
 | 
						|
    if ( sort_point(cl)>>8 == sort_point(cp)>>8 )
 | 
						|
      continue;
 | 
						|
 | 
						|
    if ( cp == '*' )
 | 
						|
    { if ( s.ip+1 == s.p.length )
 | 
						|
	return TRUE;
 | 
						|
      if ( (s.ip == 0 || fetch(&s.p, s.ip-1) == '-') &&
 | 
						|
	   fetch(&s.p, s.ip+1) == '-' )
 | 
						|
      { if ( !create_chp(&s) )
 | 
						|
	  return FALSE;
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    if ( !next_choice(&s) )
 | 
						|
      return FALSE;
 | 
						|
  }
 | 
						|
}
 |