700 lines
16 KiB
C
700 lines
16 KiB
C
/* $Id$
|
|
|
|
Part of SWI-Prolog
|
|
|
|
Author: Jan Wielemaker
|
|
E-mail: J.Wielemaker@cs.vu.nl
|
|
WWW: http://www.swi-prolog.org
|
|
Copyright (C): 1985-2011, University of Amsterdam
|
|
VU University Amsterdam
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "pl-incl.h"
|
|
#include "pl-ctype.h"
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#ifdef O_XOS
|
|
# include "windows/dirent.h"
|
|
#else
|
|
#if HAVE_DIRENT_H
|
|
# include <dirent.h>
|
|
#else
|
|
# define dirent direct
|
|
# if HAVE_SYS_NDIR_H
|
|
# include <sys/ndir.h>
|
|
# endif
|
|
# if HAVE_SYS_DIR_H
|
|
# include <sys/dir.h>
|
|
# endif
|
|
# if HAVE_NDIR_H
|
|
# include <ndir.h>
|
|
# endif
|
|
#endif
|
|
#endif /*O_XOS*/
|
|
|
|
#ifdef HAVE_SYS_STAT_H
|
|
#include <sys/stat.h>
|
|
#endif
|
|
#ifdef HAVE_SYS_PARAM_H
|
|
#include <sys/param.h>
|
|
#endif
|
|
|
|
#define O_EXPANDS_TESTS_EXISTS 1
|
|
|
|
#ifndef IS_DIR_SEPARATOR
|
|
#define IS_DIR_SEPARATOR(c) ((c) == '/')
|
|
#endif
|
|
|
|
#define char_to_int(c) (0xff & (int)(c))
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Unix Wildcard Matching. Recognised:
|
|
|
|
? matches one arbitrary character
|
|
* matches any number of any character
|
|
[xa-z] matches x and a-z
|
|
{p1,p2} matches pattern p1 or p2
|
|
|
|
backslash (\) escapes a character.
|
|
|
|
First the pattern is compiled into an intermediate representation. Next
|
|
this intermediate representation is matched against the target. The
|
|
non-ascii characters are used to store control sequences in the
|
|
intermediate representation:
|
|
|
|
ANY Match any character
|
|
STAR Match (possibly empty) sequence
|
|
ALT <offset> Match, if fails, continue at <pc> + offset
|
|
JMP <offset> Jump <offset> instructions
|
|
ANYOF Next 16 bytes are bitmap
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
#define MAXCODE 1024
|
|
|
|
#define ANY 128
|
|
#define STAR 129
|
|
#define ALT 130
|
|
#define JMP 131
|
|
#define ANYOF 132
|
|
#define EXIT 133
|
|
|
|
#define NOCURL 0
|
|
#define CURL 1
|
|
|
|
typedef unsigned char matchcode;
|
|
|
|
typedef struct
|
|
{ int size;
|
|
matchcode code[MAXCODE];
|
|
} compiled_pattern;
|
|
|
|
static char *compile_pattern(compiled_pattern *, char *, int);
|
|
static bool match_pattern(matchcode *, char *);
|
|
|
|
#define Output(c) { if ( Out->size > MAXCODE-1 ) \
|
|
{ warning("pattern too large"); \
|
|
return (char *) NULL; \
|
|
} \
|
|
Out->code[Out->size++] = c; \
|
|
}
|
|
|
|
static inline void
|
|
setMap(matchcode *map, int c)
|
|
{ GET_LD
|
|
|
|
if ( !truePrologFlag(PLFLAG_FILE_CASE) )
|
|
c = makeLower(c);
|
|
|
|
map[(c)/8] |= 1 << ((c) % 8);
|
|
}
|
|
|
|
|
|
static bool
|
|
compilePattern(char *p, compiled_pattern *cbuf)
|
|
{ cbuf->size = 0;
|
|
if ( compile_pattern(cbuf, p, NOCURL) == (char *) NULL )
|
|
fail;
|
|
|
|
succeed;
|
|
}
|
|
|
|
|
|
static char *
|
|
compile_pattern(compiled_pattern *Out, char *p, int curl)
|
|
{ int c;
|
|
|
|
for(;;)
|
|
{ switch(c = char_to_int(*p++))
|
|
{ case EOS:
|
|
break;
|
|
case '\\':
|
|
Output(*p == EOS ? '\\' : (*p & 0x7f));
|
|
if (*p == EOS )
|
|
break;
|
|
p++;
|
|
continue;
|
|
case '?':
|
|
Output(ANY);
|
|
continue;
|
|
case '*':
|
|
Output(STAR);
|
|
continue;
|
|
case '[':
|
|
{ matchcode *map;
|
|
int n;
|
|
|
|
Output(ANYOF);
|
|
map = &Out->code[Out->size];
|
|
Out->size += 16;
|
|
if ( Out->size >= MAXCODE )
|
|
{ warning("Pattern too intptr_t");
|
|
return (char *) NULL;
|
|
}
|
|
|
|
for( n=0; n < 16; n++)
|
|
map[n] = 0;
|
|
|
|
for(;;)
|
|
{ switch( c = *p++ )
|
|
{ case '\\':
|
|
if ( *p == EOS )
|
|
{ warning("Unmatched '['");
|
|
return (char *)NULL;
|
|
}
|
|
setMap(map, *p);
|
|
p++;
|
|
continue;
|
|
case ']':
|
|
break;
|
|
default:
|
|
if ( p[-1] != ']' && p[0] == '-' && p[1] != ']' )
|
|
{ int chr;
|
|
|
|
for ( chr=p[-1]; chr <= p[1]; chr++ )
|
|
setMap(map, chr);
|
|
p += 2;
|
|
} else
|
|
setMap(map, c);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
case '{':
|
|
{ int ai, aj = -1;
|
|
|
|
for(;;)
|
|
{ Output(ALT); ai = Out->size; Output(0);
|
|
if ( (p = compile_pattern(Out, p, CURL)) == (char *) NULL )
|
|
return (char *) NULL;
|
|
if ( aj > 0 )
|
|
Out->code[aj] = Out->size - aj;
|
|
if ( *p == ',' )
|
|
{ Output(JMP); aj = Out->size; Output(0);
|
|
Out->code[ai] = Out->size - ai;
|
|
Output(ALT); ai = Out->size; Output(0);
|
|
p++;
|
|
} else if ( *p == '}' )
|
|
{ p++;
|
|
break;
|
|
} else
|
|
{ warning("Unmatched '{'");
|
|
return (char *) NULL;
|
|
}
|
|
}
|
|
|
|
continue;
|
|
}
|
|
case ANY:
|
|
case STAR:
|
|
case ALT:
|
|
case JMP:
|
|
case ANYOF:
|
|
case EXIT:
|
|
PL_error(NULL, 0, "Reserved character",
|
|
ERR_REPRESENTATION, ATOM_pattern);
|
|
return NULL;
|
|
case '}':
|
|
case ',':
|
|
if ( curl == CURL )
|
|
{ p--;
|
|
return p;
|
|
}
|
|
/*FALLTHROUGH*/
|
|
default:
|
|
{ GET_LD
|
|
|
|
if ( !truePrologFlag(PLFLAG_FILE_CASE) )
|
|
c = makeLower(c);
|
|
Output(c);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
Output(EXIT);
|
|
return p;
|
|
}
|
|
}
|
|
|
|
|
|
static inline bool
|
|
matchPattern(char *s, compiled_pattern *cbuf)
|
|
{ return match_pattern(cbuf->code, s);
|
|
}
|
|
|
|
|
|
static bool
|
|
match_pattern(matchcode *p, char *str)
|
|
{ matchcode c;
|
|
matchcode *s = (matchcode *) str;
|
|
|
|
for(;;)
|
|
{ switch( c = *p++ )
|
|
{ case EXIT:
|
|
return (*s == EOS ? TRUE : FALSE);
|
|
case ANY: /* ? */
|
|
if ( *s == EOS )
|
|
fail;
|
|
s++;
|
|
continue;
|
|
case ANYOF: /* [...] */
|
|
{ GET_LD
|
|
matchcode c2 = *s;
|
|
|
|
if ( !truePrologFlag(PLFLAG_FILE_CASE) )
|
|
c2 = makeLower(c2);
|
|
|
|
if ( p[c2 / 8] & (1 << (c2 % 8)) )
|
|
{ p += 16;
|
|
s++;
|
|
continue;
|
|
}
|
|
fail;
|
|
}
|
|
case STAR: /* * */
|
|
do
|
|
{ if ( match_pattern(p, (char *)s) )
|
|
succeed;
|
|
} while( *s++ );
|
|
fail;
|
|
case JMP: /* { ... } */
|
|
p += *p;
|
|
continue;
|
|
case ALT:
|
|
if ( match_pattern(p+1, (char *)s) )
|
|
succeed;
|
|
p += *p;
|
|
continue;
|
|
default: /* character */
|
|
{ GET_LD
|
|
|
|
if ( c == *s ||
|
|
(!truePrologFlag(PLFLAG_FILE_CASE) && c == makeLower(*s)) )
|
|
{ s++;
|
|
continue;
|
|
}
|
|
fail;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/** wildcard_match(+Pattern, +Name) is semidet.
|
|
*/
|
|
|
|
static
|
|
PRED_IMPL("wildcard_match", 2, wildcard_match, 0)
|
|
{ char *p, *s;
|
|
compiled_pattern buf;
|
|
|
|
if ( !PL_get_chars(A1, &p, CVT_ALL|CVT_EXCEPTION) ||
|
|
!PL_get_chars(A2, &s, CVT_ALL|CVT_EXCEPTION) )
|
|
fail;
|
|
|
|
if ( compilePattern(p, &buf) )
|
|
{ return matchPattern(s, &buf);
|
|
}
|
|
|
|
return PL_error(NULL, 0, NULL, ERR_DOMAIN, ATOM_pattern, A1);
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* EXPAND_FILE_NAME/2 *
|
|
*******************************/
|
|
|
|
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
Wildcart expansion of a pattern to a list of files. This code uses two
|
|
`buffers' for storing the intermediate results while limiting
|
|
fragmentation. The `strings' buffer contains all strings generated. The
|
|
files contains indices in the `strings' buffer pointing to the start of
|
|
strings. The indices in the range [start,end) are valid.
|
|
|
|
First this set is filled with the empty string. Next the
|
|
directory-segment with the first wildcart is located. If found, we go
|
|
through the current set, adding the segments without wildcarts, applying
|
|
the wildcart on the directory and adding everything found to the set.
|
|
The old set is deleted by incrementing info.start.
|
|
|
|
If we are at the end, we add the remaining non-wildcart segments to each
|
|
element of the set, deleting it if the result does not exits.
|
|
|
|
Finally we sort the result.
|
|
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
typedef struct
|
|
{ tmp_buffer files; /* our files */
|
|
tmp_buffer strings; /* our strings */
|
|
int start; /* 1-st valid entry of files */
|
|
int end; /* last valid entry of files */
|
|
} glob_info, *GlobInfo;
|
|
|
|
#undef isspecial /* play safe */
|
|
#define isspecial(c) \
|
|
((c) == '[' || (c) == '{' || (c) == '?' || (c) == '*')
|
|
|
|
static void
|
|
free_expand_info(GlobInfo info)
|
|
{ discardBuffer(&info->files);
|
|
discardBuffer(&info->strings);
|
|
}
|
|
|
|
|
|
static void
|
|
add_path(const char *path, GlobInfo info)
|
|
{ int idx = (int)entriesBuffer(&info->strings, char);
|
|
size_t n = strlen(path)+1;
|
|
|
|
addMultipleBuffer(&info->strings, path, n, char);
|
|
addBuffer(&info->files, idx, int);
|
|
info->end++;
|
|
}
|
|
|
|
|
|
static const char *
|
|
expand_str(GlobInfo info, int at)
|
|
{ char *s = &fetchBuffer(&info->strings, at, char);
|
|
|
|
return (const char *)s;
|
|
}
|
|
|
|
|
|
static const char *
|
|
expand_entry(GlobInfo info, int idx)
|
|
{ int at = fetchBuffer(&info->files, idx, int);
|
|
|
|
return expand_str(info, at);
|
|
}
|
|
|
|
|
|
static void
|
|
un_escape(char *to, const char *from, const char *end)
|
|
{ while( from < end )
|
|
{ if ( *from == '\\' && (isspecial(from[1]) || from[1] == '\\') )
|
|
from++;
|
|
*to++ = *from++;
|
|
}
|
|
*to = EOS;
|
|
}
|
|
|
|
|
|
static int
|
|
expand(const char *pattern, GlobInfo info)
|
|
{ const char *pat = pattern;
|
|
compiled_pattern cbuf;
|
|
char prefix[MAXPATHLEN]; /* before first pattern */
|
|
char patbuf[MAXPATHLEN]; /* pattern buffer */
|
|
size_t prefix_len;
|
|
int end, dot;
|
|
|
|
initBuffer(&info->files);
|
|
initBuffer(&info->strings);
|
|
info->start = 0;
|
|
info->end = 0;
|
|
|
|
add_path("", info);
|
|
|
|
for(;;)
|
|
{ const char *s = pat, *head = pat, *tail;
|
|
|
|
for(;;)
|
|
{ int c;
|
|
|
|
switch( (c=*s++) )
|
|
{ case EOS:
|
|
if ( s > pat ) /* something left and expanded */
|
|
{ size_t prefix_len;
|
|
|
|
un_escape(prefix, pat, s);
|
|
prefix_len = strlen(prefix);
|
|
|
|
end = info->end;
|
|
for( ; info->start < end; info->start++ )
|
|
{ char path[MAXPATHLEN];
|
|
const char *entry = expand_entry(info, info->start);
|
|
size_t plen = strlen(entry);
|
|
|
|
if ( plen+prefix_len+2 <= MAXPATHLEN )
|
|
{ strcpy(path, entry);
|
|
if ( prefix[0] && plen > 0 && path[plen-1] != '/' )
|
|
path[plen++] = '/';
|
|
strcpy(&path[plen], prefix);
|
|
if ( end == 1 || AccessFile(path, ACCESS_EXIST) )
|
|
add_path(path, info);
|
|
}
|
|
}
|
|
}
|
|
succeed;
|
|
case '[': /* meta characters: expand */
|
|
case '{':
|
|
case '?':
|
|
case '*':
|
|
break;
|
|
case '\\':
|
|
if ( isspecial(*s) )
|
|
{ s++;
|
|
continue;
|
|
}
|
|
/*FALLTHROUGH*/
|
|
default:
|
|
if ( IS_DIR_SEPARATOR(c) )
|
|
head = s;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
for( tail=s; *tail && !IS_DIR_SEPARATOR(*tail); tail++ )
|
|
;
|
|
|
|
/* By now, head points to the start of the path holding meta characters,
|
|
while tail points to the tail:
|
|
|
|
..../meta*path/....
|
|
^ ^
|
|
head tail
|
|
*/
|
|
un_escape(prefix, pat, head);
|
|
un_escape(patbuf, head, tail);
|
|
prefix_len = strlen(prefix);
|
|
|
|
if ( !compilePattern(patbuf, &cbuf) ) /* syntax error */
|
|
fail;
|
|
dot = (patbuf[0] == '.'); /* do dots as well */
|
|
|
|
end = info->end;
|
|
|
|
for(; info->start < end; info->start++)
|
|
{ DIR *d;
|
|
struct dirent *e;
|
|
char path[MAXPATHLEN];
|
|
char tmp[MAXPATHLEN];
|
|
const char *current = expand_entry(info, info->start);
|
|
size_t clen = strlen(current);
|
|
|
|
if ( clen+prefix_len+1 > sizeof(path) )
|
|
continue;
|
|
|
|
strcpy(path, current);
|
|
strcpy(&path[clen], prefix);
|
|
|
|
if ( (d=opendir(path[0] ? OsPath(path, tmp) : ".")) )
|
|
{ size_t plen = clen+prefix_len;
|
|
|
|
if ( plen > 0 && path[plen-1] != '/' )
|
|
path[plen++] = '/';
|
|
|
|
for(e=readdir(d); e; e = readdir(d))
|
|
{
|
|
#ifdef __MSDOS__
|
|
strlwr(e->d_name);
|
|
#endif
|
|
if ( (dot || e->d_name[0] != '.') &&
|
|
matchPattern(e->d_name, &cbuf) )
|
|
{ char newp[MAXPATHLEN];
|
|
|
|
if ( plen+strlen(e->d_name)+1 < sizeof(newp) )
|
|
{ strcpy(newp, path);
|
|
strcpy(&newp[plen], e->d_name);
|
|
add_path(newp, info);
|
|
}
|
|
}
|
|
}
|
|
closedir(d);
|
|
}
|
|
}
|
|
|
|
pat = tail;
|
|
if ( IS_DIR_SEPARATOR(*pat) )
|
|
pat++;
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
compareBagEntries(const void *a1, const void *a2)
|
|
{ GET_LD
|
|
GlobInfo info = LD->glob_info;
|
|
int i1 = *(int *)a1;
|
|
int i2 = *(int *)a2;
|
|
const char *s1, *s2;
|
|
|
|
s1 = expand_str(info, i1);
|
|
s2 = expand_str(info, i2);
|
|
|
|
if ( truePrologFlag(PLFLAG_FILE_CASE) )
|
|
return mbscoll(s1, s2);
|
|
else
|
|
return mbscasecoll(s1, s2);
|
|
}
|
|
|
|
|
|
static void
|
|
sort_expand(GlobInfo info)
|
|
{ GET_LD
|
|
int *ip = &fetchBuffer(&info->files, info->start, int);
|
|
int is = info->end - info->start;
|
|
|
|
LD->glob_info = info;
|
|
qsort(ip, is, sizeof(int), compareBagEntries);
|
|
}
|
|
|
|
/**
|
|
|
|
@addgroup
|
|
|
|
\pred expand_file_name(+ _WildCard_,- _List_)
|
|
|
|
|
|
This is an SWI-Prolog built-in that unifies _List_ with a sorted list of
|
|
files or directories matching _WildCard_. The normal Unix wildcard
|
|
constructs <tt>?</tt>, <tt>\\\*</tt>, <tt>[ ... ]</tt> and <tt>{...}</tt> are recognised. The
|
|
interpretation of <tt>{...}</tt> is interpreted slightly different from the
|
|
C shell (csh(1)). The comma separated argument can be arbitrary
|
|
patterns, including <tt>{...}</tt> patterns. The empty pattern is legal as
|
|
well: <tt>{.pl,}</tt> matches either <tt>.pl</tt> or the empty string.
|
|
|
|
If the pattern contains wildcard characters, only existing files and
|
|
directories are returned. Expanding a <em>pattern'</em> without wildcard
|
|
characters returns the argument, regardless on whether or not it exists.
|
|
|
|
Before expanding wildcards, the construct $var is expanded to the value
|
|
of the environment variable var and a possible leading ~ character is
|
|
expanded to the user's home directory. In Windows, the home directory is
|
|
determined as follows: if the environment variable `HOME` exists,
|
|
this is used. If the variables `HOMEDRIVE` and `HOMEPATH`
|
|
exist (Windows-NT), these are used. At initialisation, the system will
|
|
set the environment variable `HOME` to point to the YAP home
|
|
directory if neither `HOME` nor `HOMEPATH` and
|
|
`HOMEDRIVE` are defined.
|
|
|
|
*/
|
|
static
|
|
PRED_IMPL("expand_file_name", 2, expand_file_name, 0)
|
|
{ PRED_LD
|
|
char spec[MAXPATHLEN];
|
|
char *s;
|
|
glob_info info;
|
|
term_t l = PL_copy_term_ref(A2);
|
|
term_t head = PL_new_term_ref();
|
|
int i;
|
|
|
|
if ( !PL_get_chars(A1, &s, CVT_ALL|REP_FN|CVT_EXCEPTION) )
|
|
fail;
|
|
if ( strlen(s) > sizeof(spec)-1 )
|
|
return PL_error(NULL, 0, NULL, ERR_REPRESENTATION,
|
|
ATOM_max_path_length);
|
|
|
|
if ( !expandVars(s, spec, sizeof(spec)) )
|
|
fail;
|
|
if ( !expand(spec, &info) )
|
|
goto failout;
|
|
sort_expand(&info);
|
|
|
|
for( i = info.start; i< info.end; i++ )
|
|
{ const char *e = expand_entry(&info, i);
|
|
|
|
if ( !PL_unify_list(l, head, l) ||
|
|
!PL_unify_chars(head, PL_ATOM|REP_FN, -1, e) )
|
|
goto failout;
|
|
}
|
|
|
|
if ( !PL_unify_nil(l) )
|
|
{ failout:
|
|
free_expand_info(&info);
|
|
fail;
|
|
}
|
|
|
|
free_expand_info(&info);
|
|
succeed;
|
|
}
|
|
|
|
|
|
/** directory_files(+Dir, -Files) is det.
|
|
|
|
Files is a list of atoms that describe the entries in Dir.
|
|
*/
|
|
|
|
static
|
|
PRED_IMPL("directory_files", 2, directory_files, 0)
|
|
{ PRED_LD
|
|
char *dname;
|
|
DIR *dir;
|
|
|
|
if ( !PL_get_file_name(A1, &dname, PL_FILE_READ|PL_FILE_OSPATH) )
|
|
return FALSE;
|
|
|
|
if ( (dir=opendir(dname)) )
|
|
{ struct dirent *e;
|
|
term_t tail = PL_copy_term_ref(A2);
|
|
term_t head = PL_new_term_ref();
|
|
|
|
for(e=readdir(dir); e; e = readdir(dir))
|
|
{ PL_put_variable(head);
|
|
if ( PL_handle_signals() < 0 ||
|
|
!PL_unify_list(tail, head, tail) ||
|
|
!PL_unify_chars(head, PL_ATOM|REP_FN, (size_t)-1, e->d_name) )
|
|
{ closedir(dir);
|
|
return FALSE;
|
|
}
|
|
}
|
|
closedir(dir);
|
|
|
|
return PL_unify_nil(tail);
|
|
}
|
|
|
|
return PL_error(NULL, 0, OsError(), ERR_FILE_OPERATION,
|
|
ATOM_open, ATOM_directory, A1);
|
|
}
|
|
|
|
|
|
/*******************************
|
|
* PUBLISH PREDICATES *
|
|
*******************************/
|
|
|
|
BeginPredDefs(glob)
|
|
PRED_DEF("expand_file_name", 2, expand_file_name, 0)
|
|
PRED_DEF("wildcard_match", 2, wildcard_match, 0)
|
|
PRED_DEF("directory_files", 2, directory_files, 0)
|
|
EndPredDefs
|