This repository has been archived on 2023-08-20. You can view files and clone it, but cannot push or open issues or pull requests.
yap-6.3/library/dialect/swi/os/pl-glob.c

700 lines
16 KiB
C
Raw Normal View History

/* $Id$
Part of SWI-Prolog
Author: Jan Wielemaker
2013-01-16 11:28:58 +00:00
E-mail: J.Wielemaker@cs.vu.nl
WWW: http://www.swi-prolog.org
2013-01-16 11:28:58 +00:00
Copyright (C): 1985-2011, University of Amsterdam
VU University Amsterdam
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
2013-01-16 11:28:58 +00:00
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "pl-incl.h"
#include "pl-ctype.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
2013-01-16 11:28:58 +00:00
#ifdef O_XOS
# include "windows/dirent.h"
#else
#if HAVE_DIRENT_H
# include <dirent.h>
#else
# define dirent direct
# if HAVE_SYS_NDIR_H
# include <sys/ndir.h>
# endif
# if HAVE_SYS_DIR_H
# include <sys/dir.h>
# endif
# if HAVE_NDIR_H
# include <ndir.h>
# endif
#endif
2013-01-16 11:28:58 +00:00
#endif /*O_XOS*/
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#define O_EXPANDS_TESTS_EXISTS 1
#ifndef IS_DIR_SEPARATOR
#define IS_DIR_SEPARATOR(c) ((c) == '/')
#endif
#define char_to_int(c) (0xff & (int)(c))
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Unix Wildcard Matching. Recognised:
? matches one arbitrary character
* matches any number of any character
[xa-z] matches x and a-z
{p1,p2} matches pattern p1 or p2
backslash (\) escapes a character.
First the pattern is compiled into an intermediate representation. Next
this intermediate representation is matched against the target. The
non-ascii characters are used to store control sequences in the
intermediate representation:
ANY Match any character
STAR Match (possibly empty) sequence
ALT <offset> Match, if fails, continue at <pc> + offset
JMP <offset> Jump <offset> instructions
ANYOF Next 16 bytes are bitmap
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#define MAXCODE 1024
#define ANY 128
#define STAR 129
#define ALT 130
#define JMP 131
#define ANYOF 132
#define EXIT 133
#define NOCURL 0
#define CURL 1
typedef unsigned char matchcode;
typedef struct
{ int size;
matchcode code[MAXCODE];
} compiled_pattern;
static char *compile_pattern(compiled_pattern *, char *, int);
static bool match_pattern(matchcode *, char *);
#define Output(c) { if ( Out->size > MAXCODE-1 ) \
{ warning("pattern too large"); \
return (char *) NULL; \
} \
Out->code[Out->size++] = c; \
}
static inline void
setMap(matchcode *map, int c)
{ GET_LD
if ( !truePrologFlag(PLFLAG_FILE_CASE) )
c = makeLower(c);
map[(c)/8] |= 1 << ((c) % 8);
}
static bool
compilePattern(char *p, compiled_pattern *cbuf)
{ cbuf->size = 0;
if ( compile_pattern(cbuf, p, NOCURL) == (char *) NULL )
fail;
succeed;
}
static char *
compile_pattern(compiled_pattern *Out, char *p, int curl)
{ int c;
for(;;)
{ switch(c = char_to_int(*p++))
{ case EOS:
break;
case '\\':
Output(*p == EOS ? '\\' : (*p & 0x7f));
if (*p == EOS )
break;
p++;
continue;
case '?':
Output(ANY);
continue;
case '*':
Output(STAR);
continue;
case '[':
{ matchcode *map;
int n;
Output(ANYOF);
map = &Out->code[Out->size];
Out->size += 16;
if ( Out->size >= MAXCODE )
{ warning("Pattern too intptr_t");
return (char *) NULL;
}
for( n=0; n < 16; n++)
map[n] = 0;
for(;;)
{ switch( c = *p++ )
{ case '\\':
if ( *p == EOS )
{ warning("Unmatched '['");
return (char *)NULL;
}
setMap(map, *p);
p++;
continue;
case ']':
break;
default:
if ( p[-1] != ']' && p[0] == '-' && p[1] != ']' )
{ int chr;
for ( chr=p[-1]; chr <= p[1]; chr++ )
setMap(map, chr);
p += 2;
} else
setMap(map, c);
continue;
}
break;
}
continue;
}
case '{':
{ int ai, aj = -1;
for(;;)
{ Output(ALT); ai = Out->size; Output(0);
if ( (p = compile_pattern(Out, p, CURL)) == (char *) NULL )
return (char *) NULL;
if ( aj > 0 )
Out->code[aj] = Out->size - aj;
if ( *p == ',' )
{ Output(JMP); aj = Out->size; Output(0);
Out->code[ai] = Out->size - ai;
Output(ALT); ai = Out->size; Output(0);
p++;
} else if ( *p == '}' )
{ p++;
break;
} else
{ warning("Unmatched '{'");
return (char *) NULL;
}
}
continue;
}
case ANY:
case STAR:
case ALT:
case JMP:
case ANYOF:
case EXIT:
PL_error(NULL, 0, "Reserved character",
ERR_REPRESENTATION, ATOM_pattern);
return NULL;
case '}':
case ',':
if ( curl == CURL )
{ p--;
return p;
}
/*FALLTHROUGH*/
default:
{ GET_LD
if ( !truePrologFlag(PLFLAG_FILE_CASE) )
c = makeLower(c);
Output(c);
continue;
}
}
Output(EXIT);
return p;
}
}
static inline bool
matchPattern(char *s, compiled_pattern *cbuf)
{ return match_pattern(cbuf->code, s);
}
static bool
match_pattern(matchcode *p, char *str)
{ matchcode c;
matchcode *s = (matchcode *) str;
for(;;)
{ switch( c = *p++ )
{ case EXIT:
return (*s == EOS ? TRUE : FALSE);
case ANY: /* ? */
if ( *s == EOS )
fail;
s++;
continue;
case ANYOF: /* [...] */
{ GET_LD
matchcode c2 = *s;
if ( !truePrologFlag(PLFLAG_FILE_CASE) )
c2 = makeLower(c2);
if ( p[c2 / 8] & (1 << (c2 % 8)) )
{ p += 16;
s++;
continue;
}
fail;
}
case STAR: /* * */
do
{ if ( match_pattern(p, (char *)s) )
succeed;
} while( *s++ );
fail;
case JMP: /* { ... } */
p += *p;
continue;
case ALT:
if ( match_pattern(p+1, (char *)s) )
succeed;
p += *p;
continue;
default: /* character */
{ GET_LD
if ( c == *s ||
(!truePrologFlag(PLFLAG_FILE_CASE) && c == makeLower(*s)) )
{ s++;
continue;
}
fail;
}
}
}
}
/** wildcard_match(+Pattern, +Name) is semidet.
*/
static
PRED_IMPL("wildcard_match", 2, wildcard_match, 0)
{ char *p, *s;
compiled_pattern buf;
2013-01-16 11:28:58 +00:00
if ( !PL_get_chars(A1, &p, CVT_ALL|CVT_EXCEPTION) ||
!PL_get_chars(A2, &s, CVT_ALL|CVT_EXCEPTION) )
fail;
if ( compilePattern(p, &buf) )
{ return matchPattern(s, &buf);
}
return PL_error(NULL, 0, NULL, ERR_DOMAIN, ATOM_pattern, A1);
}
/*******************************
* EXPAND_FILE_NAME/2 *
*******************************/
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Wildcart expansion of a pattern to a list of files. This code uses two
`buffers' for storing the intermediate results while limiting
fragmentation. The `strings' buffer contains all strings generated. The
files contains indices in the `strings' buffer pointing to the start of
strings. The indices in the range [start,end) are valid.
First this set is filled with the empty string. Next the
directory-segment with the first wildcart is located. If found, we go
through the current set, adding the segments without wildcarts, applying
the wildcart on the directory and adding everything found to the set.
The old set is deleted by incrementing info.start.
If we are at the end, we add the remaining non-wildcart segments to each
element of the set, deleting it if the result does not exits.
Finally we sort the result.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
typedef struct
{ tmp_buffer files; /* our files */
tmp_buffer strings; /* our strings */
int start; /* 1-st valid entry of files */
int end; /* last valid entry of files */
} glob_info, *GlobInfo;
#undef isspecial /* play safe */
#define isspecial(c) \
((c) == '[' || (c) == '{' || (c) == '?' || (c) == '*')
static void
free_expand_info(GlobInfo info)
{ discardBuffer(&info->files);
discardBuffer(&info->strings);
}
static void
add_path(const char *path, GlobInfo info)
{ int idx = (int)entriesBuffer(&info->strings, char);
size_t n = strlen(path)+1;
addMultipleBuffer(&info->strings, path, n, char);
addBuffer(&info->files, idx, int);
info->end++;
}
2011-02-10 00:02:05 +00:00
static const char *
expand_str(GlobInfo info, int at)
{ char *s = &fetchBuffer(&info->strings, at, char);
return (const char *)s;
}
2011-02-10 00:02:05 +00:00
static const char *
expand_entry(GlobInfo info, int idx)
{ int at = fetchBuffer(&info->files, idx, int);
return expand_str(info, at);
}
static void
un_escape(char *to, const char *from, const char *end)
{ while( from < end )
{ if ( *from == '\\' && (isspecial(from[1]) || from[1] == '\\') )
from++;
*to++ = *from++;
}
*to = EOS;
}
static int
expand(const char *pattern, GlobInfo info)
{ const char *pat = pattern;
compiled_pattern cbuf;
char prefix[MAXPATHLEN]; /* before first pattern */
char patbuf[MAXPATHLEN]; /* pattern buffer */
2013-01-16 11:28:58 +00:00
size_t prefix_len;
int end, dot;
initBuffer(&info->files);
initBuffer(&info->strings);
info->start = 0;
info->end = 0;
add_path("", info);
for(;;)
{ const char *s = pat, *head = pat, *tail;
for(;;)
{ int c;
switch( (c=*s++) )
{ case EOS:
if ( s > pat ) /* something left and expanded */
2013-01-16 11:28:58 +00:00
{ size_t prefix_len;
un_escape(prefix, pat, s);
prefix_len = strlen(prefix);
end = info->end;
for( ; info->start < end; info->start++ )
{ char path[MAXPATHLEN];
2013-01-16 11:28:58 +00:00
const char *entry = expand_entry(info, info->start);
size_t plen = strlen(entry);
if ( plen+prefix_len+2 <= MAXPATHLEN )
{ strcpy(path, entry);
if ( prefix[0] && plen > 0 && path[plen-1] != '/' )
path[plen++] = '/';
strcpy(&path[plen], prefix);
if ( end == 1 || AccessFile(path, ACCESS_EXIST) )
add_path(path, info);
}
}
}
succeed;
case '[': /* meta characters: expand */
case '{':
case '?':
case '*':
break;
case '\\':
if ( isspecial(*s) )
{ s++;
continue;
}
/*FALLTHROUGH*/
default:
if ( IS_DIR_SEPARATOR(c) )
head = s;
continue;
}
break;
}
for( tail=s; *tail && !IS_DIR_SEPARATOR(*tail); tail++ )
;
/* By now, head points to the start of the path holding meta characters,
while tail points to the tail:
..../meta*path/....
^ ^
head tail
*/
un_escape(prefix, pat, head);
un_escape(patbuf, head, tail);
2013-01-16 11:28:58 +00:00
prefix_len = strlen(prefix);
2013-01-16 11:28:58 +00:00
if ( !compilePattern(patbuf, &cbuf) ) /* syntax error */
fail;
dot = (patbuf[0] == '.'); /* do dots as well */
end = info->end;
for(; info->start < end; info->start++)
{ DIR *d;
struct dirent *e;
char path[MAXPATHLEN];
char tmp[MAXPATHLEN];
const char *current = expand_entry(info, info->start);
2013-01-16 11:28:58 +00:00
size_t clen = strlen(current);
if ( clen+prefix_len+1 > sizeof(path) )
continue;
strcpy(path, current);
2013-11-15 01:10:25 +00:00
strcpy(&path[clen], prefix);
if ( (d=opendir(path[0] ? OsPath(path, tmp) : ".")) )
2013-11-15 01:10:25 +00:00
{ size_t plen = clen+prefix_len;
if ( plen > 0 && path[plen-1] != '/' )
path[plen++] = '/';
for(e=readdir(d); e; e = readdir(d))
{
#ifdef __MSDOS__
strlwr(e->d_name);
#endif
if ( (dot || e->d_name[0] != '.') &&
matchPattern(e->d_name, &cbuf) )
{ char newp[MAXPATHLEN];
2013-01-16 11:28:58 +00:00
if ( plen+strlen(e->d_name)+1 < sizeof(newp) )
{ strcpy(newp, path);
strcpy(&newp[plen], e->d_name);
add_path(newp, info);
2013-01-16 11:28:58 +00:00
}
}
}
closedir(d);
}
}
pat = tail;
if ( IS_DIR_SEPARATOR(*pat) )
pat++;
}
}
static int
compareBagEntries(const void *a1, const void *a2)
{ GET_LD
GlobInfo info = LD->glob_info;
int i1 = *(int *)a1;
int i2 = *(int *)a2;
const char *s1, *s2;
s1 = expand_str(info, i1);
s2 = expand_str(info, i2);
if ( truePrologFlag(PLFLAG_FILE_CASE) )
return mbscoll(s1, s2);
else
return mbscasecoll(s1, s2);
}
static void
sort_expand(GlobInfo info)
{ GET_LD
int *ip = &fetchBuffer(&info->files, info->start, int);
int is = info->end - info->start;
LD->glob_info = info;
qsort(ip, is, sizeof(int), compareBagEntries);
}
/**
@addgroup
\pred expand_file_name(+ _WildCard_,- _List_)
This is an SWI-Prolog built-in that unifies _List_ with a sorted list of
files or directories matching _WildCard_. The normal Unix wildcard
constructs <tt>?</tt>, <tt>\\\*</tt>, <tt>[ ... ]</tt> and <tt>{...}</tt> are recognised. The
interpretation of <tt>{...}</tt> is interpreted slightly different from the
C shell (csh(1)). The comma separated argument can be arbitrary
patterns, including <tt>{...}</tt> patterns. The empty pattern is legal as
well: <tt>{.pl,}</tt> matches either <tt>.pl</tt> or the empty string.
If the pattern contains wildcard characters, only existing files and
directories are returned. Expanding a <em>pattern'</em> without wildcard
characters returns the argument, regardless on whether or not it exists.
Before expanding wildcards, the construct $var is expanded to the value
of the environment variable var and a possible leading ~ character is
expanded to the user's home directory. In Windows, the home directory is
determined as follows: if the environment variable `HOME` exists,
this is used. If the variables `HOMEDRIVE` and `HOMEPATH`
exist (Windows-NT), these are used. At initialisation, the system will
set the environment variable `HOME` to point to the YAP home
directory if neither `HOME` nor `HOMEPATH` and
`HOMEDRIVE` are defined.
*/
static
PRED_IMPL("expand_file_name", 2, expand_file_name, 0)
{ PRED_LD
char spec[MAXPATHLEN];
char *s;
glob_info info;
term_t l = PL_copy_term_ref(A2);
term_t head = PL_new_term_ref();
int i;
2013-01-16 11:28:58 +00:00
if ( !PL_get_chars(A1, &s, CVT_ALL|REP_FN|CVT_EXCEPTION) )
fail;
if ( strlen(s) > sizeof(spec)-1 )
2013-01-16 11:28:58 +00:00
return PL_error(NULL, 0, NULL, ERR_REPRESENTATION,
ATOM_max_path_length);
if ( !expandVars(s, spec, sizeof(spec)) )
fail;
if ( !expand(spec, &info) )
goto failout;
sort_expand(&info);
for( i = info.start; i< info.end; i++ )
{ const char *e = expand_entry(&info, i);
if ( !PL_unify_list(l, head, l) ||
!PL_unify_chars(head, PL_ATOM|REP_FN, -1, e) )
goto failout;
}
if ( !PL_unify_nil(l) )
{ failout:
free_expand_info(&info);
fail;
}
free_expand_info(&info);
succeed;
}
/** directory_files(+Dir, -Files) is det.
Files is a list of atoms that describe the entries in Dir.
*/
static
PRED_IMPL("directory_files", 2, directory_files, 0)
{ PRED_LD
char *dname;
DIR *dir;
if ( !PL_get_file_name(A1, &dname, PL_FILE_READ|PL_FILE_OSPATH) )
return FALSE;
if ( (dir=opendir(dname)) )
{ struct dirent *e;
term_t tail = PL_copy_term_ref(A2);
term_t head = PL_new_term_ref();
for(e=readdir(dir); e; e = readdir(dir))
{ PL_put_variable(head);
if ( PL_handle_signals() < 0 ||
!PL_unify_list(tail, head, tail) ||
!PL_unify_chars(head, PL_ATOM|REP_FN, (size_t)-1, e->d_name) )
{ closedir(dir);
return FALSE;
}
}
closedir(dir);
return PL_unify_nil(tail);
}
return PL_error(NULL, 0, OsError(), ERR_FILE_OPERATION,
ATOM_open, ATOM_directory, A1);
}
/*******************************
* PUBLISH PREDICATES *
*******************************/
BeginPredDefs(glob)
2011-02-10 00:01:19 +00:00
PRED_DEF("expand_file_name", 2, expand_file_name, 0)
PRED_DEF("wildcard_match", 2, wildcard_match, 0)
PRED_DEF("directory_files", 2, directory_files, 0)
EndPredDefs