/* $Id$ Part of SWI-Prolog Author: Jan Wielemaker E-mail: J.Wielemaker@cs.vu.nl WWW: http://www.swi-prolog.org Copyright (C): 1985-2011, University of Amsterdam VU University Amsterdam This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "pl-incl.h" #include "pl-ctype.h" #ifdef HAVE_UNISTD_H #include #endif #ifdef O_XOS # include "windows/dirent.h" #else #if HAVE_DIRENT_H # include #else # define dirent direct # if HAVE_SYS_NDIR_H # include # endif # if HAVE_SYS_DIR_H # include # endif # if HAVE_NDIR_H # include # endif #endif #endif /*O_XOS*/ #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_SYS_PARAM_H #include #endif #define O_EXPANDS_TESTS_EXISTS 1 #ifndef IS_DIR_SEPARATOR #define IS_DIR_SEPARATOR(c) ((c) == '/') #endif #define char_to_int(c) (0xff & (int)(c)) /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Unix Wildcard Matching. Recognised: ? matches one arbitrary character * matches any number of any character [xa-z] matches x and a-z {p1,p2} matches pattern p1 or p2 backslash (\) escapes a character. First the pattern is compiled into an intermediate representation. Next this intermediate representation is matched against the target. The non-ascii characters are used to store control sequences in the intermediate representation: ANY Match any character STAR Match (possibly empty) sequence ALT Match, if fails, continue at + offset JMP Jump instructions ANYOF Next 16 bytes are bitmap - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #define MAXCODE 1024 #define ANY 128 #define STAR 129 #define ALT 130 #define JMP 131 #define ANYOF 132 #define EXIT 133 #define NOCURL 0 #define CURL 1 typedef unsigned char matchcode; typedef struct { int size; matchcode code[MAXCODE]; } compiled_pattern; static char *compile_pattern(compiled_pattern *, char *, int); static bool match_pattern(matchcode *, char *); #define Output(c) { if ( Out->size > MAXCODE-1 ) \ { warning("pattern too large"); \ return (char *) NULL; \ } \ Out->code[Out->size++] = c; \ } static inline void setMap(matchcode *map, int c) { GET_LD if ( !truePrologFlag(PLFLAG_FILE_CASE) ) c = makeLower(c); map[(c)/8] |= 1 << ((c) % 8); } static bool compilePattern(char *p, compiled_pattern *cbuf) { cbuf->size = 0; if ( compile_pattern(cbuf, p, NOCURL) == (char *) NULL ) fail; succeed; } static char * compile_pattern(compiled_pattern *Out, char *p, int curl) { int c; for(;;) { switch(c = char_to_int(*p++)) { case EOS: break; case '\\': Output(*p == EOS ? '\\' : (*p & 0x7f)); if (*p == EOS ) break; p++; continue; case '?': Output(ANY); continue; case '*': Output(STAR); continue; case '[': { matchcode *map; int n; Output(ANYOF); map = &Out->code[Out->size]; Out->size += 16; if ( Out->size >= MAXCODE ) { warning("Pattern too intptr_t"); return (char *) NULL; } for( n=0; n < 16; n++) map[n] = 0; for(;;) { switch( c = *p++ ) { case '\\': if ( *p == EOS ) { warning("Unmatched '['"); return (char *)NULL; } setMap(map, *p); p++; continue; case ']': break; default: if ( p[-1] != ']' && p[0] == '-' && p[1] != ']' ) { int chr; for ( chr=p[-1]; chr <= p[1]; chr++ ) setMap(map, chr); p += 2; } else setMap(map, c); continue; } break; } continue; } case '{': { int ai, aj = -1; for(;;) { Output(ALT); ai = Out->size; Output(0); if ( (p = compile_pattern(Out, p, CURL)) == (char *) NULL ) return (char *) NULL; if ( aj > 0 ) Out->code[aj] = Out->size - aj; if ( *p == ',' ) { Output(JMP); aj = Out->size; Output(0); Out->code[ai] = Out->size - ai; Output(ALT); ai = Out->size; Output(0); p++; } else if ( *p == '}' ) { p++; break; } else { warning("Unmatched '{'"); return (char *) NULL; } } continue; } case ANY: case STAR: case ALT: case JMP: case ANYOF: case EXIT: PL_error(NULL, 0, "Reserved character", ERR_REPRESENTATION, ATOM_pattern); return NULL; case '}': case ',': if ( curl == CURL ) { p--; return p; } /*FALLTHROUGH*/ default: { GET_LD if ( !truePrologFlag(PLFLAG_FILE_CASE) ) c = makeLower(c); Output(c); continue; } } Output(EXIT); return p; } } static inline bool matchPattern(char *s, compiled_pattern *cbuf) { return match_pattern(cbuf->code, s); } static bool match_pattern(matchcode *p, char *str) { matchcode c; matchcode *s = (matchcode *) str; for(;;) { switch( c = *p++ ) { case EXIT: return (*s == EOS ? TRUE : FALSE); case ANY: /* ? */ if ( *s == EOS ) fail; s++; continue; case ANYOF: /* [...] */ { GET_LD matchcode c2 = *s; if ( !truePrologFlag(PLFLAG_FILE_CASE) ) c2 = makeLower(c2); if ( p[c2 / 8] & (1 << (c2 % 8)) ) { p += 16; s++; continue; } fail; } case STAR: /* * */ do { if ( match_pattern(p, (char *)s) ) succeed; } while( *s++ ); fail; case JMP: /* { ... } */ p += *p; continue; case ALT: if ( match_pattern(p+1, (char *)s) ) succeed; p += *p; continue; default: /* character */ { GET_LD if ( c == *s || (!truePrologFlag(PLFLAG_FILE_CASE) && c == makeLower(*s)) ) { s++; continue; } fail; } } } } /** wildcard_match(+Pattern, +Name) is semidet. */ static PRED_IMPL("wildcard_match", 2, wildcard_match, 0) { char *p, *s; compiled_pattern buf; if ( !PL_get_chars(A1, &p, CVT_ALL|CVT_EXCEPTION) || !PL_get_chars(A2, &s, CVT_ALL|CVT_EXCEPTION) ) fail; if ( compilePattern(p, &buf) ) { return matchPattern(s, &buf); } return PL_error(NULL, 0, NULL, ERR_DOMAIN, ATOM_pattern, A1); } /******************************* * EXPAND_FILE_NAME/2 * *******************************/ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Wildcart expansion of a pattern to a list of files. This code uses two `buffers' for storing the intermediate results while limiting fragmentation. The `strings' buffer contains all strings generated. The files contains indices in the `strings' buffer pointing to the start of strings. The indices in the range [start,end) are valid. First this set is filled with the empty string. Next the directory-segment with the first wildcart is located. If found, we go through the current set, adding the segments without wildcarts, applying the wildcart on the directory and adding everything found to the set. The old set is deleted by incrementing info.start. If we are at the end, we add the remaining non-wildcart segments to each element of the set, deleting it if the result does not exits. Finally we sort the result. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ typedef struct { tmp_buffer files; /* our files */ tmp_buffer strings; /* our strings */ int start; /* 1-st valid entry of files */ int end; /* last valid entry of files */ } glob_info, *GlobInfo; #undef isspecial /* play safe */ #define isspecial(c) \ ((c) == '[' || (c) == '{' || (c) == '?' || (c) == '*') static void free_expand_info(GlobInfo info) { discardBuffer(&info->files); discardBuffer(&info->strings); } static void add_path(const char *path, GlobInfo info) { int idx = (int)entriesBuffer(&info->strings, char); size_t n = strlen(path)+1; addMultipleBuffer(&info->strings, path, n, char); addBuffer(&info->files, idx, int); info->end++; } static const char * expand_str(GlobInfo info, int at) { char *s = &fetchBuffer(&info->strings, at, char); return (const char *)s; } static const char * expand_entry(GlobInfo info, int idx) { int at = fetchBuffer(&info->files, idx, int); return expand_str(info, at); } static void un_escape(char *to, const char *from, const char *end) { while( from < end ) { if ( *from == '\\' && (isspecial(from[1]) || from[1] == '\\') ) from++; *to++ = *from++; } *to = EOS; } static int expand(const char *pattern, GlobInfo info) { const char *pat = pattern; compiled_pattern cbuf; char prefix[MAXPATHLEN]; /* before first pattern */ char patbuf[MAXPATHLEN]; /* pattern buffer */ size_t prefix_len; int end, dot; initBuffer(&info->files); initBuffer(&info->strings); info->start = 0; info->end = 0; add_path("", info); for(;;) { const char *s = pat, *head = pat, *tail; for(;;) { int c; switch( (c=*s++) ) { case EOS: if ( s > pat ) /* something left and expanded */ { size_t prefix_len; un_escape(prefix, pat, s); prefix_len = strlen(prefix); end = info->end; for( ; info->start < end; info->start++ ) { char path[MAXPATHLEN]; const char *entry = expand_entry(info, info->start); size_t plen = strlen(entry); if ( plen+prefix_len+2 <= MAXPATHLEN ) { strcpy(path, entry); if ( prefix[0] && plen > 0 && path[plen-1] != '/' ) path[plen++] = '/'; strcpy(&path[plen], prefix); if ( end == 1 || AccessFile(path, ACCESS_EXIST) ) add_path(path, info); } } } succeed; case '[': /* meta characters: expand */ case '{': case '?': case '*': break; case '\\': if ( isspecial(*s) ) { s++; continue; } /*FALLTHROUGH*/ default: if ( IS_DIR_SEPARATOR(c) ) head = s; continue; } break; } for( tail=s; *tail && !IS_DIR_SEPARATOR(*tail); tail++ ) ; /* By now, head points to the start of the path holding meta characters, while tail points to the tail: ..../meta*path/.... ^ ^ head tail */ un_escape(prefix, pat, head); un_escape(patbuf, head, tail); prefix_len = strlen(prefix); if ( !compilePattern(patbuf, &cbuf) ) /* syntax error */ fail; dot = (patbuf[0] == '.'); /* do dots as well */ end = info->end; for(; info->start < end; info->start++) { DIR *d; struct dirent *e; char path[MAXPATHLEN]; char tmp[MAXPATHLEN]; const char *current = expand_entry(info, info->start); size_t clen = strlen(current); if ( clen+prefix_len+1 > sizeof(path) ) continue; strcpy(path, current); strcat(path, prefix); if ( (d=opendir(path[0] ? OsPath(path, tmp) : ".")) ) { size_t plen = strlen(path); if ( plen > 0 && path[plen-1] != '/' ) path[plen++] = '/'; for(e=readdir(d); e; e = readdir(d)) { #ifdef __MSDOS__ strlwr(e->d_name); #endif if ( (dot || e->d_name[0] != '.') && matchPattern(e->d_name, &cbuf) ) { char newp[MAXPATHLEN]; if ( plen+strlen(e->d_name)+1 < sizeof(newp) ) { strcpy(newp, path); strcpy(&newp[plen], e->d_name); add_path(newp, info); } } } closedir(d); } } pat = tail; if ( IS_DIR_SEPARATOR(*pat) ) pat++; } } static int compareBagEntries(const void *a1, const void *a2) { GET_LD GlobInfo info = LD->glob_info; int i1 = *(int *)a1; int i2 = *(int *)a2; const char *s1, *s2; s1 = expand_str(info, i1); s2 = expand_str(info, i2); if ( truePrologFlag(PLFLAG_FILE_CASE) ) return mbscoll(s1, s2); else return mbscasecoll(s1, s2); } static void sort_expand(GlobInfo info) { GET_LD int *ip = &fetchBuffer(&info->files, info->start, int); int is = info->end - info->start; LD->glob_info = info; qsort(ip, is, sizeof(int), compareBagEntries); } static PRED_IMPL("expand_file_name", 2, expand_file_name, 0) { PRED_LD char spec[MAXPATHLEN]; char *s; glob_info info; term_t l = PL_copy_term_ref(A2); term_t head = PL_new_term_ref(); int i; if ( !PL_get_chars(A1, &s, CVT_ALL|REP_FN|CVT_EXCEPTION) ) fail; if ( strlen(s) > sizeof(spec)-1 ) return PL_error(NULL, 0, NULL, ERR_REPRESENTATION, ATOM_max_path_length); if ( !expandVars(s, spec, sizeof(spec)) ) fail; if ( !expand(spec, &info) ) goto failout; sort_expand(&info); for( i = info.start; i< info.end; i++ ) { const char *e = expand_entry(&info, i); if ( !PL_unify_list(l, head, l) || !PL_unify_chars(head, PL_ATOM|REP_FN, -1, e) ) goto failout; } if ( !PL_unify_nil(l) ) { failout: free_expand_info(&info); fail; } free_expand_info(&info); succeed; } /** directory_files(+Dir, -Files) is det. Files is a list of atoms that describe the entries in Dir. */ static PRED_IMPL("directory_files", 2, directory_files, 0) { PRED_LD char *dname; DIR *dir; if ( !PL_get_file_name(A1, &dname, PL_FILE_READ|PL_FILE_OSPATH) ) return FALSE; if ( (dir=opendir(dname)) ) { struct dirent *e; term_t tail = PL_copy_term_ref(A2); term_t head = PL_new_term_ref(); for(e=readdir(dir); e; e = readdir(dir)) { PL_put_variable(head); if ( PL_handle_signals() < 0 || !PL_unify_list(tail, head, tail) || !PL_unify_chars(head, PL_ATOM|REP_FN, (size_t)-1, e->d_name) ) { closedir(dir); return FALSE; } } closedir(dir); return PL_unify_nil(tail); } return PL_error(NULL, 0, OsError(), ERR_FILE_OPERATION, ATOM_open, ATOM_directory, A1); } /******************************* * PUBLISH PREDICATES * *******************************/ BeginPredDefs(glob) PRED_DEF("expand_file_name", 2, expand_file_name, 0) PRED_DEF("wildcard_match", 2, wildcard_match, 0) PRED_DEF("directory_files", 2, directory_files, 0) EndPredDefs