1 /* $Revision: 4.0.1.1 $
3 ** Do shell-style pattern matching for ?, \, [], and * characters.
4 ** Might not be robust in face of malformed patterns; e.g., "foo[a-"
5 ** could cause a segmentation violation. It is 8bit clean.
7 ** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
8 ** Rich $alz is now <rsalz@bbn.com>.
9 ** April, 1991: Replaced mutually-recursive calls with in-line code
10 ** for the star character.
12 ** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
13 ** This can greatly speed up failing wildcard patterns. For example:
14 ** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
15 ** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
16 ** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
17 ** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
18 ** the ABORT, then it takes 22310 calls to fail. Ugh. The following
19 ** explanation is from Lars:
20 ** The precondition that must be fulfilled is that DoMatch will consume
21 ** at least one character in text. This is true if *p is neither '*' nor
22 ** '\0'.) The last return has ABORT instead of FALSE to avoid quadratic
23 ** behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx". With
24 ** FALSE, each star-loop has to run to the end of the text; with ABORT
25 ** only the last one does.
27 ** Once the control of one instance of DoMatch enters the star-loop, that
28 ** instance will return either TRUE or ABORT, and any calling instance
29 ** will therefore return immediately after (without calling recursively
30 ** again). In effect, only one star-loop is ever active. It would be
31 ** possible to modify the code to maintain this context explicitly,
32 ** eliminating all recursive calls at the cost of some complication and
33 ** loss of clarity (and the ABORT stuff seems to be unclear enough by
34 ** itself). I think it would be unwise to try to get this into a
35 ** released version unless you have a good test data base to try it out
44 /* What character marks an inverted character class? */
45 #define NEGATE_CLASS '^'
46 /* Is "*" a common pattern? */
47 #define OPTIMIZE_JUST_STAR
48 /* Do tar(1) matching rules, which ignore a trailing slash? */
49 #undef MATCH_TAR_PATTERN
53 ** Match text and p, return TRUE, FALSE, or ABORT.
64 for ( ; *p; text++, p++) {
65 if (*text == '\0' && *p != '*')
69 /* Literal match with following character. */
81 /* Consecutive stars act just like one. */
84 /* Trailing star matches everything. */
87 if ((matched = DoMatch(text++, p)) != FALSE)
91 reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
93 /* Inverted character class. */
95 for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
96 /* This next line requires a good C compiler. */
97 if (*p == '-' ? *text <= *++p && *text >= last : *text == *p)
99 if (matched == reverse)
105 #ifdef MATCH_TAR_PATTERN
108 #endif /* MATCH_TAR_ATTERN */
109 return *text == '\0';
114 ** User-level routine. Returns TRUE or FALSE.
121 #ifdef OPTIMIZE_JUST_STAR
122 if (p[0] == '*' && p[1] == '\0')
124 #endif /* OPTIMIZE_JUST_STAR */
125 return DoMatch(text, p) == TRUE;
129 #include <sys/types.h>
131 #include <sys/stat.h>
134 #define _SIZE_T /* unix defines size_t in sys/types.h */
137 # include <compiler.h>
142 extern char *malloc(), *realloc();
143 extern char *rindex(), *strdup();
144 #define __PROTO(x) ()
148 #define MAX_DIR 32 /* max depth of dir recursion */
151 } dir_stack[MAX_DIR];
153 static char **matches;
156 static void *ck_memalloc __PROTO((void *));
157 #define ck_strdup(p) ck_memalloc(strdup(p))
158 #define ck_malloc(s) ck_memalloc(malloc(s))
159 #define ck_realloc(p, s) ck_memalloc(realloc(p, s))
165 * return true if patt contains a wildcard char
167 int contains_wild(patt)
173 /* only check for wilds in the basename part of the pathname only */
174 if((p = rindex(patt, '/')) == NULL)
175 p = rindex(patt, '\\');
180 if((c == '*') || (c == '?') || (c == '['))
193 for(p = matches; *p; p++)
200 static void push(dir, patt)
204 if(stack_p < (MAX_DIR - 2))
208 fprintf(stderr,"directory stack overflow\n");
211 dir_stack[stack_p].dir = dir;
212 dir_stack[stack_p].patt = patt;
217 * if decend_dir is true, recursively decend any directories encountered.
218 * returns pointer to all matches encountered.
219 * if the initial patt is a directory, and decend_dir is true, it is
220 * equivalent to specifying the pattern "patt\*"
223 * - handles wildcards only in the base part of a pathname
224 * ie: will not handle \foo\*\bar\ (wildcard in the middle of pathname)
226 * - max dir recursion is MAX_DIR
228 * - on certain failures it will just skip potential matches as if they
231 * ++jrb bammi@cadence.com
233 static char **do_match __PROTO((int decend_dir));
235 char **glob(patt, decend_dir)
239 char *dir, *basepatt, *p;
242 DEBUGX((fprintf(stderr,"glob(%s, %d)\n", patt, decend_dir)));
247 /* first check for wildcards */
248 if(contains_wild(patt))
250 /* break it up into dir and base patt, do_matches and return */
252 if((basepatt = rindex(p, '/')) == NULL)
253 basepatt = rindex(p, '\\');
258 basepatt = ck_strdup(basepatt);
262 dir = ck_strdup(".");
266 if(strcmp(basepatt, "*.*") == 0)
268 /* the desktop, and other braindead shells strike again */
272 DEBUGX((fprintf(stderr, "calling %s, %s\n", dir, basepatt)));
273 return do_match(decend_dir);
276 /* if no wilds, check for dir */
277 if(decend_dir && (!stat(patt, &s)))
279 if((s.st_mode & S_IFMT) == S_IFDIR)
281 size_t len = strlen(patt);
283 dir = ck_strdup(patt);
285 if(len && ((dir[len] == '/')
287 || (dir[len] == '\\')
291 basepatt = ck_strdup("*");
293 DEBUGX((fprintf(stderr, "calling %s, %s\n", dir, basepatt)));
294 return do_match(decend_dir);
300 static char **do_match(decend_dir)
306 char *dir, *basepatt;
310 dir = ck_strdup(dir_stack[stack_p].dir);
311 free(dir_stack[stack_p].dir);
312 basepatt = ck_strdup(dir_stack[stack_p].patt);
313 free(dir_stack[stack_p--].patt);
315 DEBUGX((fprintf(stderr,"dir %s patt %s stack %d\n", dir, basepatt, stack_p)));
321 DEBUGX((fprintf(stderr,"no dir\n")));
325 while((d = readdir(dirp)))
327 char *p = ck_malloc(strlen(dir) + strlen(d->d_name) + 2L);
329 /* If we have a full pathname then */
330 { /* let's append the directory info */
337 strcat(p, d->d_name);
339 else /* Otherwise, the name is just fine, */
340 strcpy(p, d->d_name); /* there's no need for './' -- bjsjr */
342 DEBUGX((fprintf(stderr, "Testing %s\n", p)));
343 if(!stat(p, &s)) /* if stat fails, ignore it */
345 if( ((s.st_mode & S_IFMT) == S_IFREG) ||
346 ((s.st_mode & S_IFMT) == S_IFLNK) )
347 { /* it is a file/symbolic link */
348 if(wildmat(d->d_name, basepatt))
349 { /* it matches pattern */
350 DEBUGX((fprintf(stderr,"File Matched\n")));
352 matches = (char **)ck_malloc(sizeof(char *));
355 ck_realloc(matches, (nmatches+1)*sizeof(char *));
356 matches[nmatches++] = p;
360 DEBUGX((fprintf(stderr,"No File Match\n")));
363 } else if(decend_dir && ((s.st_mode & S_IFMT) == S_IFDIR))
365 if(!((!strcmp(d->d_name,".")) || (!strcmp(d->d_name, "..")
367 || (!strcmp(d->d_name, ".dir"))
371 char *push_p = ck_strdup("*");
373 DEBUGX((fprintf(stderr,"Dir pushed\n")));
377 DEBUGX((fprintf(stderr, "DIR skipped\n")));
383 DEBUGX((fprintf(stderr, "Not a dir/no decend\n")));
389 DEBUGX((fprintf(stderr, "Stat failed\n")));
392 } /* while readdir */
396 DEBUGX((fprintf(stderr, "Dir done\n\n")));
397 } /* while dirs in stack */
401 DEBUGX((fprintf(stderr, "No matches\n")));
405 matches = (char **)realloc(matches, (nmatches+1)*sizeof(char *));
408 matches[nmatches] = NULL;
409 DEBUGX((fprintf(stderr, "%d matches\n", nmatches)));
417 static void *ck_memalloc(p)
423 fprintf(stderr, "Out of memory\n");
426 prterror('f', no_memory);
440 printf("Testing %s %d\n", path, dec);
441 matches = glob(path, dec);
444 printf("No matches\n");
448 for(m = matches; *m; m++)
458 test("e:\\lib\\*.olb", 0);
460 test("e:\\lib\\", 1);
462 test("/net/acae127/home/bammi/News/comp.sources.misc/*.c", 0);
463 test("/net/acae127/home/bammi/News/comp.sources.misc", 0);
464 test("/net/acae127/home/bammi/News/comp.sources.misc", 1);
465 test("/net/acae127/home/bammi/atari/cross-gcc", 1);
476 /* Yes, we use gets not fgets. Sue me. */
485 printf("Wildmat tester. Enter pattern, then strings to test.\n");
486 printf("A blank line gets prompts for a new pattern; a blank pattern\n");
487 printf("exits the program.\n\n");
490 printf("Enter pattern: ");
491 if (gets(pattern) == NULL)
494 printf("Enter text: ");
495 if (gets(text) == NULL)
498 /* Blank line; go back and get a new pattern. */
500 printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO");
507 #endif /* TEST_WILDMAT */