Commit | Line | Data |
79072805 |
1 | /* $Revision: 4.1 $ |
2f3197b3 |
2 | ** |
3 | ** Do shell-style pattern matching for ?, \, [], and * characters. |
4 | ** Might not be robust in face of malformed patterns; e.g., "foo[a-" |
5 | ** could cause a segmentation violation. It is 8bit clean. |
6 | ** |
7 | ** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. |
8 | ** Rich $alz is now <rsalz@bbn.com>. |
9 | ** April, 1991: Replaced mutually-recursive calls with in-line code |
10 | ** for the star character. |
11 | ** |
12 | ** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code. |
13 | ** This can greatly speed up failing wildcard patterns. For example: |
14 | ** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-* |
15 | ** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1 |
16 | ** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1 |
17 | ** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without |
18 | ** the ABORT, then it takes 22310 calls to fail. Ugh. The following |
19 | ** explanation is from Lars: |
20 | ** The precondition that must be fulfilled is that DoMatch will consume |
21 | ** at least one character in text. This is true if *p is neither '*' nor |
22 | ** '\0'.) The last return has ABORT instead of FALSE to avoid quadratic |
23 | ** behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx". With |
24 | ** FALSE, each star-loop has to run to the end of the text; with ABORT |
25 | ** only the last one does. |
26 | ** |
27 | ** Once the control of one instance of DoMatch enters the star-loop, that |
28 | ** instance will return either TRUE or ABORT, and any calling instance |
29 | ** will therefore return immediately after (without calling recursively |
30 | ** again). In effect, only one star-loop is ever active. It would be |
31 | ** possible to modify the code to maintain this context explicitly, |
32 | ** eliminating all recursive calls at the cost of some complication and |
33 | ** loss of clarity (and the ABORT stuff seems to be unclear enough by |
34 | ** itself). I think it would be unwise to try to get this into a |
35 | ** released version unless you have a good test data base to try it out |
36 | ** on. |
37 | */ |
38 | |
39 | #define TRUE 1 |
40 | #define FALSE 0 |
41 | #define ABORT -1 |
42 | |
43 | |
44 | /* What character marks an inverted character class? */ |
45 | #define NEGATE_CLASS '^' |
46 | /* Is "*" a common pattern? */ |
47 | #define OPTIMIZE_JUST_STAR |
48 | /* Do tar(1) matching rules, which ignore a trailing slash? */ |
49 | #undef MATCH_TAR_PATTERN |
50 | |
51 | |
52 | /* |
53 | ** Match text and p, return TRUE, FALSE, or ABORT. |
54 | */ |
55 | static int |
56 | DoMatch(text, p) |
57 | char *text; |
58 | char *p; |
59 | { |
60 | int last; |
61 | int matched; |
62 | int reverse; |
63 | |
64 | for ( ; *p; text++, p++) { |
65 | if (*text == '\0' && *p != '*') |
66 | return ABORT; |
67 | switch (*p) { |
68 | case '\\': |
69 | /* Literal match with following character. */ |
70 | p++; |
71 | /* FALLTHROUGH */ |
72 | default: |
73 | if (*text != *p) |
74 | return FALSE; |
75 | continue; |
76 | case '?': |
77 | /* Match anything. */ |
78 | continue; |
79 | case '*': |
80 | while (*++p == '*') |
81 | /* Consecutive stars act just like one. */ |
82 | continue; |
83 | if (*p == '\0') |
84 | /* Trailing star matches everything. */ |
85 | return TRUE; |
86 | while (*text) |
87 | if ((matched = DoMatch(text++, p)) != FALSE) |
88 | return matched; |
89 | return ABORT; |
90 | case '[': |
91 | reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE; |
92 | if (reverse) |
93 | /* Inverted character class. */ |
94 | p++; |
95 | for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p) |
96 | /* This next line requires a good C compiler. */ |
97 | if (*p == '-' ? *text <= *++p && *text >= last : *text == *p) |
98 | matched = TRUE; |
99 | if (matched == reverse) |
100 | return FALSE; |
101 | continue; |
102 | } |
103 | } |
104 | |
105 | #ifdef MATCH_TAR_PATTERN |
106 | if (*text == '/') |
107 | return TRUE; |
108 | #endif /* MATCH_TAR_ATTERN */ |
109 | return *text == '\0'; |
110 | } |
111 | |
112 | |
113 | /* |
114 | ** User-level routine. Returns TRUE or FALSE. |
115 | */ |
116 | int |
117 | wildmat(text, p) |
118 | char *text; |
119 | char *p; |
120 | { |
121 | #ifdef OPTIMIZE_JUST_STAR |
122 | if (p[0] == '*' && p[1] == '\0') |
123 | return TRUE; |
124 | #endif /* OPTIMIZE_JUST_STAR */ |
125 | return DoMatch(text, p) == TRUE; |
126 | } |
127 | |
128 | #include <stdio.h> |
129 | #include <sys/types.h> |
130 | #include <dirent.h> |
131 | #include <sys/stat.h> |
132 | #if __STDC__ |
133 | #ifdef unix |
134 | #define _SIZE_T /* unix defines size_t in sys/types.h */ |
135 | #endif |
136 | #ifndef _COMPILER_H |
137 | # include <compiler.h> |
138 | #endif |
139 | #include <stddef.h> |
140 | #include <stdlib.h> |
141 | #else |
142 | extern char *malloc(), *realloc(); |
143 | extern char *rindex(), *strdup(); |
144 | #define __PROTO(x) () |
145 | #endif |
146 | #include <string.h> |
147 | |
148 | #define MAX_DIR 32 /* max depth of dir recursion */ |
149 | static struct { |
150 | char *dir, *patt; |
151 | } dir_stack[MAX_DIR]; |
152 | static int stack_p; |
153 | static char **matches; |
154 | static int nmatches; |
155 | |
156 | static void *ck_memalloc __PROTO((void *)); |
157 | #define ck_strdup(p) ck_memalloc(strdup(p)) |
158 | #define ck_malloc(s) ck_memalloc(malloc(s)) |
159 | #define ck_realloc(p, s) ck_memalloc(realloc(p, s)) |
160 | |
161 | |
162 | #define DEBUGX(x) |
163 | |
164 | /* |
165 | * return true if patt contains a wildcard char |
166 | */ |
167 | int contains_wild(patt) |
168 | char *patt; |
169 | { |
170 | char c; |
171 | char *p; |
172 | |
173 | /* only check for wilds in the basename part of the pathname only */ |
174 | if((p = rindex(patt, '/')) == NULL) |
175 | p = rindex(patt, '\\'); |
176 | if(!p) |
177 | p = patt; |
178 | |
179 | while((c = *p++)) |
180 | if((c == '*') || (c == '?') || (c == '[')) |
181 | return 1; |
182 | return 0; |
183 | } |
184 | |
185 | #ifndef ZOO |
186 | void free_all() |
187 | { |
188 | char **p; |
189 | |
190 | if(!matches) |
191 | return; |
192 | |
193 | for(p = matches; *p; p++) |
194 | free(*p); |
195 | free(matches); |
196 | matches = NULL; |
197 | } |
198 | #endif |
199 | |
200 | static void push(dir, patt) |
201 | char *dir; |
202 | char *patt; |
203 | { |
204 | if(stack_p < (MAX_DIR - 2)) |
205 | stack_p++; |
206 | else |
207 | { |
208 | fprintf(stderr,"directory stack overflow\n"); |
209 | exit(99); |
210 | } |
211 | dir_stack[stack_p].dir = dir; |
212 | dir_stack[stack_p].patt = patt; |
213 | } |
214 | |
215 | /* |
216 | * glob patt |
217 | * if decend_dir is true, recursively decend any directories encountered. |
218 | * returns pointer to all matches encountered. |
219 | * if the initial patt is a directory, and decend_dir is true, it is |
220 | * equivalent to specifying the pattern "patt\*" |
221 | * |
222 | * Restrictions: |
223 | * - handles wildcards only in the base part of a pathname |
224 | * ie: will not handle \foo\*\bar\ (wildcard in the middle of pathname) |
225 | * |
226 | * - max dir recursion is MAX_DIR |
227 | * |
228 | * - on certain failures it will just skip potential matches as if they |
229 | * were not present. |
230 | * |
231 | * ++jrb bammi@cadence.com |
232 | */ |
233 | static char **do_match __PROTO((int decend_dir)); |
234 | |
235 | char **glob(patt, decend_dir) |
236 | char *patt; |
237 | int decend_dir; |
238 | { |
239 | char *dir, *basepatt, *p; |
240 | struct stat s; |
241 | |
242 | DEBUGX((fprintf(stderr,"glob(%s, %d)\n", patt, decend_dir))); |
243 | matches = NULL; |
244 | nmatches = 0; |
245 | stack_p = -1; |
246 | |
247 | /* first check for wildcards */ |
248 | if(contains_wild(patt)) |
249 | { |
250 | /* break it up into dir and base patt, do_matches and return */ |
251 | p = ck_strdup(patt); |
252 | if((basepatt = rindex(p, '/')) == NULL) |
253 | basepatt = rindex(p, '\\'); |
254 | if(basepatt) |
255 | { |
256 | dir = p; |
257 | *basepatt++ = '\0'; |
258 | basepatt = ck_strdup(basepatt); |
259 | } |
260 | else |
261 | { |
262 | dir = ck_strdup("."); |
263 | basepatt = p; |
264 | } |
265 | |
266 | if(strcmp(basepatt, "*.*") == 0) |
267 | { |
268 | /* the desktop, and other braindead shells strike again */ |
269 | basepatt[1] = '\0'; |
270 | } |
271 | push(dir, basepatt); |
272 | DEBUGX((fprintf(stderr, "calling %s, %s\n", dir, basepatt))); |
273 | return do_match(decend_dir); |
274 | } |
275 | |
276 | /* if no wilds, check for dir */ |
277 | if(decend_dir && (!stat(patt, &s))) |
278 | { |
279 | if((s.st_mode & S_IFMT) == S_IFDIR) |
280 | { /* is a dir */ |
281 | size_t len = strlen(patt); |
282 | |
283 | dir = ck_strdup(patt); |
284 | --len; |
285 | if(len && ((dir[len] == '/') |
286 | #ifdef atarist |
287 | || (dir[len] == '\\') |
288 | #endif |
289 | )) |
290 | dir[len] = '\0'; |
291 | basepatt = ck_strdup("*"); |
292 | push(dir, basepatt); |
293 | DEBUGX((fprintf(stderr, "calling %s, %s\n", dir, basepatt))); |
294 | return do_match(decend_dir); |
295 | } |
296 | } |
297 | return NULL; |
298 | } |
299 | |
300 | static char **do_match(decend_dir) |
301 | int decend_dir; |
302 | { |
303 | DIR *dirp; |
304 | struct dirent *d; |
305 | struct stat s; |
306 | char *dir, *basepatt; |
307 | |
308 | while(stack_p >= 0) |
309 | { |
310 | dir = ck_strdup(dir_stack[stack_p].dir); |
311 | free(dir_stack[stack_p].dir); |
312 | basepatt = ck_strdup(dir_stack[stack_p].patt); |
313 | free(dir_stack[stack_p--].patt); |
314 | |
315 | DEBUGX((fprintf(stderr,"dir %s patt %s stack %d\n", dir, basepatt, stack_p))); |
316 | |
317 | dirp = opendir(dir); |
318 | if(!dirp) |
319 | { |
320 | free(dir); |
321 | DEBUGX((fprintf(stderr,"no dir\n"))); |
322 | continue; |
323 | } |
324 | |
325 | while((d = readdir(dirp))) |
326 | { |
327 | char *p = ck_malloc(strlen(dir) + strlen(d->d_name) + 2L); |
328 | if(strcmp(dir, ".")) |
329 | /* If we have a full pathname then */ |
330 | { /* let's append the directory info */ |
331 | strcpy(p, dir); |
332 | #ifndef unix |
333 | strcat(p, "\\"); |
334 | #else |
335 | strcat(p, "/"); |
336 | #endif |
337 | strcat(p, d->d_name); |
338 | } |
339 | else /* Otherwise, the name is just fine, */ |
340 | strcpy(p, d->d_name); /* there's no need for './' -- bjsjr */ |
341 | |
342 | DEBUGX((fprintf(stderr, "Testing %s\n", p))); |
343 | if(!stat(p, &s)) /* if stat fails, ignore it */ |
344 | { |
345 | if( ((s.st_mode & S_IFMT) == S_IFREG) || |
346 | ((s.st_mode & S_IFMT) == S_IFLNK) ) |
347 | { /* it is a file/symbolic link */ |
348 | if(wildmat(d->d_name, basepatt)) |
349 | { /* it matches pattern */ |
350 | DEBUGX((fprintf(stderr,"File Matched\n"))); |
351 | if(matches == NULL) |
352 | matches = (char **)ck_malloc(sizeof(char *)); |
353 | else |
354 | matches = (char **) |
355 | ck_realloc(matches, (nmatches+1)*sizeof(char *)); |
356 | matches[nmatches++] = p; |
357 | } /* no match */ |
358 | else |
359 | { |
360 | DEBUGX((fprintf(stderr,"No File Match\n"))); |
361 | free(p); |
362 | } |
363 | } else if(decend_dir && ((s.st_mode & S_IFMT) == S_IFDIR)) |
364 | { |
365 | if(!((!strcmp(d->d_name,".")) || (!strcmp(d->d_name, "..") |
366 | #ifdef atarist |
367 | || (!strcmp(d->d_name, ".dir")) |
368 | #endif |
369 | ))) |
370 | { |
371 | char *push_p = ck_strdup("*"); |
372 | push(p, push_p); |
373 | DEBUGX((fprintf(stderr,"Dir pushed\n"))); |
374 | } |
375 | else |
376 | { |
377 | DEBUGX((fprintf(stderr, "DIR skipped\n"))); |
378 | free(p); |
379 | } |
380 | } |
381 | else |
382 | { |
383 | DEBUGX((fprintf(stderr, "Not a dir/no decend\n"))); |
384 | free(p); |
385 | } |
386 | } /* stat */ |
387 | else |
388 | { |
389 | DEBUGX((fprintf(stderr, "Stat failed\n"))); |
390 | free(p); |
391 | } |
392 | } /* while readdir */ |
393 | closedir(dirp); |
394 | free(basepatt); |
395 | free(dir); |
396 | DEBUGX((fprintf(stderr, "Dir done\n\n"))); |
397 | } /* while dirs in stack */ |
398 | |
399 | if(!nmatches) |
400 | { |
401 | DEBUGX((fprintf(stderr, "No matches\n"))); |
402 | return NULL; |
403 | } |
404 | |
405 | matches = (char **)realloc(matches, (nmatches+1)*sizeof(char *)); |
406 | if(!matches) |
407 | { return NULL; } |
408 | matches[nmatches] = NULL; |
409 | DEBUGX((fprintf(stderr, "%d matches\n", nmatches))); |
410 | return matches; |
411 | } |
412 | |
413 | #ifdef ZOO |
414 | #include "errors.i" |
415 | #endif |
416 | |
417 | static void *ck_memalloc(p) |
418 | void *p; |
419 | { |
420 | if(!p) |
421 | { |
422 | #ifndef ZOO |
423 | fprintf(stderr, "Out of memory\n"); |
424 | exit(98); |
425 | #else |
426 | prterror('f', no_memory); |
427 | #endif |
428 | } |
429 | return p; |
430 | } |
431 | |
432 | #ifdef TEST_GLOB |
433 | void test(path, dec) |
434 | char *path; |
435 | int dec; |
436 | { |
437 | char **m; |
438 | char **matches; |
439 | |
440 | printf("Testing %s %d\n", path, dec); |
441 | matches = glob(path, dec); |
442 | if(!matches) |
443 | { |
444 | printf("No matches\n"); |
445 | } |
446 | else |
447 | { |
448 | for(m = matches; *m; m++) |
449 | printf("%s\n", *m); |
450 | putchar('\n'); |
451 | free_all(); |
452 | } |
453 | } |
454 | |
455 | int main() |
456 | { |
457 | #ifndef unix |
458 | test("e:\\lib\\*.olb", 0); |
459 | test("e:\\lib", 0); |
460 | test("e:\\lib\\", 1); |
461 | #else |
462 | test("/net/acae127/home/bammi/News/comp.sources.misc/*.c", 0); |
463 | test("/net/acae127/home/bammi/News/comp.sources.misc", 0); |
464 | test("/net/acae127/home/bammi/News/comp.sources.misc", 1); |
465 | test("/net/acae127/home/bammi/atari/cross-gcc", 1); |
466 | #endif |
467 | |
468 | return 0; |
469 | } |
470 | |
471 | #endif |
472 | |
473 | #ifdef TEST_WILDMAT |
474 | #include <stdio.h> |
475 | |
476 | /* Yes, we use gets not fgets. Sue me. */ |
477 | extern char *gets(); |
478 | |
479 | |
480 | main() |
481 | { |
482 | char pattern[80]; |
483 | char text[80]; |
484 | |
485 | printf("Wildmat tester. Enter pattern, then strings to test.\n"); |
486 | printf("A blank line gets prompts for a new pattern; a blank pattern\n"); |
487 | printf("exits the program.\n\n"); |
488 | |
489 | for ( ; ; ) { |
490 | printf("Enter pattern: "); |
491 | if (gets(pattern) == NULL) |
492 | break; |
493 | for ( ; ; ) { |
494 | printf("Enter text: "); |
495 | if (gets(text) == NULL) |
496 | exit(0); |
497 | if (text[0] == '\0') |
498 | /* Blank line; go back and get a new pattern. */ |
499 | break; |
500 | printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO"); |
501 | } |
502 | } |
503 | |
504 | exit(0); |
505 | /* NOTREACHED */ |
506 | } |
507 | #endif /* TEST_WILDMAT */ |