perl 5.0 alpha 9
[p5sagit/p5-mst-13.2.git] / atarist / wildmat.c
CommitLineData
79072805 1/* $Revision: 4.1 $
2f3197b3 2**
3** Do shell-style pattern matching for ?, \, [], and * characters.
4** Might not be robust in face of malformed patterns; e.g., "foo[a-"
5** could cause a segmentation violation. It is 8bit clean.
6**
7** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
8** Rich $alz is now <rsalz@bbn.com>.
9** April, 1991: Replaced mutually-recursive calls with in-line code
10** for the star character.
11**
12** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
13** This can greatly speed up failing wildcard patterns. For example:
14** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
15** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
16** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
17** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
18** the ABORT, then it takes 22310 calls to fail. Ugh. The following
19** explanation is from Lars:
20** The precondition that must be fulfilled is that DoMatch will consume
21** at least one character in text. This is true if *p is neither '*' nor
22** '\0'.) The last return has ABORT instead of FALSE to avoid quadratic
23** behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx". With
24** FALSE, each star-loop has to run to the end of the text; with ABORT
25** only the last one does.
26**
27** Once the control of one instance of DoMatch enters the star-loop, that
28** instance will return either TRUE or ABORT, and any calling instance
29** will therefore return immediately after (without calling recursively
30** again). In effect, only one star-loop is ever active. It would be
31** possible to modify the code to maintain this context explicitly,
32** eliminating all recursive calls at the cost of some complication and
33** loss of clarity (and the ABORT stuff seems to be unclear enough by
34** itself). I think it would be unwise to try to get this into a
35** released version unless you have a good test data base to try it out
36** on.
37*/
38
39#define TRUE 1
40#define FALSE 0
41#define ABORT -1
42
43
44 /* What character marks an inverted character class? */
45#define NEGATE_CLASS '^'
46 /* Is "*" a common pattern? */
47#define OPTIMIZE_JUST_STAR
48 /* Do tar(1) matching rules, which ignore a trailing slash? */
49#undef MATCH_TAR_PATTERN
50
51
52/*
53** Match text and p, return TRUE, FALSE, or ABORT.
54*/
55static int
56DoMatch(text, p)
57 char *text;
58 char *p;
59{
60 int last;
61 int matched;
62 int reverse;
63
64 for ( ; *p; text++, p++) {
65 if (*text == '\0' && *p != '*')
66 return ABORT;
67 switch (*p) {
68 case '\\':
69 /* Literal match with following character. */
70 p++;
71 /* FALLTHROUGH */
72 default:
73 if (*text != *p)
74 return FALSE;
75 continue;
76 case '?':
77 /* Match anything. */
78 continue;
79 case '*':
80 while (*++p == '*')
81 /* Consecutive stars act just like one. */
82 continue;
83 if (*p == '\0')
84 /* Trailing star matches everything. */
85 return TRUE;
86 while (*text)
87 if ((matched = DoMatch(text++, p)) != FALSE)
88 return matched;
89 return ABORT;
90 case '[':
91 reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
92 if (reverse)
93 /* Inverted character class. */
94 p++;
95 for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
96 /* This next line requires a good C compiler. */
97 if (*p == '-' ? *text <= *++p && *text >= last : *text == *p)
98 matched = TRUE;
99 if (matched == reverse)
100 return FALSE;
101 continue;
102 }
103 }
104
105#ifdef MATCH_TAR_PATTERN
106 if (*text == '/')
107 return TRUE;
108#endif /* MATCH_TAR_ATTERN */
109 return *text == '\0';
110}
111
112
113/*
114** User-level routine. Returns TRUE or FALSE.
115*/
116int
117wildmat(text, p)
118 char *text;
119 char *p;
120{
121#ifdef OPTIMIZE_JUST_STAR
122 if (p[0] == '*' && p[1] == '\0')
123 return TRUE;
124#endif /* OPTIMIZE_JUST_STAR */
125 return DoMatch(text, p) == TRUE;
126}
127
128#include <stdio.h>
129#include <sys/types.h>
130#include <dirent.h>
131#include <sys/stat.h>
132#if __STDC__
133#ifdef unix
134#define _SIZE_T /* unix defines size_t in sys/types.h */
135#endif
136#ifndef _COMPILER_H
137# include <compiler.h>
138#endif
139#include <stddef.h>
140#include <stdlib.h>
141#else
142extern char *malloc(), *realloc();
143extern char *rindex(), *strdup();
144#define __PROTO(x) ()
145#endif
146#include <string.h>
147
148#define MAX_DIR 32 /* max depth of dir recursion */
149static struct {
150 char *dir, *patt;
151} dir_stack[MAX_DIR];
152static int stack_p;
153static char **matches;
154static int nmatches;
155
156static void *ck_memalloc __PROTO((void *));
157#define ck_strdup(p) ck_memalloc(strdup(p))
158#define ck_malloc(s) ck_memalloc(malloc(s))
159#define ck_realloc(p, s) ck_memalloc(realloc(p, s))
160
161
162#define DEBUGX(x)
163
164/*
165 * return true if patt contains a wildcard char
166 */
167int contains_wild(patt)
168char *patt;
169{
170 char c;
171 char *p;
172
173 /* only check for wilds in the basename part of the pathname only */
174 if((p = rindex(patt, '/')) == NULL)
175 p = rindex(patt, '\\');
176 if(!p)
177 p = patt;
178
179 while((c = *p++))
180 if((c == '*') || (c == '?') || (c == '['))
181 return 1;
182 return 0;
183}
184
185#ifndef ZOO
186void free_all()
187{
188 char **p;
189
190 if(!matches)
191 return;
192
193 for(p = matches; *p; p++)
194 free(*p);
195 free(matches);
196 matches = NULL;
197}
198#endif
199
200static void push(dir, patt)
201char *dir;
202char *patt;
203{
204 if(stack_p < (MAX_DIR - 2))
205 stack_p++;
206 else
207 {
208 fprintf(stderr,"directory stack overflow\n");
209 exit(99);
210 }
211 dir_stack[stack_p].dir = dir;
212 dir_stack[stack_p].patt = patt;
213}
214
215/*
216 * glob patt
217 * if decend_dir is true, recursively decend any directories encountered.
218 * returns pointer to all matches encountered.
219 * if the initial patt is a directory, and decend_dir is true, it is
220 * equivalent to specifying the pattern "patt\*"
221 *
222 * Restrictions:
223 * - handles wildcards only in the base part of a pathname
224 * ie: will not handle \foo\*\bar\ (wildcard in the middle of pathname)
225 *
226 * - max dir recursion is MAX_DIR
227 *
228 * - on certain failures it will just skip potential matches as if they
229 * were not present.
230 *
231 * ++jrb bammi@cadence.com
232 */
233static char **do_match __PROTO((int decend_dir));
234
235char **glob(patt, decend_dir)
236char *patt;
237int decend_dir;
238{
239 char *dir, *basepatt, *p;
240 struct stat s;
241
242 DEBUGX((fprintf(stderr,"glob(%s, %d)\n", patt, decend_dir)));
243 matches = NULL;
244 nmatches = 0;
245 stack_p = -1;
246
247 /* first check for wildcards */
248 if(contains_wild(patt))
249 {
250 /* break it up into dir and base patt, do_matches and return */
251 p = ck_strdup(patt);
252 if((basepatt = rindex(p, '/')) == NULL)
253 basepatt = rindex(p, '\\');
254 if(basepatt)
255 {
256 dir = p;
257 *basepatt++ = '\0';
258 basepatt = ck_strdup(basepatt);
259 }
260 else
261 {
262 dir = ck_strdup(".");
263 basepatt = p;
264 }
265
266 if(strcmp(basepatt, "*.*") == 0)
267 {
268 /* the desktop, and other braindead shells strike again */
269 basepatt[1] = '\0';
270 }
271 push(dir, basepatt);
272 DEBUGX((fprintf(stderr, "calling %s, %s\n", dir, basepatt)));
273 return do_match(decend_dir);
274 }
275
276 /* if no wilds, check for dir */
277 if(decend_dir && (!stat(patt, &s)))
278 {
279 if((s.st_mode & S_IFMT) == S_IFDIR)
280 { /* is a dir */
281 size_t len = strlen(patt);
282
283 dir = ck_strdup(patt);
284 --len;
285 if(len && ((dir[len] == '/')
286#ifdef atarist
287 || (dir[len] == '\\')
288#endif
289 ))
290 dir[len] = '\0';
291 basepatt = ck_strdup("*");
292 push(dir, basepatt);
293 DEBUGX((fprintf(stderr, "calling %s, %s\n", dir, basepatt)));
294 return do_match(decend_dir);
295 }
296 }
297 return NULL;
298}
299
300static char **do_match(decend_dir)
301int decend_dir;
302{
303 DIR *dirp;
304 struct dirent *d;
305 struct stat s;
306 char *dir, *basepatt;
307
308 while(stack_p >= 0)
309 {
310 dir = ck_strdup(dir_stack[stack_p].dir);
311 free(dir_stack[stack_p].dir);
312 basepatt = ck_strdup(dir_stack[stack_p].patt);
313 free(dir_stack[stack_p--].patt);
314
315 DEBUGX((fprintf(stderr,"dir %s patt %s stack %d\n", dir, basepatt, stack_p)));
316
317 dirp = opendir(dir);
318 if(!dirp)
319 {
320 free(dir);
321 DEBUGX((fprintf(stderr,"no dir\n")));
322 continue;
323 }
324
325 while((d = readdir(dirp)))
326 {
327 char *p = ck_malloc(strlen(dir) + strlen(d->d_name) + 2L);
328 if(strcmp(dir, "."))
329 /* If we have a full pathname then */
330 { /* let's append the directory info */
331 strcpy(p, dir);
332#ifndef unix
333 strcat(p, "\\");
334#else
335 strcat(p, "/");
336#endif
337 strcat(p, d->d_name);
338 }
339 else /* Otherwise, the name is just fine, */
340 strcpy(p, d->d_name); /* there's no need for './' -- bjsjr */
341
342 DEBUGX((fprintf(stderr, "Testing %s\n", p)));
343 if(!stat(p, &s)) /* if stat fails, ignore it */
344 {
345 if( ((s.st_mode & S_IFMT) == S_IFREG) ||
346 ((s.st_mode & S_IFMT) == S_IFLNK) )
347 { /* it is a file/symbolic link */
348 if(wildmat(d->d_name, basepatt))
349 { /* it matches pattern */
350 DEBUGX((fprintf(stderr,"File Matched\n")));
351 if(matches == NULL)
352 matches = (char **)ck_malloc(sizeof(char *));
353 else
354 matches = (char **)
355 ck_realloc(matches, (nmatches+1)*sizeof(char *));
356 matches[nmatches++] = p;
357 } /* no match */
358 else
359 {
360 DEBUGX((fprintf(stderr,"No File Match\n")));
361 free(p);
362 }
363 } else if(decend_dir && ((s.st_mode & S_IFMT) == S_IFDIR))
364 {
365 if(!((!strcmp(d->d_name,".")) || (!strcmp(d->d_name, "..")
366#ifdef atarist
367 || (!strcmp(d->d_name, ".dir"))
368#endif
369 )))
370 {
371 char *push_p = ck_strdup("*");
372 push(p, push_p);
373 DEBUGX((fprintf(stderr,"Dir pushed\n")));
374 }
375 else
376 {
377 DEBUGX((fprintf(stderr, "DIR skipped\n")));
378 free(p);
379 }
380 }
381 else
382 {
383 DEBUGX((fprintf(stderr, "Not a dir/no decend\n")));
384 free(p);
385 }
386 } /* stat */
387 else
388 {
389 DEBUGX((fprintf(stderr, "Stat failed\n")));
390 free(p);
391 }
392 } /* while readdir */
393 closedir(dirp);
394 free(basepatt);
395 free(dir);
396 DEBUGX((fprintf(stderr, "Dir done\n\n")));
397 } /* while dirs in stack */
398
399 if(!nmatches)
400 {
401 DEBUGX((fprintf(stderr, "No matches\n")));
402 return NULL;
403 }
404
405 matches = (char **)realloc(matches, (nmatches+1)*sizeof(char *));
406 if(!matches)
407 { return NULL; }
408 matches[nmatches] = NULL;
409 DEBUGX((fprintf(stderr, "%d matches\n", nmatches)));
410 return matches;
411}
412
413#ifdef ZOO
414#include "errors.i"
415#endif
416
417static void *ck_memalloc(p)
418void *p;
419{
420 if(!p)
421 {
422#ifndef ZOO
423 fprintf(stderr, "Out of memory\n");
424 exit(98);
425#else
426 prterror('f', no_memory);
427#endif
428 }
429 return p;
430}
431
432#ifdef TEST_GLOB
433void test(path, dec)
434char *path;
435int dec;
436{
437 char **m;
438 char **matches;
439
440 printf("Testing %s %d\n", path, dec);
441 matches = glob(path, dec);
442 if(!matches)
443 {
444 printf("No matches\n");
445 }
446 else
447 {
448 for(m = matches; *m; m++)
449 printf("%s\n", *m);
450 putchar('\n');
451 free_all();
452 }
453}
454
455int main()
456{
457#ifndef unix
458 test("e:\\lib\\*.olb", 0);
459 test("e:\\lib", 0);
460 test("e:\\lib\\", 1);
461#else
462 test("/net/acae127/home/bammi/News/comp.sources.misc/*.c", 0);
463 test("/net/acae127/home/bammi/News/comp.sources.misc", 0);
464 test("/net/acae127/home/bammi/News/comp.sources.misc", 1);
465 test("/net/acae127/home/bammi/atari/cross-gcc", 1);
466#endif
467
468 return 0;
469}
470
471#endif
472
473#ifdef TEST_WILDMAT
474#include <stdio.h>
475
476/* Yes, we use gets not fgets. Sue me. */
477extern char *gets();
478
479
480main()
481{
482 char pattern[80];
483 char text[80];
484
485 printf("Wildmat tester. Enter pattern, then strings to test.\n");
486 printf("A blank line gets prompts for a new pattern; a blank pattern\n");
487 printf("exits the program.\n\n");
488
489 for ( ; ; ) {
490 printf("Enter pattern: ");
491 if (gets(pattern) == NULL)
492 break;
493 for ( ; ; ) {
494 printf("Enter text: ");
495 if (gets(text) == NULL)
496 exit(0);
497 if (text[0] == '\0')
498 /* Blank line; go back and get a new pattern. */
499 break;
500 printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO");
501 }
502 }
503
504 exit(0);
505 /* NOTREACHED */
506}
507#endif /* TEST_WILDMAT */