1 /* $Header: search.c,v 1.0 87/12/18 13:05:59 root Exp $
4 * Revision 1.0 87/12/18 13:05:59 root
9 /* string search routines */
28 #define BMAPSIZ (127 / BITSPERBYTE + 1)
30 #define CHAR 0 /* a normal character */
31 #define ANY 1 /* . matches anything except newline */
32 #define CCL 2 /* [..] character class */
33 #define NCCL 3 /* [^..]negated character class */
34 #define BEG 4 /* ^ beginning of a line */
35 #define END 5 /* $ end of a line */
36 #define LPAR 6 /* ( begin sub-match */
37 #define RPAR 7 /* ) end sub-match */
38 #define REF 8 /* \N backreference to the Nth submatch */
39 #define WORD 9 /* \w matches alphanumeric character */
40 #define NWORD 10 /* \W matches non-alphanumeric character */
41 #define WBOUND 11 /* \b matches word boundary */
42 #define NWBOUND 12 /* \B matches non-boundary */
43 #define FINIS 13 /* the end of the pattern */
49 #define MINZERO 16 /* minimum is 0, not 1 */
50 #define MAXINF 32 /* maximum is infinity, not 1 */
53 typedef char TRANSTABLE[ASCSIZ];
55 static TRANSTABLE trans = {
56 0000,0001,0002,0003,0004,0005,0006,0007,
57 0010,0011,0012,0013,0014,0015,0016,0017,
58 0020,0021,0022,0023,0024,0025,0026,0027,
59 0030,0031,0032,0033,0034,0035,0036,0037,
60 0040,0041,0042,0043,0044,0045,0046,0047,
61 0050,0051,0052,0053,0054,0055,0056,0057,
62 0060,0061,0062,0063,0064,0065,0066,0067,
63 0070,0071,0072,0073,0074,0075,0076,0077,
64 0100,0101,0102,0103,0104,0105,0106,0107,
65 0110,0111,0112,0113,0114,0115,0116,0117,
66 0120,0121,0122,0123,0124,0125,0126,0127,
67 0130,0131,0132,0133,0134,0135,0136,0137,
68 0140,0141,0142,0143,0144,0145,0146,0147,
69 0150,0151,0152,0153,0154,0155,0156,0157,
70 0160,0161,0162,0163,0164,0165,0166,0167,
71 0170,0171,0172,0173,0174,0175,0176,0177,
73 static bool folding = FALSE;
80 static char *FirstCharacter;
81 static char *matchend;
82 static char *matchtill;
90 for (i = 0; i < ASCSIZ; i++)
99 register COMPEX *compex;
101 /* the following must start off zeroed */
103 compex->precomp = Nullch;
105 compex->subbase = Nullch;
111 register COMPEX *compex;
113 if (compex->complen) {
114 safefree(compex->compbuf);
117 if (compex->subbase) {
118 safefree(compex->subbase);
119 compex->subbase = Nullch;
124 static char *gbr_str = Nullch;
125 static int gbr_siz = 0;
129 register COMPEX *compex;
132 int length = compex->subend[n] - compex->subbeg[n];
135 (!compex->numsubs || n > compex->numsubs || !compex->subend[n] || length<0))
137 growstr(&gbr_str, &gbr_siz, length+1);
138 safecpy(gbr_str, compex->subbeg[n], length+1);
148 if (which != folding) {
150 for (i = 'A'; i <= 'Z'; i++)
151 trans[i] = tolower(i);
154 for (i = 'A'; i <= 'Z'; i++)
161 /* Compile the regular expression into internal form */
164 compile(compex, sp, regex, fold)
165 register COMPEX *compex;
175 char **alt = compex->alternatives;
176 char *retmes = "Badly formed search string";
178 case_fold(compex->do_folding = fold);
180 safefree(compex->precomp);
181 compex->precomp = savestr(sp);
182 if (!compex->complen) {
183 compex->compbuf = safemalloc(84);
184 compex->complen = 80;
186 cp = compex->compbuf; /* point at compiled buffer */
187 *alt++ = cp; /* first alternative starts here */
188 parenp = paren; /* first paren goes here */
189 if (*sp == 0) { /* nothing to compile? */
191 if (*cp == 0) /* nothing there yet? */
192 return "Null search string";
195 return Nullch; /* just keep old expression */
197 compex->numsubs = 0; /* no parens yet */
200 if (cp - compex->compbuf >= compex->complen) {
201 char *ocompbuf = compex->compbuf;
204 if (ocompbuf != compex->compbuf) { /* adjust pointers? */
207 cp = compex->compbuf + (cp - ocompbuf);
209 lastcp = compex->compbuf + (lastcp - ocompbuf);
210 for (tmpalt = compex->alternatives; tmpalt < alt; tmpalt++)
212 *tmpalt = compex->compbuf + (*tmpalt - ocompbuf);
215 c = *sp++; /* get next char of pattern */
216 if (c == 0) { /* end of pattern? */
217 if (parenp != paren) { /* balanced parentheses? */
219 retmes = "Missing right parenthesis";
223 *cp++ = FINIS; /* append a stopper */
224 *alt++ = 0; /* terminate alternative list */
226 compex->complen = cp - compex->compbuf + 1;
227 compex->compbuf = saferealloc(compex->compbuf,compex->complen+4); */
228 return Nullch; /* return success */
230 if (c != '*' && c != '?' && c != '+')
232 if (!regex) { /* just a normal search string? */
233 *cp++ = CHAR; /* everything is a normal char */
236 else /* it is a regular expression */
249 case '[': { /* character class */
252 if (cp - compex->compbuf >= compex->complen - BMAPSIZ) {
253 char *ocompbuf = compex->compbuf;
255 grow_comp(compex); /* reserve bitmap */
256 if (ocompbuf != compex->compbuf) {/* adjust pointers? */
259 cp = compex->compbuf + (cp - ocompbuf);
261 lastcp = compex->compbuf + (lastcp - ocompbuf);
262 for (tmpalt = compex->alternatives; tmpalt < alt;
266 compex->compbuf + (*tmpalt - ocompbuf);
269 for (i = BMAPSIZ; i; --i)
272 if ((c = *sp++) == '^') {
274 *cp++ = NCCL; /* negated */
277 *cp++ = CCL; /* normal */
279 i = 0; /* remember oldchar */
283 retmes = "Missing ]";
287 if (c == '\\' && *sp) {
292 case '0': case '1': case '2': case '3':
293 case '4': case '5': case '6': case '7':
295 if (index("01234567",*sp)) {
299 if (index("01234567",*sp)) {
326 if (*sp == '-' && *(++sp))
331 cp[c / BITSPERBYTE] |= 1 << (c % BITSPERBYTE);
332 if (fold && isalpha(c))
333 cp[(c ^ 32) / BITSPERBYTE] |=
334 1 << ((c ^ 32) % BITSPERBYTE);
335 /* set the other bit too */
338 } while ((c = *sp++) != ']');
346 if (cp != compex->compbuf && cp[-1] != FINIS)
357 if (*sp && *sp != '|')
362 case '*': case '?': case '+':
364 (*lastcp & (MINZERO|MAXINF)) ||
379 if (compex->numsubs >= MAXSUB) {
381 retmes = "Too many parens";
385 *parenp++ = ++compex->numsubs;
387 *cp++ = compex->numsubs;
390 if (parenp <= paren) {
392 retmes = "Unmatched right paren";
402 retmes = "No | in subpattern"; /* Sigh! */
407 if (alt - compex->alternatives >= MAXALT) {
409 retmes = "Too many alternatives";
415 case '\\': /* backslashed thingie */
417 case '0': case '1': case '2': case '3': case '4':
418 case '5': case '6': case '7': case '8': case '9':
459 compex->compbuf[0] = 0;
466 register COMPEX *compex;
468 compex->complen += 80;
469 compex->compbuf = saferealloc(compex->compbuf, (MEM_SIZE)compex->complen + 4);
473 execute(compex, addr, beginning, minend)
474 register COMPEX *compex;
479 register char *p1 = addr;
480 register char *trt = trans;
487 if (compex->numsubs) { /* any submatches? */
488 for (c = 0; c <= compex->numsubs; c++)
489 compex->subbeg[c] = compex->subend[c] = Nullch;
491 case_fold(compex->do_folding); /* make sure table is correct */
493 FirstCharacter = p1; /* for ^ tests */
495 if (multiline || compex->alternatives[1] || compex->compbuf[0] != BEG)
496 FirstCharacter = Nullch;
498 return Nullch; /* can't match */
501 matchtill = addr + minend;
503 if (compex->compbuf[0] == CHAR && !compex->alternatives[1]) {
504 if (compex->do_folding) {
505 c = compex->compbuf[1]; /* fast check for first character */
507 if (trt[*p1] == c && try(compex, p1, compex->compbuf))
509 } while (*p1++ && !err);
512 c = compex->compbuf[1]; /* faster check for first character */
513 if (compex->compbuf[2] == CHAR)
514 c2 = compex->compbuf[3];
519 while (scr = *p1++, scr && scr != c) ;
522 if (c2 && *p1 != c2) /* and maybe even second character */
524 if (try(compex, p1, compex->compbuf+2)) {
532 else { /* normal algorithm */
534 register char **alt = compex->alternatives;
536 if (try(compex, p1, *alt++))
539 } while (*p1++ && err < FATAL);
544 if (compex->numsubs) { /* any parens? */
545 trt = savestr(addr); /* in case addr is not static */
547 safefree(compex->subbase); /* (may be freeing addr!) */
548 compex->subbase = trt;
549 scr = compex->subbase - addr;
552 for (c = 0; c <= compex->numsubs; c++) {
553 if (compex->subend[c]) {
554 compex->subbeg[c] += scr;
555 compex->subend[c] += scr;
559 compex->subend[0] = matchend;
560 compex->subbeg[0] = p1;
570 register char *basesp;
571 register char *trt = trans;
573 register int backlen;
576 while (*sp || (*cp & MAXINF) || *cp == BEG || *cp == RPAR ||
577 *cp == WBOUND || *cp == NWBOUND) {
578 switch ((code = *cp++) & CODEMASK) {
584 while (*sp && trt[*sp] == i) sp++;
586 if (*sp && trt[*sp] == i) sp++;
591 while (sp > basesp) {
592 if (try(compex, sp, cp))
603 while (*sp && *sp != '\n') sp++;
605 if (*sp && *sp != '\n') sp++;
612 while (*sp && cclass(cp, *sp, 1)) sp++;
614 if (*sp && cclass(cp, *sp, 1)) sp++;
622 while (*sp && cclass(cp, *sp, 0)) sp++;
624 if (*sp && cclass(cp, *sp, 0)) sp++;
630 if (!*sp || *sp == '\n') {
637 if (sp == FirstCharacter || (
638 *sp && sp[-1] == '\n') ) {
642 if (!multiline) /* no point in advancing more */
649 while (*sp && isalnum(*sp)) sp++;
651 if (*sp && isalnum(*sp)) sp++;
658 while (*sp && !isalnum(*sp)) sp++;
660 if (*sp && !isalnum(*sp)) sp++;
665 if ((sp == FirstCharacter || !isalnum(sp[-1])) !=
666 (!*sp || !isalnum(*sp)) )
671 if ((sp == FirstCharacter || !isalnum(sp[-1])) ==
672 (!*sp || !isalnum(*sp)))
680 compex->subbeg[*cp++] = sp;
685 compex->subend[i] = sp;
686 compex->lastparen = i;
690 if (compex->subend[i = *cp++] == 0) {
691 fputs("Bad subpattern reference\n",stdout) FLUSH;
696 backlen = compex->subend[i] - compex->subbeg[i];
698 while (*sp && subpat(compex, i, sp)) sp += backlen;
700 if (*sp && subpat(compex, i, sp)) sp += backlen;
704 fputs("Botched pattern compilation\n",stdout) FLUSH;
709 if (*cp == FINIS || *cp == END) {
711 if (matchend == Nullch || sp > matchend)
713 return matchend >= matchtill;
721 subpat(compex, i, sp)
722 register COMPEX *compex;
728 bp = compex->subbeg[i];
729 while (*sp && *bp == *sp) {
732 if (bp >= compex->subend[i])
745 if (set[c >> 3] & 1 << (c & 7))
747 if (set[c / BITSPERBYTE] & 1 << (c % BITSPERBYTE))