perl 4.0 patch 33: patch #20, continued
[p5sagit/p5-mst-13.2.git] / regexec.c
CommitLineData
a687059c 1/* NOTE: this is derived from Henry Spencer's regexp code, and should not
2 * confused with the original package (see point 3 below). Thanks, Henry!
3 */
4
5/* Additional note: this code is very heavily munged from Henry's version
6 * in places. In some spots I've traded clarity for efficiency, so don't
7 * blame Henry for some of the lack of readability.
8 */
9
2b69d0c2 10/* $RCSfile: regexec.c,v $$Revision: 4.0.1.4 $$Date: 92/06/08 15:25:50 $
a687059c 11 *
12 * $Log: regexec.c,v $
2b69d0c2 13 * Revision 4.0.1.4 92/06/08 15:25:50 lwall
14 * patch20: pattern modifiers i and g didn't interact right
15 * patch20: in some cases $` and $' didn't get set by match
16 * patch20: /x{0}/ was wrongly interpreted as /x{0,}/
17 *
f0fcb552 18 * Revision 4.0.1.3 91/11/05 18:23:55 lwall
19 * patch11: prepared for ctype implementations that don't define isascii()
20 * patch11: initial .* in pattern had dependency on value of $*
21 *
9ef589d8 22 * Revision 4.0.1.2 91/06/07 11:50:33 lwall
23 * patch4: new copyright notice
24 * patch4: // wouldn't use previous pattern if it started with a null character
25 *
35c8bce7 26 * Revision 4.0.1.1 91/04/12 09:07:39 lwall
27 * patch1: regexec only allocated space for 9 subexpresssions
28 *
fe14fcc3 29 * Revision 4.0 91/03/20 01:39:16 lwall
30 * 4.0 baseline.
a687059c 31 *
32 */
f0fcb552 33/*SUPPRESS 112*/
a687059c 34/*
35 * regcomp and regexec -- regsub and regerror are not used in perl
36 *
37 * Copyright (c) 1986 by University of Toronto.
38 * Written by Henry Spencer. Not derived from licensed software.
39 *
40 * Permission is granted to anyone to use this software for any
41 * purpose on any computer system, and to redistribute it freely,
42 * subject to the following restrictions:
43 *
44 * 1. The author is not responsible for the consequences of use of
45 * this software, no matter how awful, even if they arise
46 * from defects in it.
47 *
48 * 2. The origin of this software must not be misrepresented, either
49 * by explicit claim or by omission.
50 *
51 * 3. Altered versions must be plainly marked as such, and must not
52 * be misrepresented as being the original software.
53 *
54 **** Alterations to Henry's code are...
55 ****
9ef589d8 56 **** Copyright (c) 1991, Larry Wall
a687059c 57 ****
9ef589d8 58 **** You may distribute under the terms of either the GNU General Public
59 **** License or the Artistic License, as specified in the README file.
a687059c 60 *
61 * Beware that some of this code is subtly aware of the way operator
62 * precedence is structured in regular expressions. Serious changes in
63 * regular-expression syntax might require a total rethink.
64 */
65#include "EXTERN.h"
66#include "perl.h"
67#include "regcomp.h"
68
69#ifndef STATIC
70#define STATIC static
71#endif
72
73#ifdef DEBUGGING
74int regnarrate = 0;
75#endif
76
77/*
78 * regexec and friends
79 */
80
81/*
82 * Global work variables for regexec().
83 */
84static char *regprecomp;
85static char *reginput; /* String-input pointer. */
ac58e20f 86static char regprev; /* char before regbol, \n if none */
a687059c 87static char *regbol; /* Beginning of input, for ^ check. */
88static char *regeol; /* End of input, for $ check. */
89static char **regstartp; /* Pointer to startp array. */
90static char **regendp; /* Ditto for endp. */
91static char *reglastparen; /* Similarly for lastparen. */
92static char *regtill;
93
35c8bce7 94static int regmyp_size = 0;
95static char **regmystartp = Null(char**);
96static char **regmyendp = Null(char**);
a687059c 97
98/*
99 * Forwards.
100 */
101STATIC int regtry();
102STATIC int regmatch();
103STATIC int regrepeat();
104
105extern int multiline;
106
107/*
108 - regexec - match a regexp against a string
109 */
110int
111regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
112register regexp *prog;
113char *stringarg;
114register char *strend; /* pointer to null at end of string */
115char *strbeg; /* real beginning of string */
116int minend; /* end of match must be at least minend after stringarg */
117STR *screamer;
118int safebase; /* no need to remember string in subbase */
119{
120 register char *s;
121 register int i;
122 register char *c;
123 register char *string = stringarg;
124 register int tmp;
125 int minlen = 0; /* must match at least this many chars */
126 int dontbother = 0; /* how many characters not to try at end */
a687059c 127
128 /* Be paranoid... */
129 if (prog == NULL || string == NULL) {
130 fatal("NULL regexp parameter");
131 return(0);
132 }
133
ac58e20f 134 if (string == strbeg) /* is ^ valid at stringarg? */
135 regprev = '\n';
0a12ae7d 136 else {
ac58e20f 137 regprev = stringarg[-1];
0a12ae7d 138 if (!multiline && regprev == '\n')
139 regprev = '\0'; /* force ^ to NOT match */
140 }
a687059c 141 regprecomp = prog->precomp;
142 /* Check validity of program. */
143 if (UCHARAT(prog->program) != MAGIC) {
144 FAIL("corrupted regexp program");
145 }
146
147 if (prog->do_folding) {
a687059c 148 i = strend - string;
149 New(1101,c,i+1,char);
2b69d0c2 150 Copy(string, c, i+1, char);
a687059c 151 string = c;
152 strend = string + i;
153 for (s = string; s < strend; s++)
ac58e20f 154 if (isUPPER(*s))
a687059c 155 *s = tolower(*s);
156 }
157
158 /* If there is a "must appear" string, look for it. */
159 s = string;
34de22dd 160 if (prog->regmust != Nullstr &&
9ef589d8 161 (!(prog->reganch & ROPT_ANCH)
162 || (multiline && prog->regback >= 0)) ) {
ac58e20f 163 if (stringarg == strbeg && screamer) {
a687059c 164 if (screamfirst[prog->regmust->str_rare] >= 0)
165 s = screaminstr(screamer,prog->regmust);
166 else
167 s = Nullch;
168 }
169#ifndef lint
170 else
171 s = fbminstr((unsigned char*)s, (unsigned char*)strend,
172 prog->regmust);
173#endif
174 if (!s) {
175 ++prog->regmust->str_u.str_useful; /* hooray */
176 goto phooey; /* not present */
177 }
178 else if (prog->regback >= 0) {
179 s -= prog->regback;
180 if (s < string)
181 s = string;
182 minlen = prog->regback + prog->regmust->str_cur;
183 }
184 else if (--prog->regmust->str_u.str_useful < 0) { /* boo */
185 str_free(prog->regmust);
186 prog->regmust = Nullstr; /* disable regmust */
187 s = string;
188 }
189 else {
190 s = string;
191 minlen = prog->regmust->str_cur;
192 }
193 }
194
195 /* Mark beginning of line for ^ . */
ac58e20f 196 regbol = string;
a687059c 197
198 /* Mark end of line for $ (and such) */
199 regeol = strend;
200
201 /* see how far we have to get to not match where we matched before */
202 regtill = string+minend;
203
35c8bce7 204 /* Allocate our backreference arrays */
205 if ( regmyp_size < prog->nparens + 1 ) {
206 /* Allocate or enlarge the arrays */
207 regmyp_size = prog->nparens + 1;
208 if ( regmyp_size < 10 ) regmyp_size = 10; /* minimum */
209 if ( regmystartp ) {
210 /* reallocate larger */
211 Renew(regmystartp,regmyp_size,char*);
212 Renew(regmyendp, regmyp_size,char*);
213 }
214 else {
215 /* Initial allocation */
216 New(1102,regmystartp,regmyp_size,char*);
217 New(1102,regmyendp, regmyp_size,char*);
218 }
219
220 }
221
a687059c 222 /* Simplest case: anchored match need be tried only once. */
223 /* [unless multiline is set] */
9ef589d8 224 if (prog->reganch & ROPT_ANCH) {
a687059c 225 if (regtry(prog, string))
226 goto got_it;
f0fcb552 227 else if (multiline || (prog->reganch & ROPT_IMPLICIT)) {
a687059c 228 if (minlen)
229 dontbother = minlen - 1;
230 strend -= dontbother;
231 /* for multiline we only have to try after newlines */
232 if (s > string)
233 s--;
00bf170e 234 while (s < strend) {
235 if (*s++ == '\n') {
236 if (s < strend && regtry(prog, s))
a687059c 237 goto got_it;
238 }
239 }
240 }
241 goto phooey;
242 }
243
244 /* Messy cases: unanchored match. */
245 if (prog->regstart) {
9ef589d8 246 if (prog->reganch & ROPT_SKIP) { /* we have /x+whatever/ */
00bf170e 247 /* it must be a one character string */
248 i = prog->regstart->str_ptr[0];
249 while (s < strend) {
250 if (*s == i) {
251 if (regtry(prog, s))
252 goto got_it;
253 s++;
254 while (s < strend && *s == i)
255 s++;
256 }
257 s++;
258 }
259 }
260 else if (prog->regstart->str_pok == 3) {
261 /* We know what string it must start with. */
a687059c 262#ifndef lint
263 while ((s = fbminstr((unsigned char*)s,
264 (unsigned char*)strend, prog->regstart)) != NULL)
265#else
266 while (s = Nullch)
267#endif
268 {
269 if (regtry(prog, s))
270 goto got_it;
271 s++;
272 }
273 }
274 else {
275 c = prog->regstart->str_ptr;
276 while ((s = ninstr(s, strend,
277 c, c + prog->regstart->str_cur )) != NULL) {
278 if (regtry(prog, s))
279 goto got_it;
280 s++;
281 }
282 }
283 goto phooey;
284 }
f0fcb552 285 /*SUPPRESS 560*/
a687059c 286 if (c = prog->regstclass) {
9ef589d8 287 int doevery = (prog->reganch & ROPT_SKIP) == 0;
00bf170e 288
a687059c 289 if (minlen)
290 dontbother = minlen - 1;
291 strend -= dontbother; /* don't bother with what can't match */
00bf170e 292 tmp = 1;
a687059c 293 /* We know what class it must start with. */
294 switch (OP(c)) {
00bf170e 295 case ANYOF:
a687059c 296 c = OPERAND(c);
297 while (s < strend) {
ac58e20f 298 i = UCHARAT(s);
00bf170e 299 if (!(c[i >> 3] & (1 << (i&7)))) {
300 if (tmp && regtry(prog, s))
a687059c 301 goto got_it;
00bf170e 302 else
303 tmp = doevery;
304 }
305 else
306 tmp = 1;
a687059c 307 s++;
308 }
309 break;
310 case BOUND:
311 if (minlen)
312 dontbother++,strend--;
313 if (s != string) {
314 i = s[-1];
ac58e20f 315 tmp = isALNUM(i);
a687059c 316 }
317 else
ac58e20f 318 tmp = isALNUM(regprev); /* assume not alphanumeric */
a687059c 319 while (s < strend) {
320 i = *s;
ac58e20f 321 if (tmp != isALNUM(i)) {
a687059c 322 tmp = !tmp;
323 if (regtry(prog, s))
324 goto got_it;
325 }
326 s++;
327 }
ae986130 328 if ((minlen || tmp) && regtry(prog,s))
a687059c 329 goto got_it;
330 break;
331 case NBOUND:
332 if (minlen)
333 dontbother++,strend--;
334 if (s != string) {
335 i = s[-1];
ac58e20f 336 tmp = isALNUM(i);
a687059c 337 }
338 else
ac58e20f 339 tmp = isALNUM(regprev); /* assume not alphanumeric */
a687059c 340 while (s < strend) {
341 i = *s;
ac58e20f 342 if (tmp != isALNUM(i))
a687059c 343 tmp = !tmp;
344 else if (regtry(prog, s))
345 goto got_it;
346 s++;
347 }
ae986130 348 if ((minlen || !tmp) && regtry(prog,s))
a687059c 349 goto got_it;
350 break;
351 case ALNUM:
352 while (s < strend) {
353 i = *s;
00bf170e 354 if (isALNUM(i)) {
355 if (tmp && regtry(prog, s))
a687059c 356 goto got_it;
00bf170e 357 else
358 tmp = doevery;
359 }
360 else
361 tmp = 1;
a687059c 362 s++;
363 }
364 break;
365 case NALNUM:
366 while (s < strend) {
367 i = *s;
00bf170e 368 if (!isALNUM(i)) {
369 if (tmp && regtry(prog, s))
a687059c 370 goto got_it;
00bf170e 371 else
372 tmp = doevery;
373 }
374 else
375 tmp = 1;
a687059c 376 s++;
377 }
378 break;
379 case SPACE:
380 while (s < strend) {
00bf170e 381 if (isSPACE(*s)) {
382 if (tmp && regtry(prog, s))
a687059c 383 goto got_it;
00bf170e 384 else
385 tmp = doevery;
386 }
387 else
388 tmp = 1;
a687059c 389 s++;
390 }
391 break;
392 case NSPACE:
393 while (s < strend) {
00bf170e 394 if (!isSPACE(*s)) {
395 if (tmp && regtry(prog, s))
a687059c 396 goto got_it;
00bf170e 397 else
398 tmp = doevery;
399 }
400 else
401 tmp = 1;
a687059c 402 s++;
403 }
404 break;
405 case DIGIT:
406 while (s < strend) {
00bf170e 407 if (isDIGIT(*s)) {
408 if (tmp && regtry(prog, s))
a687059c 409 goto got_it;
00bf170e 410 else
411 tmp = doevery;
412 }
413 else
414 tmp = 1;
a687059c 415 s++;
416 }
417 break;
418 case NDIGIT:
419 while (s < strend) {
00bf170e 420 if (!isDIGIT(*s)) {
421 if (tmp && regtry(prog, s))
a687059c 422 goto got_it;
00bf170e 423 else
424 tmp = doevery;
425 }
426 else
427 tmp = 1;
a687059c 428 s++;
429 }
430 break;
431 }
432 }
433 else {
663a0e37 434 if (minlen)
435 dontbother = minlen - 1;
a687059c 436 strend -= dontbother;
437 /* We don't know much -- general case. */
438 do {
439 if (regtry(prog, s))
440 goto got_it;
441 } while (s++ < strend);
442 }
443
444 /* Failure. */
445 goto phooey;
446
447 got_it:
2b69d0c2 448 prog->subbeg = strbeg;
449 prog->subend = strend;
a687059c 450 if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding){
451 strend += dontbother; /* uncheat */
452 if (safebase) /* no need for $digit later */
453 s = strbeg;
454 else if (strbeg != prog->subbase) {
455 i = strend - string + (stringarg - strbeg);
456 s = nsavestr(strbeg,i); /* so $digit will work later */
457 if (prog->subbase)
458 Safefree(prog->subbase);
9ef589d8 459 prog->subbeg = prog->subbase = s;
00bf170e 460 prog->subend = s+i;
a687059c 461 }
2b69d0c2 462 else {
463 i = strend - string + (stringarg - strbeg);
464 prog->subbeg = s = prog->subbase;
465 prog->subend = s+i;
466 }
a687059c 467 s += (stringarg - strbeg);
468 for (i = 0; i <= prog->nparens; i++) {
469 if (prog->endp[i]) {
470 prog->startp[i] = s + (prog->startp[i] - string);
471 prog->endp[i] = s + (prog->endp[i] - string);
472 }
473 }
474 if (prog->do_folding)
475 Safefree(string);
476 }
477 return(1);
478
479 phooey:
480 if (prog->do_folding)
481 Safefree(string);
482 return(0);
483}
484
485/*
486 - regtry - try match at specific point
487 */
488static int /* 0 failure, 1 success */
489regtry(prog, string)
490regexp *prog;
491char *string;
492{
493 register int i;
494 register char **sp;
495 register char **ep;
496
497 reginput = string;
498 regstartp = prog->startp;
499 regendp = prog->endp;
500 reglastparen = &prog->lastparen;
501 prog->lastparen = 0;
502
503 sp = prog->startp;
504 ep = prog->endp;
505 if (prog->nparens) {
fe14fcc3 506 for (i = prog->nparens; i >= 0; i--) {
a687059c 507 *sp++ = NULL;
508 *ep++ = NULL;
509 }
510 }
511 if (regmatch(prog->program + 1) && reginput >= regtill) {
512 prog->startp[0] = string;
513 prog->endp[0] = reginput;
514 return(1);
515 } else
516 return(0);
517}
518
519/*
520 - regmatch - main matching routine
521 *
522 * Conceptually the strategy is simple: check to see whether the current
523 * node matches, call self recursively to see whether the rest matches,
524 * and then act accordingly. In practice we make some effort to avoid
525 * recursion, in particular by going through "ordinary" nodes (that don't
526 * need to know whether the rest of the match failed) by a loop instead of
527 * by recursion.
528 */
529/* [lwall] I've hoisted the register declarations to the outer block in order to
530 * maybe save a little bit of pushing and popping on the stack. It also takes
531 * advantage of machines that use a register save mask on subroutine entry.
532 */
533static int /* 0 failure, 1 success */
534regmatch(prog)
535char *prog;
536{
537 register char *scan; /* Current node. */
538 char *next; /* Next node. */
539 register int nextchar;
540 register int n; /* no or next */
541 register int ln; /* len or last */
542 register char *s; /* operand or save */
543 register char *locinput = reginput;
544
545 nextchar = *locinput;
546 scan = prog;
547#ifdef DEBUGGING
548 if (scan != NULL && regnarrate)
549 fprintf(stderr, "%s(\n", regprop(scan));
550#endif
551 while (scan != NULL) {
552#ifdef DEBUGGING
553 if (regnarrate)
554 fprintf(stderr, "%s...\n", regprop(scan));
555#endif
556
557#ifdef REGALIGN
558 next = scan + NEXT(scan);
559 if (next == scan)
560 next = NULL;
561#else
562 next = regnext(scan);
563#endif
564
565 switch (OP(scan)) {
566 case BOL:
ac58e20f 567 if (locinput == regbol ? regprev == '\n' :
a687059c 568 ((nextchar || locinput < regeol) &&
569 locinput[-1] == '\n') )
570 {
fe14fcc3 571 /* regtill = regbol; */
a687059c 572 break;
573 }
574 return(0);
575 case EOL:
576 if ((nextchar || locinput < regeol) && nextchar != '\n')
577 return(0);
00bf170e 578 if (!multiline && regeol - locinput > 1)
579 return 0;
fe14fcc3 580 /* regtill = regbol; */
a687059c 581 break;
582 case ANY:
583 if ((nextchar == '\0' && locinput >= regeol) ||
584 nextchar == '\n')
585 return(0);
586 nextchar = *++locinput;
587 break;
588 case EXACTLY:
589 s = OPERAND(scan);
590 ln = *s++;
591 /* Inline the first character, for speed. */
592 if (*s != nextchar)
593 return(0);
00bf170e 594 if (regeol - locinput < ln)
a687059c 595 return 0;
596 if (ln > 1 && bcmp(s, locinput, ln) != 0)
597 return(0);
598 locinput += ln;
599 nextchar = *locinput;
600 break;
601 case ANYOF:
a687059c 602 s = OPERAND(scan);
603 if (nextchar < 0)
604 nextchar = UCHARAT(locinput);
605 if (s[nextchar >> 3] & (1 << (nextchar&7)))
606 return(0);
34de22dd 607 if (!nextchar && locinput >= regeol)
a687059c 608 return 0;
34de22dd 609 nextchar = *++locinput;
a687059c 610 break;
611 case ALNUM:
612 if (!nextchar)
613 return(0);
ac58e20f 614 if (!isALNUM(nextchar))
a687059c 615 return(0);
616 nextchar = *++locinput;
617 break;
618 case NALNUM:
619 if (!nextchar && locinput >= regeol)
620 return(0);
ac58e20f 621 if (isALNUM(nextchar))
a687059c 622 return(0);
623 nextchar = *++locinput;
624 break;
625 case NBOUND:
626 case BOUND:
627 if (locinput == regbol) /* was last char in word? */
ac58e20f 628 ln = isALNUM(regprev);
a687059c 629 else
ac58e20f 630 ln = isALNUM(locinput[-1]);
631 n = isALNUM(nextchar); /* is next char in word? */
a687059c 632 if ((ln == n) == (OP(scan) == BOUND))
633 return(0);
634 break;
635 case SPACE:
636 if (!nextchar && locinput >= regeol)
637 return(0);
ac58e20f 638 if (!isSPACE(nextchar))
a687059c 639 return(0);
640 nextchar = *++locinput;
641 break;
642 case NSPACE:
643 if (!nextchar)
644 return(0);
ac58e20f 645 if (isSPACE(nextchar))
a687059c 646 return(0);
647 nextchar = *++locinput;
648 break;
649 case DIGIT:
ac58e20f 650 if (!isDIGIT(nextchar))
a687059c 651 return(0);
652 nextchar = *++locinput;
653 break;
654 case NDIGIT:
655 if (!nextchar && locinput >= regeol)
656 return(0);
ac58e20f 657 if (isDIGIT(nextchar))
a687059c 658 return(0);
659 nextchar = *++locinput;
660 break;
661 case REF:
fe14fcc3 662 n = ARG1(scan); /* which paren pair */
a687059c 663 s = regmystartp[n];
664 if (!s)
665 return(0);
666 if (!regmyendp[n])
667 return(0);
668 if (s == regmyendp[n])
669 break;
670 /* Inline the first character, for speed. */
671 if (*s != nextchar)
672 return(0);
673 ln = regmyendp[n] - s;
674 if (locinput + ln > regeol)
675 return 0;
676 if (ln > 1 && bcmp(s, locinput, ln) != 0)
677 return(0);
678 locinput += ln;
679 nextchar = *locinput;
680 break;
681
682 case NOTHING:
683 break;
684 case BACK:
685 break;
fe14fcc3 686 case OPEN:
687 n = ARG1(scan); /* which paren pair */
a687059c 688 reginput = locinput;
689
690 regmystartp[n] = locinput; /* for REF */
691 if (regmatch(next)) {
692 /*
693 * Don't set startp if some later
694 * invocation of the same parentheses
695 * already has.
696 */
697 if (regstartp[n] == NULL)
698 regstartp[n] = locinput;
699 return(1);
700 } else
701 return(0);
702 /* NOTREACHED */
fe14fcc3 703 case CLOSE: {
704 n = ARG1(scan); /* which paren pair */
a687059c 705 reginput = locinput;
706
707 regmyendp[n] = locinput; /* for REF */
708 if (regmatch(next)) {
709 /*
710 * Don't set endp if some later
711 * invocation of the same parentheses
712 * already has.
713 */
714 if (regendp[n] == NULL) {
715 regendp[n] = locinput;
716 if (n > *reglastparen)
717 *reglastparen = n;
718 }
719 return(1);
720 } else
721 return(0);
722 }
723 /*NOTREACHED*/
724 case BRANCH: {
725 if (OP(next) != BRANCH) /* No choice. */
726 next = NEXTOPER(scan); /* Avoid recursion. */
727 else {
728 do {
729 reginput = locinput;
730 if (regmatch(NEXTOPER(scan)))
731 return(1);
732#ifdef REGALIGN
f0fcb552 733 /*SUPPRESS 560*/
a687059c 734 if (n = NEXT(scan))
735 scan += n;
736 else
737 scan = NULL;
738#else
739 scan = regnext(scan);
740#endif
741 } while (scan != NULL && OP(scan) == BRANCH);
742 return(0);
743 /* NOTREACHED */
744 }
745 }
746 break;
00bf170e 747 case CURLY:
748 ln = ARG1(scan); /* min to match */
749 n = ARG2(scan); /* max to match */
750 scan = NEXTOPER(scan) + 4;
751 goto repeat;
a687059c 752 case STAR:
00bf170e 753 ln = 0;
2b69d0c2 754 n = 32767;
00bf170e 755 scan = NEXTOPER(scan);
756 goto repeat;
a687059c 757 case PLUS:
758 /*
759 * Lookahead to avoid useless match attempts
760 * when we know what character comes next.
761 */
00bf170e 762 ln = 1;
2b69d0c2 763 n = 32767;
00bf170e 764 scan = NEXTOPER(scan);
765 repeat:
a687059c 766 if (OP(next) == EXACTLY)
767 nextchar = *(OPERAND(next)+1);
768 else
769 nextchar = -1000;
a687059c 770 reginput = locinput;
00bf170e 771 n = regrepeat(scan, n);
0a12ae7d 772 if (!multiline && OP(next) == EOL && ln < n)
00bf170e 773 ln = n; /* why back off? */
a687059c 774 while (n >= ln) {
775 /* If it could work, try it. */
776 if (nextchar == -1000 || *reginput == nextchar)
777 if (regmatch(next))
778 return(1);
779 /* Couldn't or didn't -- back up. */
780 n--;
781 reginput = locinput + n;
782 }
783 return(0);
784 case END:
785 reginput = locinput; /* put where regtry can find it */
786 return(1); /* Success! */
787 default:
788 printf("%x %d\n",scan,scan[1]);
789 FAIL("regexp memory corruption");
790 }
791
792 scan = next;
793 }
794
795 /*
796 * We get here only if there's trouble -- normally "case END" is
797 * the terminating point.
798 */
799 FAIL("corrupted regexp pointers");
800 /*NOTREACHED*/
801#ifdef lint
802 return 0;
803#endif
804}
805
806/*
807 - regrepeat - repeatedly match something simple, report how many
808 */
809/*
810 * [This routine now assumes that it will only match on things of length 1.
811 * That was true before, but now we assume scan - reginput is the count,
812 * rather than incrementing count on every character.]
813 */
814static int
00bf170e 815regrepeat(p, max)
a687059c 816char *p;
00bf170e 817int max;
a687059c 818{
819 register char *scan;
820 register char *opnd;
821 register int c;
822 register char *loceol = regeol;
823
824 scan = reginput;
2b69d0c2 825 if (max != 32767 && max < loceol - scan)
00bf170e 826 loceol = scan + max;
a687059c 827 opnd = OPERAND(p);
828 switch (OP(p)) {
829 case ANY:
830 while (scan < loceol && *scan != '\n')
831 scan++;
832 break;
833 case EXACTLY: /* length of string is 1 */
834 opnd++;
835 while (scan < loceol && *opnd == *scan)
836 scan++;
837 break;
838 case ANYOF:
a687059c 839 c = UCHARAT(scan);
840 while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
841 scan++;
842 c = UCHARAT(scan);
843 }
844 break;
845 case ALNUM:
0a12ae7d 846 while (scan < loceol && isALNUM(*scan))
a687059c 847 scan++;
848 break;
849 case NALNUM:
ac58e20f 850 while (scan < loceol && !isALNUM(*scan))
a687059c 851 scan++;
852 break;
853 case SPACE:
ac58e20f 854 while (scan < loceol && isSPACE(*scan))
a687059c 855 scan++;
856 break;
857 case NSPACE:
ac58e20f 858 while (scan < loceol && !isSPACE(*scan))
a687059c 859 scan++;
860 break;
861 case DIGIT:
0a12ae7d 862 while (scan < loceol && isDIGIT(*scan))
a687059c 863 scan++;
864 break;
865 case NDIGIT:
ac58e20f 866 while (scan < loceol && !isDIGIT(*scan))
a687059c 867 scan++;
868 break;
869 default: /* Oh dear. Called inappropriately. */
870 FAIL("internal regexp foulup");
871 /* NOTREACHED */
872 }
873
874 c = scan - reginput;
875 reginput = scan;
876
877 return(c);
878}
879
880/*
881 - regnext - dig the "next" pointer out of a node
882 *
883 * [Note, when REGALIGN is defined there are two places in regmatch()
884 * that bypass this code for speed.]
885 */
886char *
887regnext(p)
888register char *p;
889{
890 register int offset;
891
892 if (p == &regdummy)
893 return(NULL);
894
895 offset = NEXT(p);
896 if (offset == 0)
897 return(NULL);
898
899#ifdef REGALIGN
900 return(p+offset);
901#else
902 if (OP(p) == BACK)
903 return(p-offset);
904 else
905 return(p+offset);
906#endif
907}