61439ea98066999ce38368213403d735d7d1341d
[p5sagit/p5-mst-13.2.git] / regexec.c
1 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
2  * confused with the original package (see point 3 below).  Thanks, Henry!
3  */
4
5 /* Additional note: this code is very heavily munged from Henry's version
6  * in places.  In some spots I've traded clarity for efficiency, so don't
7  * blame Henry for some of the lack of readability.
8  */
9
10 /* $Header: regexec.c,v 3.0.1.4 90/08/09 05:12:03 lwall Locked $
11  *
12  * $Log:        regexec.c,v $
13  * Revision 3.0.1.4  90/08/09  05:12:03  lwall
14  * patch19: sped up /x+y/ patterns greatly by not retrying on every x
15  * patch19: inhibited backoff on patterns anchored to the end like /\s+$/
16  * patch19: sped up {m,n} on simple items
17  * patch19: $' broke on embedded nulls
18  * patch19: $ will now only match at end of string if $* == 0
19  * 
20  * Revision 3.0.1.3  90/02/28  18:14:39  lwall
21  * patch9: /[\200-\377]/ didn't work on machines with signed chars
22  * patch9: \d, \w, and \s could misfire on characters with high bit set
23  * patch9: /\bfoo/i didn't work
24  * 
25  * Revision 3.0.1.2  89/12/21  20:16:27  lwall
26  * patch7: certain patterns didn't match correctly at end of string
27  * 
28  * Revision 3.0.1.1  89/11/11  04:52:04  lwall
29  * patch2: /\b$foo/ didn't work
30  * 
31  * Revision 3.0  89/10/18  15:22:53  lwall
32  * 3.0 baseline
33  * 
34  */
35
36 /*
37  * regcomp and regexec -- regsub and regerror are not used in perl
38  *
39  *      Copyright (c) 1986 by University of Toronto.
40  *      Written by Henry Spencer.  Not derived from licensed software.
41  *
42  *      Permission is granted to anyone to use this software for any
43  *      purpose on any computer system, and to redistribute it freely,
44  *      subject to the following restrictions:
45  *
46  *      1. The author is not responsible for the consequences of use of
47  *              this software, no matter how awful, even if they arise
48  *              from defects in it.
49  *
50  *      2. The origin of this software must not be misrepresented, either
51  *              by explicit claim or by omission.
52  *
53  *      3. Altered versions must be plainly marked as such, and must not
54  *              be misrepresented as being the original software.
55  *
56  ****    Alterations to Henry's code are...
57  ****
58  ****    Copyright (c) 1989, Larry Wall
59  ****
60  ****    You may distribute under the terms of the GNU General Public License
61  ****    as specified in the README file that comes with the perl 3.0 kit.
62  *
63  * Beware that some of this code is subtly aware of the way operator
64  * precedence is structured in regular expressions.  Serious changes in
65  * regular-expression syntax might require a total rethink.
66  */
67 #include "EXTERN.h"
68 #include "perl.h"
69 #include "regcomp.h"
70
71 #ifndef STATIC
72 #define STATIC  static
73 #endif
74
75 #ifdef DEBUGGING
76 int regnarrate = 0;
77 #endif
78
79 #define isALNUM(c) (isascii(c) && (isalpha(c) || isdigit(c) || c == '_'))
80 #define isSPACE(c) (isascii(c) && isspace(c))
81 #define isDIGIT(c) (isascii(c) && isdigit(c))
82 #define isUPPER(c) (isascii(c) && isupper(c))
83
84 /*
85  * regexec and friends
86  */
87
88 /*
89  * Global work variables for regexec().
90  */
91 static char *regprecomp;
92 static char *reginput;          /* String-input pointer. */
93 static char regprev;            /* char before regbol, \n if none */
94 static char *regbol;            /* Beginning of input, for ^ check. */
95 static char *regeol;            /* End of input, for $ check. */
96 static char **regstartp;        /* Pointer to startp array. */
97 static char **regendp;          /* Ditto for endp. */
98 static char *reglastparen;      /* Similarly for lastparen. */
99 static char *regtill;
100
101 static char *regmystartp[10];   /* For remembering backreferences. */
102 static char *regmyendp[10];
103
104 /*
105  * Forwards.
106  */
107 STATIC int regtry();
108 STATIC int regmatch();
109 STATIC int regrepeat();
110
111 extern int multiline;
112
113 /*
114  - regexec - match a regexp against a string
115  */
116 int
117 regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
118 register regexp *prog;
119 char *stringarg;
120 register char *strend;  /* pointer to null at end of string */
121 char *strbeg;   /* real beginning of string */
122 int minend;     /* end of match must be at least minend after stringarg */
123 STR *screamer;
124 int safebase;   /* no need to remember string in subbase */
125 {
126         register char *s;
127         register int i;
128         register char *c;
129         register char *string = stringarg;
130         register int tmp;
131         int minlen = 0;         /* must match at least this many chars */
132         int dontbother = 0;     /* how many characters not to try at end */
133
134         /* Be paranoid... */
135         if (prog == NULL || string == NULL) {
136                 fatal("NULL regexp parameter");
137                 return(0);
138         }
139
140         if (string == strbeg)   /* is ^ valid at stringarg? */
141             regprev = '\n';
142         else
143             regprev = stringarg[-1];
144         regprecomp = prog->precomp;
145         /* Check validity of program. */
146         if (UCHARAT(prog->program) != MAGIC) {
147                 FAIL("corrupted regexp program");
148         }
149
150         if (prog->do_folding) {
151                 safebase = FALSE;
152                 i = strend - string;
153                 New(1101,c,i+1,char);
154                 (void)bcopy(string, c, i+1);
155                 string = c;
156                 strend = string + i;
157                 for (s = string; s < strend; s++)
158                         if (isUPPER(*s))
159                                 *s = tolower(*s);
160         }
161
162         /* If there is a "must appear" string, look for it. */
163         s = string;
164         if (prog->regmust != Nullstr) {
165                 if (stringarg == strbeg && screamer) {
166                         if (screamfirst[prog->regmust->str_rare] >= 0)
167                                 s = screaminstr(screamer,prog->regmust);
168                         else
169                                 s = Nullch;
170                 }
171 #ifndef lint
172                 else
173                         s = fbminstr((unsigned char*)s, (unsigned char*)strend,
174                             prog->regmust);
175 #endif
176                 if (!s) {
177                         ++prog->regmust->str_u.str_useful;      /* hooray */
178                         goto phooey;    /* not present */
179                 }
180                 else if (prog->regback >= 0) {
181                         s -= prog->regback;
182                         if (s < string)
183                             s = string;
184                         minlen = prog->regback + prog->regmust->str_cur;
185                 }
186                 else if (--prog->regmust->str_u.str_useful < 0) { /* boo */
187                         str_free(prog->regmust);
188                         prog->regmust = Nullstr;        /* disable regmust */
189                         s = string;
190                 }
191                 else {
192                         s = string;
193                         minlen = prog->regmust->str_cur;
194                 }
195         }
196
197         /* Mark beginning of line for ^ . */
198         regbol = string;
199
200         /* Mark end of line for $ (and such) */
201         regeol = strend;
202
203         /* see how far we have to get to not match where we matched before */
204         regtill = string+minend;
205
206         /* Simplest case:  anchored match need be tried only once. */
207         /*  [unless multiline is set] */
208         if (prog->reganch & 1) {
209                 if (regtry(prog, string))
210                         goto got_it;
211                 else if (multiline) {
212                         if (minlen)
213                             dontbother = minlen - 1;
214                         strend -= dontbother;
215                         /* for multiline we only have to try after newlines */
216                         if (s > string)
217                             s--;
218                         while (s < strend) {
219                             if (*s++ == '\n') {
220                                 if (s < strend && regtry(prog, s))
221                                     goto got_it;
222                             }
223                         }
224                 }
225                 goto phooey;
226         }
227
228         /* Messy cases:  unanchored match. */
229         if (prog->regstart) {
230                 if (prog->reganch & 2) {        /* we have /x+whatever/ */
231                     /* it must be a one character string */
232                     i = prog->regstart->str_ptr[0];
233                     while (s < strend) {
234                             if (*s == i) {
235                                     if (regtry(prog, s))
236                                             goto got_it;
237                                     s++;
238                                     while (s < strend && *s == i)
239                                         s++;
240                             }
241                             s++;
242                     }
243                 }
244                 else if (prog->regstart->str_pok == 3) {
245                     /* We know what string it must start with. */
246 #ifndef lint
247                     while ((s = fbminstr((unsigned char*)s,
248                       (unsigned char*)strend, prog->regstart)) != NULL)
249 #else
250                     while (s = Nullch)
251 #endif
252                     {
253                             if (regtry(prog, s))
254                                     goto got_it;
255                             s++;
256                     }
257                 }
258                 else {
259                     c = prog->regstart->str_ptr;
260                     while ((s = ninstr(s, strend,
261                       c, c + prog->regstart->str_cur )) != NULL) {
262                             if (regtry(prog, s))
263                                     goto got_it;
264                             s++;
265                     }
266                 }
267                 goto phooey;
268         }
269         if (c = prog->regstclass) {
270                 int doevery = (prog->reganch & 2) == 0;
271
272                 if (minlen)
273                     dontbother = minlen - 1;
274                 strend -= dontbother;   /* don't bother with what can't match */
275                 tmp = 1;
276                 /* We know what class it must start with. */
277                 switch (OP(c)) {
278                 case ANYOF:
279                     c = OPERAND(c);
280                     while (s < strend) {
281                             i = UCHARAT(s);
282                             if (!(c[i >> 3] & (1 << (i&7)))) {
283                                     if (tmp && regtry(prog, s))
284                                             goto got_it;
285                                     else
286                                             tmp = doevery;
287                             }
288                             else
289                                     tmp = 1;
290                             s++;
291                     }
292                     break;
293                 case BOUND:
294                     if (minlen)
295                         dontbother++,strend--;
296                     if (s != string) {
297                         i = s[-1];
298                         tmp = isALNUM(i);
299                     }
300                     else
301                         tmp = isALNUM(regprev); /* assume not alphanumeric */
302                     while (s < strend) {
303                             i = *s;
304                             if (tmp != isALNUM(i)) {
305                                     tmp = !tmp;
306                                     if (regtry(prog, s))
307                                             goto got_it;
308                             }
309                             s++;
310                     }
311                     if ((minlen || tmp) && regtry(prog,s))
312                             goto got_it;
313                     break;
314                 case NBOUND:
315                     if (minlen)
316                         dontbother++,strend--;
317                     if (s != string) {
318                         i = s[-1];
319                         tmp = isALNUM(i);
320                     }
321                     else
322                         tmp = isALNUM(regprev); /* assume not alphanumeric */
323                     while (s < strend) {
324                             i = *s;
325                             if (tmp != isALNUM(i))
326                                     tmp = !tmp;
327                             else if (regtry(prog, s))
328                                     goto got_it;
329                             s++;
330                     }
331                     if ((minlen || !tmp) && regtry(prog,s))
332                             goto got_it;
333                     break;
334                 case ALNUM:
335                     while (s < strend) {
336                             i = *s;
337                             if (isALNUM(i)) {
338                                     if (tmp && regtry(prog, s))
339                                             goto got_it;
340                                     else
341                                             tmp = doevery;
342                             }
343                             else
344                                     tmp = 1;
345                             s++;
346                     }
347                     break;
348                 case NALNUM:
349                     while (s < strend) {
350                             i = *s;
351                             if (!isALNUM(i)) {
352                                     if (tmp && regtry(prog, s))
353                                             goto got_it;
354                                     else
355                                             tmp = doevery;
356                             }
357                             else
358                                     tmp = 1;
359                             s++;
360                     }
361                     break;
362                 case SPACE:
363                     while (s < strend) {
364                             if (isSPACE(*s)) {
365                                     if (tmp && regtry(prog, s))
366                                             goto got_it;
367                                     else
368                                             tmp = doevery;
369                             }
370                             else
371                                     tmp = 1;
372                             s++;
373                     }
374                     break;
375                 case NSPACE:
376                     while (s < strend) {
377                             if (!isSPACE(*s)) {
378                                     if (tmp && regtry(prog, s))
379                                             goto got_it;
380                                     else
381                                             tmp = doevery;
382                             }
383                             else
384                                     tmp = 1;
385                             s++;
386                     }
387                     break;
388                 case DIGIT:
389                     while (s < strend) {
390                             if (isDIGIT(*s)) {
391                                     if (tmp && regtry(prog, s))
392                                             goto got_it;
393                                     else
394                                             tmp = doevery;
395                             }
396                             else
397                                     tmp = 1;
398                             s++;
399                     }
400                     break;
401                 case NDIGIT:
402                     while (s < strend) {
403                             if (!isDIGIT(*s)) {
404                                     if (tmp && regtry(prog, s))
405                                             goto got_it;
406                                     else
407                                             tmp = doevery;
408                             }
409                             else
410                                     tmp = 1;
411                             s++;
412                     }
413                     break;
414                 }
415         }
416         else {
417                 if (minlen)
418                     dontbother = minlen - 1;
419                 strend -= dontbother;
420                 /* We don't know much -- general case. */
421                 do {
422                         if (regtry(prog, s))
423                                 goto got_it;
424                 } while (s++ < strend);
425         }
426
427         /* Failure. */
428         goto phooey;
429
430     got_it:
431         if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding){
432                 strend += dontbother;   /* uncheat */
433                 if (safebase)                   /* no need for $digit later */
434                     s = strbeg;
435                 else if (strbeg != prog->subbase) {
436                     i = strend - string + (stringarg - strbeg);
437                     s = nsavestr(strbeg,i);     /* so $digit will work later */
438                     if (prog->subbase)
439                             Safefree(prog->subbase);
440                     prog->subbase = s;
441                     prog->subend = s+i;
442                 }
443                 else
444                     s = prog->subbase;
445                 s += (stringarg - strbeg);
446                 for (i = 0; i <= prog->nparens; i++) {
447                         if (prog->endp[i]) {
448                             prog->startp[i] = s + (prog->startp[i] - string);
449                             prog->endp[i] = s + (prog->endp[i] - string);
450                         }
451                 }
452                 if (prog->do_folding)
453                         Safefree(string);
454         }
455         return(1);
456
457     phooey:
458         if (prog->do_folding)
459                 Safefree(string);
460         return(0);
461 }
462
463 /*
464  - regtry - try match at specific point
465  */
466 static int                      /* 0 failure, 1 success */
467 regtry(prog, string)
468 regexp *prog;
469 char *string;
470 {
471         register int i;
472         register char **sp;
473         register char **ep;
474
475         reginput = string;
476         regstartp = prog->startp;
477         regendp = prog->endp;
478         reglastparen = &prog->lastparen;
479         prog->lastparen = 0;
480
481         sp = prog->startp;
482         ep = prog->endp;
483         if (prog->nparens) {
484                 for (i = NSUBEXP; i > 0; i--) {
485                         *sp++ = NULL;
486                         *ep++ = NULL;
487                 }
488         }
489         if (regmatch(prog->program + 1) && reginput >= regtill) {
490                 prog->startp[0] = string;
491                 prog->endp[0] = reginput;
492                 return(1);
493         } else
494                 return(0);
495 }
496
497 /*
498  - regmatch - main matching routine
499  *
500  * Conceptually the strategy is simple:  check to see whether the current
501  * node matches, call self recursively to see whether the rest matches,
502  * and then act accordingly.  In practice we make some effort to avoid
503  * recursion, in particular by going through "ordinary" nodes (that don't
504  * need to know whether the rest of the match failed) by a loop instead of
505  * by recursion.
506  */
507 /* [lwall] I've hoisted the register declarations to the outer block in order to
508  * maybe save a little bit of pushing and popping on the stack.  It also takes
509  * advantage of machines that use a register save mask on subroutine entry.
510  */
511 static int                      /* 0 failure, 1 success */
512 regmatch(prog)
513 char *prog;
514 {
515         register char *scan;    /* Current node. */
516         char *next;             /* Next node. */
517         register int nextchar;
518         register int n;         /* no or next */
519         register int ln;        /* len or last */
520         register char *s;       /* operand or save */
521         register char *locinput = reginput;
522
523         nextchar = *locinput;
524         scan = prog;
525 #ifdef DEBUGGING
526         if (scan != NULL && regnarrate)
527                 fprintf(stderr, "%s(\n", regprop(scan));
528 #endif
529         while (scan != NULL) {
530 #ifdef DEBUGGING
531                 if (regnarrate)
532                         fprintf(stderr, "%s...\n", regprop(scan));
533 #endif
534
535 #ifdef REGALIGN
536                 next = scan + NEXT(scan);
537                 if (next == scan)
538                     next = NULL;
539 #else
540                 next = regnext(scan);
541 #endif
542
543                 switch (OP(scan)) {
544                 case BOL:
545                         if (locinput == regbol ? regprev == '\n' :
546                             ((nextchar || locinput < regeol) &&
547                               locinput[-1] == '\n') )
548                         {
549                                 regtill = regbol;
550                                 break;
551                         }
552                         return(0);
553                 case EOL:
554                         if ((nextchar || locinput < regeol) && nextchar != '\n')
555                                 return(0);
556                         if (!multiline && regeol - locinput > 1)
557                                 return 0;
558                         regtill = regbol;
559                         break;
560                 case ANY:
561                         if ((nextchar == '\0' && locinput >= regeol) ||
562                           nextchar == '\n')
563                                 return(0);
564                         nextchar = *++locinput;
565                         break;
566                 case EXACTLY:
567                         s = OPERAND(scan);
568                         ln = *s++;
569                         /* Inline the first character, for speed. */
570                         if (*s != nextchar)
571                                 return(0);
572                         if (regeol - locinput < ln)
573                                 return 0;
574                         if (ln > 1 && bcmp(s, locinput, ln) != 0)
575                                 return(0);
576                         locinput += ln;
577                         nextchar = *locinput;
578                         break;
579                 case ANYOF:
580                         s = OPERAND(scan);
581                         if (nextchar < 0)
582                                 nextchar = UCHARAT(locinput);
583                         if (s[nextchar >> 3] & (1 << (nextchar&7)))
584                                 return(0);
585                         nextchar = *++locinput;
586                         if (!nextchar && locinput > regeol)
587                                 return 0;
588                         break;
589                 case ALNUM:
590                         if (!nextchar)
591                                 return(0);
592                         if (!isALNUM(nextchar))
593                                 return(0);
594                         nextchar = *++locinput;
595                         break;
596                 case NALNUM:
597                         if (!nextchar && locinput >= regeol)
598                                 return(0);
599                         if (isALNUM(nextchar))
600                                 return(0);
601                         nextchar = *++locinput;
602                         break;
603                 case NBOUND:
604                 case BOUND:
605                         if (locinput == regbol) /* was last char in word? */
606                                 ln = isALNUM(regprev);
607                         else 
608                                 ln = isALNUM(locinput[-1]);
609                         n = isALNUM(nextchar); /* is next char in word? */
610                         if ((ln == n) == (OP(scan) == BOUND))
611                                 return(0);
612                         break;
613                 case SPACE:
614                         if (!nextchar && locinput >= regeol)
615                                 return(0);
616                         if (!isSPACE(nextchar))
617                                 return(0);
618                         nextchar = *++locinput;
619                         break;
620                 case NSPACE:
621                         if (!nextchar)
622                                 return(0);
623                         if (isSPACE(nextchar))
624                                 return(0);
625                         nextchar = *++locinput;
626                         break;
627                 case DIGIT:
628                         if (!isDIGIT(nextchar))
629                                 return(0);
630                         nextchar = *++locinput;
631                         break;
632                 case NDIGIT:
633                         if (!nextchar && locinput >= regeol)
634                                 return(0);
635                         if (isDIGIT(nextchar))
636                                 return(0);
637                         nextchar = *++locinput;
638                         break;
639                 case REF:
640                 case REF+1:
641                 case REF+2:
642                 case REF+3:
643                 case REF+4:
644                 case REF+5:
645                 case REF+6:
646                 case REF+7:
647                 case REF+8:
648                 case REF+9:
649                         n = OP(scan) - REF;
650                         s = regmystartp[n];
651                         if (!s)
652                             return(0);
653                         if (!regmyendp[n])
654                             return(0);
655                         if (s == regmyendp[n])
656                             break;
657                         /* Inline the first character, for speed. */
658                         if (*s != nextchar)
659                                 return(0);
660                         ln = regmyendp[n] - s;
661                         if (locinput + ln > regeol)
662                                 return 0;
663                         if (ln > 1 && bcmp(s, locinput, ln) != 0)
664                                 return(0);
665                         locinput += ln;
666                         nextchar = *locinput;
667                         break;
668
669                 case NOTHING:
670                         break;
671                 case BACK:
672                         break;
673                 case OPEN+1:
674                 case OPEN+2:
675                 case OPEN+3:
676                 case OPEN+4:
677                 case OPEN+5:
678                 case OPEN+6:
679                 case OPEN+7:
680                 case OPEN+8:
681                 case OPEN+9:
682                         n = OP(scan) - OPEN;
683                         reginput = locinput;
684
685                         regmystartp[n] = locinput;      /* for REF */
686                         if (regmatch(next)) {
687                                 /*
688                                  * Don't set startp if some later
689                                  * invocation of the same parentheses
690                                  * already has.
691                                  */
692                                 if (regstartp[n] == NULL)
693                                         regstartp[n] = locinput;
694                                 return(1);
695                         } else
696                                 return(0);
697                         /* NOTREACHED */
698                 case CLOSE+1:
699                 case CLOSE+2:
700                 case CLOSE+3:
701                 case CLOSE+4:
702                 case CLOSE+5:
703                 case CLOSE+6:
704                 case CLOSE+7:
705                 case CLOSE+8:
706                 case CLOSE+9: {
707                                 n = OP(scan) - CLOSE;
708                                 reginput = locinput;
709
710                                 regmyendp[n] = locinput;        /* for REF */
711                                 if (regmatch(next)) {
712                                         /*
713                                          * Don't set endp if some later
714                                          * invocation of the same parentheses
715                                          * already has.
716                                          */
717                                         if (regendp[n] == NULL) {
718                                                 regendp[n] = locinput;
719                                                 if (n > *reglastparen)
720                                                     *reglastparen = n;
721                                         }
722                                         return(1);
723                                 } else
724                                         return(0);
725                         }
726                         /*NOTREACHED*/
727                 case BRANCH: {
728                                 if (OP(next) != BRANCH)         /* No choice. */
729                                         next = NEXTOPER(scan);  /* Avoid recursion. */
730                                 else {
731                                         do {
732                                                 reginput = locinput;
733                                                 if (regmatch(NEXTOPER(scan)))
734                                                         return(1);
735 #ifdef REGALIGN
736                                                 if (n = NEXT(scan))
737                                                     scan += n;
738                                                 else
739                                                     scan = NULL;
740 #else
741                                                 scan = regnext(scan);
742 #endif
743                                         } while (scan != NULL && OP(scan) == BRANCH);
744                                         return(0);
745                                         /* NOTREACHED */
746                                 }
747                         }
748                         break;
749                 case CURLY:
750                         ln = ARG1(scan);  /* min to match */
751                         n  = ARG2(scan);  /* max to match */
752                         scan = NEXTOPER(scan) + 4;
753                         goto repeat;
754                 case STAR:
755                         ln = 0;
756                         n = 0;
757                         scan = NEXTOPER(scan);
758                         goto repeat;
759                 case PLUS:
760                         /*
761                          * Lookahead to avoid useless match attempts
762                          * when we know what character comes next.
763                          */
764                         ln = 1;
765                         n = 0;
766                         scan = NEXTOPER(scan);
767                     repeat:
768                         if (OP(next) == EXACTLY)
769                                 nextchar = *(OPERAND(next)+1);
770                         else
771                                 nextchar = -1000;
772                         reginput = locinput;
773                         n = regrepeat(scan, n);
774                         if (!multiline && OP(next) == EOL)
775                             ln = n;                     /* why back off? */
776                         while (n >= ln) {
777                                 /* If it could work, try it. */
778                                 if (nextchar == -1000 || *reginput == nextchar)
779                                         if (regmatch(next))
780                                                 return(1);
781                                 /* Couldn't or didn't -- back up. */
782                                 n--;
783                                 reginput = locinput + n;
784                         }
785                         return(0);
786                 case END:
787                         reginput = locinput; /* put where regtry can find it */
788                         return(1);      /* Success! */
789                 default:
790                         printf("%x %d\n",scan,scan[1]);
791                         FAIL("regexp memory corruption");
792                 }
793
794                 scan = next;
795         }
796
797         /*
798          * We get here only if there's trouble -- normally "case END" is
799          * the terminating point.
800          */
801         FAIL("corrupted regexp pointers");
802         /*NOTREACHED*/
803 #ifdef lint
804         return 0;
805 #endif
806 }
807
808 /*
809  - regrepeat - repeatedly match something simple, report how many
810  */
811 /*
812  * [This routine now assumes that it will only match on things of length 1.
813  * That was true before, but now we assume scan - reginput is the count,
814  * rather than incrementing count on every character.]
815  */
816 static int
817 regrepeat(p, max)
818 char *p;
819 int max;
820 {
821         register char *scan;
822         register char *opnd;
823         register int c;
824         register char *loceol = regeol;
825
826         scan = reginput;
827         if (max && max < loceol - scan)
828             loceol = scan + max;
829         opnd = OPERAND(p);
830         switch (OP(p)) {
831         case ANY:
832                 while (scan < loceol && *scan != '\n')
833                         scan++;
834                 break;
835         case EXACTLY:           /* length of string is 1 */
836                 opnd++;
837                 while (scan < loceol && *opnd == *scan)
838                         scan++;
839                 break;
840         case ANYOF:
841                 c = UCHARAT(scan);
842                 while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
843                         scan++;
844                         c = UCHARAT(scan);
845                 }
846                 break;
847         case ALNUM:
848                 while (isALNUM(*scan))
849                         scan++;
850                 break;
851         case NALNUM:
852                 while (scan < loceol && !isALNUM(*scan))
853                         scan++;
854                 break;
855         case SPACE:
856                 while (scan < loceol && isSPACE(*scan))
857                         scan++;
858                 break;
859         case NSPACE:
860                 while (scan < loceol && !isSPACE(*scan))
861                         scan++;
862                 break;
863         case DIGIT:
864                 while (isDIGIT(*scan))
865                         scan++;
866                 break;
867         case NDIGIT:
868                 while (scan < loceol && !isDIGIT(*scan))
869                         scan++;
870                 break;
871         default:                /* Oh dear.  Called inappropriately. */
872                 FAIL("internal regexp foulup");
873                 /* NOTREACHED */
874         }
875
876         c = scan - reginput;
877         reginput = scan;
878
879         return(c);
880 }
881
882 /*
883  - regnext - dig the "next" pointer out of a node
884  *
885  * [Note, when REGALIGN is defined there are two places in regmatch()
886  * that bypass this code for speed.]
887  */
888 char *
889 regnext(p)
890 register char *p;
891 {
892         register int offset;
893
894         if (p == &regdummy)
895                 return(NULL);
896
897         offset = NEXT(p);
898         if (offset == 0)
899                 return(NULL);
900
901 #ifdef REGALIGN
902         return(p+offset);
903 #else
904         if (OP(p) == BACK)
905                 return(p-offset);
906         else
907                 return(p+offset);
908 #endif
909 }