perl 4.0 patch 2: Patch 1 continued
[p5sagit/p5-mst-13.2.git] / regexec.c
1 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
2  * confused with the original package (see point 3 below).  Thanks, Henry!
3  */
4
5 /* Additional note: this code is very heavily munged from Henry's version
6  * in places.  In some spots I've traded clarity for efficiency, so don't
7  * blame Henry for some of the lack of readability.
8  */
9
10 /* $RCSfile: regexec.c,v $$Revision: 4.0.1.1 $$Date: 91/04/12 09:07:39 $
11  *
12  * $Log:        regexec.c,v $
13  * Revision 4.0.1.1  91/04/12  09:07:39  lwall
14  * patch1: regexec only allocated space for 9 subexpresssions
15  * 
16  * Revision 4.0  91/03/20  01:39:16  lwall
17  * 4.0 baseline.
18  * 
19  */
20
21 /*
22  * regcomp and regexec -- regsub and regerror are not used in perl
23  *
24  *      Copyright (c) 1986 by University of Toronto.
25  *      Written by Henry Spencer.  Not derived from licensed software.
26  *
27  *      Permission is granted to anyone to use this software for any
28  *      purpose on any computer system, and to redistribute it freely,
29  *      subject to the following restrictions:
30  *
31  *      1. The author is not responsible for the consequences of use of
32  *              this software, no matter how awful, even if they arise
33  *              from defects in it.
34  *
35  *      2. The origin of this software must not be misrepresented, either
36  *              by explicit claim or by omission.
37  *
38  *      3. Altered versions must be plainly marked as such, and must not
39  *              be misrepresented as being the original software.
40  *
41  ****    Alterations to Henry's code are...
42  ****
43  ****    Copyright (c) 1989, Larry Wall
44  ****
45  ****    You may distribute under the terms of the GNU General Public License
46  ****    as specified in the README file that comes with the perl 3.0 kit.
47  *
48  * Beware that some of this code is subtly aware of the way operator
49  * precedence is structured in regular expressions.  Serious changes in
50  * regular-expression syntax might require a total rethink.
51  */
52 #include "EXTERN.h"
53 #include "perl.h"
54 #include "regcomp.h"
55
56 #ifndef STATIC
57 #define STATIC  static
58 #endif
59
60 #ifdef DEBUGGING
61 int regnarrate = 0;
62 #endif
63
64 #define isALNUM(c) (isascii(c) && (isalpha(c) || isdigit(c) || c == '_'))
65 #define isSPACE(c) (isascii(c) && isspace(c))
66 #define isDIGIT(c) (isascii(c) && isdigit(c))
67 #define isUPPER(c) (isascii(c) && isupper(c))
68
69 /*
70  * regexec and friends
71  */
72
73 /*
74  * Global work variables for regexec().
75  */
76 static char *regprecomp;
77 static char *reginput;          /* String-input pointer. */
78 static char regprev;            /* char before regbol, \n if none */
79 static char *regbol;            /* Beginning of input, for ^ check. */
80 static char *regeol;            /* End of input, for $ check. */
81 static char **regstartp;        /* Pointer to startp array. */
82 static char **regendp;          /* Ditto for endp. */
83 static char *reglastparen;      /* Similarly for lastparen. */
84 static char *regtill;
85
86 static int regmyp_size = 0;
87 static char **regmystartp = Null(char**);
88 static char **regmyendp   = Null(char**);
89
90 /*
91  * Forwards.
92  */
93 STATIC int regtry();
94 STATIC int regmatch();
95 STATIC int regrepeat();
96
97 extern int multiline;
98
99 /*
100  - regexec - match a regexp against a string
101  */
102 int
103 regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
104 register regexp *prog;
105 char *stringarg;
106 register char *strend;  /* pointer to null at end of string */
107 char *strbeg;   /* real beginning of string */
108 int minend;     /* end of match must be at least minend after stringarg */
109 STR *screamer;
110 int safebase;   /* no need to remember string in subbase */
111 {
112         register char *s;
113         register int i;
114         register char *c;
115         register char *string = stringarg;
116         register int tmp;
117         int minlen = 0;         /* must match at least this many chars */
118         int dontbother = 0;     /* how many characters not to try at end */
119
120         /* Be paranoid... */
121         if (prog == NULL || string == NULL) {
122                 fatal("NULL regexp parameter");
123                 return(0);
124         }
125
126         if (string == strbeg)   /* is ^ valid at stringarg? */
127             regprev = '\n';
128         else {
129             regprev = stringarg[-1];
130             if (!multiline && regprev == '\n')
131                 regprev = '\0';         /* force ^ to NOT match */
132         }
133         regprecomp = prog->precomp;
134         /* Check validity of program. */
135         if (UCHARAT(prog->program) != MAGIC) {
136                 FAIL("corrupted regexp program");
137         }
138
139         if (prog->do_folding) {
140                 safebase = FALSE;
141                 i = strend - string;
142                 New(1101,c,i+1,char);
143                 (void)bcopy(string, c, i+1);
144                 string = c;
145                 strend = string + i;
146                 for (s = string; s < strend; s++)
147                         if (isUPPER(*s))
148                                 *s = tolower(*s);
149         }
150
151         /* If there is a "must appear" string, look for it. */
152         s = string;
153         if (prog->regmust != Nullstr &&
154             (!(prog->reganch & 1) || (multiline && prog->regback >= 0)) ) {
155                 if (stringarg == strbeg && screamer) {
156                         if (screamfirst[prog->regmust->str_rare] >= 0)
157                                 s = screaminstr(screamer,prog->regmust);
158                         else
159                                 s = Nullch;
160                 }
161 #ifndef lint
162                 else
163                         s = fbminstr((unsigned char*)s, (unsigned char*)strend,
164                             prog->regmust);
165 #endif
166                 if (!s) {
167                         ++prog->regmust->str_u.str_useful;      /* hooray */
168                         goto phooey;    /* not present */
169                 }
170                 else if (prog->regback >= 0) {
171                         s -= prog->regback;
172                         if (s < string)
173                             s = string;
174                         minlen = prog->regback + prog->regmust->str_cur;
175                 }
176                 else if (--prog->regmust->str_u.str_useful < 0) { /* boo */
177                         str_free(prog->regmust);
178                         prog->regmust = Nullstr;        /* disable regmust */
179                         s = string;
180                 }
181                 else {
182                         s = string;
183                         minlen = prog->regmust->str_cur;
184                 }
185         }
186
187         /* Mark beginning of line for ^ . */
188         regbol = string;
189
190         /* Mark end of line for $ (and such) */
191         regeol = strend;
192
193         /* see how far we have to get to not match where we matched before */
194         regtill = string+minend;
195
196         /* Allocate our backreference arrays */
197         if ( regmyp_size < prog->nparens + 1 ) {
198             /* Allocate or enlarge the arrays */
199             regmyp_size = prog->nparens + 1;
200             if ( regmyp_size < 10 ) regmyp_size = 10;   /* minimum */
201             if ( regmystartp ) {
202                 /* reallocate larger */
203                 Renew(regmystartp,regmyp_size,char*);
204                 Renew(regmyendp,  regmyp_size,char*);
205             }
206             else {
207                 /* Initial allocation */
208                 New(1102,regmystartp,regmyp_size,char*);
209                 New(1102,regmyendp,  regmyp_size,char*);
210             }
211         
212         }
213
214         /* Simplest case:  anchored match need be tried only once. */
215         /*  [unless multiline is set] */
216         if (prog->reganch & 1) {
217                 if (regtry(prog, string))
218                         goto got_it;
219                 else if (multiline) {
220                         if (minlen)
221                             dontbother = minlen - 1;
222                         strend -= dontbother;
223                         /* for multiline we only have to try after newlines */
224                         if (s > string)
225                             s--;
226                         while (s < strend) {
227                             if (*s++ == '\n') {
228                                 if (s < strend && regtry(prog, s))
229                                     goto got_it;
230                             }
231                         }
232                 }
233                 goto phooey;
234         }
235
236         /* Messy cases:  unanchored match. */
237         if (prog->regstart) {
238                 if (prog->reganch & 2) {        /* we have /x+whatever/ */
239                     /* it must be a one character string */
240                     i = prog->regstart->str_ptr[0];
241                     while (s < strend) {
242                             if (*s == i) {
243                                     if (regtry(prog, s))
244                                             goto got_it;
245                                     s++;
246                                     while (s < strend && *s == i)
247                                         s++;
248                             }
249                             s++;
250                     }
251                 }
252                 else if (prog->regstart->str_pok == 3) {
253                     /* We know what string it must start with. */
254 #ifndef lint
255                     while ((s = fbminstr((unsigned char*)s,
256                       (unsigned char*)strend, prog->regstart)) != NULL)
257 #else
258                     while (s = Nullch)
259 #endif
260                     {
261                             if (regtry(prog, s))
262                                     goto got_it;
263                             s++;
264                     }
265                 }
266                 else {
267                     c = prog->regstart->str_ptr;
268                     while ((s = ninstr(s, strend,
269                       c, c + prog->regstart->str_cur )) != NULL) {
270                             if (regtry(prog, s))
271                                     goto got_it;
272                             s++;
273                     }
274                 }
275                 goto phooey;
276         }
277         if (c = prog->regstclass) {
278                 int doevery = (prog->reganch & 2) == 0;
279
280                 if (minlen)
281                     dontbother = minlen - 1;
282                 strend -= dontbother;   /* don't bother with what can't match */
283                 tmp = 1;
284                 /* We know what class it must start with. */
285                 switch (OP(c)) {
286                 case ANYOF:
287                     c = OPERAND(c);
288                     while (s < strend) {
289                             i = UCHARAT(s);
290                             if (!(c[i >> 3] & (1 << (i&7)))) {
291                                     if (tmp && regtry(prog, s))
292                                             goto got_it;
293                                     else
294                                             tmp = doevery;
295                             }
296                             else
297                                     tmp = 1;
298                             s++;
299                     }
300                     break;
301                 case BOUND:
302                     if (minlen)
303                         dontbother++,strend--;
304                     if (s != string) {
305                         i = s[-1];
306                         tmp = isALNUM(i);
307                     }
308                     else
309                         tmp = isALNUM(regprev); /* assume not alphanumeric */
310                     while (s < strend) {
311                             i = *s;
312                             if (tmp != isALNUM(i)) {
313                                     tmp = !tmp;
314                                     if (regtry(prog, s))
315                                             goto got_it;
316                             }
317                             s++;
318                     }
319                     if ((minlen || tmp) && regtry(prog,s))
320                             goto got_it;
321                     break;
322                 case NBOUND:
323                     if (minlen)
324                         dontbother++,strend--;
325                     if (s != string) {
326                         i = s[-1];
327                         tmp = isALNUM(i);
328                     }
329                     else
330                         tmp = isALNUM(regprev); /* assume not alphanumeric */
331                     while (s < strend) {
332                             i = *s;
333                             if (tmp != isALNUM(i))
334                                     tmp = !tmp;
335                             else if (regtry(prog, s))
336                                     goto got_it;
337                             s++;
338                     }
339                     if ((minlen || !tmp) && regtry(prog,s))
340                             goto got_it;
341                     break;
342                 case ALNUM:
343                     while (s < strend) {
344                             i = *s;
345                             if (isALNUM(i)) {
346                                     if (tmp && regtry(prog, s))
347                                             goto got_it;
348                                     else
349                                             tmp = doevery;
350                             }
351                             else
352                                     tmp = 1;
353                             s++;
354                     }
355                     break;
356                 case NALNUM:
357                     while (s < strend) {
358                             i = *s;
359                             if (!isALNUM(i)) {
360                                     if (tmp && regtry(prog, s))
361                                             goto got_it;
362                                     else
363                                             tmp = doevery;
364                             }
365                             else
366                                     tmp = 1;
367                             s++;
368                     }
369                     break;
370                 case SPACE:
371                     while (s < strend) {
372                             if (isSPACE(*s)) {
373                                     if (tmp && regtry(prog, s))
374                                             goto got_it;
375                                     else
376                                             tmp = doevery;
377                             }
378                             else
379                                     tmp = 1;
380                             s++;
381                     }
382                     break;
383                 case NSPACE:
384                     while (s < strend) {
385                             if (!isSPACE(*s)) {
386                                     if (tmp && regtry(prog, s))
387                                             goto got_it;
388                                     else
389                                             tmp = doevery;
390                             }
391                             else
392                                     tmp = 1;
393                             s++;
394                     }
395                     break;
396                 case DIGIT:
397                     while (s < strend) {
398                             if (isDIGIT(*s)) {
399                                     if (tmp && regtry(prog, s))
400                                             goto got_it;
401                                     else
402                                             tmp = doevery;
403                             }
404                             else
405                                     tmp = 1;
406                             s++;
407                     }
408                     break;
409                 case NDIGIT:
410                     while (s < strend) {
411                             if (!isDIGIT(*s)) {
412                                     if (tmp && regtry(prog, s))
413                                             goto got_it;
414                                     else
415                                             tmp = doevery;
416                             }
417                             else
418                                     tmp = 1;
419                             s++;
420                     }
421                     break;
422                 }
423         }
424         else {
425                 if (minlen)
426                     dontbother = minlen - 1;
427                 strend -= dontbother;
428                 /* We don't know much -- general case. */
429                 do {
430                         if (regtry(prog, s))
431                                 goto got_it;
432                 } while (s++ < strend);
433         }
434
435         /* Failure. */
436         goto phooey;
437
438     got_it:
439         if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding){
440                 strend += dontbother;   /* uncheat */
441                 if (safebase)                   /* no need for $digit later */
442                     s = strbeg;
443                 else if (strbeg != prog->subbase) {
444                     i = strend - string + (stringarg - strbeg);
445                     s = nsavestr(strbeg,i);     /* so $digit will work later */
446                     if (prog->subbase)
447                             Safefree(prog->subbase);
448                     prog->subbase = s;
449                     prog->subend = s+i;
450                 }
451                 else
452                     s = prog->subbase;
453                 s += (stringarg - strbeg);
454                 for (i = 0; i <= prog->nparens; i++) {
455                         if (prog->endp[i]) {
456                             prog->startp[i] = s + (prog->startp[i] - string);
457                             prog->endp[i] = s + (prog->endp[i] - string);
458                         }
459                 }
460                 if (prog->do_folding)
461                         Safefree(string);
462         }
463         return(1);
464
465     phooey:
466         if (prog->do_folding)
467                 Safefree(string);
468         return(0);
469 }
470
471 /*
472  - regtry - try match at specific point
473  */
474 static int                      /* 0 failure, 1 success */
475 regtry(prog, string)
476 regexp *prog;
477 char *string;
478 {
479         register int i;
480         register char **sp;
481         register char **ep;
482
483         reginput = string;
484         regstartp = prog->startp;
485         regendp = prog->endp;
486         reglastparen = &prog->lastparen;
487         prog->lastparen = 0;
488
489         sp = prog->startp;
490         ep = prog->endp;
491         if (prog->nparens) {
492                 for (i = prog->nparens; i >= 0; i--) {
493                         *sp++ = NULL;
494                         *ep++ = NULL;
495                 }
496         }
497         if (regmatch(prog->program + 1) && reginput >= regtill) {
498                 prog->startp[0] = string;
499                 prog->endp[0] = reginput;
500                 return(1);
501         } else
502                 return(0);
503 }
504
505 /*
506  - regmatch - main matching routine
507  *
508  * Conceptually the strategy is simple:  check to see whether the current
509  * node matches, call self recursively to see whether the rest matches,
510  * and then act accordingly.  In practice we make some effort to avoid
511  * recursion, in particular by going through "ordinary" nodes (that don't
512  * need to know whether the rest of the match failed) by a loop instead of
513  * by recursion.
514  */
515 /* [lwall] I've hoisted the register declarations to the outer block in order to
516  * maybe save a little bit of pushing and popping on the stack.  It also takes
517  * advantage of machines that use a register save mask on subroutine entry.
518  */
519 static int                      /* 0 failure, 1 success */
520 regmatch(prog)
521 char *prog;
522 {
523         register char *scan;    /* Current node. */
524         char *next;             /* Next node. */
525         register int nextchar;
526         register int n;         /* no or next */
527         register int ln;        /* len or last */
528         register char *s;       /* operand or save */
529         register char *locinput = reginput;
530
531         nextchar = *locinput;
532         scan = prog;
533 #ifdef DEBUGGING
534         if (scan != NULL && regnarrate)
535                 fprintf(stderr, "%s(\n", regprop(scan));
536 #endif
537         while (scan != NULL) {
538 #ifdef DEBUGGING
539                 if (regnarrate)
540                         fprintf(stderr, "%s...\n", regprop(scan));
541 #endif
542
543 #ifdef REGALIGN
544                 next = scan + NEXT(scan);
545                 if (next == scan)
546                     next = NULL;
547 #else
548                 next = regnext(scan);
549 #endif
550
551                 switch (OP(scan)) {
552                 case BOL:
553                         if (locinput == regbol ? regprev == '\n' :
554                             ((nextchar || locinput < regeol) &&
555                               locinput[-1] == '\n') )
556                         {
557                                 /* regtill = regbol; */
558                                 break;
559                         }
560                         return(0);
561                 case EOL:
562                         if ((nextchar || locinput < regeol) && nextchar != '\n')
563                                 return(0);
564                         if (!multiline && regeol - locinput > 1)
565                                 return 0;
566                         /* regtill = regbol; */
567                         break;
568                 case ANY:
569                         if ((nextchar == '\0' && locinput >= regeol) ||
570                           nextchar == '\n')
571                                 return(0);
572                         nextchar = *++locinput;
573                         break;
574                 case EXACTLY:
575                         s = OPERAND(scan);
576                         ln = *s++;
577                         /* Inline the first character, for speed. */
578                         if (*s != nextchar)
579                                 return(0);
580                         if (regeol - locinput < ln)
581                                 return 0;
582                         if (ln > 1 && bcmp(s, locinput, ln) != 0)
583                                 return(0);
584                         locinput += ln;
585                         nextchar = *locinput;
586                         break;
587                 case ANYOF:
588                         s = OPERAND(scan);
589                         if (nextchar < 0)
590                                 nextchar = UCHARAT(locinput);
591                         if (s[nextchar >> 3] & (1 << (nextchar&7)))
592                                 return(0);
593                         if (!nextchar && locinput >= regeol)
594                                 return 0;
595                         nextchar = *++locinput;
596                         break;
597                 case ALNUM:
598                         if (!nextchar)
599                                 return(0);
600                         if (!isALNUM(nextchar))
601                                 return(0);
602                         nextchar = *++locinput;
603                         break;
604                 case NALNUM:
605                         if (!nextchar && locinput >= regeol)
606                                 return(0);
607                         if (isALNUM(nextchar))
608                                 return(0);
609                         nextchar = *++locinput;
610                         break;
611                 case NBOUND:
612                 case BOUND:
613                         if (locinput == regbol) /* was last char in word? */
614                                 ln = isALNUM(regprev);
615                         else 
616                                 ln = isALNUM(locinput[-1]);
617                         n = isALNUM(nextchar); /* is next char in word? */
618                         if ((ln == n) == (OP(scan) == BOUND))
619                                 return(0);
620                         break;
621                 case SPACE:
622                         if (!nextchar && locinput >= regeol)
623                                 return(0);
624                         if (!isSPACE(nextchar))
625                                 return(0);
626                         nextchar = *++locinput;
627                         break;
628                 case NSPACE:
629                         if (!nextchar)
630                                 return(0);
631                         if (isSPACE(nextchar))
632                                 return(0);
633                         nextchar = *++locinput;
634                         break;
635                 case DIGIT:
636                         if (!isDIGIT(nextchar))
637                                 return(0);
638                         nextchar = *++locinput;
639                         break;
640                 case NDIGIT:
641                         if (!nextchar && locinput >= regeol)
642                                 return(0);
643                         if (isDIGIT(nextchar))
644                                 return(0);
645                         nextchar = *++locinput;
646                         break;
647                 case REF:
648                         n = ARG1(scan);  /* which paren pair */
649                         s = regmystartp[n];
650                         if (!s)
651                             return(0);
652                         if (!regmyendp[n])
653                             return(0);
654                         if (s == regmyendp[n])
655                             break;
656                         /* Inline the first character, for speed. */
657                         if (*s != nextchar)
658                                 return(0);
659                         ln = regmyendp[n] - s;
660                         if (locinput + ln > regeol)
661                                 return 0;
662                         if (ln > 1 && bcmp(s, locinput, ln) != 0)
663                                 return(0);
664                         locinput += ln;
665                         nextchar = *locinput;
666                         break;
667
668                 case NOTHING:
669                         break;
670                 case BACK:
671                         break;
672                 case OPEN:
673                         n = ARG1(scan);  /* which paren pair */
674                         reginput = locinput;
675
676                         regmystartp[n] = locinput;      /* for REF */
677                         if (regmatch(next)) {
678                                 /*
679                                  * Don't set startp if some later
680                                  * invocation of the same parentheses
681                                  * already has.
682                                  */
683                                 if (regstartp[n] == NULL)
684                                         regstartp[n] = locinput;
685                                 return(1);
686                         } else
687                                 return(0);
688                         /* NOTREACHED */
689                 case CLOSE: {
690                                 n = ARG1(scan);  /* which paren pair */
691                                 reginput = locinput;
692
693                                 regmyendp[n] = locinput;        /* for REF */
694                                 if (regmatch(next)) {
695                                         /*
696                                          * Don't set endp if some later
697                                          * invocation of the same parentheses
698                                          * already has.
699                                          */
700                                         if (regendp[n] == NULL) {
701                                                 regendp[n] = locinput;
702                                                 if (n > *reglastparen)
703                                                     *reglastparen = n;
704                                         }
705                                         return(1);
706                                 } else
707                                         return(0);
708                         }
709                         /*NOTREACHED*/
710                 case BRANCH: {
711                                 if (OP(next) != BRANCH)         /* No choice. */
712                                         next = NEXTOPER(scan);  /* Avoid recursion. */
713                                 else {
714                                         do {
715                                                 reginput = locinput;
716                                                 if (regmatch(NEXTOPER(scan)))
717                                                         return(1);
718 #ifdef REGALIGN
719                                                 if (n = NEXT(scan))
720                                                     scan += n;
721                                                 else
722                                                     scan = NULL;
723 #else
724                                                 scan = regnext(scan);
725 #endif
726                                         } while (scan != NULL && OP(scan) == BRANCH);
727                                         return(0);
728                                         /* NOTREACHED */
729                                 }
730                         }
731                         break;
732                 case CURLY:
733                         ln = ARG1(scan);  /* min to match */
734                         n  = ARG2(scan);  /* max to match */
735                         scan = NEXTOPER(scan) + 4;
736                         goto repeat;
737                 case STAR:
738                         ln = 0;
739                         n = 0;
740                         scan = NEXTOPER(scan);
741                         goto repeat;
742                 case PLUS:
743                         /*
744                          * Lookahead to avoid useless match attempts
745                          * when we know what character comes next.
746                          */
747                         ln = 1;
748                         n = 0;
749                         scan = NEXTOPER(scan);
750                     repeat:
751                         if (OP(next) == EXACTLY)
752                                 nextchar = *(OPERAND(next)+1);
753                         else
754                                 nextchar = -1000;
755                         reginput = locinput;
756                         n = regrepeat(scan, n);
757                         if (!multiline && OP(next) == EOL && ln < n)
758                             ln = n;                     /* why back off? */
759                         while (n >= ln) {
760                                 /* If it could work, try it. */
761                                 if (nextchar == -1000 || *reginput == nextchar)
762                                         if (regmatch(next))
763                                                 return(1);
764                                 /* Couldn't or didn't -- back up. */
765                                 n--;
766                                 reginput = locinput + n;
767                         }
768                         return(0);
769                 case END:
770                         reginput = locinput; /* put where regtry can find it */
771                         return(1);      /* Success! */
772                 default:
773                         printf("%x %d\n",scan,scan[1]);
774                         FAIL("regexp memory corruption");
775                 }
776
777                 scan = next;
778         }
779
780         /*
781          * We get here only if there's trouble -- normally "case END" is
782          * the terminating point.
783          */
784         FAIL("corrupted regexp pointers");
785         /*NOTREACHED*/
786 #ifdef lint
787         return 0;
788 #endif
789 }
790
791 /*
792  - regrepeat - repeatedly match something simple, report how many
793  */
794 /*
795  * [This routine now assumes that it will only match on things of length 1.
796  * That was true before, but now we assume scan - reginput is the count,
797  * rather than incrementing count on every character.]
798  */
799 static int
800 regrepeat(p, max)
801 char *p;
802 int max;
803 {
804         register char *scan;
805         register char *opnd;
806         register int c;
807         register char *loceol = regeol;
808
809         scan = reginput;
810         if (max && max < loceol - scan)
811             loceol = scan + max;
812         opnd = OPERAND(p);
813         switch (OP(p)) {
814         case ANY:
815                 while (scan < loceol && *scan != '\n')
816                         scan++;
817                 break;
818         case EXACTLY:           /* length of string is 1 */
819                 opnd++;
820                 while (scan < loceol && *opnd == *scan)
821                         scan++;
822                 break;
823         case ANYOF:
824                 c = UCHARAT(scan);
825                 while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
826                         scan++;
827                         c = UCHARAT(scan);
828                 }
829                 break;
830         case ALNUM:
831                 while (scan < loceol && isALNUM(*scan))
832                         scan++;
833                 break;
834         case NALNUM:
835                 while (scan < loceol && !isALNUM(*scan))
836                         scan++;
837                 break;
838         case SPACE:
839                 while (scan < loceol && isSPACE(*scan))
840                         scan++;
841                 break;
842         case NSPACE:
843                 while (scan < loceol && !isSPACE(*scan))
844                         scan++;
845                 break;
846         case DIGIT:
847                 while (scan < loceol && isDIGIT(*scan))
848                         scan++;
849                 break;
850         case NDIGIT:
851                 while (scan < loceol && !isDIGIT(*scan))
852                         scan++;
853                 break;
854         default:                /* Oh dear.  Called inappropriately. */
855                 FAIL("internal regexp foulup");
856                 /* NOTREACHED */
857         }
858
859         c = scan - reginput;
860         reginput = scan;
861
862         return(c);
863 }
864
865 /*
866  - regnext - dig the "next" pointer out of a node
867  *
868  * [Note, when REGALIGN is defined there are two places in regmatch()
869  * that bypass this code for speed.]
870  */
871 char *
872 regnext(p)
873 register char *p;
874 {
875         register int offset;
876
877         if (p == &regdummy)
878                 return(NULL);
879
880         offset = NEXT(p);
881         if (offset == 0)
882                 return(NULL);
883
884 #ifdef REGALIGN
885         return(p+offset);
886 #else
887         if (OP(p) == BACK)
888                 return(p-offset);
889         else
890                 return(p+offset);
891 #endif
892 }