perl 4.0 patch 1: (combined patch)
[p5sagit/p5-mst-13.2.git] / regexec.c
1 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
2  * confused with the original package (see point 3 below).  Thanks, Henry!
3  */
4
5 /* Additional note: this code is very heavily munged from Henry's version
6  * in places.  In some spots I've traded clarity for efficiency, so don't
7  * blame Henry for some of the lack of readability.
8  */
9
10 /* $Header: regexec.c,v 4.0 91/03/20 01:39:16 lwall Locked $
11  *
12  * $Log:        regexec.c,v $
13  * Revision 4.0  91/03/20  01:39:16  lwall
14  * 4.0 baseline.
15  * 
16  */
17
18 /*
19  * regcomp and regexec -- regsub and regerror are not used in perl
20  *
21  *      Copyright (c) 1986 by University of Toronto.
22  *      Written by Henry Spencer.  Not derived from licensed software.
23  *
24  *      Permission is granted to anyone to use this software for any
25  *      purpose on any computer system, and to redistribute it freely,
26  *      subject to the following restrictions:
27  *
28  *      1. The author is not responsible for the consequences of use of
29  *              this software, no matter how awful, even if they arise
30  *              from defects in it.
31  *
32  *      2. The origin of this software must not be misrepresented, either
33  *              by explicit claim or by omission.
34  *
35  *      3. Altered versions must be plainly marked as such, and must not
36  *              be misrepresented as being the original software.
37  *
38  ****    Alterations to Henry's code are...
39  ****
40  ****    Copyright (c) 1989, Larry Wall
41  ****
42  ****    You may distribute under the terms of the GNU General Public License
43  ****    as specified in the README file that comes with the perl 3.0 kit.
44  *
45  * Beware that some of this code is subtly aware of the way operator
46  * precedence is structured in regular expressions.  Serious changes in
47  * regular-expression syntax might require a total rethink.
48  */
49 #include "EXTERN.h"
50 #include "perl.h"
51 #include "regcomp.h"
52
53 #ifndef STATIC
54 #define STATIC  static
55 #endif
56
57 #ifdef DEBUGGING
58 int regnarrate = 0;
59 #endif
60
61 #define isALNUM(c) (isascii(c) && (isalpha(c) || isdigit(c) || c == '_'))
62 #define isSPACE(c) (isascii(c) && isspace(c))
63 #define isDIGIT(c) (isascii(c) && isdigit(c))
64 #define isUPPER(c) (isascii(c) && isupper(c))
65
66 /*
67  * regexec and friends
68  */
69
70 /*
71  * Global work variables for regexec().
72  */
73 static char *regprecomp;
74 static char *reginput;          /* String-input pointer. */
75 static char regprev;            /* char before regbol, \n if none */
76 static char *regbol;            /* Beginning of input, for ^ check. */
77 static char *regeol;            /* End of input, for $ check. */
78 static char **regstartp;        /* Pointer to startp array. */
79 static char **regendp;          /* Ditto for endp. */
80 static char *reglastparen;      /* Similarly for lastparen. */
81 static char *regtill;
82
83 static char *regmystartp[10];   /* For remembering backreferences. */
84 static char *regmyendp[10];
85
86 /*
87  * Forwards.
88  */
89 STATIC int regtry();
90 STATIC int regmatch();
91 STATIC int regrepeat();
92
93 extern int multiline;
94
95 /*
96  - regexec - match a regexp against a string
97  */
98 int
99 regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
100 register regexp *prog;
101 char *stringarg;
102 register char *strend;  /* pointer to null at end of string */
103 char *strbeg;   /* real beginning of string */
104 int minend;     /* end of match must be at least minend after stringarg */
105 STR *screamer;
106 int safebase;   /* no need to remember string in subbase */
107 {
108         register char *s;
109         register int i;
110         register char *c;
111         register char *string = stringarg;
112         register int tmp;
113         int minlen = 0;         /* must match at least this many chars */
114         int dontbother = 0;     /* how many characters not to try at end */
115
116         /* Be paranoid... */
117         if (prog == NULL || string == NULL) {
118                 fatal("NULL regexp parameter");
119                 return(0);
120         }
121
122         if (string == strbeg)   /* is ^ valid at stringarg? */
123             regprev = '\n';
124         else {
125             regprev = stringarg[-1];
126             if (!multiline && regprev == '\n')
127                 regprev = '\0';         /* force ^ to NOT match */
128         }
129         regprecomp = prog->precomp;
130         /* Check validity of program. */
131         if (UCHARAT(prog->program) != MAGIC) {
132                 FAIL("corrupted regexp program");
133         }
134
135         if (prog->do_folding) {
136                 safebase = FALSE;
137                 i = strend - string;
138                 New(1101,c,i+1,char);
139                 (void)bcopy(string, c, i+1);
140                 string = c;
141                 strend = string + i;
142                 for (s = string; s < strend; s++)
143                         if (isUPPER(*s))
144                                 *s = tolower(*s);
145         }
146
147         /* If there is a "must appear" string, look for it. */
148         s = string;
149         if (prog->regmust != Nullstr &&
150             (!(prog->reganch & 1) || (multiline && prog->regback >= 0)) ) {
151                 if (stringarg == strbeg && screamer) {
152                         if (screamfirst[prog->regmust->str_rare] >= 0)
153                                 s = screaminstr(screamer,prog->regmust);
154                         else
155                                 s = Nullch;
156                 }
157 #ifndef lint
158                 else
159                         s = fbminstr((unsigned char*)s, (unsigned char*)strend,
160                             prog->regmust);
161 #endif
162                 if (!s) {
163                         ++prog->regmust->str_u.str_useful;      /* hooray */
164                         goto phooey;    /* not present */
165                 }
166                 else if (prog->regback >= 0) {
167                         s -= prog->regback;
168                         if (s < string)
169                             s = string;
170                         minlen = prog->regback + prog->regmust->str_cur;
171                 }
172                 else if (--prog->regmust->str_u.str_useful < 0) { /* boo */
173                         str_free(prog->regmust);
174                         prog->regmust = Nullstr;        /* disable regmust */
175                         s = string;
176                 }
177                 else {
178                         s = string;
179                         minlen = prog->regmust->str_cur;
180                 }
181         }
182
183         /* Mark beginning of line for ^ . */
184         regbol = string;
185
186         /* Mark end of line for $ (and such) */
187         regeol = strend;
188
189         /* see how far we have to get to not match where we matched before */
190         regtill = string+minend;
191
192         /* Simplest case:  anchored match need be tried only once. */
193         /*  [unless multiline is set] */
194         if (prog->reganch & 1) {
195                 if (regtry(prog, string))
196                         goto got_it;
197                 else if (multiline) {
198                         if (minlen)
199                             dontbother = minlen - 1;
200                         strend -= dontbother;
201                         /* for multiline we only have to try after newlines */
202                         if (s > string)
203                             s--;
204                         while (s < strend) {
205                             if (*s++ == '\n') {
206                                 if (s < strend && regtry(prog, s))
207                                     goto got_it;
208                             }
209                         }
210                 }
211                 goto phooey;
212         }
213
214         /* Messy cases:  unanchored match. */
215         if (prog->regstart) {
216                 if (prog->reganch & 2) {        /* we have /x+whatever/ */
217                     /* it must be a one character string */
218                     i = prog->regstart->str_ptr[0];
219                     while (s < strend) {
220                             if (*s == i) {
221                                     if (regtry(prog, s))
222                                             goto got_it;
223                                     s++;
224                                     while (s < strend && *s == i)
225                                         s++;
226                             }
227                             s++;
228                     }
229                 }
230                 else if (prog->regstart->str_pok == 3) {
231                     /* We know what string it must start with. */
232 #ifndef lint
233                     while ((s = fbminstr((unsigned char*)s,
234                       (unsigned char*)strend, prog->regstart)) != NULL)
235 #else
236                     while (s = Nullch)
237 #endif
238                     {
239                             if (regtry(prog, s))
240                                     goto got_it;
241                             s++;
242                     }
243                 }
244                 else {
245                     c = prog->regstart->str_ptr;
246                     while ((s = ninstr(s, strend,
247                       c, c + prog->regstart->str_cur )) != NULL) {
248                             if (regtry(prog, s))
249                                     goto got_it;
250                             s++;
251                     }
252                 }
253                 goto phooey;
254         }
255         if (c = prog->regstclass) {
256                 int doevery = (prog->reganch & 2) == 0;
257
258                 if (minlen)
259                     dontbother = minlen - 1;
260                 strend -= dontbother;   /* don't bother with what can't match */
261                 tmp = 1;
262                 /* We know what class it must start with. */
263                 switch (OP(c)) {
264                 case ANYOF:
265                     c = OPERAND(c);
266                     while (s < strend) {
267                             i = UCHARAT(s);
268                             if (!(c[i >> 3] & (1 << (i&7)))) {
269                                     if (tmp && regtry(prog, s))
270                                             goto got_it;
271                                     else
272                                             tmp = doevery;
273                             }
274                             else
275                                     tmp = 1;
276                             s++;
277                     }
278                     break;
279                 case BOUND:
280                     if (minlen)
281                         dontbother++,strend--;
282                     if (s != string) {
283                         i = s[-1];
284                         tmp = isALNUM(i);
285                     }
286                     else
287                         tmp = isALNUM(regprev); /* assume not alphanumeric */
288                     while (s < strend) {
289                             i = *s;
290                             if (tmp != isALNUM(i)) {
291                                     tmp = !tmp;
292                                     if (regtry(prog, s))
293                                             goto got_it;
294                             }
295                             s++;
296                     }
297                     if ((minlen || tmp) && regtry(prog,s))
298                             goto got_it;
299                     break;
300                 case NBOUND:
301                     if (minlen)
302                         dontbother++,strend--;
303                     if (s != string) {
304                         i = s[-1];
305                         tmp = isALNUM(i);
306                     }
307                     else
308                         tmp = isALNUM(regprev); /* assume not alphanumeric */
309                     while (s < strend) {
310                             i = *s;
311                             if (tmp != isALNUM(i))
312                                     tmp = !tmp;
313                             else if (regtry(prog, s))
314                                     goto got_it;
315                             s++;
316                     }
317                     if ((minlen || !tmp) && regtry(prog,s))
318                             goto got_it;
319                     break;
320                 case ALNUM:
321                     while (s < strend) {
322                             i = *s;
323                             if (isALNUM(i)) {
324                                     if (tmp && regtry(prog, s))
325                                             goto got_it;
326                                     else
327                                             tmp = doevery;
328                             }
329                             else
330                                     tmp = 1;
331                             s++;
332                     }
333                     break;
334                 case NALNUM:
335                     while (s < strend) {
336                             i = *s;
337                             if (!isALNUM(i)) {
338                                     if (tmp && regtry(prog, s))
339                                             goto got_it;
340                                     else
341                                             tmp = doevery;
342                             }
343                             else
344                                     tmp = 1;
345                             s++;
346                     }
347                     break;
348                 case SPACE:
349                     while (s < strend) {
350                             if (isSPACE(*s)) {
351                                     if (tmp && regtry(prog, s))
352                                             goto got_it;
353                                     else
354                                             tmp = doevery;
355                             }
356                             else
357                                     tmp = 1;
358                             s++;
359                     }
360                     break;
361                 case NSPACE:
362                     while (s < strend) {
363                             if (!isSPACE(*s)) {
364                                     if (tmp && regtry(prog, s))
365                                             goto got_it;
366                                     else
367                                             tmp = doevery;
368                             }
369                             else
370                                     tmp = 1;
371                             s++;
372                     }
373                     break;
374                 case DIGIT:
375                     while (s < strend) {
376                             if (isDIGIT(*s)) {
377                                     if (tmp && regtry(prog, s))
378                                             goto got_it;
379                                     else
380                                             tmp = doevery;
381                             }
382                             else
383                                     tmp = 1;
384                             s++;
385                     }
386                     break;
387                 case NDIGIT:
388                     while (s < strend) {
389                             if (!isDIGIT(*s)) {
390                                     if (tmp && regtry(prog, s))
391                                             goto got_it;
392                                     else
393                                             tmp = doevery;
394                             }
395                             else
396                                     tmp = 1;
397                             s++;
398                     }
399                     break;
400                 }
401         }
402         else {
403                 if (minlen)
404                     dontbother = minlen - 1;
405                 strend -= dontbother;
406                 /* We don't know much -- general case. */
407                 do {
408                         if (regtry(prog, s))
409                                 goto got_it;
410                 } while (s++ < strend);
411         }
412
413         /* Failure. */
414         goto phooey;
415
416     got_it:
417         if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding){
418                 strend += dontbother;   /* uncheat */
419                 if (safebase)                   /* no need for $digit later */
420                     s = strbeg;
421                 else if (strbeg != prog->subbase) {
422                     i = strend - string + (stringarg - strbeg);
423                     s = nsavestr(strbeg,i);     /* so $digit will work later */
424                     if (prog->subbase)
425                             Safefree(prog->subbase);
426                     prog->subbase = s;
427                     prog->subend = s+i;
428                 }
429                 else
430                     s = prog->subbase;
431                 s += (stringarg - strbeg);
432                 for (i = 0; i <= prog->nparens; i++) {
433                         if (prog->endp[i]) {
434                             prog->startp[i] = s + (prog->startp[i] - string);
435                             prog->endp[i] = s + (prog->endp[i] - string);
436                         }
437                 }
438                 if (prog->do_folding)
439                         Safefree(string);
440         }
441         return(1);
442
443     phooey:
444         if (prog->do_folding)
445                 Safefree(string);
446         return(0);
447 }
448
449 /*
450  - regtry - try match at specific point
451  */
452 static int                      /* 0 failure, 1 success */
453 regtry(prog, string)
454 regexp *prog;
455 char *string;
456 {
457         register int i;
458         register char **sp;
459         register char **ep;
460
461         reginput = string;
462         regstartp = prog->startp;
463         regendp = prog->endp;
464         reglastparen = &prog->lastparen;
465         prog->lastparen = 0;
466
467         sp = prog->startp;
468         ep = prog->endp;
469         if (prog->nparens) {
470                 for (i = prog->nparens; i >= 0; i--) {
471                         *sp++ = NULL;
472                         *ep++ = NULL;
473                 }
474         }
475         if (regmatch(prog->program + 1) && reginput >= regtill) {
476                 prog->startp[0] = string;
477                 prog->endp[0] = reginput;
478                 return(1);
479         } else
480                 return(0);
481 }
482
483 /*
484  - regmatch - main matching routine
485  *
486  * Conceptually the strategy is simple:  check to see whether the current
487  * node matches, call self recursively to see whether the rest matches,
488  * and then act accordingly.  In practice we make some effort to avoid
489  * recursion, in particular by going through "ordinary" nodes (that don't
490  * need to know whether the rest of the match failed) by a loop instead of
491  * by recursion.
492  */
493 /* [lwall] I've hoisted the register declarations to the outer block in order to
494  * maybe save a little bit of pushing and popping on the stack.  It also takes
495  * advantage of machines that use a register save mask on subroutine entry.
496  */
497 static int                      /* 0 failure, 1 success */
498 regmatch(prog)
499 char *prog;
500 {
501         register char *scan;    /* Current node. */
502         char *next;             /* Next node. */
503         register int nextchar;
504         register int n;         /* no or next */
505         register int ln;        /* len or last */
506         register char *s;       /* operand or save */
507         register char *locinput = reginput;
508
509         nextchar = *locinput;
510         scan = prog;
511 #ifdef DEBUGGING
512         if (scan != NULL && regnarrate)
513                 fprintf(stderr, "%s(\n", regprop(scan));
514 #endif
515         while (scan != NULL) {
516 #ifdef DEBUGGING
517                 if (regnarrate)
518                         fprintf(stderr, "%s...\n", regprop(scan));
519 #endif
520
521 #ifdef REGALIGN
522                 next = scan + NEXT(scan);
523                 if (next == scan)
524                     next = NULL;
525 #else
526                 next = regnext(scan);
527 #endif
528
529                 switch (OP(scan)) {
530                 case BOL:
531                         if (locinput == regbol ? regprev == '\n' :
532                             ((nextchar || locinput < regeol) &&
533                               locinput[-1] == '\n') )
534                         {
535                                 /* regtill = regbol; */
536                                 break;
537                         }
538                         return(0);
539                 case EOL:
540                         if ((nextchar || locinput < regeol) && nextchar != '\n')
541                                 return(0);
542                         if (!multiline && regeol - locinput > 1)
543                                 return 0;
544                         /* regtill = regbol; */
545                         break;
546                 case ANY:
547                         if ((nextchar == '\0' && locinput >= regeol) ||
548                           nextchar == '\n')
549                                 return(0);
550                         nextchar = *++locinput;
551                         break;
552                 case EXACTLY:
553                         s = OPERAND(scan);
554                         ln = *s++;
555                         /* Inline the first character, for speed. */
556                         if (*s != nextchar)
557                                 return(0);
558                         if (regeol - locinput < ln)
559                                 return 0;
560                         if (ln > 1 && bcmp(s, locinput, ln) != 0)
561                                 return(0);
562                         locinput += ln;
563                         nextchar = *locinput;
564                         break;
565                 case ANYOF:
566                         s = OPERAND(scan);
567                         if (nextchar < 0)
568                                 nextchar = UCHARAT(locinput);
569                         if (s[nextchar >> 3] & (1 << (nextchar&7)))
570                                 return(0);
571                         if (!nextchar && locinput >= regeol)
572                                 return 0;
573                         nextchar = *++locinput;
574                         break;
575                 case ALNUM:
576                         if (!nextchar)
577                                 return(0);
578                         if (!isALNUM(nextchar))
579                                 return(0);
580                         nextchar = *++locinput;
581                         break;
582                 case NALNUM:
583                         if (!nextchar && locinput >= regeol)
584                                 return(0);
585                         if (isALNUM(nextchar))
586                                 return(0);
587                         nextchar = *++locinput;
588                         break;
589                 case NBOUND:
590                 case BOUND:
591                         if (locinput == regbol) /* was last char in word? */
592                                 ln = isALNUM(regprev);
593                         else 
594                                 ln = isALNUM(locinput[-1]);
595                         n = isALNUM(nextchar); /* is next char in word? */
596                         if ((ln == n) == (OP(scan) == BOUND))
597                                 return(0);
598                         break;
599                 case SPACE:
600                         if (!nextchar && locinput >= regeol)
601                                 return(0);
602                         if (!isSPACE(nextchar))
603                                 return(0);
604                         nextchar = *++locinput;
605                         break;
606                 case NSPACE:
607                         if (!nextchar)
608                                 return(0);
609                         if (isSPACE(nextchar))
610                                 return(0);
611                         nextchar = *++locinput;
612                         break;
613                 case DIGIT:
614                         if (!isDIGIT(nextchar))
615                                 return(0);
616                         nextchar = *++locinput;
617                         break;
618                 case NDIGIT:
619                         if (!nextchar && locinput >= regeol)
620                                 return(0);
621                         if (isDIGIT(nextchar))
622                                 return(0);
623                         nextchar = *++locinput;
624                         break;
625                 case REF:
626                         n = ARG1(scan);  /* which paren pair */
627                         s = regmystartp[n];
628                         if (!s)
629                             return(0);
630                         if (!regmyendp[n])
631                             return(0);
632                         if (s == regmyendp[n])
633                             break;
634                         /* Inline the first character, for speed. */
635                         if (*s != nextchar)
636                                 return(0);
637                         ln = regmyendp[n] - s;
638                         if (locinput + ln > regeol)
639                                 return 0;
640                         if (ln > 1 && bcmp(s, locinput, ln) != 0)
641                                 return(0);
642                         locinput += ln;
643                         nextchar = *locinput;
644                         break;
645
646                 case NOTHING:
647                         break;
648                 case BACK:
649                         break;
650                 case OPEN:
651                         n = ARG1(scan);  /* which paren pair */
652                         reginput = locinput;
653
654                         regmystartp[n] = locinput;      /* for REF */
655                         if (regmatch(next)) {
656                                 /*
657                                  * Don't set startp if some later
658                                  * invocation of the same parentheses
659                                  * already has.
660                                  */
661                                 if (regstartp[n] == NULL)
662                                         regstartp[n] = locinput;
663                                 return(1);
664                         } else
665                                 return(0);
666                         /* NOTREACHED */
667                 case CLOSE: {
668                                 n = ARG1(scan);  /* which paren pair */
669                                 reginput = locinput;
670
671                                 regmyendp[n] = locinput;        /* for REF */
672                                 if (regmatch(next)) {
673                                         /*
674                                          * Don't set endp if some later
675                                          * invocation of the same parentheses
676                                          * already has.
677                                          */
678                                         if (regendp[n] == NULL) {
679                                                 regendp[n] = locinput;
680                                                 if (n > *reglastparen)
681                                                     *reglastparen = n;
682                                         }
683                                         return(1);
684                                 } else
685                                         return(0);
686                         }
687                         /*NOTREACHED*/
688                 case BRANCH: {
689                                 if (OP(next) != BRANCH)         /* No choice. */
690                                         next = NEXTOPER(scan);  /* Avoid recursion. */
691                                 else {
692                                         do {
693                                                 reginput = locinput;
694                                                 if (regmatch(NEXTOPER(scan)))
695                                                         return(1);
696 #ifdef REGALIGN
697                                                 if (n = NEXT(scan))
698                                                     scan += n;
699                                                 else
700                                                     scan = NULL;
701 #else
702                                                 scan = regnext(scan);
703 #endif
704                                         } while (scan != NULL && OP(scan) == BRANCH);
705                                         return(0);
706                                         /* NOTREACHED */
707                                 }
708                         }
709                         break;
710                 case CURLY:
711                         ln = ARG1(scan);  /* min to match */
712                         n  = ARG2(scan);  /* max to match */
713                         scan = NEXTOPER(scan) + 4;
714                         goto repeat;
715                 case STAR:
716                         ln = 0;
717                         n = 0;
718                         scan = NEXTOPER(scan);
719                         goto repeat;
720                 case PLUS:
721                         /*
722                          * Lookahead to avoid useless match attempts
723                          * when we know what character comes next.
724                          */
725                         ln = 1;
726                         n = 0;
727                         scan = NEXTOPER(scan);
728                     repeat:
729                         if (OP(next) == EXACTLY)
730                                 nextchar = *(OPERAND(next)+1);
731                         else
732                                 nextchar = -1000;
733                         reginput = locinput;
734                         n = regrepeat(scan, n);
735                         if (!multiline && OP(next) == EOL && ln < n)
736                             ln = n;                     /* why back off? */
737                         while (n >= ln) {
738                                 /* If it could work, try it. */
739                                 if (nextchar == -1000 || *reginput == nextchar)
740                                         if (regmatch(next))
741                                                 return(1);
742                                 /* Couldn't or didn't -- back up. */
743                                 n--;
744                                 reginput = locinput + n;
745                         }
746                         return(0);
747                 case END:
748                         reginput = locinput; /* put where regtry can find it */
749                         return(1);      /* Success! */
750                 default:
751                         printf("%x %d\n",scan,scan[1]);
752                         FAIL("regexp memory corruption");
753                 }
754
755                 scan = next;
756         }
757
758         /*
759          * We get here only if there's trouble -- normally "case END" is
760          * the terminating point.
761          */
762         FAIL("corrupted regexp pointers");
763         /*NOTREACHED*/
764 #ifdef lint
765         return 0;
766 #endif
767 }
768
769 /*
770  - regrepeat - repeatedly match something simple, report how many
771  */
772 /*
773  * [This routine now assumes that it will only match on things of length 1.
774  * That was true before, but now we assume scan - reginput is the count,
775  * rather than incrementing count on every character.]
776  */
777 static int
778 regrepeat(p, max)
779 char *p;
780 int max;
781 {
782         register char *scan;
783         register char *opnd;
784         register int c;
785         register char *loceol = regeol;
786
787         scan = reginput;
788         if (max && max < loceol - scan)
789             loceol = scan + max;
790         opnd = OPERAND(p);
791         switch (OP(p)) {
792         case ANY:
793                 while (scan < loceol && *scan != '\n')
794                         scan++;
795                 break;
796         case EXACTLY:           /* length of string is 1 */
797                 opnd++;
798                 while (scan < loceol && *opnd == *scan)
799                         scan++;
800                 break;
801         case ANYOF:
802                 c = UCHARAT(scan);
803                 while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
804                         scan++;
805                         c = UCHARAT(scan);
806                 }
807                 break;
808         case ALNUM:
809                 while (scan < loceol && isALNUM(*scan))
810                         scan++;
811                 break;
812         case NALNUM:
813                 while (scan < loceol && !isALNUM(*scan))
814                         scan++;
815                 break;
816         case SPACE:
817                 while (scan < loceol && isSPACE(*scan))
818                         scan++;
819                 break;
820         case NSPACE:
821                 while (scan < loceol && !isSPACE(*scan))
822                         scan++;
823                 break;
824         case DIGIT:
825                 while (scan < loceol && isDIGIT(*scan))
826                         scan++;
827                 break;
828         case NDIGIT:
829                 while (scan < loceol && !isDIGIT(*scan))
830                         scan++;
831                 break;
832         default:                /* Oh dear.  Called inappropriately. */
833                 FAIL("internal regexp foulup");
834                 /* NOTREACHED */
835         }
836
837         c = scan - reginput;
838         reginput = scan;
839
840         return(c);
841 }
842
843 /*
844  - regnext - dig the "next" pointer out of a node
845  *
846  * [Note, when REGALIGN is defined there are two places in regmatch()
847  * that bypass this code for speed.]
848  */
849 char *
850 regnext(p)
851 register char *p;
852 {
853         register int offset;
854
855         if (p == &regdummy)
856                 return(NULL);
857
858         offset = NEXT(p);
859         if (offset == 0)
860                 return(NULL);
861
862 #ifdef REGALIGN
863         return(p+offset);
864 #else
865         if (OP(p) == BACK)
866                 return(p-offset);
867         else
868                 return(p+offset);
869 #endif
870 }