perl 3.0 patch #38 (combined patch)
[p5sagit/p5-mst-13.2.git] / regexec.c
1 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
2  * confused with the original package (see point 3 below).  Thanks, Henry!
3  */
4
5 /* Additional note: this code is very heavily munged from Henry's version
6  * in places.  In some spots I've traded clarity for efficiency, so don't
7  * blame Henry for some of the lack of readability.
8  */
9
10 /* $Header: regexec.c,v 3.0.1.5 90/10/16 10:25:36 lwall Locked $
11  *
12  * $Log:        regexec.c,v $
13  * Revision 3.0.1.5  90/10/16  10:25:36  lwall
14  * patch29: /^pat/ occasionally matched in middle of string when $* = 0
15  * patch29: /.{n,m}$/ could match with fewer than n characters remaining
16  * patch29: /\d{9}/ could match more than 9 characters
17  * 
18  * Revision 3.0.1.4  90/08/09  05:12:03  lwall
19  * patch19: sped up /x+y/ patterns greatly by not retrying on every x
20  * patch19: inhibited backoff on patterns anchored to the end like /\s+$/
21  * patch19: sped up {m,n} on simple items
22  * patch19: $' broke on embedded nulls
23  * patch19: $ will now only match at end of string if $* == 0
24  * 
25  * Revision 3.0.1.3  90/02/28  18:14:39  lwall
26  * patch9: /[\200-\377]/ didn't work on machines with signed chars
27  * patch9: \d, \w, and \s could misfire on characters with high bit set
28  * patch9: /\bfoo/i didn't work
29  * 
30  * Revision 3.0.1.2  89/12/21  20:16:27  lwall
31  * patch7: certain patterns didn't match correctly at end of string
32  * 
33  * Revision 3.0.1.1  89/11/11  04:52:04  lwall
34  * patch2: /\b$foo/ didn't work
35  * 
36  * Revision 3.0  89/10/18  15:22:53  lwall
37  * 3.0 baseline
38  * 
39  */
40
41 /*
42  * regcomp and regexec -- regsub and regerror are not used in perl
43  *
44  *      Copyright (c) 1986 by University of Toronto.
45  *      Written by Henry Spencer.  Not derived from licensed software.
46  *
47  *      Permission is granted to anyone to use this software for any
48  *      purpose on any computer system, and to redistribute it freely,
49  *      subject to the following restrictions:
50  *
51  *      1. The author is not responsible for the consequences of use of
52  *              this software, no matter how awful, even if they arise
53  *              from defects in it.
54  *
55  *      2. The origin of this software must not be misrepresented, either
56  *              by explicit claim or by omission.
57  *
58  *      3. Altered versions must be plainly marked as such, and must not
59  *              be misrepresented as being the original software.
60  *
61  ****    Alterations to Henry's code are...
62  ****
63  ****    Copyright (c) 1989, Larry Wall
64  ****
65  ****    You may distribute under the terms of the GNU General Public License
66  ****    as specified in the README file that comes with the perl 3.0 kit.
67  *
68  * Beware that some of this code is subtly aware of the way operator
69  * precedence is structured in regular expressions.  Serious changes in
70  * regular-expression syntax might require a total rethink.
71  */
72 #include "EXTERN.h"
73 #include "perl.h"
74 #include "regcomp.h"
75
76 #ifndef STATIC
77 #define STATIC  static
78 #endif
79
80 #ifdef DEBUGGING
81 int regnarrate = 0;
82 #endif
83
84 #define isALNUM(c) (isascii(c) && (isalpha(c) || isdigit(c) || c == '_'))
85 #define isSPACE(c) (isascii(c) && isspace(c))
86 #define isDIGIT(c) (isascii(c) && isdigit(c))
87 #define isUPPER(c) (isascii(c) && isupper(c))
88
89 /*
90  * regexec and friends
91  */
92
93 /*
94  * Global work variables for regexec().
95  */
96 static char *regprecomp;
97 static char *reginput;          /* String-input pointer. */
98 static char regprev;            /* char before regbol, \n if none */
99 static char *regbol;            /* Beginning of input, for ^ check. */
100 static char *regeol;            /* End of input, for $ check. */
101 static char **regstartp;        /* Pointer to startp array. */
102 static char **regendp;          /* Ditto for endp. */
103 static char *reglastparen;      /* Similarly for lastparen. */
104 static char *regtill;
105
106 static char *regmystartp[10];   /* For remembering backreferences. */
107 static char *regmyendp[10];
108
109 /*
110  * Forwards.
111  */
112 STATIC int regtry();
113 STATIC int regmatch();
114 STATIC int regrepeat();
115
116 extern int multiline;
117
118 /*
119  - regexec - match a regexp against a string
120  */
121 int
122 regexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
123 register regexp *prog;
124 char *stringarg;
125 register char *strend;  /* pointer to null at end of string */
126 char *strbeg;   /* real beginning of string */
127 int minend;     /* end of match must be at least minend after stringarg */
128 STR *screamer;
129 int safebase;   /* no need to remember string in subbase */
130 {
131         register char *s;
132         register int i;
133         register char *c;
134         register char *string = stringarg;
135         register int tmp;
136         int minlen = 0;         /* must match at least this many chars */
137         int dontbother = 0;     /* how many characters not to try at end */
138
139         /* Be paranoid... */
140         if (prog == NULL || string == NULL) {
141                 fatal("NULL regexp parameter");
142                 return(0);
143         }
144
145         if (string == strbeg)   /* is ^ valid at stringarg? */
146             regprev = '\n';
147         else {
148             regprev = stringarg[-1];
149             if (!multiline && regprev == '\n')
150                 regprev = '\0';         /* force ^ to NOT match */
151         }
152         regprecomp = prog->precomp;
153         /* Check validity of program. */
154         if (UCHARAT(prog->program) != MAGIC) {
155                 FAIL("corrupted regexp program");
156         }
157
158         if (prog->do_folding) {
159                 safebase = FALSE;
160                 i = strend - string;
161                 New(1101,c,i+1,char);
162                 (void)bcopy(string, c, i+1);
163                 string = c;
164                 strend = string + i;
165                 for (s = string; s < strend; s++)
166                         if (isUPPER(*s))
167                                 *s = tolower(*s);
168         }
169
170         /* If there is a "must appear" string, look for it. */
171         s = string;
172         if (prog->regmust != Nullstr) {
173                 if (stringarg == strbeg && screamer) {
174                         if (screamfirst[prog->regmust->str_rare] >= 0)
175                                 s = screaminstr(screamer,prog->regmust);
176                         else
177                                 s = Nullch;
178                 }
179 #ifndef lint
180                 else
181                         s = fbminstr((unsigned char*)s, (unsigned char*)strend,
182                             prog->regmust);
183 #endif
184                 if (!s) {
185                         ++prog->regmust->str_u.str_useful;      /* hooray */
186                         goto phooey;    /* not present */
187                 }
188                 else if (prog->regback >= 0) {
189                         s -= prog->regback;
190                         if (s < string)
191                             s = string;
192                         minlen = prog->regback + prog->regmust->str_cur;
193                 }
194                 else if (--prog->regmust->str_u.str_useful < 0) { /* boo */
195                         str_free(prog->regmust);
196                         prog->regmust = Nullstr;        /* disable regmust */
197                         s = string;
198                 }
199                 else {
200                         s = string;
201                         minlen = prog->regmust->str_cur;
202                 }
203         }
204
205         /* Mark beginning of line for ^ . */
206         regbol = string;
207
208         /* Mark end of line for $ (and such) */
209         regeol = strend;
210
211         /* see how far we have to get to not match where we matched before */
212         regtill = string+minend;
213
214         /* Simplest case:  anchored match need be tried only once. */
215         /*  [unless multiline is set] */
216         if (prog->reganch & 1) {
217                 if (regtry(prog, string))
218                         goto got_it;
219                 else if (multiline) {
220                         if (minlen)
221                             dontbother = minlen - 1;
222                         strend -= dontbother;
223                         /* for multiline we only have to try after newlines */
224                         if (s > string)
225                             s--;
226                         while (s < strend) {
227                             if (*s++ == '\n') {
228                                 if (s < strend && regtry(prog, s))
229                                     goto got_it;
230                             }
231                         }
232                 }
233                 goto phooey;
234         }
235
236         /* Messy cases:  unanchored match. */
237         if (prog->regstart) {
238                 if (prog->reganch & 2) {        /* we have /x+whatever/ */
239                     /* it must be a one character string */
240                     i = prog->regstart->str_ptr[0];
241                     while (s < strend) {
242                             if (*s == i) {
243                                     if (regtry(prog, s))
244                                             goto got_it;
245                                     s++;
246                                     while (s < strend && *s == i)
247                                         s++;
248                             }
249                             s++;
250                     }
251                 }
252                 else if (prog->regstart->str_pok == 3) {
253                     /* We know what string it must start with. */
254 #ifndef lint
255                     while ((s = fbminstr((unsigned char*)s,
256                       (unsigned char*)strend, prog->regstart)) != NULL)
257 #else
258                     while (s = Nullch)
259 #endif
260                     {
261                             if (regtry(prog, s))
262                                     goto got_it;
263                             s++;
264                     }
265                 }
266                 else {
267                     c = prog->regstart->str_ptr;
268                     while ((s = ninstr(s, strend,
269                       c, c + prog->regstart->str_cur )) != NULL) {
270                             if (regtry(prog, s))
271                                     goto got_it;
272                             s++;
273                     }
274                 }
275                 goto phooey;
276         }
277         if (c = prog->regstclass) {
278                 int doevery = (prog->reganch & 2) == 0;
279
280                 if (minlen)
281                     dontbother = minlen - 1;
282                 strend -= dontbother;   /* don't bother with what can't match */
283                 tmp = 1;
284                 /* We know what class it must start with. */
285                 switch (OP(c)) {
286                 case ANYOF:
287                     c = OPERAND(c);
288                     while (s < strend) {
289                             i = UCHARAT(s);
290                             if (!(c[i >> 3] & (1 << (i&7)))) {
291                                     if (tmp && regtry(prog, s))
292                                             goto got_it;
293                                     else
294                                             tmp = doevery;
295                             }
296                             else
297                                     tmp = 1;
298                             s++;
299                     }
300                     break;
301                 case BOUND:
302                     if (minlen)
303                         dontbother++,strend--;
304                     if (s != string) {
305                         i = s[-1];
306                         tmp = isALNUM(i);
307                     }
308                     else
309                         tmp = isALNUM(regprev); /* assume not alphanumeric */
310                     while (s < strend) {
311                             i = *s;
312                             if (tmp != isALNUM(i)) {
313                                     tmp = !tmp;
314                                     if (regtry(prog, s))
315                                             goto got_it;
316                             }
317                             s++;
318                     }
319                     if ((minlen || tmp) && regtry(prog,s))
320                             goto got_it;
321                     break;
322                 case NBOUND:
323                     if (minlen)
324                         dontbother++,strend--;
325                     if (s != string) {
326                         i = s[-1];
327                         tmp = isALNUM(i);
328                     }
329                     else
330                         tmp = isALNUM(regprev); /* assume not alphanumeric */
331                     while (s < strend) {
332                             i = *s;
333                             if (tmp != isALNUM(i))
334                                     tmp = !tmp;
335                             else if (regtry(prog, s))
336                                     goto got_it;
337                             s++;
338                     }
339                     if ((minlen || !tmp) && regtry(prog,s))
340                             goto got_it;
341                     break;
342                 case ALNUM:
343                     while (s < strend) {
344                             i = *s;
345                             if (isALNUM(i)) {
346                                     if (tmp && regtry(prog, s))
347                                             goto got_it;
348                                     else
349                                             tmp = doevery;
350                             }
351                             else
352                                     tmp = 1;
353                             s++;
354                     }
355                     break;
356                 case NALNUM:
357                     while (s < strend) {
358                             i = *s;
359                             if (!isALNUM(i)) {
360                                     if (tmp && regtry(prog, s))
361                                             goto got_it;
362                                     else
363                                             tmp = doevery;
364                             }
365                             else
366                                     tmp = 1;
367                             s++;
368                     }
369                     break;
370                 case SPACE:
371                     while (s < strend) {
372                             if (isSPACE(*s)) {
373                                     if (tmp && regtry(prog, s))
374                                             goto got_it;
375                                     else
376                                             tmp = doevery;
377                             }
378                             else
379                                     tmp = 1;
380                             s++;
381                     }
382                     break;
383                 case NSPACE:
384                     while (s < strend) {
385                             if (!isSPACE(*s)) {
386                                     if (tmp && regtry(prog, s))
387                                             goto got_it;
388                                     else
389                                             tmp = doevery;
390                             }
391                             else
392                                     tmp = 1;
393                             s++;
394                     }
395                     break;
396                 case DIGIT:
397                     while (s < strend) {
398                             if (isDIGIT(*s)) {
399                                     if (tmp && regtry(prog, s))
400                                             goto got_it;
401                                     else
402                                             tmp = doevery;
403                             }
404                             else
405                                     tmp = 1;
406                             s++;
407                     }
408                     break;
409                 case NDIGIT:
410                     while (s < strend) {
411                             if (!isDIGIT(*s)) {
412                                     if (tmp && regtry(prog, s))
413                                             goto got_it;
414                                     else
415                                             tmp = doevery;
416                             }
417                             else
418                                     tmp = 1;
419                             s++;
420                     }
421                     break;
422                 }
423         }
424         else {
425                 if (minlen)
426                     dontbother = minlen - 1;
427                 strend -= dontbother;
428                 /* We don't know much -- general case. */
429                 do {
430                         if (regtry(prog, s))
431                                 goto got_it;
432                 } while (s++ < strend);
433         }
434
435         /* Failure. */
436         goto phooey;
437
438     got_it:
439         if ((!safebase && (prog->nparens || sawampersand)) || prog->do_folding){
440                 strend += dontbother;   /* uncheat */
441                 if (safebase)                   /* no need for $digit later */
442                     s = strbeg;
443                 else if (strbeg != prog->subbase) {
444                     i = strend - string + (stringarg - strbeg);
445                     s = nsavestr(strbeg,i);     /* so $digit will work later */
446                     if (prog->subbase)
447                             Safefree(prog->subbase);
448                     prog->subbase = s;
449                     prog->subend = s+i;
450                 }
451                 else
452                     s = prog->subbase;
453                 s += (stringarg - strbeg);
454                 for (i = 0; i <= prog->nparens; i++) {
455                         if (prog->endp[i]) {
456                             prog->startp[i] = s + (prog->startp[i] - string);
457                             prog->endp[i] = s + (prog->endp[i] - string);
458                         }
459                 }
460                 if (prog->do_folding)
461                         Safefree(string);
462         }
463         return(1);
464
465     phooey:
466         if (prog->do_folding)
467                 Safefree(string);
468         return(0);
469 }
470
471 /*
472  - regtry - try match at specific point
473  */
474 static int                      /* 0 failure, 1 success */
475 regtry(prog, string)
476 regexp *prog;
477 char *string;
478 {
479         register int i;
480         register char **sp;
481         register char **ep;
482
483         reginput = string;
484         regstartp = prog->startp;
485         regendp = prog->endp;
486         reglastparen = &prog->lastparen;
487         prog->lastparen = 0;
488
489         sp = prog->startp;
490         ep = prog->endp;
491         if (prog->nparens) {
492                 for (i = NSUBEXP; i > 0; i--) {
493                         *sp++ = NULL;
494                         *ep++ = NULL;
495                 }
496         }
497         if (regmatch(prog->program + 1) && reginput >= regtill) {
498                 prog->startp[0] = string;
499                 prog->endp[0] = reginput;
500                 return(1);
501         } else
502                 return(0);
503 }
504
505 /*
506  - regmatch - main matching routine
507  *
508  * Conceptually the strategy is simple:  check to see whether the current
509  * node matches, call self recursively to see whether the rest matches,
510  * and then act accordingly.  In practice we make some effort to avoid
511  * recursion, in particular by going through "ordinary" nodes (that don't
512  * need to know whether the rest of the match failed) by a loop instead of
513  * by recursion.
514  */
515 /* [lwall] I've hoisted the register declarations to the outer block in order to
516  * maybe save a little bit of pushing and popping on the stack.  It also takes
517  * advantage of machines that use a register save mask on subroutine entry.
518  */
519 static int                      /* 0 failure, 1 success */
520 regmatch(prog)
521 char *prog;
522 {
523         register char *scan;    /* Current node. */
524         char *next;             /* Next node. */
525         register int nextchar;
526         register int n;         /* no or next */
527         register int ln;        /* len or last */
528         register char *s;       /* operand or save */
529         register char *locinput = reginput;
530
531         nextchar = *locinput;
532         scan = prog;
533 #ifdef DEBUGGING
534         if (scan != NULL && regnarrate)
535                 fprintf(stderr, "%s(\n", regprop(scan));
536 #endif
537         while (scan != NULL) {
538 #ifdef DEBUGGING
539                 if (regnarrate)
540                         fprintf(stderr, "%s...\n", regprop(scan));
541 #endif
542
543 #ifdef REGALIGN
544                 next = scan + NEXT(scan);
545                 if (next == scan)
546                     next = NULL;
547 #else
548                 next = regnext(scan);
549 #endif
550
551                 switch (OP(scan)) {
552                 case BOL:
553                         if (locinput == regbol ? regprev == '\n' :
554                             ((nextchar || locinput < regeol) &&
555                               locinput[-1] == '\n') )
556                         {
557                                 regtill = regbol;
558                                 break;
559                         }
560                         return(0);
561                 case EOL:
562                         if ((nextchar || locinput < regeol) && nextchar != '\n')
563                                 return(0);
564                         if (!multiline && regeol - locinput > 1)
565                                 return 0;
566                         regtill = regbol;
567                         break;
568                 case ANY:
569                         if ((nextchar == '\0' && locinput >= regeol) ||
570                           nextchar == '\n')
571                                 return(0);
572                         nextchar = *++locinput;
573                         break;
574                 case EXACTLY:
575                         s = OPERAND(scan);
576                         ln = *s++;
577                         /* Inline the first character, for speed. */
578                         if (*s != nextchar)
579                                 return(0);
580                         if (regeol - locinput < ln)
581                                 return 0;
582                         if (ln > 1 && bcmp(s, locinput, ln) != 0)
583                                 return(0);
584                         locinput += ln;
585                         nextchar = *locinput;
586                         break;
587                 case ANYOF:
588                         s = OPERAND(scan);
589                         if (nextchar < 0)
590                                 nextchar = UCHARAT(locinput);
591                         if (s[nextchar >> 3] & (1 << (nextchar&7)))
592                                 return(0);
593                         nextchar = *++locinput;
594                         if (!nextchar && locinput > regeol)
595                                 return 0;
596                         break;
597                 case ALNUM:
598                         if (!nextchar)
599                                 return(0);
600                         if (!isALNUM(nextchar))
601                                 return(0);
602                         nextchar = *++locinput;
603                         break;
604                 case NALNUM:
605                         if (!nextchar && locinput >= regeol)
606                                 return(0);
607                         if (isALNUM(nextchar))
608                                 return(0);
609                         nextchar = *++locinput;
610                         break;
611                 case NBOUND:
612                 case BOUND:
613                         if (locinput == regbol) /* was last char in word? */
614                                 ln = isALNUM(regprev);
615                         else 
616                                 ln = isALNUM(locinput[-1]);
617                         n = isALNUM(nextchar); /* is next char in word? */
618                         if ((ln == n) == (OP(scan) == BOUND))
619                                 return(0);
620                         break;
621                 case SPACE:
622                         if (!nextchar && locinput >= regeol)
623                                 return(0);
624                         if (!isSPACE(nextchar))
625                                 return(0);
626                         nextchar = *++locinput;
627                         break;
628                 case NSPACE:
629                         if (!nextchar)
630                                 return(0);
631                         if (isSPACE(nextchar))
632                                 return(0);
633                         nextchar = *++locinput;
634                         break;
635                 case DIGIT:
636                         if (!isDIGIT(nextchar))
637                                 return(0);
638                         nextchar = *++locinput;
639                         break;
640                 case NDIGIT:
641                         if (!nextchar && locinput >= regeol)
642                                 return(0);
643                         if (isDIGIT(nextchar))
644                                 return(0);
645                         nextchar = *++locinput;
646                         break;
647                 case REF:
648                 case REF+1:
649                 case REF+2:
650                 case REF+3:
651                 case REF+4:
652                 case REF+5:
653                 case REF+6:
654                 case REF+7:
655                 case REF+8:
656                 case REF+9:
657                         n = OP(scan) - REF;
658                         s = regmystartp[n];
659                         if (!s)
660                             return(0);
661                         if (!regmyendp[n])
662                             return(0);
663                         if (s == regmyendp[n])
664                             break;
665                         /* Inline the first character, for speed. */
666                         if (*s != nextchar)
667                                 return(0);
668                         ln = regmyendp[n] - s;
669                         if (locinput + ln > regeol)
670                                 return 0;
671                         if (ln > 1 && bcmp(s, locinput, ln) != 0)
672                                 return(0);
673                         locinput += ln;
674                         nextchar = *locinput;
675                         break;
676
677                 case NOTHING:
678                         break;
679                 case BACK:
680                         break;
681                 case OPEN+1:
682                 case OPEN+2:
683                 case OPEN+3:
684                 case OPEN+4:
685                 case OPEN+5:
686                 case OPEN+6:
687                 case OPEN+7:
688                 case OPEN+8:
689                 case OPEN+9:
690                         n = OP(scan) - OPEN;
691                         reginput = locinput;
692
693                         regmystartp[n] = locinput;      /* for REF */
694                         if (regmatch(next)) {
695                                 /*
696                                  * Don't set startp if some later
697                                  * invocation of the same parentheses
698                                  * already has.
699                                  */
700                                 if (regstartp[n] == NULL)
701                                         regstartp[n] = locinput;
702                                 return(1);
703                         } else
704                                 return(0);
705                         /* NOTREACHED */
706                 case CLOSE+1:
707                 case CLOSE+2:
708                 case CLOSE+3:
709                 case CLOSE+4:
710                 case CLOSE+5:
711                 case CLOSE+6:
712                 case CLOSE+7:
713                 case CLOSE+8:
714                 case CLOSE+9: {
715                                 n = OP(scan) - CLOSE;
716                                 reginput = locinput;
717
718                                 regmyendp[n] = locinput;        /* for REF */
719                                 if (regmatch(next)) {
720                                         /*
721                                          * Don't set endp if some later
722                                          * invocation of the same parentheses
723                                          * already has.
724                                          */
725                                         if (regendp[n] == NULL) {
726                                                 regendp[n] = locinput;
727                                                 if (n > *reglastparen)
728                                                     *reglastparen = n;
729                                         }
730                                         return(1);
731                                 } else
732                                         return(0);
733                         }
734                         /*NOTREACHED*/
735                 case BRANCH: {
736                                 if (OP(next) != BRANCH)         /* No choice. */
737                                         next = NEXTOPER(scan);  /* Avoid recursion. */
738                                 else {
739                                         do {
740                                                 reginput = locinput;
741                                                 if (regmatch(NEXTOPER(scan)))
742                                                         return(1);
743 #ifdef REGALIGN
744                                                 if (n = NEXT(scan))
745                                                     scan += n;
746                                                 else
747                                                     scan = NULL;
748 #else
749                                                 scan = regnext(scan);
750 #endif
751                                         } while (scan != NULL && OP(scan) == BRANCH);
752                                         return(0);
753                                         /* NOTREACHED */
754                                 }
755                         }
756                         break;
757                 case CURLY:
758                         ln = ARG1(scan);  /* min to match */
759                         n  = ARG2(scan);  /* max to match */
760                         scan = NEXTOPER(scan) + 4;
761                         goto repeat;
762                 case STAR:
763                         ln = 0;
764                         n = 0;
765                         scan = NEXTOPER(scan);
766                         goto repeat;
767                 case PLUS:
768                         /*
769                          * Lookahead to avoid useless match attempts
770                          * when we know what character comes next.
771                          */
772                         ln = 1;
773                         n = 0;
774                         scan = NEXTOPER(scan);
775                     repeat:
776                         if (OP(next) == EXACTLY)
777                                 nextchar = *(OPERAND(next)+1);
778                         else
779                                 nextchar = -1000;
780                         reginput = locinput;
781                         n = regrepeat(scan, n);
782                         if (!multiline && OP(next) == EOL && ln < n)
783                             ln = n;                     /* why back off? */
784                         while (n >= ln) {
785                                 /* If it could work, try it. */
786                                 if (nextchar == -1000 || *reginput == nextchar)
787                                         if (regmatch(next))
788                                                 return(1);
789                                 /* Couldn't or didn't -- back up. */
790                                 n--;
791                                 reginput = locinput + n;
792                         }
793                         return(0);
794                 case END:
795                         reginput = locinput; /* put where regtry can find it */
796                         return(1);      /* Success! */
797                 default:
798                         printf("%x %d\n",scan,scan[1]);
799                         FAIL("regexp memory corruption");
800                 }
801
802                 scan = next;
803         }
804
805         /*
806          * We get here only if there's trouble -- normally "case END" is
807          * the terminating point.
808          */
809         FAIL("corrupted regexp pointers");
810         /*NOTREACHED*/
811 #ifdef lint
812         return 0;
813 #endif
814 }
815
816 /*
817  - regrepeat - repeatedly match something simple, report how many
818  */
819 /*
820  * [This routine now assumes that it will only match on things of length 1.
821  * That was true before, but now we assume scan - reginput is the count,
822  * rather than incrementing count on every character.]
823  */
824 static int
825 regrepeat(p, max)
826 char *p;
827 int max;
828 {
829         register char *scan;
830         register char *opnd;
831         register int c;
832         register char *loceol = regeol;
833
834         scan = reginput;
835         if (max && max < loceol - scan)
836             loceol = scan + max;
837         opnd = OPERAND(p);
838         switch (OP(p)) {
839         case ANY:
840                 while (scan < loceol && *scan != '\n')
841                         scan++;
842                 break;
843         case EXACTLY:           /* length of string is 1 */
844                 opnd++;
845                 while (scan < loceol && *opnd == *scan)
846                         scan++;
847                 break;
848         case ANYOF:
849                 c = UCHARAT(scan);
850                 while (scan < loceol && !(opnd[c >> 3] & (1 << (c & 7)))) {
851                         scan++;
852                         c = UCHARAT(scan);
853                 }
854                 break;
855         case ALNUM:
856                 while (scan < loceol && isALNUM(*scan))
857                         scan++;
858                 break;
859         case NALNUM:
860                 while (scan < loceol && !isALNUM(*scan))
861                         scan++;
862                 break;
863         case SPACE:
864                 while (scan < loceol && isSPACE(*scan))
865                         scan++;
866                 break;
867         case NSPACE:
868                 while (scan < loceol && !isSPACE(*scan))
869                         scan++;
870                 break;
871         case DIGIT:
872                 while (scan < loceol && isDIGIT(*scan))
873                         scan++;
874                 break;
875         case NDIGIT:
876                 while (scan < loceol && !isDIGIT(*scan))
877                         scan++;
878                 break;
879         default:                /* Oh dear.  Called inappropriately. */
880                 FAIL("internal regexp foulup");
881                 /* NOTREACHED */
882         }
883
884         c = scan - reginput;
885         reginput = scan;
886
887         return(c);
888 }
889
890 /*
891  - regnext - dig the "next" pointer out of a node
892  *
893  * [Note, when REGALIGN is defined there are two places in regmatch()
894  * that bypass this code for speed.]
895  */
896 char *
897 regnext(p)
898 register char *p;
899 {
900         register int offset;
901
902         if (p == &regdummy)
903                 return(NULL);
904
905         offset = NEXT(p);
906         if (offset == 0)
907                 return(NULL);
908
909 #ifdef REGALIGN
910         return(p+offset);
911 #else
912         if (OP(p) == BACK)
913                 return(p-offset);
914         else
915                 return(p+offset);
916 #endif
917 }