Move init of global mutexes/cond vars earlier.
[p5sagit/p5-mst-13.2.git] / regexec.c
CommitLineData
a0d0e21e 1/* regexec.c
2 */
3
4/*
5 * "One Ring to rule them all, One Ring to find them..."
6 */
7
a687059c 8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73 17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
f0fcb552 22/*SUPPRESS 112*/
a687059c 23/*
e50aee73 24 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c 25 *
26 * Copyright (c) 1986 by University of Toronto.
27 * Written by Henry Spencer. Not derived from licensed software.
28 *
29 * Permission is granted to anyone to use this software for any
30 * purpose on any computer system, and to redistribute it freely,
31 * subject to the following restrictions:
32 *
33 * 1. The author is not responsible for the consequences of use of
34 * this software, no matter how awful, even if they arise
35 * from defects in it.
36 *
37 * 2. The origin of this software must not be misrepresented, either
38 * by explicit claim or by omission.
39 *
40 * 3. Altered versions must be plainly marked as such, and must not
41 * be misrepresented as being the original software.
42 *
43 **** Alterations to Henry's code are...
44 ****
9607fc9c 45 **** Copyright (c) 1991-1997, Larry Wall
a687059c 46 ****
9ef589d8 47 **** You may distribute under the terms of either the GNU General Public
48 **** License or the Artistic License, as specified in the README file.
a687059c 49 *
50 * Beware that some of this code is subtly aware of the way operator
51 * precedence is structured in regular expressions. Serious changes in
52 * regular-expression syntax might require a total rethink.
53 */
54#include "EXTERN.h"
55#include "perl.h"
56#include "regcomp.h"
57
58#ifndef STATIC
59#define STATIC static
60#endif
61
62#ifdef DEBUGGING
a0d0e21e 63static I32 regnarrate = 0;
64static char* regprogram = 0;
a687059c 65#endif
66
a0d0e21e 67/* Current curly descriptor */
68typedef struct curcur CURCUR;
69struct curcur {
70 int parenfloor; /* how far back to strip paren data */
71 int cur; /* how many instances of scan we've matched */
72 int min; /* the minimal number of scans to match */
73 int max; /* the maximal number of scans to match */
74 int minmod; /* whether to work our way up or down */
75 char * scan; /* the thing to match */
76 char * next; /* what has to match after it */
77 char * lastloc; /* where we started matching this scan */
78 CURCUR * oldcc; /* current curly before we started this one */
79};
80
81static CURCUR* regcc;
82
83typedef I32 CHECKPOINT;
84
55497cff 85static CHECKPOINT regcppush _((I32 parenfloor));
86static char * regcppop _((void));
a0d0e21e 87
55497cff 88static CHECKPOINT
a0d0e21e 89regcppush(parenfloor)
90I32 parenfloor;
91{
11343788 92 dTHR;
a0d0e21e 93 int retval = savestack_ix;
94 int i = (regsize - parenfloor) * 3;
95 int p;
96
97 SSCHECK(i + 5);
98 for (p = regsize; p > parenfloor; p--) {
99 SSPUSHPTR(regendp[p]);
100 SSPUSHPTR(regstartp[p]);
101 SSPUSHINT(p);
102 }
103 SSPUSHINT(regsize);
104 SSPUSHINT(*reglastparen);
105 SSPUSHPTR(reginput);
106 SSPUSHINT(i + 3);
107 SSPUSHINT(SAVEt_REGCONTEXT);
108 return retval;
109}
110
55497cff 111static char *
a0d0e21e 112regcppop()
113{
11343788 114 dTHR;
a0d0e21e 115 I32 i = SSPOPINT;
116 U32 paren = 0;
117 char *input;
118 char *tmps;
119 assert(i == SAVEt_REGCONTEXT);
120 i = SSPOPINT;
121 input = (char *) SSPOPPTR;
122 *reglastparen = SSPOPINT;
123 regsize = SSPOPINT;
124 for (i -= 3; i > 0; i -= 3) {
125 paren = (U32)SSPOPINT;
126 regstartp[paren] = (char *) SSPOPPTR;
127 tmps = (char*)SSPOPPTR;
128 if (paren <= *reglastparen)
129 regendp[paren] = tmps;
130 }
131 for (paren = *reglastparen + 1; paren <= regnpar; paren++) {
132 if (paren > regsize)
133 regstartp[paren] = Nullch;
134 regendp[paren] = Nullch;
135 }
136 return input;
137}
138
139#define regcpblow(cp) leave_scope(cp)
140
a687059c 141/*
e50aee73 142 * pregexec and friends
a687059c 143 */
144
145/*
a687059c 146 * Forwards.
147 */
a0d0e21e 148
149static I32 regmatch _((char *prog));
150static I32 regrepeat _((char *p, I32 max));
151static I32 regtry _((regexp *prog, char *startpos));
bbce6d69 152static bool reginclass _((char *p, I32 c));
153
154static bool regtainted; /* tainted information used? */
a687059c 155
156/*
e50aee73 157 - pregexec - match a regexp against a string
a687059c 158 */
79072805 159I32
e50aee73 160pregexec(prog, stringarg, strend, strbeg, minend, screamer, safebase)
a687059c 161register regexp *prog;
162char *stringarg;
163register char *strend; /* pointer to null at end of string */
164char *strbeg; /* real beginning of string */
79072805 165I32 minend; /* end of match must be at least minend after stringarg */
166SV *screamer;
167I32 safebase; /* no need to remember string in subbase */
a687059c 168{
a0d0e21e 169 register char *s;
a0d0e21e 170 register char *c;
171 register char *startpos = stringarg;
172 register I32 tmp;
173 I32 minlen = 0; /* must match at least this many chars */
174 I32 dontbother = 0; /* how many characters not to try at end */
175 CURCUR cc;
a687059c 176
a0d0e21e 177 cc.cur = 0;
4633a7c4 178 cc.oldcc = 0;
a0d0e21e 179 regcc = &cc;
180
181#ifdef DEBUGGING
182 regnarrate = debug & 512;
183 regprogram = prog->program;
184#endif
185
186 /* Be paranoid... */
187 if (prog == NULL || startpos == NULL) {
188 croak("NULL regexp parameter");
189 return 0;
190 }
191
192 if (startpos == strbeg) /* is ^ valid at stringarg? */
193 regprev = '\n';
194 else {
195 regprev = stringarg[-1];
196 if (!multiline && regprev == '\n')
197 regprev = '\0'; /* force ^ to NOT match */
198 }
bbce6d69 199
a0d0e21e 200 regprecomp = prog->precomp;
a0d0e21e 201 /* Check validity of program. */
202 if (UCHARAT(prog->program) != MAGIC) {
203 FAIL("corrupted regexp program");
204 }
205
bbce6d69 206 regnpar = prog->nparens;
207 regtainted = FALSE;
a0d0e21e 208
209 /* If there is a "must appear" string, look for it. */
210 s = startpos;
211 if (prog->regmust != Nullsv &&
774d564b 212 !(prog->reganch & ROPT_ANCH_GPOS) &&
213 (!(prog->reganch & ROPT_ANCH_BOL)
a0d0e21e 214 || (multiline && prog->regback >= 0)) )
215 {
216 if (stringarg == strbeg && screamer) {
217 if (screamfirst[BmRARE(prog->regmust)] >= 0)
218 s = screaminstr(screamer,prog->regmust);
219 else
220 s = Nullch;
0a12ae7d 221 }
a0d0e21e 222 else
223 s = fbm_instr((unsigned char*)s, (unsigned char*)strend,
224 prog->regmust);
225 if (!s) {
226 ++BmUSEFUL(prog->regmust); /* hooray */
227 goto phooey; /* not present */
a687059c 228 }
a0d0e21e 229 else if (prog->regback >= 0) {
230 s -= prog->regback;
231 if (s < startpos)
232 s = startpos;
233 minlen = prog->regback + SvCUR(prog->regmust);
a687059c 234 }
a0d0e21e 235 else if (!prog->naughty && --BmUSEFUL(prog->regmust) < 0) { /* boo */
236 SvREFCNT_dec(prog->regmust);
237 prog->regmust = Nullsv; /* disable regmust */
238 s = startpos;
239 }
240 else {
241 s = startpos;
242 minlen = SvCUR(prog->regmust);
a687059c 243 }
a0d0e21e 244 }
a687059c 245
a0d0e21e 246 /* Mark beginning of line for ^ . */
247 regbol = startpos;
a687059c 248
a0d0e21e 249 /* Mark end of line for $ (and such) */
250 regeol = strend;
a687059c 251
a0d0e21e 252 /* see how far we have to get to not match where we matched before */
253 regtill = startpos+minend;
a687059c 254
a0d0e21e 255 /* Simplest case: anchored match need be tried only once. */
774d564b 256 /* [unless only anchor is BOL and multiline is set] */
a0d0e21e 257 if (prog->reganch & ROPT_ANCH) {
258 if (regtry(prog, startpos))
259 goto got_it;
774d564b 260 else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
261 (multiline || (prog->reganch & ROPT_IMPLICIT)))
262 {
a0d0e21e 263 if (minlen)
264 dontbother = minlen - 1;
265 strend -= dontbother;
266 /* for multiline we only have to try after newlines */
267 if (s > startpos)
268 s--;
269 while (s < strend) {
270 if (*s++ == '\n') {
271 if (s < strend && regtry(prog, s))
272 goto got_it;
273 }
35c8bce7 274 }
35c8bce7 275 }
a0d0e21e 276 goto phooey;
277 }
35c8bce7 278
a0d0e21e 279 /* Messy cases: unanchored match. */
280 if (prog->regstart) {
281 if (prog->reganch & ROPT_SKIP) { /* we have /x+whatever/ */
282 /* it must be a one character string */
bbce6d69 283 char ch = SvPVX(prog->regstart)[0];
a0d0e21e 284 while (s < strend) {
bbce6d69 285 if (*s == ch) {
a0d0e21e 286 if (regtry(prog, s))
a687059c 287 goto got_it;
a0d0e21e 288 s++;
bbce6d69 289 while (s < strend && *s == ch)
a0d0e21e 290 s++;
a687059c 291 }
a0d0e21e 292 s++;
293 }
a687059c 294 }
47109bfb 295 else if (SvTYPE(prog->regstart) == SVt_PVBM) {
a0d0e21e 296 /* We know what string it must start with. */
297 while ((s = fbm_instr((unsigned char*)s,
298 (unsigned char*)strend, prog->regstart)) != NULL)
299 {
300 if (regtry(prog, s))
301 goto got_it;
302 s++;
303 }
304 }
47109bfb 305 else { /* Optimized fbm_instr: */
a0d0e21e 306 c = SvPVX(prog->regstart);
307 while ((s = ninstr(s, strend, c, c + SvCUR(prog->regstart))) != NULL)
308 {
309 if (regtry(prog, s))
310 goto got_it;
311 s++;
312 }
313 }
314 goto phooey;
315 }
316 /*SUPPRESS 560*/
317 if (c = prog->regstclass) {
318 I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
a687059c 319
a0d0e21e 320 if (minlen)
321 dontbother = minlen - 1;
322 strend -= dontbother; /* don't bother with what can't match */
323 tmp = 1;
324 /* We know what class it must start with. */
325 switch (OP(c)) {
326 case ANYOF:
327 c = OPERAND(c);
328 while (s < strend) {
bbce6d69 329 if (reginclass(c, *s)) {
a0d0e21e 330 if (tmp && regtry(prog, s))
331 goto got_it;
332 else
333 tmp = doevery;
a687059c 334 }
a0d0e21e 335 else
336 tmp = 1;
337 s++;
338 }
339 break;
bbce6d69 340 case BOUNDL:
341 regtainted = TRUE;
342 /* FALL THROUGH */
a0d0e21e 343 case BOUND:
344 if (minlen)
345 dontbother++,strend--;
bbce6d69 346 tmp = (s != startpos) ? UCHARAT(s - 1) : regprev;
95bac841 347 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 348 while (s < strend) {
95bac841 349 if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
a0d0e21e 350 tmp = !tmp;
351 if (regtry(prog, s))
352 goto got_it;
a687059c 353 }
a0d0e21e 354 s++;
355 }
356 if ((minlen || tmp) && regtry(prog,s))
357 goto got_it;
358 break;
bbce6d69 359 case NBOUNDL:
360 regtainted = TRUE;
361 /* FALL THROUGH */
a0d0e21e 362 case NBOUND:
363 if (minlen)
364 dontbother++,strend--;
bbce6d69 365 tmp = (s != startpos) ? UCHARAT(s - 1) : regprev;
95bac841 366 tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
a0d0e21e 367 while (s < strend) {
95bac841 368 if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
a0d0e21e 369 tmp = !tmp;
370 else if (regtry(prog, s))
371 goto got_it;
372 s++;
373 }
374 if ((minlen || !tmp) && regtry(prog,s))
375 goto got_it;
376 break;
377 case ALNUM:
378 while (s < strend) {
bbce6d69 379 if (isALNUM(*s)) {
380 if (tmp && regtry(prog, s))
381 goto got_it;
382 else
383 tmp = doevery;
384 }
385 else
386 tmp = 1;
387 s++;
388 }
389 break;
390 case ALNUML:
391 regtainted = TRUE;
392 while (s < strend) {
393 if (isALNUM_LC(*s)) {
a0d0e21e 394 if (tmp && regtry(prog, s))
395 goto got_it;
a687059c 396 else
a0d0e21e 397 tmp = doevery;
398 }
399 else
400 tmp = 1;
401 s++;
402 }
403 break;
404 case NALNUM:
405 while (s < strend) {
bbce6d69 406 if (!isALNUM(*s)) {
407 if (tmp && regtry(prog, s))
408 goto got_it;
409 else
410 tmp = doevery;
411 }
412 else
413 tmp = 1;
414 s++;
415 }
416 break;
417 case NALNUML:
418 regtainted = TRUE;
419 while (s < strend) {
420 if (!isALNUM_LC(*s)) {
a0d0e21e 421 if (tmp && regtry(prog, s))
422 goto got_it;
a687059c 423 else
a0d0e21e 424 tmp = doevery;
a687059c 425 }
a0d0e21e 426 else
427 tmp = 1;
428 s++;
429 }
430 break;
431 case SPACE:
432 while (s < strend) {
433 if (isSPACE(*s)) {
434 if (tmp && regtry(prog, s))
435 goto got_it;
436 else
437 tmp = doevery;
2304df62 438 }
a0d0e21e 439 else
440 tmp = 1;
441 s++;
442 }
443 break;
bbce6d69 444 case SPACEL:
445 regtainted = TRUE;
446 while (s < strend) {
447 if (isSPACE_LC(*s)) {
448 if (tmp && regtry(prog, s))
449 goto got_it;
450 else
451 tmp = doevery;
452 }
453 else
454 tmp = 1;
455 s++;
456 }
457 break;
a0d0e21e 458 case NSPACE:
459 while (s < strend) {
460 if (!isSPACE(*s)) {
461 if (tmp && regtry(prog, s))
462 goto got_it;
463 else
464 tmp = doevery;
a687059c 465 }
a0d0e21e 466 else
467 tmp = 1;
468 s++;
469 }
470 break;
bbce6d69 471 case NSPACEL:
472 regtainted = TRUE;
473 while (s < strend) {
474 if (!isSPACE_LC(*s)) {
475 if (tmp && regtry(prog, s))
476 goto got_it;
477 else
478 tmp = doevery;
479 }
480 else
481 tmp = 1;
482 s++;
483 }
484 break;
a0d0e21e 485 case DIGIT:
486 while (s < strend) {
487 if (isDIGIT(*s)) {
488 if (tmp && regtry(prog, s))
489 goto got_it;
490 else
491 tmp = doevery;
2b69d0c2 492 }
a0d0e21e 493 else
494 tmp = 1;
495 s++;
496 }
497 break;
498 case NDIGIT:
499 while (s < strend) {
500 if (!isDIGIT(*s)) {
501 if (tmp && regtry(prog, s))
502 goto got_it;
503 else
504 tmp = doevery;
a687059c 505 }
a0d0e21e 506 else
507 tmp = 1;
508 s++;
509 }
510 break;
a687059c 511 }
a0d0e21e 512 }
513 else {
514 if (minlen)
515 dontbother = minlen - 1;
516 strend -= dontbother;
517 /* We don't know much -- general case. */
518 do {
519 if (regtry(prog, s))
520 goto got_it;
521 } while (s++ < strend);
522 }
523
524 /* Failure. */
525 goto phooey;
a687059c 526
a0d0e21e 527got_it:
420218e7 528 strend += dontbother; /* uncheat */
a0d0e21e 529 prog->subbeg = strbeg;
530 prog->subend = strend;
bbce6d69 531 prog->exec_tainted = regtainted;
5f05dabc 532
533 /* make sure $`, $&, $', and $digit will work later */
137443ea 534 if (strbeg != prog->subbase) {
535 if (safebase) {
536 if (prog->subbase) {
537 Safefree(prog->subbase);
538 prog->subbase = Nullch;
539 }
540 }
541 else {
542 I32 i = strend - startpos + (stringarg - strbeg);
543 s = savepvn(strbeg, i);
544 Safefree(prog->subbase);
545 prog->subbase = s;
546 prog->subbeg = prog->subbase;
547 prog->subend = prog->subbase + i;
548 s = prog->subbase + (stringarg - strbeg);
549 for (i = 0; i <= prog->nparens; i++) {
550 if (prog->endp[i]) {
551 prog->startp[i] = s + (prog->startp[i] - startpos);
552 prog->endp[i] = s + (prog->endp[i] - startpos);
553 }
a0d0e21e 554 }
555 }
a0d0e21e 556 }
557 return 1;
558
559phooey:
a0d0e21e 560 return 0;
a687059c 561}
562
563/*
564 - regtry - try match at specific point
565 */
79072805 566static I32 /* 0 failure, 1 success */
a0d0e21e 567regtry(prog, startpos)
a687059c 568regexp *prog;
a0d0e21e 569char *startpos;
a687059c 570{
a0d0e21e 571 register I32 i;
572 register char **sp;
573 register char **ep;
574
575 reginput = startpos;
576 regstartp = prog->startp;
577 regendp = prog->endp;
578 reglastparen = &prog->lastparen;
579 prog->lastparen = 0;
580 regsize = 0;
581
582 sp = prog->startp;
583 ep = prog->endp;
584 if (prog->nparens) {
585 for (i = prog->nparens; i >= 0; i--) {
586 *sp++ = NULL;
587 *ep++ = NULL;
a687059c 588 }
a0d0e21e 589 }
590 if (regmatch(prog->program + 1) && reginput >= regtill) {
591 prog->startp[0] = startpos;
592 prog->endp[0] = reginput;
593 return 1;
594 }
595 else
596 return 0;
a687059c 597}
598
599/*
600 - regmatch - main matching routine
601 *
602 * Conceptually the strategy is simple: check to see whether the current
603 * node matches, call self recursively to see whether the rest matches,
604 * and then act accordingly. In practice we make some effort to avoid
605 * recursion, in particular by going through "ordinary" nodes (that don't
606 * need to know whether the rest of the match failed) by a loop instead of
607 * by recursion.
608 */
609/* [lwall] I've hoisted the register declarations to the outer block in order to
610 * maybe save a little bit of pushing and popping on the stack. It also takes
611 * advantage of machines that use a register save mask on subroutine entry.
612 */
79072805 613static I32 /* 0 failure, 1 success */
a687059c 614regmatch(prog)
615char *prog;
616{
a0d0e21e 617 register char *scan; /* Current node. */
618 char *next; /* Next node. */
619 register I32 nextchar;
620 register I32 n; /* no or next */
621 register I32 ln; /* len or last */
622 register char *s; /* operand or save */
623 register char *locinput = reginput;
bbce6d69 624 register I32 c1, c2; /* case fold search */
a0d0e21e 625 int minmod = 0;
4633a7c4 626#ifdef DEBUGGING
627 static int regindent = 0;
628 regindent++;
629#endif
a0d0e21e 630
bbce6d69 631 nextchar = UCHARAT(locinput);
a0d0e21e 632 scan = prog;
633 while (scan != NULL) {
a687059c 634#ifdef DEBUGGING
4633a7c4 635#define sayYES goto yes
636#define sayNO goto no
637#define saySAME(x) if (x) goto yes; else goto no
638 if (regnarrate) {
46fc3d4c 639 SV *prop = sv_newmortal();
640 regprop(prop, scan);
641 PerlIO_printf(Perl_debug_log, "%*s%2d%-8.8s\t<%.10s>\n",
642 regindent*2, "", scan - regprogram,
643 SvPVX(prop), locinput);
4633a7c4 644 }
645#else
646#define sayYES return 1
647#define sayNO return 0
648#define saySAME(x) return x
a687059c 649#endif
650
651#ifdef REGALIGN
a0d0e21e 652 next = scan + NEXT(scan);
653 if (next == scan)
654 next = NULL;
a687059c 655#else
a0d0e21e 656 next = regnext(scan);
a687059c 657#endif
658
a0d0e21e 659 switch (OP(scan)) {
660 case BOL:
661 if (locinput == regbol
662 ? regprev == '\n'
663 : ((nextchar || locinput < regeol) && locinput[-1] == '\n') )
664 {
665 /* regtill = regbol; */
666 break;
667 }
4633a7c4 668 sayNO;
a0d0e21e 669 case MBOL:
670 if (locinput == regbol
671 ? regprev == '\n'
672 : ((nextchar || locinput < regeol) && locinput[-1] == '\n') )
673 {
674 break;
675 }
4633a7c4 676 sayNO;
a0d0e21e 677 case SBOL:
678 if (locinput == regbol && regprev == '\n')
679 break;
4633a7c4 680 sayNO;
774d564b 681 case GPOS:
a0d0e21e 682 if (locinput == regbol)
683 break;
4633a7c4 684 sayNO;
a0d0e21e 685 case EOL:
686 if (multiline)
687 goto meol;
688 else
689 goto seol;
690 case MEOL:
691 meol:
692 if ((nextchar || locinput < regeol) && nextchar != '\n')
4633a7c4 693 sayNO;
a0d0e21e 694 break;
695 case SEOL:
696 seol:
697 if ((nextchar || locinput < regeol) && nextchar != '\n')
4633a7c4 698 sayNO;
a0d0e21e 699 if (regeol - locinput > 1)
4633a7c4 700 sayNO;
a0d0e21e 701 break;
702 case SANY:
703 if (!nextchar && locinput >= regeol)
4633a7c4 704 sayNO;
bbce6d69 705 nextchar = UCHARAT(++locinput);
a0d0e21e 706 break;
707 case ANY:
708 if (!nextchar && locinput >= regeol || nextchar == '\n')
4633a7c4 709 sayNO;
bbce6d69 710 nextchar = UCHARAT(++locinput);
a0d0e21e 711 break;
bbce6d69 712 case EXACT:
a0d0e21e 713 s = OPERAND(scan);
714 ln = *s++;
715 /* Inline the first character, for speed. */
95bac841 716 if (UCHARAT(s) != nextchar)
4633a7c4 717 sayNO;
a0d0e21e 718 if (regeol - locinput < ln)
4633a7c4 719 sayNO;
36477c24 720 if (ln > 1 && memNE(s, locinput, ln))
4633a7c4 721 sayNO;
a0d0e21e 722 locinput += ln;
bbce6d69 723 nextchar = UCHARAT(locinput);
724 break;
725 case EXACTFL:
726 regtainted = TRUE;
727 /* FALL THROUGH */
728 case EXACTF:
729 s = OPERAND(scan);
730 ln = *s++;
731 /* Inline the first character, for speed. */
732 if (UCHARAT(s) != nextchar &&
733 UCHARAT(s) != ((OP(scan) == EXACTF)
734 ? fold : fold_locale)[nextchar])
735 sayNO;
736 if (regeol - locinput < ln)
737 sayNO;
5f05dabc 738 if (ln > 1 && (OP(scan) == EXACTF
739 ? ibcmp(s, locinput, ln)
740 : ibcmp_locale(s, locinput, ln)))
bbce6d69 741 sayNO;
742 locinput += ln;
743 nextchar = UCHARAT(locinput);
a0d0e21e 744 break;
745 case ANYOF:
746 s = OPERAND(scan);
747 if (nextchar < 0)
748 nextchar = UCHARAT(locinput);
bbce6d69 749 if (!reginclass(s, nextchar))
4633a7c4 750 sayNO;
a0d0e21e 751 if (!nextchar && locinput >= regeol)
4633a7c4 752 sayNO;
bbce6d69 753 nextchar = UCHARAT(++locinput);
a0d0e21e 754 break;
bbce6d69 755 case ALNUML:
756 regtainted = TRUE;
757 /* FALL THROUGH */
a0d0e21e 758 case ALNUM:
759 if (!nextchar)
4633a7c4 760 sayNO;
bbce6d69 761 if (!(OP(scan) == ALNUM
762 ? isALNUM(nextchar) : isALNUM_LC(nextchar)))
4633a7c4 763 sayNO;
bbce6d69 764 nextchar = UCHARAT(++locinput);
a0d0e21e 765 break;
bbce6d69 766 case NALNUML:
767 regtainted = TRUE;
768 /* FALL THROUGH */
a0d0e21e 769 case NALNUM:
770 if (!nextchar && locinput >= regeol)
4633a7c4 771 sayNO;
bbce6d69 772 if (OP(scan) == NALNUM
773 ? isALNUM(nextchar) : isALNUM_LC(nextchar))
4633a7c4 774 sayNO;
bbce6d69 775 nextchar = UCHARAT(++locinput);
a0d0e21e 776 break;
bbce6d69 777 case BOUNDL:
778 case NBOUNDL:
779 regtainted = TRUE;
780 /* FALL THROUGH */
a0d0e21e 781 case BOUND:
bbce6d69 782 case NBOUND:
783 /* was last char in word? */
784 ln = (locinput != regbol) ? UCHARAT(locinput - 1) : regprev;
785 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
786 ln = isALNUM(ln);
787 n = isALNUM(nextchar);
788 }
789 else {
790 ln = isALNUM_LC(ln);
791 n = isALNUM_LC(nextchar);
792 }
95bac841 793 if (((!ln) == (!n)) == (OP(scan) == BOUND || OP(scan) == BOUNDL))
4633a7c4 794 sayNO;
a0d0e21e 795 break;
bbce6d69 796 case SPACEL:
797 regtainted = TRUE;
798 /* FALL THROUGH */
a0d0e21e 799 case SPACE:
800 if (!nextchar && locinput >= regeol)
4633a7c4 801 sayNO;
bbce6d69 802 if (!(OP(scan) == SPACE
803 ? isSPACE(nextchar) : isSPACE_LC(nextchar)))
4633a7c4 804 sayNO;
bbce6d69 805 nextchar = UCHARAT(++locinput);
a0d0e21e 806 break;
bbce6d69 807 case NSPACEL:
808 regtainted = TRUE;
809 /* FALL THROUGH */
a0d0e21e 810 case NSPACE:
811 if (!nextchar)
4633a7c4 812 sayNO;
bbce6d69 813 if (OP(scan) == SPACE
814 ? isSPACE(nextchar) : isSPACE_LC(nextchar))
4633a7c4 815 sayNO;
bbce6d69 816 nextchar = UCHARAT(++locinput);
a0d0e21e 817 break;
818 case DIGIT:
819 if (!isDIGIT(nextchar))
4633a7c4 820 sayNO;
bbce6d69 821 nextchar = UCHARAT(++locinput);
a0d0e21e 822 break;
823 case NDIGIT:
824 if (!nextchar && locinput >= regeol)
4633a7c4 825 sayNO;
a0d0e21e 826 if (isDIGIT(nextchar))
4633a7c4 827 sayNO;
bbce6d69 828 nextchar = UCHARAT(++locinput);
a0d0e21e 829 break;
c8756f30 830 case REFFL:
831 regtainted = TRUE;
832 /* FALL THROUGH */
a0d0e21e 833 case REF:
c8756f30 834 case REFF:
a0d0e21e 835 n = ARG1(scan); /* which paren pair */
836 s = regstartp[n];
837 if (!s)
4633a7c4 838 sayNO;
a0d0e21e 839 if (!regendp[n])
4633a7c4 840 sayNO;
a0d0e21e 841 if (s == regendp[n])
842 break;
843 /* Inline the first character, for speed. */
c8756f30 844 if (UCHARAT(s) != nextchar &&
845 (OP(scan) == REF ||
846 (UCHARAT(s) != ((OP(scan) == REFF
847 ? fold : fold_locale)[nextchar]))))
4633a7c4 848 sayNO;
a0d0e21e 849 ln = regendp[n] - s;
850 if (locinput + ln > regeol)
4633a7c4 851 sayNO;
c8756f30 852 if (ln > 1 && (OP(scan) == REF
853 ? memNE(s, locinput, ln)
854 : (OP(scan) == REFF
855 ? ibcmp(s, locinput, ln)
856 : ibcmp_locale(s, locinput, ln))))
4633a7c4 857 sayNO;
a0d0e21e 858 locinput += ln;
bbce6d69 859 nextchar = UCHARAT(locinput);
a0d0e21e 860 break;
861
862 case NOTHING:
863 break;
864 case BACK:
865 break;
866 case OPEN:
867 n = ARG1(scan); /* which paren pair */
868 regstartp[n] = locinput;
869 if (n > regsize)
870 regsize = n;
871 break;
872 case CLOSE:
873 n = ARG1(scan); /* which paren pair */
874 regendp[n] = locinput;
875 if (n > *reglastparen)
876 *reglastparen = n;
877 break;
878 case CURLYX: {
11343788 879 dTHR;
a0d0e21e 880 CURCUR cc;
881 CHECKPOINT cp = savestack_ix;
882 cc.oldcc = regcc;
883 regcc = &cc;
884 cc.parenfloor = *reglastparen;
885 cc.cur = -1;
886 cc.min = ARG1(scan);
887 cc.max = ARG2(scan);
888 cc.scan = NEXTOPER(scan) + 4;
889 cc.next = next;
890 cc.minmod = minmod;
891 cc.lastloc = 0;
892 reginput = locinput;
893 n = regmatch(PREVOPER(next)); /* start on the WHILEM */
894 regcpblow(cp);
895 regcc = cc.oldcc;
4633a7c4 896 saySAME(n);
a0d0e21e 897 }
898 /* NOT REACHED */
899 case WHILEM: {
900 /*
901 * This is really hard to understand, because after we match
902 * what we're trying to match, we must make sure the rest of
903 * the RE is going to match for sure, and to do that we have
904 * to go back UP the parse tree by recursing ever deeper. And
905 * if it fails, we have to reset our parent's current state
906 * that we can try again after backing off.
907 */
908
5f05dabc 909 CHECKPOINT cp;
a0d0e21e 910 CURCUR* cc = regcc;
4633a7c4 911 n = cc->cur + 1; /* how many we know we matched */
a0d0e21e 912 reginput = locinput;
913
4633a7c4 914#ifdef DEBUGGING
915 if (regnarrate)
68dc0745 916 PerlIO_printf(Perl_debug_log, "%*s %ld %lx\n", regindent*2, "",
917 (long)n, (long)cc);
4633a7c4 918#endif
919
a0d0e21e 920 /* If degenerate scan matches "", assume scan done. */
921
579cf2c3 922 if (locinput == cc->lastloc && n >= cc->min) {
a0d0e21e 923 regcc = cc->oldcc;
924 ln = regcc->cur;
925 if (regmatch(cc->next))
4633a7c4 926 sayYES;
a0d0e21e 927 regcc->cur = ln;
928 regcc = cc;
4633a7c4 929 sayNO;
a0d0e21e 930 }
931
932 /* First just match a string of min scans. */
933
934 if (n < cc->min) {
935 cc->cur = n;
936 cc->lastloc = locinput;
4633a7c4 937 if (regmatch(cc->scan))
938 sayYES;
939 cc->cur = n - 1;
940 sayNO;
a0d0e21e 941 }
942
943 /* Prefer next over scan for minimal matching. */
944
945 if (cc->minmod) {
946 regcc = cc->oldcc;
947 ln = regcc->cur;
5f05dabc 948 cp = regcppush(cc->parenfloor);
949 if (regmatch(cc->next)) {
950 regcpblow(cp);
4633a7c4 951 sayYES; /* All done. */
5f05dabc 952 }
953 regcppop();
a0d0e21e 954 regcc->cur = ln;
955 regcc = cc;
956
957 if (n >= cc->max) /* Maximum greed exceeded? */
4633a7c4 958 sayNO;
a687059c 959
a0d0e21e 960 /* Try scanning more and see if it helps. */
961 reginput = locinput;
962 cc->cur = n;
963 cc->lastloc = locinput;
5f05dabc 964 cp = regcppush(cc->parenfloor);
965 if (regmatch(cc->scan)) {
966 regcpblow(cp);
4633a7c4 967 sayYES;
5f05dabc 968 }
969 regcppop();
4633a7c4 970 cc->cur = n - 1;
971 sayNO;
a0d0e21e 972 }
973
974 /* Prefer scan over next for maximal matching. */
975
976 if (n < cc->max) { /* More greed allowed? */
5f05dabc 977 cp = regcppush(cc->parenfloor);
a0d0e21e 978 cc->cur = n;
979 cc->lastloc = locinput;
5f05dabc 980 if (regmatch(cc->scan)) {
981 regcpblow(cp);
4633a7c4 982 sayYES;
5f05dabc 983 }
a0d0e21e 984 regcppop(); /* Restore some previous $<digit>s? */
985 reginput = locinput;
986 }
987
988 /* Failed deeper matches of scan, so see if this one works. */
989 regcc = cc->oldcc;
990 ln = regcc->cur;
991 if (regmatch(cc->next))
4633a7c4 992 sayYES;
a0d0e21e 993 regcc->cur = ln;
994 regcc = cc;
4633a7c4 995 cc->cur = n - 1;
996 sayNO;
a0d0e21e 997 }
998 /* NOT REACHED */
999 case BRANCH: {
1000 if (OP(next) != BRANCH) /* No choice. */
1001 next = NEXTOPER(scan);/* Avoid recursion. */
1002 else {
748a9306 1003 int lastparen = *reglastparen;
a0d0e21e 1004 do {
1005 reginput = locinput;
1006 if (regmatch(NEXTOPER(scan)))
4633a7c4 1007 sayYES;
748a9306 1008 for (n = *reglastparen; n > lastparen; n--)
1009 regendp[n] = 0;
1010 *reglastparen = n;
1011
a687059c 1012#ifdef REGALIGN
a0d0e21e 1013 /*SUPPRESS 560*/
1014 if (n = NEXT(scan))
1015 scan += n;
1016 else
1017 scan = NULL;
a687059c 1018#else
a0d0e21e 1019 scan = regnext(scan);
79072805 1020#endif
a0d0e21e 1021 } while (scan != NULL && OP(scan) == BRANCH);
4633a7c4 1022 sayNO;
a0d0e21e 1023 /* NOTREACHED */
a687059c 1024 }
a0d0e21e 1025 }
1026 break;
1027 case MINMOD:
1028 minmod = 1;
1029 break;
1030 case CURLY:
1031 ln = ARG1(scan); /* min to match */
1032 n = ARG2(scan); /* max to match */
1033 scan = NEXTOPER(scan) + 4;
1034 goto repeat;
1035 case STAR:
1036 ln = 0;
1037 n = 32767;
1038 scan = NEXTOPER(scan);
1039 goto repeat;
1040 case PLUS:
1041 /*
1042 * Lookahead to avoid useless match attempts
1043 * when we know what character comes next.
1044 */
1045 ln = 1;
1046 n = 32767;
1047 scan = NEXTOPER(scan);
1048 repeat:
bbce6d69 1049 if (regkind[(U8)OP(next)] == EXACT) {
1050 c1 = UCHARAT(OPERAND(next) + 1);
1051 if (OP(next) == EXACTF)
1052 c2 = fold[c1];
1053 else if (OP(next) == EXACTFL)
1054 c2 = fold_locale[c1];
1055 else
1056 c2 = c1;
1057 }
a0d0e21e 1058 else
bbce6d69 1059 c1 = c2 = -1000;
a0d0e21e 1060 reginput = locinput;
1061 if (minmod) {
1062 minmod = 0;
1063 if (ln && regrepeat(scan, ln) < ln)
4633a7c4 1064 sayNO;
8e07c86e 1065 while (n >= ln || (n == 32767 && ln > 0)) { /* ln overflow ? */
a0d0e21e 1066 /* If it could work, try it. */
bbce6d69 1067 if (c1 == -1000 ||
1068 UCHARAT(reginput) == c1 ||
1069 UCHARAT(reginput) == c2)
1070 {
a0d0e21e 1071 if (regmatch(next))
4633a7c4 1072 sayYES;
bbce6d69 1073 }
a0d0e21e 1074 /* Couldn't or didn't -- back up. */
748a9306 1075 reginput = locinput + ln;
a0d0e21e 1076 if (regrepeat(scan, 1)) {
1077 ln++;
1078 reginput = locinput + ln;
1079 }
1080 else
4633a7c4 1081 sayNO;
a0d0e21e 1082 }
1083 }
1084 else {
1085 n = regrepeat(scan, n);
1086 if (ln < n && regkind[(U8)OP(next)] == EOL &&
1087 (!multiline || OP(next) == SEOL))
1088 ln = n; /* why back off? */
1089 while (n >= ln) {
1090 /* If it could work, try it. */
bbce6d69 1091 if (c1 == -1000 ||
1092 UCHARAT(reginput) == c1 ||
1093 UCHARAT(reginput) == c2)
1094 {
a0d0e21e 1095 if (regmatch(next))
4633a7c4 1096 sayYES;
bbce6d69 1097 }
a0d0e21e 1098 /* Couldn't or didn't -- back up. */
1099 n--;
1100 reginput = locinput + n;
1101 }
1102 }
4633a7c4 1103 sayNO;
a0d0e21e 1104 case SUCCEED:
1105 case END:
1106 reginput = locinput; /* put where regtry can find it */
4633a7c4 1107 sayYES; /* Success! */
a0d0e21e 1108 case IFMATCH:
1109 reginput = locinput;
1110 scan = NEXTOPER(scan);
1111 if (!regmatch(scan))
4633a7c4 1112 sayNO;
a0d0e21e 1113 break;
1114 case UNLESSM:
1115 reginput = locinput;
1116 scan = NEXTOPER(scan);
1117 if (regmatch(scan))
4633a7c4 1118 sayNO;
a0d0e21e 1119 break;
1120 default:
c030ccd9 1121 PerlIO_printf(PerlIO_stderr(), "%lx %d\n",
1122 (unsigned long)scan, scan[1]);
a0d0e21e 1123 FAIL("regexp memory corruption");
a687059c 1124 }
a0d0e21e 1125 scan = next;
1126 }
a687059c 1127
a0d0e21e 1128 /*
1129 * We get here only if there's trouble -- normally "case END" is
1130 * the terminating point.
1131 */
1132 FAIL("corrupted regexp pointers");
1133 /*NOTREACHED*/
4633a7c4 1134 sayNO;
1135
1136yes:
1137#ifdef DEBUGGING
1138 regindent--;
1139#endif
1140 return 1;
1141
1142no:
1143#ifdef DEBUGGING
1144 regindent--;
1145#endif
a0d0e21e 1146 return 0;
a687059c 1147}
1148
1149/*
1150 - regrepeat - repeatedly match something simple, report how many
1151 */
1152/*
1153 * [This routine now assumes that it will only match on things of length 1.
1154 * That was true before, but now we assume scan - reginput is the count,
1155 * rather than incrementing count on every character.]
1156 */
79072805 1157static I32
00bf170e 1158regrepeat(p, max)
a687059c 1159char *p;
79072805 1160I32 max;
a687059c 1161{
a0d0e21e 1162 register char *scan;
1163 register char *opnd;
1164 register I32 c;
1165 register char *loceol = regeol;
1166
1167 scan = reginput;
1168 if (max != 32767 && max < loceol - scan)
1169 loceol = scan + max;
1170 opnd = OPERAND(p);
1171 switch (OP(p)) {
1172 case ANY:
1173 while (scan < loceol && *scan != '\n')
1174 scan++;
1175 break;
1176 case SANY:
1177 scan = loceol;
1178 break;
bbce6d69 1179 case EXACT: /* length of string is 1 */
1180 c = UCHARAT(++opnd);
1181 while (scan < loceol && UCHARAT(scan) == c)
1182 scan++;
1183 break;
1184 case EXACTF: /* length of string is 1 */
1185 c = UCHARAT(++opnd);
1186 while (scan < loceol &&
1187 (UCHARAT(scan) == c || UCHARAT(scan) == fold[c]))
1188 scan++;
1189 break;
1190 case EXACTFL: /* length of string is 1 */
1191 regtainted = TRUE;
1192 c = UCHARAT(++opnd);
1193 while (scan < loceol &&
1194 (UCHARAT(scan) == c || UCHARAT(scan) == fold_locale[c]))
a0d0e21e 1195 scan++;
1196 break;
1197 case ANYOF:
bbce6d69 1198 while (scan < loceol && reginclass(opnd, *scan))
a0d0e21e 1199 scan++;
a0d0e21e 1200 break;
1201 case ALNUM:
1202 while (scan < loceol && isALNUM(*scan))
1203 scan++;
1204 break;
bbce6d69 1205 case ALNUML:
1206 regtainted = TRUE;
1207 while (scan < loceol && isALNUM_LC(*scan))
1208 scan++;
1209 break;
a0d0e21e 1210 case NALNUM:
1211 while (scan < loceol && !isALNUM(*scan))
1212 scan++;
1213 break;
bbce6d69 1214 case NALNUML:
1215 regtainted = TRUE;
1216 while (scan < loceol && !isALNUM_LC(*scan))
1217 scan++;
1218 break;
a0d0e21e 1219 case SPACE:
1220 while (scan < loceol && isSPACE(*scan))
1221 scan++;
1222 break;
bbce6d69 1223 case SPACEL:
1224 regtainted = TRUE;
1225 while (scan < loceol && isSPACE_LC(*scan))
1226 scan++;
1227 break;
a0d0e21e 1228 case NSPACE:
1229 while (scan < loceol && !isSPACE(*scan))
1230 scan++;
1231 break;
bbce6d69 1232 case NSPACEL:
1233 regtainted = TRUE;
1234 while (scan < loceol && !isSPACE_LC(*scan))
1235 scan++;
1236 break;
a0d0e21e 1237 case DIGIT:
1238 while (scan < loceol && isDIGIT(*scan))
1239 scan++;
1240 break;
1241 case NDIGIT:
1242 while (scan < loceol && !isDIGIT(*scan))
1243 scan++;
1244 break;
1245 default: /* Called on something of 0 width. */
1246 break; /* So match right here or not at all. */
1247 }
a687059c 1248
a0d0e21e 1249 c = scan - reginput;
1250 reginput = scan;
a687059c 1251
a0d0e21e 1252 return(c);
a687059c 1253}
1254
1255/*
bbce6d69 1256 - regclass - determine if a character falls into a character class
1257 */
1258
1259static bool
1260reginclass(p, c)
1261register char *p;
1262register I32 c;
1263{
1264 char flags = *p;
1265 bool match = FALSE;
1266
1267 c &= 0xFF;
1268 if (p[1 + (c >> 3)] & (1 << (c & 7)))
1269 match = TRUE;
1270 else if (flags & ANYOF_FOLD) {
1271 I32 cf;
1272 if (flags & ANYOF_LOCALE) {
1273 regtainted = TRUE;
1274 cf = fold_locale[c];
1275 }
1276 else
1277 cf = fold[c];
1278 if (p[1 + (cf >> 3)] & (1 << (cf & 7)))
1279 match = TRUE;
1280 }
1281
1282 if (!match && (flags & ANYOF_ISA)) {
1283 regtainted = TRUE;
1284
1285 if (((flags & ANYOF_ALNUML) && isALNUM_LC(c)) ||
1286 ((flags & ANYOF_NALNUML) && !isALNUM_LC(c)) ||
1287 ((flags & ANYOF_SPACEL) && isSPACE_LC(c)) ||
1288 ((flags & ANYOF_NSPACEL) && !isSPACE_LC(c)))
1289 {
1290 match = TRUE;
1291 }
1292 }
1293
1294 return match ^ ((flags & ANYOF_INVERT) != 0);
1295}
1296
1297/*
a687059c 1298 - regnext - dig the "next" pointer out of a node
1299 *
1300 * [Note, when REGALIGN is defined there are two places in regmatch()
1301 * that bypass this code for speed.]
1302 */
1303char *
1304regnext(p)
1305register char *p;
1306{
a0d0e21e 1307 register I32 offset;
a687059c 1308
a0d0e21e 1309 if (p == &regdummy)
1310 return(NULL);
a687059c 1311
a0d0e21e 1312 offset = NEXT(p);
1313 if (offset == 0)
1314 return(NULL);
a687059c 1315
1316#ifdef REGALIGN
a0d0e21e 1317 return(p+offset);
a687059c 1318#else
a0d0e21e 1319 if (OP(p) == BACK)
1320 return(p-offset);
1321 else
1322 return(p+offset);
a687059c 1323#endif
1324}