change#3612 was buggy and failed to build Tk; applied Ilya's
[p5sagit/p5-mst-13.2.git] / regcomp.c
CommitLineData
a0d0e21e 1/* regcomp.c
2 */
3
4/*
5 * "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
6 */
7
a687059c 8/* NOTE: this is derived from Henry Spencer's regexp code, and should not
9 * confused with the original package (see point 3 below). Thanks, Henry!
10 */
11
12/* Additional note: this code is very heavily munged from Henry's version
13 * in places. In some spots I've traded clarity for efficiency, so don't
14 * blame Henry for some of the lack of readability.
15 */
16
e50aee73 17/* The names of the functions have been changed from regcomp and
18 * regexec to pregcomp and pregexec in order to avoid conflicts
19 * with the POSIX routines of the same names.
20*/
21
b9d5759e 22#ifdef PERL_EXT_RE_BUILD
23/* need to replace pregcomp et al, so enable that */
24# ifndef PERL_IN_XSUB_RE
25# define PERL_IN_XSUB_RE
26# endif
27/* need access to debugger hooks */
cad2e5aa 28# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
b9d5759e 29# define DEBUGGING
30# endif
31#endif
32
33#ifdef PERL_IN_XSUB_RE
d06ea78c 34/* We *really* need to overwrite these symbols: */
56953603 35# define Perl_pregcomp my_regcomp
36# define Perl_regdump my_regdump
37# define Perl_regprop my_regprop
d06ea78c 38# define Perl_pregfree my_regfree
cad2e5aa 39# define Perl_re_intuit_string my_re_intuit_string
40/* *These* symbols are masked to allow static link. */
d06ea78c 41# define Perl_regnext my_regnext
f0b8d043 42# define Perl_save_re_context my_save_re_context
d88dccdf 43# define Perl_reginitcolors my_reginitcolors
56953603 44#endif
45
f0fcb552 46/*SUPPRESS 112*/
a687059c 47/*
e50aee73 48 * pregcomp and pregexec -- regsub and regerror are not used in perl
a687059c 49 *
50 * Copyright (c) 1986 by University of Toronto.
51 * Written by Henry Spencer. Not derived from licensed software.
52 *
53 * Permission is granted to anyone to use this software for any
54 * purpose on any computer system, and to redistribute it freely,
55 * subject to the following restrictions:
56 *
57 * 1. The author is not responsible for the consequences of use of
58 * this software, no matter how awful, even if they arise
59 * from defects in it.
60 *
61 * 2. The origin of this software must not be misrepresented, either
62 * by explicit claim or by omission.
63 *
64 * 3. Altered versions must be plainly marked as such, and must not
65 * be misrepresented as being the original software.
66 *
67 *
68 **** Alterations to Henry's code are...
69 ****
4eb8286e 70 **** Copyright (c) 1991-1999, Larry Wall
a687059c 71 ****
9ef589d8 72 **** You may distribute under the terms of either the GNU General Public
73 **** License or the Artistic License, as specified in the README file.
74
a687059c 75 *
76 * Beware that some of this code is subtly aware of the way operator
77 * precedence is structured in regular expressions. Serious changes in
78 * regular-expression syntax might require a total rethink.
79 */
80#include "EXTERN.h"
864dbfa3 81#define PERL_IN_REGCOMP_C
a687059c 82#include "perl.h"
d06ea78c 83
b9d5759e 84#ifndef PERL_IN_XSUB_RE
d06ea78c 85# include "INTERN.h"
86#endif
c277df42 87
88#define REG_COMP_C
a687059c 89#include "regcomp.h"
90
d4cce5f1 91#ifdef op
11343788 92#undef op
d4cce5f1 93#endif /* op */
11343788 94
fe14fcc3 95#ifdef MSDOS
96# if defined(BUGGY_MSC6)
97 /* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
98 # pragma optimize("a",off)
99 /* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
100 # pragma optimize("w",on )
101# endif /* BUGGY_MSC6 */
102#endif /* MSDOS */
103
a687059c 104#ifndef STATIC
105#define STATIC static
106#endif
107
108#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
109#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
110 ((*s) == '{' && regcurly(s)))
2b69d0c2 111#ifdef atarist
112#define PERL_META "^$.[()|?+*\\"
113#else
a687059c 114#define META "^$.[()|?+*\\"
2b69d0c2 115#endif
a687059c 116
35c8bce7 117#ifdef SPSTART
118#undef SPSTART /* dratted cpp namespace... */
119#endif
a687059c 120/*
121 * Flags to be passed up and down.
122 */
a687059c 123#define WORST 0 /* Worst case. */
821b33a5 124#define HASWIDTH 0x1 /* Known to match non-null strings. */
a0d0e21e 125#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
126#define SPSTART 0x4 /* Starts with * or +. */
127#define TRYAGAIN 0x8 /* Weeded out a declaration. */
a687059c 128
129/*
e50aee73 130 * Forward declarations for pregcomp()'s friends.
a687059c 131 */
a0d0e21e 132
cd488c12 133static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
134 0, 0, 0 };
c277df42 135
136#define SF_BEFORE_EOL (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
137#define SF_BEFORE_SEOL 0x1
138#define SF_BEFORE_MEOL 0x2
139#define SF_FIX_BEFORE_EOL (SF_FIX_BEFORE_SEOL|SF_FIX_BEFORE_MEOL)
140#define SF_FL_BEFORE_EOL (SF_FL_BEFORE_SEOL|SF_FL_BEFORE_MEOL)
141
09b7f37c 142#ifdef NO_UNARY_PLUS
143# define SF_FIX_SHIFT_EOL (0+2)
144# define SF_FL_SHIFT_EOL (0+4)
145#else
146# define SF_FIX_SHIFT_EOL (+2)
147# define SF_FL_SHIFT_EOL (+4)
148#endif
c277df42 149
150#define SF_FIX_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FIX_SHIFT_EOL)
151#define SF_FIX_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FIX_SHIFT_EOL)
152
153#define SF_FL_BEFORE_SEOL (SF_BEFORE_SEOL << SF_FL_SHIFT_EOL)
154#define SF_FL_BEFORE_MEOL (SF_BEFORE_MEOL << SF_FL_SHIFT_EOL) /* 0x20 */
155#define SF_IS_INF 0x40
156#define SF_HAS_PAR 0x80
157#define SF_IN_PAR 0x100
158#define SF_HAS_EVAL 0x200
4bfe0158 159#define SCF_DO_SUBSTR 0x400
c277df42 160
a0ed51b3 161#define RF_utf8 8
162#define UTF (PL_reg_flags & RF_utf8)
163#define LOC (PL_regflags & PMf_LOCALE)
164#define FOLD (PL_regflags & PMf_FOLD)
165
b8c5462f 166#define OOB_CHAR8 1234
167#define OOB_UTF8 123456
168
a0ed51b3 169#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
170#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
171
51371543 172static void clear_re(pTHXo_ void *r);
4327152a 173
174STATIC void
cea2e8a9 175S_scan_commit(pTHX_ scan_data_t *data)
c277df42 176{
c485e607 177 dTHR;
a0ed51b3 178 STRLEN l = CHR_SVLEN(data->last_found);
179 STRLEN old_l = CHR_SVLEN(*data->longest);
c277df42 180
181 if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
182 sv_setsv(*data->longest, data->last_found);
183 if (*data->longest == data->longest_fixed) {
184 data->offset_fixed = l ? data->last_start_min : data->pos_min;
185 if (data->flags & SF_BEFORE_EOL)
186 data->flags
187 |= ((data->flags & SF_BEFORE_EOL) << SF_FIX_SHIFT_EOL);
188 else
189 data->flags &= ~SF_FIX_BEFORE_EOL;
a0ed51b3 190 }
191 else {
c277df42 192 data->offset_float_min = l ? data->last_start_min : data->pos_min;
193 data->offset_float_max = (l
194 ? data->last_start_max
195 : data->pos_min + data->pos_delta);
196 if (data->flags & SF_BEFORE_EOL)
197 data->flags
198 |= ((data->flags & SF_BEFORE_EOL) << SF_FL_SHIFT_EOL);
199 else
200 data->flags &= ~SF_FL_BEFORE_EOL;
201 }
202 }
203 SvCUR_set(data->last_found, 0);
204 data->last_end = -1;
205 data->flags &= ~SF_BEFORE_EOL;
206}
207
c277df42 208/* Stops at toplevel WHILEM as well as at `last'. At end *scanp is set
209 to the position after last scanned or to NULL. */
210
76e3520e 211STATIC I32
cea2e8a9 212S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 flags)
c277df42 213 /* scanp: Start here (read-write). */
214 /* deltap: Write maxlen-minlen here. */
215 /* last: Stop before this one. */
216{
5c0ca799 217 dTHR;
c277df42 218 I32 min = 0, pars = 0, code;
219 regnode *scan = *scanp, *next;
220 I32 delta = 0;
221 int is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
aca2d497 222 int is_inf_internal = 0; /* The studied chunk is infinite */
c277df42 223 I32 is_par = OP(scan) == OPEN ? ARG(scan) : 0;
224 scan_data_t data_fake;
225
226 while (scan && OP(scan) != END && scan < last) {
227 /* Peephole optimizer: */
228
22c35a8c 229 if (PL_regkind[(U8)OP(scan)] == EXACT) {
c277df42 230 regnode *n = regnext(scan);
231 U32 stringok = 1;
232#ifdef DEBUGGING
233 regnode *stop = scan;
234#endif
235
236 next = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2;
237 /* Skip NOTHING, merge EXACT*. */
238 while (n &&
22c35a8c 239 ( PL_regkind[(U8)OP(n)] == NOTHING ||
c277df42 240 (stringok && (OP(n) == OP(scan))))
241 && NEXT_OFF(n)
242 && NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX) {
243 if (OP(n) == TAIL || n > next)
244 stringok = 0;
22c35a8c 245 if (PL_regkind[(U8)OP(n)] == NOTHING) {
c277df42 246 NEXT_OFF(scan) += NEXT_OFF(n);
247 next = n + NODE_STEP_REGNODE;
248#ifdef DEBUGGING
249 if (stringok)
250 stop = n;
251#endif
252 n = regnext(n);
a0ed51b3 253 }
254 else {
c277df42 255 int oldl = *OPERAND(scan);
256 regnode *nnext = regnext(n);
257
258 if (oldl + *OPERAND(n) > U8_MAX)
259 break;
260 NEXT_OFF(scan) += NEXT_OFF(n);
261 *OPERAND(scan) += *OPERAND(n);
262 next = n + (*OPERAND(n) + 2 - 1)/sizeof(regnode) + 2;
263 /* Now we can overwrite *n : */
264 Move(OPERAND(n) + 1, OPERAND(scan) + oldl + 1,
265 *OPERAND(n) + 1, char);
266#ifdef DEBUGGING
267 if (stringok)
268 stop = next - 1;
269#endif
270 n = nnext;
271 }
272 }
273#ifdef DEBUGGING
274 /* Allow dumping */
275 n = scan + (*OPERAND(scan) + 2 - 1)/sizeof(regnode) + 2;
276 while (n <= stop) {
ca04da08 277 /* Purify reports a benign UMR here sometimes, because we
278 * don't initialize the OP() slot of a node when that node
279 * is occupied by just the trailing null of the string in
280 * an EXACT node */
22c35a8c 281 if (PL_regkind[(U8)OP(n)] != NOTHING || OP(n) == NOTHING) {
c277df42 282 OP(n) = OPTIMIZED;
283 NEXT_OFF(n) = 0;
284 }
285 n++;
286 }
287#endif
288
289 }
290 if (OP(scan) != CURLYX) {
048cfca1 291 int max = (reg_off_by_arg[OP(scan)]
292 ? I32_MAX
293 /* I32 may be smaller than U16 on CRAYs! */
294 : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
c277df42 295 int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
296 int noff;
297 regnode *n = scan;
298
299 /* Skip NOTHING and LONGJMP. */
300 while ((n = regnext(n))
22c35a8c 301 && ((PL_regkind[(U8)OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
c277df42 302 || ((OP(n) == LONGJMP) && (noff = ARG(n))))
303 && off + noff < max)
304 off += noff;
305 if (reg_off_by_arg[OP(scan)])
306 ARG(scan) = off;
307 else
308 NEXT_OFF(scan) = off;
309 }
310 if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
311 || OP(scan) == IFTHEN || OP(scan) == SUSPEND) {
312 next = regnext(scan);
313 code = OP(scan);
314
315 if (OP(next) == code || code == IFTHEN || code == SUSPEND) {
316 I32 max1 = 0, min1 = I32_MAX, num = 0;
317
318 if (flags & SCF_DO_SUBSTR)
319 scan_commit(data);
320 while (OP(scan) == code) {
321 I32 deltanext, minnext;
322
323 num++;
324 data_fake.flags = 0;
325 next = regnext(scan);
326 scan = NEXTOPER(scan);
327 if (code != BRANCH)
328 scan = NEXTOPER(scan);
329 /* We suppose the run is continuous, last=next...*/
330 minnext = study_chunk(&scan, &deltanext, next,
331 &data_fake, 0);
332 if (min1 > minnext)
333 min1 = minnext;
334 if (max1 < minnext + deltanext)
335 max1 = minnext + deltanext;
336 if (deltanext == I32_MAX)
aca2d497 337 is_inf = is_inf_internal = 1;
c277df42 338 scan = next;
339 if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
340 pars++;
405ff068 341 if (data && (data_fake.flags & SF_HAS_EVAL))
c277df42 342 data->flags |= SF_HAS_EVAL;
343 if (code == SUSPEND)
344 break;
345 }
346 if (code == IFTHEN && num < 2) /* Empty ELSE branch */
347 min1 = 0;
348 if (flags & SCF_DO_SUBSTR) {
349 data->pos_min += min1;
350 data->pos_delta += max1 - min1;
351 if (max1 != min1 || is_inf)
352 data->longest = &(data->longest_float);
353 }
354 min += min1;
355 delta += max1 - min1;
a0ed51b3 356 }
357 else if (code == BRANCHJ) /* single branch is optimized. */
c277df42 358 scan = NEXTOPER(NEXTOPER(scan));
359 else /* single branch is optimized. */
360 scan = NEXTOPER(scan);
361 continue;
a0ed51b3 362 }
363 else if (OP(scan) == EXACT) {
364 I32 l = *OPERAND(scan);
365 if (UTF) {
366 unsigned char *s = (unsigned char *)(OPERAND(scan)+1);
367 unsigned char *e = s + l;
368 I32 newl = 0;
369 while (s < e) {
370 newl++;
371 s += UTF8SKIP(s);
372 }
373 l = newl;
374 }
375 min += l;
c277df42 376 if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
c277df42 377 /* The code below prefers earlier match for fixed
378 offset, later match for variable offset. */
379 if (data->last_end == -1) { /* Update the start info. */
380 data->last_start_min = data->pos_min;
381 data->last_start_max = is_inf
382 ? I32_MAX : data->pos_min + data->pos_delta;
383 }
a0ed51b3 384 sv_catpvn(data->last_found, (char *)(OPERAND(scan)+1), *OPERAND(scan));
c277df42 385 data->last_end = data->pos_min + l;
386 data->pos_min += l; /* As in the first entry. */
387 data->flags &= ~SF_BEFORE_EOL;
388 }
a0ed51b3 389 }
22c35a8c 390 else if (PL_regkind[(U8)OP(scan)] == EXACT) {
a0ed51b3 391 I32 l = *OPERAND(scan);
c277df42 392 if (flags & SCF_DO_SUBSTR)
393 scan_commit(data);
a0ed51b3 394 if (UTF) {
395 unsigned char *s = (unsigned char *)(OPERAND(scan)+1);
396 unsigned char *e = s + l;
397 I32 newl = 0;
398 while (s < e) {
399 newl++;
400 s += UTF8SKIP(s);
401 }
402 l = newl;
403 }
404 min += l;
c277df42 405 if (data && (flags & SCF_DO_SUBSTR))
a0ed51b3 406 data->pos_min += l;
407 }
4d61ec05 408 else if (strchr((char*)PL_varies,OP(scan))) {
c277df42 409 I32 mincount, maxcount, minnext, deltanext, pos_before, fl;
410 regnode *oscan = scan;
411
22c35a8c 412 switch (PL_regkind[(U8)OP(scan)]) {
c277df42 413 case WHILEM:
414 scan = NEXTOPER(scan);
415 goto finish;
416 case PLUS:
417 if (flags & SCF_DO_SUBSTR) {
418 next = NEXTOPER(scan);
419 if (OP(next) == EXACT) {
420 mincount = 1;
421 maxcount = REG_INFTY;
422 next = regnext(scan);
423 scan = NEXTOPER(scan);
424 goto do_curly;
425 }
426 }
427 if (flags & SCF_DO_SUBSTR)
428 data->pos_min++;
429 min++;
430 /* Fall through. */
431 case STAR:
aca2d497 432 is_inf = is_inf_internal = 1;
c277df42 433 scan = regnext(scan);
434 if (flags & SCF_DO_SUBSTR) {
435 scan_commit(data);
436 data->longest = &(data->longest_float);
437 }
438 goto optimize_curly_tail;
439 case CURLY:
440 mincount = ARG1(scan);
441 maxcount = ARG2(scan);
442 next = regnext(scan);
443 scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
444 do_curly:
445 if (flags & SCF_DO_SUBSTR) {
446 if (mincount == 0) scan_commit(data);
447 pos_before = data->pos_min;
448 }
449 if (data) {
450 fl = data->flags;
451 data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
452 if (is_inf)
453 data->flags |= SF_IS_INF;
454 }
455 /* This will finish on WHILEM, setting scan, or on NULL: */
456 minnext = study_chunk(&scan, &deltanext, last, data,
457 mincount == 0
458 ? (flags & ~SCF_DO_SUBSTR) : flags);
459 if (!scan) /* It was not CURLYX, but CURLY. */
460 scan = next;
599cee73 461 if (ckWARN(WARN_UNSAFE) && (minnext + deltanext == 0)
821b33a5 462 && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
17feb5d5 463 && maxcount <= REG_INFTY/3) /* Complement check for big count */
cea2e8a9 464 Perl_warner(aTHX_ WARN_UNSAFE, "Strange *+?{} on zero-length expression");
c277df42 465 min += minnext * mincount;
aca2d497 466 is_inf_internal |= (maxcount == REG_INFTY
467 && (minnext + deltanext) > 0
468 || deltanext == I32_MAX);
469 is_inf |= is_inf_internal;
c277df42 470 delta += (minnext + deltanext) * maxcount - minnext * mincount;
471
472 /* Try powerful optimization CURLYX => CURLYN. */
c277df42 473 if ( OP(oscan) == CURLYX && data
474 && data->flags & SF_IN_PAR
475 && !(data->flags & SF_HAS_EVAL)
476 && !deltanext && minnext == 1 ) {
477 /* Try to optimize to CURLYN. */
478 regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
479 regnode *nxt1 = nxt, *nxt2;
480
481 /* Skip open. */
482 nxt = regnext(nxt);
4d61ec05 483 if (!strchr((char*)PL_simple,OP(nxt))
22c35a8c 484 && !(PL_regkind[(U8)OP(nxt)] == EXACT
c277df42 485 && *OPERAND(nxt) == 1))
486 goto nogo;
487 nxt2 = nxt;
488 nxt = regnext(nxt);
489 if (OP(nxt) != CLOSE)
490 goto nogo;
491 /* Now we know that nxt2 is the only contents: */
492 oscan->flags = ARG(nxt);
493 OP(oscan) = CURLYN;
494 OP(nxt1) = NOTHING; /* was OPEN. */
495#ifdef DEBUGGING
496 OP(nxt1 + 1) = OPTIMIZED; /* was count. */
497 NEXT_OFF(nxt1+ 1) = 0; /* just for consistancy. */
498 NEXT_OFF(nxt2) = 0; /* just for consistancy with CURLY. */
499 OP(nxt) = OPTIMIZED; /* was CLOSE. */
500 OP(nxt + 1) = OPTIMIZED; /* was count. */
501 NEXT_OFF(nxt+ 1) = 0; /* just for consistancy. */
502#endif
503 }
c277df42 504 nogo:
505
506 /* Try optimization CURLYX => CURLYM. */
507 if ( OP(oscan) == CURLYX && data
c277df42 508 && !(data->flags & SF_HAS_PAR)
c277df42 509 && !(data->flags & SF_HAS_EVAL)
510 && !deltanext ) {
511 /* XXXX How to optimize if data == 0? */
512 /* Optimize to a simpler form. */
513 regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */
514 regnode *nxt2;
515
516 OP(oscan) = CURLYM;
517 while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
518 && (OP(nxt2) != WHILEM))
519 nxt = nxt2;
520 OP(nxt2) = SUCCEED; /* Whas WHILEM */
c277df42 521 /* Need to optimize away parenths. */
522 if (data->flags & SF_IN_PAR) {
523 /* Set the parenth number. */
524 regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
525
526 if (OP(nxt) != CLOSE)
527 FAIL("panic opt close");
528 oscan->flags = ARG(nxt);
529 OP(nxt1) = OPTIMIZED; /* was OPEN. */
530 OP(nxt) = OPTIMIZED; /* was CLOSE. */
531#ifdef DEBUGGING
532 OP(nxt1 + 1) = OPTIMIZED; /* was count. */
533 OP(nxt + 1) = OPTIMIZED; /* was count. */
534 NEXT_OFF(nxt1 + 1) = 0; /* just for consistancy. */
535 NEXT_OFF(nxt + 1) = 0; /* just for consistancy. */
536#endif
537#if 0
538 while ( nxt1 && (OP(nxt1) != WHILEM)) {
539 regnode *nnxt = regnext(nxt1);
540
541 if (nnxt == nxt) {
542 if (reg_off_by_arg[OP(nxt1)])
543 ARG_SET(nxt1, nxt2 - nxt1);
544 else if (nxt2 - nxt1 < U16_MAX)
545 NEXT_OFF(nxt1) = nxt2 - nxt1;
546 else
547 OP(nxt) = NOTHING; /* Cannot beautify */
548 }
549 nxt1 = nnxt;
550 }
551#endif
552 /* Optimize again: */
553 study_chunk(&nxt1, &deltanext, nxt, NULL, 0);
a0ed51b3 554 }
555 else
c277df42 556 oscan->flags = 0;
c277df42 557 }
558 if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
559 pars++;
560 if (flags & SCF_DO_SUBSTR) {
561 SV *last_str = Nullsv;
562 int counted = mincount != 0;
563
564 if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */
565 I32 b = pos_before >= data->last_start_min
566 ? pos_before : data->last_start_min;
567 STRLEN l;
568 char *s = SvPV(data->last_found, l);
a0ed51b3 569 I32 old = b - data->last_start_min;
570
571 if (UTF)
572 old = utf8_hop((U8*)s, old) - (U8*)s;
c277df42 573
a0ed51b3 574 l -= old;
c277df42 575 /* Get the added string: */
79cb57f6 576 last_str = newSVpvn(s + old, l);
c277df42 577 if (deltanext == 0 && pos_before == b) {
578 /* What was added is a constant string */
579 if (mincount > 1) {
580 SvGROW(last_str, (mincount * l) + 1);
581 repeatcpy(SvPVX(last_str) + l,
582 SvPVX(last_str), l, mincount - 1);
583 SvCUR(last_str) *= mincount;
584 /* Add additional parts. */
585 SvCUR_set(data->last_found,
586 SvCUR(data->last_found) - l);
587 sv_catsv(data->last_found, last_str);
588 data->last_end += l * (mincount - 1);
589 }
590 }
591 }
592 /* It is counted once already... */
593 data->pos_min += minnext * (mincount - counted);
594 data->pos_delta += - counted * deltanext +
595 (minnext + deltanext) * maxcount - minnext * mincount;
596 if (mincount != maxcount) {
597 scan_commit(data);
598 if (mincount && last_str) {
599 sv_setsv(data->last_found, last_str);
600 data->last_end = data->pos_min;
601 data->last_start_min =
a0ed51b3 602 data->pos_min - CHR_SVLEN(last_str);
c277df42 603 data->last_start_max = is_inf
604 ? I32_MAX
605 : data->pos_min + data->pos_delta
a0ed51b3 606 - CHR_SVLEN(last_str);
c277df42 607 }
608 data->longest = &(data->longest_float);
609 }
aca2d497 610 SvREFCNT_dec(last_str);
c277df42 611 }
405ff068 612 if (data && (fl & SF_HAS_EVAL))
c277df42 613 data->flags |= SF_HAS_EVAL;
614 optimize_curly_tail:
c277df42 615 if (OP(oscan) != CURLYX) {
22c35a8c 616 while (PL_regkind[(U8)OP(next = regnext(oscan))] == NOTHING
c277df42 617 && NEXT_OFF(next))
618 NEXT_OFF(oscan) += NEXT_OFF(next);
619 }
c277df42 620 continue;
621 default: /* REF only? */
622 if (flags & SCF_DO_SUBSTR) {
623 scan_commit(data);
624 data->longest = &(data->longest_float);
625 }
aca2d497 626 is_inf = is_inf_internal = 1;
c277df42 627 break;
628 }
a0ed51b3 629 }
4d61ec05 630 else if (strchr((char*)PL_simple,OP(scan)) || PL_regkind[(U8)OP(scan)] == ANYUTF8) {
c277df42 631 if (flags & SCF_DO_SUBSTR) {
632 scan_commit(data);
633 data->pos_min++;
634 }
635 min++;
a0ed51b3 636 }
22c35a8c 637 else if (PL_regkind[(U8)OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
c277df42 638 data->flags |= (OP(scan) == MEOL
639 ? SF_BEFORE_MEOL
640 : SF_BEFORE_SEOL);
a0ed51b3 641 }
22c35a8c 642 else if (PL_regkind[(U8)OP(scan)] == BRANCHJ
c277df42 643 && (scan->flags || data)
644 && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
645 I32 deltanext, minnext;
646 regnode *nscan;
647
648 data_fake.flags = 0;
649 next = regnext(scan);
650 nscan = NEXTOPER(NEXTOPER(scan));
651 minnext = study_chunk(&nscan, &deltanext, last, &data_fake, 0);
652 if (scan->flags) {
653 if (deltanext) {
654 FAIL("variable length lookbehind not implemented");
a0ed51b3 655 }
656 else if (minnext > U8_MAX) {
c277df42 657 FAIL2("lookbehind longer than %d not implemented", U8_MAX);
658 }
659 scan->flags = minnext;
660 }
661 if (data && data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
662 pars++;
405ff068 663 if (data && (data_fake.flags & SF_HAS_EVAL))
c277df42 664 data->flags |= SF_HAS_EVAL;
a0ed51b3 665 }
666 else if (OP(scan) == OPEN) {
c277df42 667 pars++;
a0ed51b3 668 }
669 else if (OP(scan) == CLOSE && ARG(scan) == is_par) {
c277df42 670 next = regnext(scan);
671
672 if ( next && (OP(next) != WHILEM) && next < last)
c277df42 673 is_par = 0; /* Disable optimization */
a0ed51b3 674 }
675 else if (OP(scan) == EVAL) {
c277df42 676 if (data)
677 data->flags |= SF_HAS_EVAL;
678 }
0f5d15d6 679 else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded */
680 if (flags & SCF_DO_SUBSTR) {
681 scan_commit(data);
682 data->longest = &(data->longest_float);
683 }
684 is_inf = is_inf_internal = 1;
685 }
c277df42 686 /* Else: zero-length, ignore. */
687 scan = regnext(scan);
688 }
689
690 finish:
691 *scanp = scan;
aca2d497 692 *deltap = is_inf_internal ? I32_MAX : delta;
c277df42 693 if (flags & SCF_DO_SUBSTR && is_inf)
694 data->pos_delta = I32_MAX - data->pos_min;
695 if (is_par > U8_MAX)
696 is_par = 0;
697 if (is_par && pars==1 && data) {
698 data->flags |= SF_IN_PAR;
699 data->flags &= ~SF_HAS_PAR;
a0ed51b3 700 }
701 else if (pars && data) {
c277df42 702 data->flags |= SF_HAS_PAR;
703 data->flags &= ~SF_IN_PAR;
704 }
705 return min;
706}
707
76e3520e 708STATIC I32
cea2e8a9 709S_add_data(pTHX_ I32 n, char *s)
c277df42 710{
5c0ca799 711 dTHR;
3280af22 712 if (PL_regcomp_rx->data) {
713 Renewc(PL_regcomp_rx->data,
714 sizeof(*PL_regcomp_rx->data) + sizeof(void*) * (PL_regcomp_rx->data->count + n - 1),
c277df42 715 char, struct reg_data);
3280af22 716 Renew(PL_regcomp_rx->data->what, PL_regcomp_rx->data->count + n, U8);
717 PL_regcomp_rx->data->count += n;
a0ed51b3 718 }
719 else {
3280af22 720 Newc(1207, PL_regcomp_rx->data, sizeof(*PL_regcomp_rx->data) + sizeof(void*) * (n - 1),
c277df42 721 char, struct reg_data);
3280af22 722 New(1208, PL_regcomp_rx->data->what, n, U8);
723 PL_regcomp_rx->data->count = n;
c277df42 724 }
3280af22 725 Copy(s, PL_regcomp_rx->data->what + PL_regcomp_rx->data->count - n, n, U8);
726 return PL_regcomp_rx->data->count - n;
c277df42 727}
728
d88dccdf 729void
864dbfa3 730Perl_reginitcolors(pTHX)
d88dccdf 731{
732 dTHR;
733 int i = 0;
734 char *s = PerlEnv_getenv("PERL_RE_COLORS");
735
736 if (s) {
737 PL_colors[0] = s = savepv(s);
738 while (++i < 6) {
739 s = strchr(s, '\t');
740 if (s) {
741 *s = '\0';
742 PL_colors[i] = ++s;
743 }
744 else
c712d376 745 PL_colors[i] = s = "";
d88dccdf 746 }
747 } else {
748 while (i < 6)
749 PL_colors[i++] = "";
750 }
751 PL_colorset = 1;
752}
753
a687059c 754/*
e50aee73 755 - pregcomp - compile a regular expression into internal code
a687059c 756 *
757 * We can't allocate space until we know how big the compiled form will be,
758 * but we can't compile it (and thus know how big it is) until we've got a
759 * place to put the code. So we cheat: we compile it twice, once with code
760 * generation turned off and size counting turned on, and once "for real".
761 * This also means that we don't allocate space until we are sure that the
762 * thing really will compile successfully, and we never have to move the
763 * code and thus invalidate pointers into it. (Note that it has to be in
764 * one piece because free() must be able to free it all.) [NB: not true in perl]
765 *
766 * Beware that the optimization-preparation code in here knows about some
767 * of the structure of the compiled regexp. [I'll say.]
768 */
769regexp *
864dbfa3 770Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
a687059c 771{
5c0ca799 772 dTHR;
a0d0e21e 773 register regexp *r;
c277df42 774 regnode *scan;
775 SV **longest;
776 SV *longest_fixed;
777 SV *longest_float;
778 regnode *first;
a0d0e21e 779 I32 flags;
a0d0e21e 780 I32 minlen = 0;
781 I32 sawplus = 0;
782 I32 sawopen = 0;
783
784 if (exp == NULL)
c277df42 785 FAIL("NULL regexp argument");
a0d0e21e 786
e24b16f9 787 if (PL_curcop == &PL_compiling ? (PL_hints & HINT_UTF8) : IN_UTF8)
a0ed51b3 788 PL_reg_flags |= RF_utf8;
789 else
790 PL_reg_flags = 0;
791
3280af22 792 PL_regprecomp = savepvn(exp, xend - exp);
35ef4773 793 DEBUG_r(if (!PL_colorset) reginitcolors());
794 DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling%s RE `%s%*s%s'\n",
d88dccdf 795 PL_colors[4],PL_colors[5],PL_colors[0],
796 xend - exp, PL_regprecomp, PL_colors[1]));
3280af22 797 PL_regflags = pm->op_pmflags;
798 PL_regsawback = 0;
bbce6d69 799
3280af22 800 PL_regseen = 0;
801 PL_seen_zerolen = *exp == '^' ? -1 : 0;
802 PL_seen_evals = 0;
803 PL_extralen = 0;
c277df42 804
bbce6d69 805 /* First pass: determine size, legality. */
3280af22 806 PL_regcomp_parse = exp;
807 PL_regxend = xend;
808 PL_regnaughty = 0;
809 PL_regnpar = 1;
810 PL_regsize = 0L;
811 PL_regcode = &PL_regdummy;
22c35a8c 812 regc((U8)REG_MAGIC, (char*)PL_regcode);
a0d0e21e 813 if (reg(0, &flags) == NULL) {
3280af22 814 Safefree(PL_regprecomp);
815 PL_regprecomp = Nullch;
a0d0e21e 816 return(NULL);
817 }
3280af22 818 DEBUG_r(PerlIO_printf(Perl_debug_log, "size %d ", PL_regsize));
c277df42 819
c277df42 820 /* Small enough for pointer-storage convention?
821 If extralen==0, this means that we will not need long jumps. */
3280af22 822 if (PL_regsize >= 0x10000L && PL_extralen)
823 PL_regsize += PL_extralen;
c277df42 824 else
3280af22 825 PL_extralen = 0;
a0d0e21e 826
bbce6d69 827 /* Allocate space and initialize. */
3280af22 828 Newc(1001, r, sizeof(regexp) + (unsigned)PL_regsize * sizeof(regnode),
c277df42 829 char, regexp);
a0d0e21e 830 if (r == NULL)
831 FAIL("regexp out of space");
c277df42 832 r->refcnt = 1;
bbce6d69 833 r->prelen = xend - exp;
3280af22 834 r->precomp = PL_regprecomp;
cf93c79d 835 r->subbeg = NULL;
836 r->reganch = pm->op_pmflags & PMf_COMPILETIME;
4327152a 837 r->nparens = PL_regnpar - 1; /* set early to validate backrefs */
838
839 r->substrs = 0; /* Useful during FAIL. */
840 r->startp = 0; /* Useful during FAIL. */
841 r->endp = 0; /* Useful during FAIL. */
842
3280af22 843 PL_regcomp_rx = r;
bbce6d69 844
845 /* Second pass: emit code. */
3280af22 846 PL_regcomp_parse = exp;
847 PL_regxend = xend;
848 PL_regnaughty = 0;
849 PL_regnpar = 1;
850 PL_regcode = r->program;
2cd61cdb 851 /* Store the count of eval-groups for security checks: */
3280af22 852 PL_regcode->next_off = ((PL_seen_evals > U16_MAX) ? U16_MAX : PL_seen_evals);
22c35a8c 853 regc((U8)REG_MAGIC, (char*) PL_regcode++);
c277df42 854 r->data = 0;
a0d0e21e 855 if (reg(0, &flags) == NULL)
856 return(NULL);
857
858 /* Dig out information for optimizations. */
cf93c79d 859 r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
3280af22 860 pm->op_pmflags = PL_regflags;
a0ed51b3 861 if (UTF)
862 r->reganch |= ROPT_UTF8;
c277df42 863 r->regstclass = NULL;
a0ed51b3 864 if (PL_regnaughty >= 10) /* Probably an expensive pattern. */
865 r->reganch |= ROPT_NAUGHTY;
c277df42 866 scan = r->program + 1; /* First BRANCH. */
2779dcf1 867
868 /* XXXX To minimize changes to RE engine we always allocate
869 3-units-long substrs field. */
870 Newz(1004, r->substrs, 1, struct reg_substr_data);
871
c277df42 872 if (OP(scan) != BRANCH) { /* Only one top-level choice. */
873 scan_data_t data;
874 I32 fake;
c5254dd6 875 STRLEN longest_float_length, longest_fixed_length;
a0d0e21e 876
c277df42 877 StructCopy(&zero_scan_data, &data, scan_data_t);
a0d0e21e 878 first = scan;
c277df42 879 /* Skip introductions and multiplicators >= 1. */
a0d0e21e 880 while ((OP(first) == OPEN && (sawopen = 1)) ||
881 (OP(first) == BRANCH && OP(regnext(first)) != BRANCH) ||
882 (OP(first) == PLUS) ||
883 (OP(first) == MINMOD) ||
22c35a8c 884 (PL_regkind[(U8)OP(first)] == CURLY && ARG1(first) > 0) ) {
a0d0e21e 885 if (OP(first) == PLUS)
886 sawplus = 1;
887 else
888 first += regarglen[(U8)OP(first)];
889 first = NEXTOPER(first);
a687059c 890 }
891
a0d0e21e 892 /* Starting-point info. */
893 again:
c277df42 894 if (OP(first) == EXACT); /* Empty, get anchored substr later. */
4d61ec05 895 else if (strchr((char*)PL_simple+4,OP(first)))
a0d0e21e 896 r->regstclass = first;
22c35a8c 897 else if (PL_regkind[(U8)OP(first)] == BOUND ||
898 PL_regkind[(U8)OP(first)] == NBOUND)
a0d0e21e 899 r->regstclass = first;
22c35a8c 900 else if (PL_regkind[(U8)OP(first)] == BOL) {
cad2e5aa 901 r->reganch |= (OP(first) == MBOL
902 ? ROPT_ANCH_MBOL
903 : (OP(first) == SBOL
904 ? ROPT_ANCH_SBOL
905 : ROPT_ANCH_BOL));
a0d0e21e 906 first = NEXTOPER(first);
774d564b 907 goto again;
908 }
909 else if (OP(first) == GPOS) {
910 r->reganch |= ROPT_ANCH_GPOS;
911 first = NEXTOPER(first);
912 goto again;
a0d0e21e 913 }
914 else if ((OP(first) == STAR &&
22c35a8c 915 PL_regkind[(U8)OP(NEXTOPER(first))] == REG_ANY) &&
a0d0e21e 916 !(r->reganch & ROPT_ANCH) )
917 {
918 /* turn .* into ^.* with an implied $*=1 */
cad2e5aa 919 int type = OP(NEXTOPER(first));
920
921 if (type == REG_ANY || type == ANYUTF8)
922 type = ROPT_ANCH_MBOL;
923 else
924 type = ROPT_ANCH_SBOL;
925
926 r->reganch |= type | ROPT_IMPLICIT;
a0d0e21e 927 first = NEXTOPER(first);
774d564b 928 goto again;
a0d0e21e 929 }
cad2e5aa 930 if (sawplus && (!sawopen || !PL_regsawback)
931 && !(PL_regseen & REG_SEEN_EVAL)) /* May examine pos and $& */
932 /* x+ must match at the 1st pos of run of x's */
933 r->reganch |= ROPT_SKIP;
a0d0e21e 934
c277df42 935 /* Scan is after the zeroth branch, first is atomic matcher. */
936 DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %d\n",
937 first - scan + 1));
a0d0e21e 938 /*
939 * If there's something expensive in the r.e., find the
940 * longest literal string that must appear and make it the
941 * regmust. Resolve ties in favor of later strings, since
942 * the regstart check works with the beginning of the r.e.
943 * and avoiding duplication strengthens checking. Not a
944 * strong reason, but sufficient in the absence of others.
945 * [Now we resolve ties in favor of the earlier string if
c277df42 946 * it happens that c_offset_min has been invalidated, since the
a0d0e21e 947 * earlier string may buy us something the later one won't.]
948 */
a0d0e21e 949 minlen = 0;
a687059c 950
79cb57f6 951 data.longest_fixed = newSVpvn("",0);
952 data.longest_float = newSVpvn("",0);
953 data.last_found = newSVpvn("",0);
c277df42 954 data.longest = &(data.longest_fixed);
955 first = scan;
956
3280af22 957 minlen = study_chunk(&first, &fake, scan + PL_regsize, /* Up to end */
c277df42 958 &data, SCF_DO_SUBSTR);
3280af22 959 if ( PL_regnpar == 1 && data.longest == &(data.longest_fixed)
c277df42 960 && data.last_start_min == 0 && data.last_end > 0
3280af22 961 && !PL_seen_zerolen
962 && (!(PL_regseen & REG_SEEN_GPOS) || (r->reganch & ROPT_ANCH_GPOS)))
c277df42 963 r->reganch |= ROPT_CHECK_ALL;
964 scan_commit(&data);
965 SvREFCNT_dec(data.last_found);
966
a0ed51b3 967 longest_float_length = CHR_SVLEN(data.longest_float);
c5254dd6 968 if (longest_float_length
c277df42 969 || (data.flags & SF_FL_BEFORE_EOL
970 && (!(data.flags & SF_FL_BEFORE_MEOL)
3280af22 971 || (PL_regflags & PMf_MULTILINE)))) {
cf93c79d 972 int t;
973
a0ed51b3 974 if (SvCUR(data.longest_fixed) /* ok to leave SvCUR */
aca2d497 975 && data.offset_fixed == data.offset_float_min
976 && SvCUR(data.longest_fixed) == SvCUR(data.longest_float))
977 goto remove_float; /* As in (a)+. */
978
c277df42 979 r->float_substr = data.longest_float;
980 r->float_min_offset = data.offset_float_min;
981 r->float_max_offset = data.offset_float_max;
cf93c79d 982 t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
983 && (!(data.flags & SF_FL_BEFORE_MEOL)
984 || (PL_regflags & PMf_MULTILINE)));
985 fbm_compile(r->float_substr, t ? FBMcf_TAIL : 0);
a0ed51b3 986 }
987 else {
aca2d497 988 remove_float:
c277df42 989 r->float_substr = Nullsv;
990 SvREFCNT_dec(data.longest_float);
c5254dd6 991 longest_float_length = 0;
a0d0e21e 992 }
c277df42 993
a0ed51b3 994 longest_fixed_length = CHR_SVLEN(data.longest_fixed);
c5254dd6 995 if (longest_fixed_length
c277df42 996 || (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
997 && (!(data.flags & SF_FIX_BEFORE_MEOL)
3280af22 998 || (PL_regflags & PMf_MULTILINE)))) {
cf93c79d 999 int t;
1000
c277df42 1001 r->anchored_substr = data.longest_fixed;
1002 r->anchored_offset = data.offset_fixed;
cf93c79d 1003 t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
1004 && (!(data.flags & SF_FIX_BEFORE_MEOL)
1005 || (PL_regflags & PMf_MULTILINE)));
1006 fbm_compile(r->anchored_substr, t ? FBMcf_TAIL : 0);
a0ed51b3 1007 }
1008 else {
c277df42 1009 r->anchored_substr = Nullsv;
1010 SvREFCNT_dec(data.longest_fixed);
c5254dd6 1011 longest_fixed_length = 0;
a0d0e21e 1012 }
c277df42 1013
1014 /* A temporary algorithm prefers floated substr to fixed one to dig more info. */
c5254dd6 1015 if (longest_fixed_length > longest_float_length) {
c277df42 1016 r->check_substr = r->anchored_substr;
1017 r->check_offset_min = r->check_offset_max = r->anchored_offset;
1018 if (r->reganch & ROPT_ANCH_SINGLE)
1019 r->reganch |= ROPT_NOSCAN;
a0ed51b3 1020 }
1021 else {
c277df42 1022 r->check_substr = r->float_substr;
1023 r->check_offset_min = data.offset_float_min;
1024 r->check_offset_max = data.offset_float_max;
a0d0e21e 1025 }
cad2e5aa 1026 if (r->check_substr) {
1027 r->reganch |= RE_USE_INTUIT;
1028 if (SvTAIL(r->check_substr))
1029 r->reganch |= RE_INTUIT_TAIL;
1030 }
a0ed51b3 1031 }
1032 else {
c277df42 1033 /* Several toplevels. Best we can is to set minlen. */
1034 I32 fake;
1035
1036 DEBUG_r(PerlIO_printf(Perl_debug_log, "\n"));
1037 scan = r->program + 1;
3280af22 1038 minlen = study_chunk(&scan, &fake, scan + PL_regsize, NULL, 0);
c277df42 1039 r->check_substr = r->anchored_substr = r->float_substr = Nullsv;
a0d0e21e 1040 }
1041
a0d0e21e 1042 r->minlen = minlen;
3280af22 1043 if (PL_regseen & REG_SEEN_GPOS)
c277df42 1044 r->reganch |= ROPT_GPOS_SEEN;
3280af22 1045 if (PL_regseen & REG_SEEN_LOOKBEHIND)
c277df42 1046 r->reganch |= ROPT_LOOKBEHIND_SEEN;
3280af22 1047 if (PL_regseen & REG_SEEN_EVAL)
ce862d02 1048 r->reganch |= ROPT_EVAL_SEEN;
cf93c79d 1049 Newz(1002, r->startp, PL_regnpar, I32);
1050 Newz(1002, r->endp, PL_regnpar, I32);
a0d0e21e 1051 DEBUG_r(regdump(r));
1052 return(r);
a687059c 1053}
1054
1055/*
1056 - reg - regular expression, i.e. main body or parenthesized thing
1057 *
1058 * Caller must absorb opening parenthesis.
1059 *
1060 * Combining parenthesis handling with the base level of regular expression
1061 * is a trifle forced, but the need to tie the tails of the branches to what
1062 * follows makes it hard to avoid.
1063 */
76e3520e 1064STATIC regnode *
cea2e8a9 1065S_reg(pTHX_ I32 paren, I32 *flagp)
c277df42 1066 /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
a687059c 1067{
5c0ca799 1068 dTHR;
c277df42 1069 register regnode *ret; /* Will be the head of the group. */
1070 register regnode *br;
1071 register regnode *lastbr;
1072 register regnode *ender = 0;
a0d0e21e 1073 register I32 parno = 0;
3280af22 1074 I32 flags, oregflags = PL_regflags, have_branch = 0, open = 0;
c277df42 1075 char c;
a0d0e21e 1076
821b33a5 1077 *flagp = 0; /* Tentatively. */
a0d0e21e 1078
1079 /* Make an OPEN node, if parenthesized. */
1080 if (paren) {
3280af22 1081 if (*PL_regcomp_parse == '?') {
ca9dfc88 1082 U16 posflags = 0, negflags = 0;
1083 U16 *flagsp = &posflags;
0f5d15d6 1084 int logical = 0;
ca9dfc88 1085
3280af22 1086 PL_regcomp_parse++;
1087 paren = *PL_regcomp_parse++;
c277df42 1088 ret = NULL; /* For look-ahead/behind. */
a0d0e21e 1089 switch (paren) {
c277df42 1090 case '<':
3280af22 1091 PL_regseen |= REG_SEEN_LOOKBEHIND;
1092 if (*PL_regcomp_parse == '!')
c277df42 1093 paren = ',';
3280af22 1094 if (*PL_regcomp_parse != '=' && *PL_regcomp_parse != '!')
c277df42 1095 goto unknown;
3280af22 1096 PL_regcomp_parse++;
a0d0e21e 1097 case '=':
1098 case '!':
3280af22 1099 PL_seen_zerolen++;
c277df42 1100 case ':':
1101 case '>':
a0d0e21e 1102 break;
1103 case '$':
1104 case '@':
c277df42 1105 FAIL2("Sequence (?%c...) not implemented", (int)paren);
a0d0e21e 1106 break;
1107 case '#':
3280af22 1108 while (*PL_regcomp_parse && *PL_regcomp_parse != ')')
1109 PL_regcomp_parse++;
1110 if (*PL_regcomp_parse != ')')
c277df42 1111 FAIL("Sequence (?#... not terminated");
a0d0e21e 1112 nextchar();
1113 *flagp = TRYAGAIN;
1114 return NULL;
0f5d15d6 1115 case 'p':
1116 logical = 1;
1117 paren = *PL_regcomp_parse++;
1118 /* FALL THROUGH */
c277df42 1119 case '{':
1120 {
1121 dTHR;
1122 I32 count = 1, n = 0;
1123 char c;
3280af22 1124 char *s = PL_regcomp_parse;
c277df42 1125 SV *sv;
1126 OP_4tree *sop, *rop;
1127
3280af22 1128 PL_seen_zerolen++;
1129 PL_regseen |= REG_SEEN_EVAL;
1130 while (count && (c = *PL_regcomp_parse)) {
1131 if (c == '\\' && PL_regcomp_parse[1])
1132 PL_regcomp_parse++;
c277df42 1133 else if (c == '{')
1134 count++;
1135 else if (c == '}')
1136 count--;
3280af22 1137 PL_regcomp_parse++;
c277df42 1138 }
3280af22 1139 if (*PL_regcomp_parse != ')')
c277df42 1140 FAIL("Sequence (?{...}) not terminated or not {}-balanced");
1141 if (!SIZE_ONLY) {
1142 AV *av;
1143
3280af22 1144 if (PL_regcomp_parse - 1 - s)
79cb57f6 1145 sv = newSVpvn(s, PL_regcomp_parse - 1 - s);
c277df42 1146 else
79cb57f6 1147 sv = newSVpvn("", 0);
c277df42 1148
1149 rop = sv_compile_2op(sv, &sop, "re", &av);
1150
dfad63ad 1151 n = add_data(3, "nop");
3280af22 1152 PL_regcomp_rx->data->data[n] = (void*)rop;
dfad63ad 1153 PL_regcomp_rx->data->data[n+1] = (void*)sop;
1154 PL_regcomp_rx->data->data[n+2] = (void*)av;
c277df42 1155 SvREFCNT_dec(sv);
a0ed51b3 1156 }
e24b16f9 1157 else { /* First pass */
1158 if (PL_reginterp_cnt < ++PL_seen_evals
1159 && PL_curcop != &PL_compiling)
2cd61cdb 1160 /* No compiled RE interpolated, has runtime
1161 components ===> unsafe. */
1162 FAIL("Eval-group not allowed at runtime, use re 'eval'");
3280af22 1163 if (PL_tainted)
cc6b7395 1164 FAIL("Eval-group in insecure regular expression");
c277df42 1165 }
1166
1167 nextchar();
0f5d15d6 1168 if (logical) {
1169 ret = reg_node(LOGICAL);
1170 if (!SIZE_ONLY)
1171 ret->flags = 2;
1172 regtail(ret, reganode(EVAL, n));
1173 return ret;
1174 }
c277df42 1175 return reganode(EVAL, n);
1176 }
1177 case '(':
1178 {
3280af22 1179 if (PL_regcomp_parse[0] == '?') {
1180 if (PL_regcomp_parse[1] == '=' || PL_regcomp_parse[1] == '!'
1181 || PL_regcomp_parse[1] == '<'
1182 || PL_regcomp_parse[1] == '{') { /* Lookahead or eval. */
c277df42 1183 I32 flag;
1184
1185 ret = reg_node(LOGICAL);
0f5d15d6 1186 if (!SIZE_ONLY)
1187 ret->flags = 1;
c277df42 1188 regtail(ret, reg(1, &flag));
1189 goto insert_if;
1190 }
a0ed51b3 1191 }
1192 else if (PL_regcomp_parse[0] >= '1' && PL_regcomp_parse[0] <= '9' ) {
3280af22 1193 parno = atoi(PL_regcomp_parse++);
c277df42 1194
3280af22 1195 while (isDIGIT(*PL_regcomp_parse))
1196 PL_regcomp_parse++;
c277df42 1197 ret = reganode(GROUPP, parno);
1198 if ((c = *nextchar()) != ')')
1199 FAIL2("Switch (?(number%c not recognized", c);
1200 insert_if:
1201 regtail(ret, reganode(IFTHEN, 0));
1202 br = regbranch(&flags, 1);
1203 if (br == NULL)
1204 br = reganode(LONGJMP, 0);
1205 else
1206 regtail(br, reganode(LONGJMP, 0));
1207 c = *nextchar();
d1b80229 1208 if (flags&HASWIDTH)
1209 *flagp |= HASWIDTH;
c277df42 1210 if (c == '|') {
1211 lastbr = reganode(IFTHEN, 0); /* Fake one for optimizer. */
1212 regbranch(&flags, 1);
1213 regtail(ret, lastbr);
d1b80229 1214 if (flags&HASWIDTH)
1215 *flagp |= HASWIDTH;
c277df42 1216 c = *nextchar();
a0ed51b3 1217 }
1218 else
c277df42 1219 lastbr = NULL;
1220 if (c != ')')
1221 FAIL("Switch (?(condition)... contains too many branches");
1222 ender = reg_node(TAIL);
1223 regtail(br, ender);
1224 if (lastbr) {
1225 regtail(lastbr, ender);
1226 regtail(NEXTOPER(NEXTOPER(lastbr)), ender);
a0ed51b3 1227 }
1228 else
c277df42 1229 regtail(ret, ender);
1230 return ret;
a0ed51b3 1231 }
1232 else {
3280af22 1233 FAIL2("Unknown condition for (?(%.2s", PL_regcomp_parse);
c277df42 1234 }
1235 }
1b1626e4 1236 case 0:
c277df42 1237 FAIL("Sequence (? incomplete");
1b1626e4 1238 break;
a0d0e21e 1239 default:
3280af22 1240 --PL_regcomp_parse;
ca9dfc88 1241 parse_flags:
3280af22 1242 while (*PL_regcomp_parse && strchr("iogcmsx", *PL_regcomp_parse)) {
1243 if (*PL_regcomp_parse != 'o')
1244 pmflag(flagsp, *PL_regcomp_parse);
1245 ++PL_regcomp_parse;
ca9dfc88 1246 }
3280af22 1247 if (*PL_regcomp_parse == '-') {
ca9dfc88 1248 flagsp = &negflags;
3280af22 1249 ++PL_regcomp_parse;
ca9dfc88 1250 goto parse_flags;
48c036b1 1251 }
3280af22 1252 PL_regflags |= posflags;
1253 PL_regflags &= ~negflags;
1254 if (*PL_regcomp_parse == ':') {
1255 PL_regcomp_parse++;
ca9dfc88 1256 paren = ':';
1257 break;
1258 }
c277df42 1259 unknown:
3280af22 1260 if (*PL_regcomp_parse != ')')
1261 FAIL2("Sequence (?%c...) not recognized", *PL_regcomp_parse);
a0d0e21e 1262 nextchar();
1263 *flagp = TRYAGAIN;
1264 return NULL;
1265 }
1266 }
1267 else {
3280af22 1268 parno = PL_regnpar;
1269 PL_regnpar++;
a0d0e21e 1270 ret = reganode(OPEN, parno);
c277df42 1271 open = 1;
a0d0e21e 1272 }
a0ed51b3 1273 }
1274 else
a0d0e21e 1275 ret = NULL;
1276
1277 /* Pick up the branches, linking them together. */
c277df42 1278 br = regbranch(&flags, 1);
a0d0e21e 1279 if (br == NULL)
1280 return(NULL);
3280af22 1281 if (*PL_regcomp_parse == '|') {
1282 if (!SIZE_ONLY && PL_extralen) {
c277df42 1283 reginsert(BRANCHJ, br);
a0ed51b3 1284 }
1285 else
c277df42 1286 reginsert(BRANCH, br);
1287 have_branch = 1;
1288 if (SIZE_ONLY)
3280af22 1289 PL_extralen += 1; /* For BRANCHJ-BRANCH. */
a0ed51b3 1290 }
1291 else if (paren == ':') {
c277df42 1292 *flagp |= flags&SIMPLE;
1293 }
1294 if (open) { /* Starts with OPEN. */
1295 regtail(ret, br); /* OPEN -> first. */
a0ed51b3 1296 }
1297 else if (paren != '?') /* Not Conditional */
a0d0e21e 1298 ret = br;
821b33a5 1299 if (flags&HASWIDTH)
1300 *flagp |= HASWIDTH;
a0d0e21e 1301 *flagp |= flags&SPSTART;
c277df42 1302 lastbr = br;
3280af22 1303 while (*PL_regcomp_parse == '|') {
1304 if (!SIZE_ONLY && PL_extralen) {
c277df42 1305 ender = reganode(LONGJMP,0);
1306 regtail(NEXTOPER(NEXTOPER(lastbr)), ender); /* Append to the previous. */
1307 }
1308 if (SIZE_ONLY)
3280af22 1309 PL_extralen += 2; /* Account for LONGJMP. */
a0d0e21e 1310 nextchar();
c277df42 1311 br = regbranch(&flags, 0);
a687059c 1312 if (br == NULL)
a0d0e21e 1313 return(NULL);
c277df42 1314 regtail(lastbr, br); /* BRANCH -> BRANCH. */
1315 lastbr = br;
821b33a5 1316 if (flags&HASWIDTH)
1317 *flagp |= HASWIDTH;
a687059c 1318 *flagp |= flags&SPSTART;
a0d0e21e 1319 }
1320
c277df42 1321 if (have_branch || paren != ':') {
1322 /* Make a closing node, and hook it on the end. */
1323 switch (paren) {
1324 case ':':
1325 ender = reg_node(TAIL);
1326 break;
1327 case 1:
1328 ender = reganode(CLOSE, parno);
1329 break;
1330 case '<':
c277df42 1331 case ',':
1332 case '=':
1333 case '!':
c277df42 1334 *flagp &= ~HASWIDTH;
821b33a5 1335 /* FALL THROUGH */
1336 case '>':
1337 ender = reg_node(SUCCEED);
c277df42 1338 break;
1339 case 0:
1340 ender = reg_node(END);
1341 break;
1342 }
1343 regtail(lastbr, ender);
a0d0e21e 1344
c277df42 1345 if (have_branch) {
1346 /* Hook the tails of the branches to the closing node. */
1347 for (br = ret; br != NULL; br = regnext(br)) {
1348 regoptail(br, ender);
1349 }
1350 }
a0d0e21e 1351 }
c277df42 1352
1353 {
1354 char *p;
1355 static char parens[] = "=!<,>";
1356
1357 if (paren && (p = strchr(parens, paren))) {
1358 int node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
1359 int flag = (p - parens) > 1;
1360
1361 if (paren == '>')
1362 node = SUSPEND, flag = 0;
1363 reginsert(node,ret);
c277df42 1364 ret->flags = flag;
c277df42 1365 regtail(ret, reg_node(TAIL));
1366 }
a0d0e21e 1367 }
1368
1369 /* Check for proper termination. */
ce3e6498 1370 if (paren) {
1371 PL_regflags = oregflags;
1372 if (PL_regcomp_parse >= PL_regxend || *nextchar() != ')') {
1373 FAIL("unmatched () in regexp");
1374 }
a0ed51b3 1375 }
1376 else if (!paren && PL_regcomp_parse < PL_regxend) {
3280af22 1377 if (*PL_regcomp_parse == ')') {
a0d0e21e 1378 FAIL("unmatched () in regexp");
a0ed51b3 1379 }
1380 else
a0d0e21e 1381 FAIL("junk on end of regexp"); /* "Can't happen". */
1382 /* NOTREACHED */
1383 }
a687059c 1384
a0d0e21e 1385 return(ret);
a687059c 1386}
1387
1388/*
1389 - regbranch - one alternative of an | operator
1390 *
1391 * Implements the concatenation operator.
1392 */
76e3520e 1393STATIC regnode *
cea2e8a9 1394S_regbranch(pTHX_ I32 *flagp, I32 first)
a687059c 1395{
5c0ca799 1396 dTHR;
c277df42 1397 register regnode *ret;
1398 register regnode *chain = NULL;
1399 register regnode *latest;
1400 I32 flags = 0, c = 0;
a0d0e21e 1401
c277df42 1402 if (first)
1403 ret = NULL;
1404 else {
3280af22 1405 if (!SIZE_ONLY && PL_extralen)
c277df42 1406 ret = reganode(BRANCHJ,0);
1407 else
1408 ret = reg_node(BRANCH);
1409 }
1410
1411 if (!first && SIZE_ONLY)
3280af22 1412 PL_extralen += 1; /* BRANCHJ */
c277df42 1413
1414 *flagp = WORST; /* Tentatively. */
a0d0e21e 1415
3280af22 1416 PL_regcomp_parse--;
a0d0e21e 1417 nextchar();
3280af22 1418 while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != '|' && *PL_regcomp_parse != ')') {
a0d0e21e 1419 flags &= ~TRYAGAIN;
1420 latest = regpiece(&flags);
1421 if (latest == NULL) {
1422 if (flags & TRYAGAIN)
1423 continue;
1424 return(NULL);
a0ed51b3 1425 }
1426 else if (ret == NULL)
c277df42 1427 ret = latest;
a0d0e21e 1428 *flagp |= flags&HASWIDTH;
c277df42 1429 if (chain == NULL) /* First piece. */
a0d0e21e 1430 *flagp |= flags&SPSTART;
1431 else {
3280af22 1432 PL_regnaughty++;
a0d0e21e 1433 regtail(chain, latest);
a687059c 1434 }
a0d0e21e 1435 chain = latest;
c277df42 1436 c++;
1437 }
1438 if (chain == NULL) { /* Loop ran zero times. */
1439 chain = reg_node(NOTHING);
1440 if (ret == NULL)
1441 ret = chain;
1442 }
1443 if (c == 1) {
1444 *flagp |= flags&SIMPLE;
a0d0e21e 1445 }
a687059c 1446
a0d0e21e 1447 return(ret);
a687059c 1448}
1449
1450/*
1451 - regpiece - something followed by possible [*+?]
1452 *
1453 * Note that the branching code sequences used for ? and the general cases
1454 * of * and + are somewhat optimized: they use the same NOTHING node as
1455 * both the endmarker for their branch list and the body of the last branch.
1456 * It might seem that this node could be dispensed with entirely, but the
1457 * endmarker role is not redundant.
1458 */
76e3520e 1459STATIC regnode *
cea2e8a9 1460S_regpiece(pTHX_ I32 *flagp)
a687059c 1461{
5c0ca799 1462 dTHR;
c277df42 1463 register regnode *ret;
a0d0e21e 1464 register char op;
1465 register char *next;
1466 I32 flags;
3280af22 1467 char *origparse = PL_regcomp_parse;
a0d0e21e 1468 char *maxpos;
1469 I32 min;
c277df42 1470 I32 max = REG_INFTY;
a0d0e21e 1471
1472 ret = regatom(&flags);
1473 if (ret == NULL) {
1474 if (flags & TRYAGAIN)
1475 *flagp |= TRYAGAIN;
1476 return(NULL);
1477 }
1478
3280af22 1479 op = *PL_regcomp_parse;
a0d0e21e 1480
3280af22 1481 if (op == '{' && regcurly(PL_regcomp_parse)) {
1482 next = PL_regcomp_parse + 1;
a0d0e21e 1483 maxpos = Nullch;
1484 while (isDIGIT(*next) || *next == ',') {
1485 if (*next == ',') {
1486 if (maxpos)
1487 break;
1488 else
1489 maxpos = next;
a687059c 1490 }
a0d0e21e 1491 next++;
1492 }
1493 if (*next == '}') { /* got one */
1494 if (!maxpos)
1495 maxpos = next;
3280af22 1496 PL_regcomp_parse++;
1497 min = atoi(PL_regcomp_parse);
a0d0e21e 1498 if (*maxpos == ',')
1499 maxpos++;
1500 else
3280af22 1501 maxpos = PL_regcomp_parse;
a0d0e21e 1502 max = atoi(maxpos);
1503 if (!max && *maxpos != '0')
c277df42 1504 max = REG_INFTY; /* meaning "infinity" */
1505 else if (max >= REG_INFTY)
1506 FAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
3280af22 1507 PL_regcomp_parse = next;
a0d0e21e 1508 nextchar();
1509
1510 do_curly:
1511 if ((flags&SIMPLE)) {
3280af22 1512 PL_regnaughty += 2 + PL_regnaughty / 2;
a0d0e21e 1513 reginsert(CURLY, ret);
1514 }
1515 else {
3280af22 1516 PL_regnaughty += 4 + PL_regnaughty; /* compound interest */
c277df42 1517 regtail(ret, reg_node(WHILEM));
3280af22 1518 if (!SIZE_ONLY && PL_extralen) {
c277df42 1519 reginsert(LONGJMP,ret);
1520 reginsert(NOTHING,ret);
1521 NEXT_OFF(ret) = 3; /* Go over LONGJMP. */
1522 }
a0d0e21e 1523 reginsert(CURLYX,ret);
3280af22 1524 if (!SIZE_ONLY && PL_extralen)
c277df42 1525 NEXT_OFF(ret) = 3; /* Go over NOTHING to LONGJMP. */
1526 regtail(ret, reg_node(NOTHING));
1527 if (SIZE_ONLY)
3280af22 1528 PL_extralen += 3;
a0d0e21e 1529 }
c277df42 1530 ret->flags = 0;
a0d0e21e 1531
1532 if (min > 0)
821b33a5 1533 *flagp = WORST;
1534 if (max > 0)
1535 *flagp |= HASWIDTH;
a0d0e21e 1536 if (max && max < min)
c277df42 1537 FAIL("Can't do {n,m} with n > m");
1538 if (!SIZE_ONLY) {
1539 ARG1_SET(ret, min);
1540 ARG2_SET(ret, max);
a687059c 1541 }
a687059c 1542
a0d0e21e 1543 goto nest_check;
a687059c 1544 }
a0d0e21e 1545 }
a687059c 1546
a0d0e21e 1547 if (!ISMULT1(op)) {
1548 *flagp = flags;
a687059c 1549 return(ret);
a0d0e21e 1550 }
bb20fd44 1551
c277df42 1552#if 0 /* Now runtime fix should be reliable. */
bb20fd44 1553 if (!(flags&HASWIDTH) && op != '?')
c277df42 1554 FAIL("regexp *+ operand could be empty");
1555#endif
bb20fd44 1556
a0d0e21e 1557 nextchar();
1558
821b33a5 1559 *flagp = (op != '+') ? (WORST|SPSTART|HASWIDTH) : (WORST|HASWIDTH);
a0d0e21e 1560
1561 if (op == '*' && (flags&SIMPLE)) {
1562 reginsert(STAR, ret);
c277df42 1563 ret->flags = 0;
3280af22 1564 PL_regnaughty += 4;
a0d0e21e 1565 }
1566 else if (op == '*') {
1567 min = 0;
1568 goto do_curly;
a0ed51b3 1569 }
1570 else if (op == '+' && (flags&SIMPLE)) {
a0d0e21e 1571 reginsert(PLUS, ret);
c277df42 1572 ret->flags = 0;
3280af22 1573 PL_regnaughty += 3;
a0d0e21e 1574 }
1575 else if (op == '+') {
1576 min = 1;
1577 goto do_curly;
a0ed51b3 1578 }
1579 else if (op == '?') {
a0d0e21e 1580 min = 0; max = 1;
1581 goto do_curly;
1582 }
1583 nest_check:
17feb5d5 1584 if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3) {
cea2e8a9 1585 Perl_warner(aTHX_ WARN_UNSAFE, "%.*s matches null string many times",
3280af22 1586 PL_regcomp_parse - origparse, origparse);
a0d0e21e 1587 }
1588
3280af22 1589 if (*PL_regcomp_parse == '?') {
a0d0e21e 1590 nextchar();
1591 reginsert(MINMOD, ret);
c277df42 1592 regtail(ret, ret + NODE_STEP_REGNODE);
a0d0e21e 1593 }
3280af22 1594 if (ISMULT2(PL_regcomp_parse))
a0d0e21e 1595 FAIL("nested *?+ in regexp");
1596
1597 return(ret);
a687059c 1598}
1599
1600/*
1601 - regatom - the lowest level
1602 *
1603 * Optimization: gobbles an entire sequence of ordinary characters so that
1604 * it can turn them into a single node, which is smaller to store and
1605 * faster to run. Backslashed characters are exceptions, each becoming a
1606 * separate node; the code is simpler that way and it's not worth fixing.
1607 *
1608 * [Yes, it is worth fixing, some scripts can run twice the speed.]
1609 */
76e3520e 1610STATIC regnode *
cea2e8a9 1611S_regatom(pTHX_ I32 *flagp)
a687059c 1612{
5c0ca799 1613 dTHR;
c277df42 1614 register regnode *ret = 0;
a0d0e21e 1615 I32 flags;
1616
1617 *flagp = WORST; /* Tentatively. */
1618
1619tryagain:
3280af22 1620 switch (*PL_regcomp_parse) {
a0d0e21e 1621 case '^':
3280af22 1622 PL_seen_zerolen++;
a0d0e21e 1623 nextchar();
3280af22 1624 if (PL_regflags & PMf_MULTILINE)
c277df42 1625 ret = reg_node(MBOL);
3280af22 1626 else if (PL_regflags & PMf_SINGLELINE)
c277df42 1627 ret = reg_node(SBOL);
a0d0e21e 1628 else
c277df42 1629 ret = reg_node(BOL);
a0d0e21e 1630 break;
1631 case '$':
3280af22 1632 if (PL_regcomp_parse[1])
1633 PL_seen_zerolen++;
a0d0e21e 1634 nextchar();
3280af22 1635 if (PL_regflags & PMf_MULTILINE)
c277df42 1636 ret = reg_node(MEOL);
3280af22 1637 else if (PL_regflags & PMf_SINGLELINE)
c277df42 1638 ret = reg_node(SEOL);
a0d0e21e 1639 else
c277df42 1640 ret = reg_node(EOL);
a0d0e21e 1641 break;
1642 case '.':
1643 nextchar();
a0ed51b3 1644 if (UTF) {
1645 if (PL_regflags & PMf_SINGLELINE)
1646 ret = reg_node(SANYUTF8);
1647 else
1648 ret = reg_node(ANYUTF8);
1649 *flagp |= HASWIDTH;
1650 }
1651 else {
1652 if (PL_regflags & PMf_SINGLELINE)
1653 ret = reg_node(SANY);
1654 else
22c35a8c 1655 ret = reg_node(REG_ANY);
a0ed51b3 1656 *flagp |= HASWIDTH|SIMPLE;
1657 }
3280af22 1658 PL_regnaughty++;
a0d0e21e 1659 break;
1660 case '[':
3280af22 1661 PL_regcomp_parse++;
a0ed51b3 1662 ret = (UTF ? regclassutf8() : regclass());
a14b48bc 1663 if (*PL_regcomp_parse != ']')
1664 FAIL("unmatched [] in regexp");
1665 nextchar();
a0d0e21e 1666 *flagp |= HASWIDTH|SIMPLE;
1667 break;
1668 case '(':
1669 nextchar();
1670 ret = reg(1, &flags);
1671 if (ret == NULL) {
1672 if (flags & TRYAGAIN)
1673 goto tryagain;
1674 return(NULL);
1675 }
c277df42 1676 *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE);
a0d0e21e 1677 break;
1678 case '|':
1679 case ')':
1680 if (flags & TRYAGAIN) {
1681 *flagp |= TRYAGAIN;
1682 return NULL;
1683 }
3280af22 1684 FAIL2("internal urp in regexp at /%s/", PL_regcomp_parse);
a0d0e21e 1685 /* Supposed to be caught earlier. */
1686 break;
85afd4ae 1687 case '{':
3280af22 1688 if (!regcurly(PL_regcomp_parse)) {
1689 PL_regcomp_parse++;
85afd4ae 1690 goto defchar;
1691 }
1692 /* FALL THROUGH */
a0d0e21e 1693 case '?':
1694 case '+':
1695 case '*':
3115e423 1696 FAIL("?+*{} follows nothing in regexp");
a0d0e21e 1697 break;
1698 case '\\':
3280af22 1699 switch (*++PL_regcomp_parse) {
a0d0e21e 1700 case 'A':
3280af22 1701 PL_seen_zerolen++;
c277df42 1702 ret = reg_node(SBOL);
a0d0e21e 1703 *flagp |= SIMPLE;
1704 nextchar();
1705 break;
1706 case 'G':
c277df42 1707 ret = reg_node(GPOS);
3280af22 1708 PL_regseen |= REG_SEEN_GPOS;
a0d0e21e 1709 *flagp |= SIMPLE;
1710 nextchar();
1711 break;
1712 case 'Z':
c277df42 1713 ret = reg_node(SEOL);
a0d0e21e 1714 *flagp |= SIMPLE;
1715 nextchar();
1716 break;
b85d18e9 1717 case 'z':
1718 ret = reg_node(EOS);
1719 *flagp |= SIMPLE;
3280af22 1720 PL_seen_zerolen++; /* Do not optimize RE away */
b85d18e9 1721 nextchar();
1722 break;
a0ed51b3 1723 case 'C':
1724 ret = reg_node(SANY);
1725 *flagp |= HASWIDTH|SIMPLE;
1726 nextchar();
1727 break;
1728 case 'X':
1729 ret = reg_node(CLUMP);
1730 *flagp |= HASWIDTH;
1731 nextchar();
1732 if (UTF && !PL_utf8_mark)
dfe13c55 1733 is_utf8_mark((U8*)"~"); /* preload table */
a0ed51b3 1734 break;
a0d0e21e 1735 case 'w':
a0ed51b3 1736 ret = reg_node(
1737 UTF
1738 ? (LOC ? ALNUMLUTF8 : ALNUMUTF8)
1739 : (LOC ? ALNUML : ALNUM));
a0d0e21e 1740 *flagp |= HASWIDTH|SIMPLE;
1741 nextchar();
a0ed51b3 1742 if (UTF && !PL_utf8_alnum)
dfe13c55 1743 is_utf8_alnum((U8*)"a"); /* preload table */
a0d0e21e 1744 break;
1745 case 'W':
a0ed51b3 1746 ret = reg_node(
1747 UTF
1748 ? (LOC ? NALNUMLUTF8 : NALNUMUTF8)
1749 : (LOC ? NALNUML : NALNUM));
a0d0e21e 1750 *flagp |= HASWIDTH|SIMPLE;
1751 nextchar();
a0ed51b3 1752 if (UTF && !PL_utf8_alnum)
dfe13c55 1753 is_utf8_alnum((U8*)"a"); /* preload table */
a0d0e21e 1754 break;
1755 case 'b':
3280af22 1756 PL_seen_zerolen++;
f5c9036e 1757 PL_regseen |= REG_SEEN_LOOKBEHIND;
a0ed51b3 1758 ret = reg_node(
1759 UTF
1760 ? (LOC ? BOUNDLUTF8 : BOUNDUTF8)
1761 : (LOC ? BOUNDL : BOUND));
a0d0e21e 1762 *flagp |= SIMPLE;
1763 nextchar();
a0ed51b3 1764 if (UTF && !PL_utf8_alnum)
dfe13c55 1765 is_utf8_alnum((U8*)"a"); /* preload table */
a0d0e21e 1766 break;
1767 case 'B':
3280af22 1768 PL_seen_zerolen++;
f5c9036e 1769 PL_regseen |= REG_SEEN_LOOKBEHIND;
a0ed51b3 1770 ret = reg_node(
1771 UTF
1772 ? (LOC ? NBOUNDLUTF8 : NBOUNDUTF8)
1773 : (LOC ? NBOUNDL : NBOUND));
a0d0e21e 1774 *flagp |= SIMPLE;
1775 nextchar();
a0ed51b3 1776 if (UTF && !PL_utf8_alnum)
dfe13c55 1777 is_utf8_alnum((U8*)"a"); /* preload table */
a0d0e21e 1778 break;
1779 case 's':
a0ed51b3 1780 ret = reg_node(
1781 UTF
1782 ? (LOC ? SPACELUTF8 : SPACEUTF8)
1783 : (LOC ? SPACEL : SPACE));
a0d0e21e 1784 *flagp |= HASWIDTH|SIMPLE;
1785 nextchar();
a0ed51b3 1786 if (UTF && !PL_utf8_space)
dfe13c55 1787 is_utf8_space((U8*)" "); /* preload table */
a0d0e21e 1788 break;
1789 case 'S':
a0ed51b3 1790 ret = reg_node(
1791 UTF
1792 ? (LOC ? NSPACELUTF8 : NSPACEUTF8)
1793 : (LOC ? NSPACEL : NSPACE));
a0d0e21e 1794 *flagp |= HASWIDTH|SIMPLE;
1795 nextchar();
a0ed51b3 1796 if (UTF && !PL_utf8_space)
dfe13c55 1797 is_utf8_space((U8*)" "); /* preload table */
a0d0e21e 1798 break;
1799 case 'd':
a0ed51b3 1800 ret = reg_node(UTF ? DIGITUTF8 : DIGIT);
a0d0e21e 1801 *flagp |= HASWIDTH|SIMPLE;
1802 nextchar();
a0ed51b3 1803 if (UTF && !PL_utf8_digit)
dfe13c55 1804 is_utf8_digit((U8*)"1"); /* preload table */
a0d0e21e 1805 break;
1806 case 'D':
a0ed51b3 1807 ret = reg_node(UTF ? NDIGITUTF8 : NDIGIT);
a0d0e21e 1808 *flagp |= HASWIDTH|SIMPLE;
1809 nextchar();
a0ed51b3 1810 if (UTF && !PL_utf8_digit)
dfe13c55 1811 is_utf8_digit((U8*)"1"); /* preload table */
a0d0e21e 1812 break;
a14b48bc 1813 case 'p':
1814 case 'P':
1815 { /* a lovely hack--pretend we saw [\pX] instead */
1816 char* oldregxend = PL_regxend;
1817
1818 if (PL_regcomp_parse[1] == '{') {
1819 PL_regxend = strchr(PL_regcomp_parse, '}');
1820 if (!PL_regxend)
1821 FAIL("Missing right brace on \\p{}");
1822 PL_regxend++;
1823 }
1824 else
1825 PL_regxend = PL_regcomp_parse + 2;
1826 PL_regcomp_parse--;
1827
1828 ret = regclassutf8();
1829
1830 PL_regxend = oldregxend;
1831 PL_regcomp_parse--;
1832 nextchar();
1833 *flagp |= HASWIDTH|SIMPLE;
1834 }
1835 break;
a0d0e21e 1836 case 'n':
1837 case 'r':
1838 case 't':
1839 case 'f':
1840 case 'e':
1841 case 'a':
1842 case 'x':
1843 case 'c':
1844 case '0':
1845 goto defchar;
1846 case '1': case '2': case '3': case '4':
1847 case '5': case '6': case '7': case '8': case '9':
1848 {
3280af22 1849 I32 num = atoi(PL_regcomp_parse);
a0d0e21e 1850
3280af22 1851 if (num > 9 && num >= PL_regnpar)
a0d0e21e 1852 goto defchar;
1853 else {
3280af22 1854 if (!SIZE_ONLY && num > PL_regcomp_rx->nparens)
ef64f398 1855 FAIL("reference to nonexistent group");
3280af22 1856 PL_regsawback = 1;
a0ed51b3 1857 ret = reganode(FOLD
1858 ? (LOC ? REFFL : REFF)
c8756f30 1859 : REF, num);
a0d0e21e 1860 *flagp |= HASWIDTH;
3280af22 1861 while (isDIGIT(*PL_regcomp_parse))
1862 PL_regcomp_parse++;
1863 PL_regcomp_parse--;
a0d0e21e 1864 nextchar();
1865 }
1866 }
1867 break;
1868 case '\0':
3280af22 1869 if (PL_regcomp_parse >= PL_regxend)
a0d0e21e 1870 FAIL("trailing \\ in regexp");
1871 /* FALL THROUGH */
1872 default:
c9f97d15 1873 /* Do not generate `unrecognized' warnings here, we fall
1874 back into the quick-grab loop below */
a0d0e21e 1875 goto defchar;
1876 }
1877 break;
4633a7c4 1878
1879 case '#':
3280af22 1880 if (PL_regflags & PMf_EXTENDED) {
1881 while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != '\n') PL_regcomp_parse++;
1882 if (PL_regcomp_parse < PL_regxend)
4633a7c4 1883 goto tryagain;
1884 }
1885 /* FALL THROUGH */
1886
a0d0e21e 1887 default: {
1888 register I32 len;
a0ed51b3 1889 register UV ender;
a0d0e21e 1890 register char *p;
c277df42 1891 char *oldp, *s;
a0d0e21e 1892 I32 numlen;
1893
3280af22 1894 PL_regcomp_parse++;
a0d0e21e 1895
1896 defchar:
a0ed51b3 1897 ret = reg_node(FOLD
1898 ? (LOC ? EXACTFL : EXACTF)
bbce6d69 1899 : EXACT);
161b471a 1900 s = (char *) OPERAND(ret);
c277df42 1901 regc(0, s++); /* save spot for len */
3280af22 1902 for (len = 0, p = PL_regcomp_parse - 1;
1903 len < 127 && p < PL_regxend;
a0d0e21e 1904 len++)
1905 {
1906 oldp = p;
5b5a24f7 1907
3280af22 1908 if (PL_regflags & PMf_EXTENDED)
1909 p = regwhite(p, PL_regxend);
a0d0e21e 1910 switch (*p) {
1911 case '^':
1912 case '$':
1913 case '.':
1914 case '[':
1915 case '(':
1916 case ')':
1917 case '|':
1918 goto loopdone;
1919 case '\\':
1920 switch (*++p) {
1921 case 'A':
1922 case 'G':
1923 case 'Z':
b85d18e9 1924 case 'z':
a0d0e21e 1925 case 'w':
1926 case 'W':
1927 case 'b':
1928 case 'B':
1929 case 's':
1930 case 'S':
1931 case 'd':
1932 case 'D':
a14b48bc 1933 case 'p':
1934 case 'P':
a0d0e21e 1935 --p;
1936 goto loopdone;
1937 case 'n':
1938 ender = '\n';
1939 p++;
a687059c 1940 break;
a0d0e21e 1941 case 'r':
1942 ender = '\r';
1943 p++;
a687059c 1944 break;
a0d0e21e 1945 case 't':
1946 ender = '\t';
1947 p++;
a687059c 1948 break;
a0d0e21e 1949 case 'f':
1950 ender = '\f';
1951 p++;
a687059c 1952 break;
a0d0e21e 1953 case 'e':
1954 ender = '\033';
1955 p++;
a687059c 1956 break;
a0d0e21e 1957 case 'a':
1958 ender = '\007';
1959 p++;
a687059c 1960 break;
a0d0e21e 1961 case 'x':
a0ed51b3 1962 if (*++p == '{') {
1963 char* e = strchr(p, '}');
1964
1965 if (!e)
1966 FAIL("Missing right brace on \\x{}");
1967 else if (UTF) {
1968 ender = scan_hex(p + 1, e - p, &numlen);
1969 if (numlen + len >= 127) { /* numlen is generous */
1970 p--;
1971 goto loopdone;
1972 }
1973 p = e + 1;
1974 }
1975 else
1976 FAIL("Can't use \\x{} without 'use utf8' declaration");
1977 }
1978 else {
1979 ender = scan_hex(p, 2, &numlen);
1980 p += numlen;
1981 }
a687059c 1982 break;
a0d0e21e 1983 case 'c':
1984 p++;
bbce6d69 1985 ender = UCHARAT(p++);
1986 ender = toCTRL(ender);
a687059c 1987 break;
a0d0e21e 1988 case '0': case '1': case '2': case '3':case '4':
1989 case '5': case '6': case '7': case '8':case '9':
1990 if (*p == '0' ||
3280af22 1991 (isDIGIT(p[1]) && atoi(p) >= PL_regnpar) ) {
a0d0e21e 1992 ender = scan_oct(p, 3, &numlen);
1993 p += numlen;
1994 }
1995 else {
1996 --p;
1997 goto loopdone;
a687059c 1998 }
1999 break;
a0d0e21e 2000 case '\0':
3280af22 2001 if (p >= PL_regxend)
a687059c 2002 FAIL("trailing \\ in regexp");
2003 /* FALL THROUGH */
a0d0e21e 2004 default:
c9f97d15 2005 if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && isALPHA(*p))
cea2e8a9 2006 Perl_warner(aTHX_ WARN_UNSAFE,
c9f97d15 2007 "/%.127s/: Unrecognized escape \\%c passed through",
2008 PL_regprecomp,
2009 *p);
a0ed51b3 2010 goto normal_default;
a0d0e21e 2011 }
2012 break;
a687059c 2013 default:
a0ed51b3 2014 normal_default:
2015 if ((*p & 0xc0) == 0xc0 && UTF) {
dfe13c55 2016 ender = utf8_to_uv((U8*)p, &numlen);
a0ed51b3 2017 p += numlen;
2018 }
2019 else
2020 ender = *p++;
a0d0e21e 2021 break;
a687059c 2022 }
3280af22 2023 if (PL_regflags & PMf_EXTENDED)
2024 p = regwhite(p, PL_regxend);
a0ed51b3 2025 if (UTF && FOLD) {
2026 if (LOC)
2027 ender = toLOWER_LC_uni(ender);
2028 else
2029 ender = toLOWER_uni(ender);
2030 }
a0d0e21e 2031 if (ISMULT2(p)) { /* Back off on ?+*. */
2032 if (len)
2033 p = oldp;
a0ed51b3 2034 else if (ender >= 0x80 && UTF) {
2035 reguni(ender, s, &numlen);
2036 s += numlen;
2037 len += numlen;
2038 }
a0d0e21e 2039 else {
2040 len++;
c277df42 2041 regc(ender, s++);
a0d0e21e 2042 }
2043 break;
a687059c 2044 }
a0ed51b3 2045 if (ender >= 0x80 && UTF) {
2046 reguni(ender, s, &numlen);
2047 s += numlen;
2048 len += numlen - 1;
2049 }
2050 else
2051 regc(ender, s++);
a0d0e21e 2052 }
2053 loopdone:
3280af22 2054 PL_regcomp_parse = p - 1;
a0d0e21e 2055 nextchar();
2056 if (len < 0)
2057 FAIL("internal disaster in regexp");
2058 if (len > 0)
2059 *flagp |= HASWIDTH;
2060 if (len == 1)
2061 *flagp |= SIMPLE;
c277df42 2062 if (!SIZE_ONLY)
a0d0e21e 2063 *OPERAND(ret) = len;
c277df42 2064 regc('\0', s++);
2065 if (SIZE_ONLY) {
3280af22 2066 PL_regsize += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode);
a0ed51b3 2067 }
2068 else {
3280af22 2069 PL_regcode += (len + 2 + sizeof(regnode) - 1) / sizeof(regnode);
c277df42 2070 }
a687059c 2071 }
a0d0e21e 2072 break;
2073 }
a687059c 2074
a0d0e21e 2075 return(ret);
a687059c 2076}
2077
873ef191 2078STATIC char *
cea2e8a9 2079S_regwhite(pTHX_ char *p, char *e)
5b5a24f7 2080{
2081 while (p < e) {
2082 if (isSPACE(*p))
2083 ++p;
2084 else if (*p == '#') {
2085 do {
2086 p++;
2087 } while (p < e && *p != '\n');
2088 }
2089 else
2090 break;
2091 }
2092 return p;
2093}
2094
b8c5462f 2095/* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
2096 Character classes ([:foo:]) can also be negated ([:^foo:]).
2097 Returns a named class id (ANYOF_XXX) if successful, -1 otherwise.
2098 Equivalence classes ([=foo=]) and composites ([.foo.]) are parsed,
2099 but trigger warnings because they are currently unimplemented. */
2100STATIC I32
cea2e8a9 2101S_regpposixcc(pTHX_ I32 value)
620e46c5 2102{
11b8faa4 2103 dTHR;
620e46c5 2104 char *posixcc = 0;
b8c5462f 2105 I32 namedclass = -1;
620e46c5 2106
2107 if (value == '[' && PL_regcomp_parse + 1 < PL_regxend &&
2108 /* I smell either [: or [= or [. -- POSIX has been here, right? */
2109 (*PL_regcomp_parse == ':' ||
2110 *PL_regcomp_parse == '=' ||
2111 *PL_regcomp_parse == '.')) {
2112 char c = *PL_regcomp_parse;
2113 char* s = PL_regcomp_parse++;
2114
2115 while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != c)
2116 PL_regcomp_parse++;
2117 if (PL_regcomp_parse == PL_regxend)
2118 /* Grandfather lone [:, [=, [. */
2119 PL_regcomp_parse = s;
2120 else {
b8c5462f 2121 char* t = PL_regcomp_parse++; /* skip over the c */
2122
2123 if (*PL_regcomp_parse == ']') {
2124 PL_regcomp_parse++; /* skip over the ending ] */
2125 posixcc = s + 1;
2126 if (*s == ':') {
2127 I32 complement = *posixcc == '^' ? *posixcc++ : 0;
2128 I32 skip = 5; /* the most common skip */
2129
2130 switch (*posixcc) {
2131 case 'a':
2132 if (strnEQ(posixcc, "alnum", 5))
2133 namedclass =
2134 complement ? ANYOF_NALNUMC : ANYOF_ALNUMC;
2135 else if (strnEQ(posixcc, "alpha", 5))
2136 namedclass =
2137 complement ? ANYOF_NALPHA : ANYOF_ALPHA;
2138 else if (strnEQ(posixcc, "ascii", 5))
2139 namedclass =
2140 complement ? ANYOF_NASCII : ANYOF_ASCII;
2141 break;
2142 case 'c':
2143 if (strnEQ(posixcc, "cntrl", 5))
2144 namedclass =
2145 complement ? ANYOF_NCNTRL : ANYOF_CNTRL;
2146 break;
2147 case 'd':
2148 if (strnEQ(posixcc, "digit", 5))
2149 namedclass =
2150 complement ? ANYOF_NDIGIT : ANYOF_DIGIT;
2151 break;
2152 case 'g':
2153 if (strnEQ(posixcc, "graph", 5))
2154 namedclass =
2155 complement ? ANYOF_NGRAPH : ANYOF_GRAPH;
2156 break;
2157 case 'l':
2158 if (strnEQ(posixcc, "lower", 5))
2159 namedclass =
2160 complement ? ANYOF_NLOWER : ANYOF_LOWER;
2161 break;
2162 case 'p':
2163 if (strnEQ(posixcc, "print", 5))
2164 namedclass =
2165 complement ? ANYOF_NPRINT : ANYOF_PRINT;
2166 else if (strnEQ(posixcc, "punct", 5))
2167 namedclass =
2168 complement ? ANYOF_NPUNCT : ANYOF_PUNCT;
2169 break;
2170 case 's':
2171 if (strnEQ(posixcc, "space", 5))
2172 namedclass =
2173 complement ? ANYOF_NSPACE : ANYOF_SPACE;
2174 case 'u':
2175 if (strnEQ(posixcc, "upper", 5))
2176 namedclass =
2177 complement ? ANYOF_NUPPER : ANYOF_UPPER;
2178 break;
2179 case 'w': /* this is not POSIX, this is the Perl \w */
2180 if (strnEQ(posixcc, "word", 4)) {
2181 namedclass =
2182 complement ? ANYOF_NALNUM : ANYOF_ALNUM;
2183 skip = 4;
2184 }
2185 break;
2186 case 'x':
2187 if (strnEQ(posixcc, "xdigit", 6)) {
2188 namedclass =
2189 complement ? ANYOF_NXDIGIT : ANYOF_XDIGIT;
2190 skip = 6;
2191 }
2192 break;
2193 }
2194 if ((namedclass == -1 ||
2195 !(posixcc + skip + 2 < PL_regxend &&
2196 (posixcc[skip] == ':' &&
2197 posixcc[skip + 1] == ']'))))
2198 Perl_croak(aTHX_ "Character class [:%.*s:] unknown",
2199 t - s - 1, s + 1);
2200 } else if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY)
2201 /* [[=foo=]] and [[.foo.]] are still future. */
cea2e8a9 2202 Perl_warner(aTHX_ WARN_UNSAFE,
b8c5462f 2203 "Character class syntax [%c %c] is reserved for future extensions", c, c);
2204 } else {
2205 /* Maternal grandfather:
2206 * "[:" ending in ":" but not in ":]" */
767d463e 2207 PL_regcomp_parse = s;
2208 }
620e46c5 2209 }
2210 }
2211
b8c5462f 2212 return namedclass;
2213}
2214
2215STATIC void
2216S_checkposixcc(pTHX)
2217{
2218 if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY &&
2219 (*PL_regcomp_parse == ':' ||
2220 *PL_regcomp_parse == '=' ||
2221 *PL_regcomp_parse == '.')) {
2222 char *s = PL_regcomp_parse;
2223 char c = *s++;
2224
2225 while(*s && isALNUM(*s))
2226 s++;
2227 if (*s && c == *s && s[1] == ']') {
2228 Perl_warner(aTHX_ WARN_UNSAFE,
2229 "Character class syntax [%c %c] belongs inside character classes", c, c);
2230 if (c == '=' || c == '.')
2231 Perl_warner(aTHX_ WARN_UNSAFE,
2232 "Character class syntax [%c %c] is reserved for future extensions", c, c);
2233 }
2234 }
620e46c5 2235}
2236
76e3520e 2237STATIC regnode *
cea2e8a9 2238S_regclass(pTHX)
a687059c 2239{
5c0ca799 2240 dTHR;
c277df42 2241 register char *opnd, *s;
a0ed51b3 2242 register I32 value;
b8c5462f 2243 register I32 lastvalue = OOB_CHAR8;
a0d0e21e 2244 register I32 range = 0;
c277df42 2245 register regnode *ret;
a0d0e21e 2246 register I32 def;
2247 I32 numlen;
b8c5462f 2248 I32 namedclass;
a0d0e21e 2249
3280af22 2250 s = opnd = (char *) OPERAND(PL_regcode);
c277df42 2251 ret = reg_node(ANYOF);
b8c5462f 2252 for (value = 0; value < ANYOF_SIZE; value++)
c277df42 2253 regc(0, s++);
3280af22 2254 if (*PL_regcomp_parse == '^') { /* Complement of range. */
2255 PL_regnaughty++;
2256 PL_regcomp_parse++;
c277df42 2257 if (!SIZE_ONLY)
b8c5462f 2258 ANYOF_FLAGS(opnd) |= ANYOF_INVERT;
bbce6d69 2259 }
c277df42 2260 if (!SIZE_ONLY) {
3280af22 2261 PL_regcode += ANY_SKIP;
a0ed51b3 2262 if (FOLD)
b8c5462f 2263 ANYOF_FLAGS(opnd) |= ANYOF_FOLD;
a0ed51b3 2264 if (LOC)
b8c5462f 2265 ANYOF_FLAGS(opnd) |= ANYOF_LOCALE;
a0ed51b3 2266 }
2267 else {
3280af22 2268 PL_regsize += ANY_SKIP;
a0d0e21e 2269 }
b8c5462f 2270
2271 checkposixcc();
2272
3280af22 2273 if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
a0d0e21e 2274 goto skipcond; /* allow 1st char to be ] or - */
3280af22 2275 while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
a0d0e21e 2276 skipcond:
b8c5462f 2277 namedclass = -1;
a0ed51b3 2278 value = UCHARAT(PL_regcomp_parse++);
620e46c5 2279 if (value == '[')
b8c5462f 2280 namedclass = regpposixcc(value);
620e46c5 2281 else if (value == '\\') {
a0ed51b3 2282 value = UCHARAT(PL_regcomp_parse++);
2283 switch (value) {
b8c5462f 2284 case 'w': namedclass = ANYOF_ALNUM; break;
2285 case 'W': namedclass = ANYOF_NALNUM; break;
2286 case 's': namedclass = ANYOF_SPACE; break;
2287 case 'S': namedclass = ANYOF_NSPACE; break;
2288 case 'd': namedclass = ANYOF_DIGIT; break;
2289 case 'D': namedclass = ANYOF_NDIGIT; break;
2290 case 'n': value = '\n'; break;
2291 case 'r': value = '\r'; break;
2292 case 't': value = '\t'; break;
2293 case 'f': value = '\f'; break;
2294 case 'b': value = '\b'; break;
2295 case 'e': value = '\033'; break;
2296 case 'a': value = '\007'; break;
2297 case 'x':
2298 value = scan_hex(PL_regcomp_parse, 2, &numlen);
2299 PL_regcomp_parse += numlen;
2300 break;
2301 case 'c':
2302 value = UCHARAT(PL_regcomp_parse++);
2303 value = toCTRL(value);
2304 break;
2305 case '0': case '1': case '2': case '3': case '4':
2306 case '5': case '6': case '7': case '8': case '9':
2307 value = scan_oct(--PL_regcomp_parse, 3, &numlen);
2308 PL_regcomp_parse += numlen;
2309 break;
2310 }
2311 }
2312 if (!SIZE_ONLY && namedclass > -1) {
2313 switch (namedclass) {
2314 case ANYOF_ALNUM:
2315 if (LOC)
2316 ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
2317 else {
2318 for (value = 0; value < 256; value++)
2319 if (isALNUM(value))
2320 ANYOF_BITMAP_SET(opnd, value);
bbce6d69 2321 }
b8c5462f 2322 break;
2323 case ANYOF_NALNUM:
2324 if (LOC)
2325 ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
2326 else {
2327 for (value = 0; value < 256; value++)
2328 if (!isALNUM(value))
2329 ANYOF_BITMAP_SET(opnd, value);
bbce6d69 2330 }
b8c5462f 2331 break;
2332 case ANYOF_SPACE:
2333 if (LOC)
2334 ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
2335 else {
2336 for (value = 0; value < 256; value++)
2337 if (isSPACE(value))
2338 ANYOF_BITMAP_SET(opnd, value);
bbce6d69 2339 }
b8c5462f 2340 break;
2341 case ANYOF_NSPACE:
2342 if (LOC)
2343 ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
2344 else {
2345 for (value = 0; value < 256; value++)
2346 if (!isSPACE(value))
2347 ANYOF_BITMAP_SET(opnd, value);
bbce6d69 2348 }
b8c5462f 2349 break;
2350 case ANYOF_DIGIT:
2351 if (LOC)
2352 ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
2353 else {
a0ed51b3 2354 for (value = '0'; value <= '9'; value++)
b8c5462f 2355 ANYOF_BITMAP_SET(opnd, value);
ae5c130c 2356 }
b8c5462f 2357 break;
2358 case ANYOF_NDIGIT:
2359 if (LOC)
2360 ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
2361 else {
a0ed51b3 2362 for (value = 0; value < '0'; value++)
b8c5462f 2363 ANYOF_BITMAP_SET(opnd, value);
a0ed51b3 2364 for (value = '9' + 1; value < 256; value++)
b8c5462f 2365 ANYOF_BITMAP_SET(opnd, value);
2366 }
2367 break;
2368 case ANYOF_NALNUMC:
2369 if (LOC)
2370 ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
2371 else {
2372 for (value = 0; value < 256; value++)
2373 if (!isALNUMC(value))
2374 ANYOF_BITMAP_SET(opnd, value);
2375 }
2376 break;
2377 case ANYOF_ALNUMC:
2378 if (LOC)
2379 ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
2380 else {
2381 for (value = 0; value < 256; value++)
2382 if (isALNUMC(value))
2383 ANYOF_BITMAP_SET(opnd, value);
2384 }
2385 break;
2386 case ANYOF_ALPHA:
2387 if (LOC)
2388 ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
2389 else {
2390 for (value = 0; value < 256; value++)
2391 if (isALPHA(value))
2392 ANYOF_BITMAP_SET(opnd, value);
ae5c130c 2393 }
a0d0e21e 2394 break;
b8c5462f 2395 case ANYOF_NALPHA:
2396 if (LOC)
2397 ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
2398 else {
2399 for (value = 0; value < 256; value++)
2400 if (!isALPHA(value))
2401 ANYOF_BITMAP_SET(opnd, value);
2402 }
a0d0e21e 2403 break;
b8c5462f 2404 case ANYOF_ASCII:
2405 if (LOC)
2406 ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
2407 else {
2408 for (value = 0; value < 128; value++)
2409 ANYOF_BITMAP_SET(opnd, value);
2410 }
a0d0e21e 2411 break;
b8c5462f 2412 case ANYOF_NASCII:
2413 if (LOC)
2414 ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
2415 else {
2416 for (value = 128; value < 256; value++)
2417 ANYOF_BITMAP_SET(opnd, value);
2418 }
a0d0e21e 2419 break;
b8c5462f 2420 case ANYOF_CNTRL:
2421 if (LOC)
2422 ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
2423 else {
2424 for (value = 0; value < 256; value++)
2425 if (isCNTRL(value))
2426 ANYOF_BITMAP_SET(opnd, value);
2427 }
2428 lastvalue = OOB_CHAR8;
a0d0e21e 2429 break;
b8c5462f 2430 case ANYOF_NCNTRL:
2431 if (LOC)
2432 ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
2433 else {
2434 for (value = 0; value < 256; value++)
2435 if (!isCNTRL(value))
2436 ANYOF_BITMAP_SET(opnd, value);
2437 }
a0d0e21e 2438 break;
b8c5462f 2439 case ANYOF_GRAPH:
2440 if (LOC)
2441 ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
2442 else {
2443 for (value = 0; value < 256; value++)
2444 if (isGRAPH(value))
2445 ANYOF_BITMAP_SET(opnd, value);
2446 }
a0d0e21e 2447 break;
b8c5462f 2448 case ANYOF_NGRAPH:
2449 if (LOC)
2450 ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
2451 else {
2452 for (value = 0; value < 256; value++)
2453 if (!isGRAPH(value))
2454 ANYOF_BITMAP_SET(opnd, value);
2455 }
a0d0e21e 2456 break;
b8c5462f 2457 case ANYOF_LOWER:
2458 if (LOC)
2459 ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
2460 else {
2461 for (value = 0; value < 256; value++)
2462 if (isLOWER(value))
2463 ANYOF_BITMAP_SET(opnd, value);
2464 }
a0d0e21e 2465 break;
b8c5462f 2466 case ANYOF_NLOWER:
2467 if (LOC)
2468 ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
2469 else {
2470 for (value = 0; value < 256; value++)
2471 if (!isLOWER(value))
2472 ANYOF_BITMAP_SET(opnd, value);
2473 }
2474 break;
2475 case ANYOF_PRINT:
2476 if (LOC)
2477 ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
2478 else {
2479 for (value = 0; value < 256; value++)
2480 if (isPRINT(value))
2481 ANYOF_BITMAP_SET(opnd, value);
2482 }
2483 break;
2484 case ANYOF_NPRINT:
2485 if (LOC)
2486 ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
2487 else {
2488 for (value = 0; value < 256; value++)
2489 if (!isPRINT(value))
2490 ANYOF_BITMAP_SET(opnd, value);
2491 }
2492 break;
2493 case ANYOF_PUNCT:
2494 if (LOC)
2495 ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
2496 else {
2497 for (value = 0; value < 256; value++)
2498 if (isPUNCT(value))
2499 ANYOF_BITMAP_SET(opnd, value);
2500 }
2501 break;
2502 case ANYOF_NPUNCT:
2503 if (LOC)
2504 ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
2505 else {
2506 for (value = 0; value < 256; value++)
2507 if (!isPUNCT(value))
2508 ANYOF_BITMAP_SET(opnd, value);
2509 }
2510 break;
2511 case ANYOF_UPPER:
2512 if (LOC)
2513 ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
2514 else {
2515 for (value = 0; value < 256; value++)
2516 if (isUPPER(value))
2517 ANYOF_BITMAP_SET(opnd, value);
2518 }
2519 break;
2520 case ANYOF_NUPPER:
2521 if (LOC)
2522 ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
2523 else {
2524 for (value = 0; value < 256; value++)
2525 if (!isUPPER(value))
2526 ANYOF_BITMAP_SET(opnd, value);
2527 }
2528 break;
2529 case ANYOF_XDIGIT:
2530 if (LOC)
2531 ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
2532 else {
2533 for (value = 0; value < 256; value++)
2534 if (isXDIGIT(value))
2535 ANYOF_BITMAP_SET(opnd, value);
2536 }
2537 break;
2538 case ANYOF_NXDIGIT:
2539 if (LOC)
2540 ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
2541 else {
2542 for (value = 0; value < 256; value++)
2543 if (!isXDIGIT(value))
2544 ANYOF_BITMAP_SET(opnd, value);
2545 }
2546 break;
2547 default:
2548 FAIL("invalid [::] class in regexp");
a0d0e21e 2549 break;
2550 }
b8c5462f 2551 if (LOC)
2552 ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
2553 lastvalue = OOB_CHAR8;
a0d0e21e 2554 }
b8c5462f 2555 else
a0d0e21e 2556 if (range) {
a0ed51b3 2557 if (lastvalue > value)
a0d0e21e 2558 FAIL("invalid [] range in regexp");
2559 range = 0;
2560 }
2561 else {
a0ed51b3 2562 lastvalue = value;
3280af22 2563 if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
2564 PL_regcomp_parse[1] != ']') {
2565 PL_regcomp_parse++;
a0d0e21e 2566 range = 1;
2567 continue; /* do it next time */
2568 }
a687059c 2569 }
ae5c130c 2570 if (!SIZE_ONLY) {
8ada0baa 2571#ifndef ASCIIish
2572 if ((isLOWER(lastvalue) && isLOWER(value)) ||
c8dba6f3 2573 (isUPPER(lastvalue) && isUPPER(value)))
2574 {
2575 I32 i;
8ada0baa 2576 if (isLOWER(lastvalue)) {
2577 for (i = lastvalue; i <= value; i++)
2578 if (isLOWER(i))
b8c5462f 2579 ANYOF_BITMAP_SET(opnd, i);
8ada0baa 2580 } else {
2581 for (i = lastvalue; i <= value; i++)
2582 if (isUPPER(i))
b8c5462f 2583 ANYOF_BITMAP_SET(opnd, i);
8ada0baa 2584 }
2585 }
2586 else
2587#endif
2588 for ( ; lastvalue <= value; lastvalue++)
b8c5462f 2589 ANYOF_BITMAP_SET(opnd, lastvalue);
8ada0baa 2590 }
a0ed51b3 2591 lastvalue = value;
a0d0e21e 2592 }
ae5c130c 2593 /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
b8c5462f 2594 if (!SIZE_ONLY &&
2595 (ANYOF_FLAGS(opnd) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) {
a0ed51b3 2596 for (value = 0; value < 256; ++value) {
b8c5462f 2597 if (ANYOF_BITMAP_TEST(opnd, value)) {
22c35a8c 2598 I32 cf = PL_fold[value];
b8c5462f 2599 ANYOF_BITMAP_SET(opnd, cf);
ae5c130c 2600 }
2601 }
b8c5462f 2602 ANYOF_FLAGS(opnd) &= ~ANYOF_FOLD;
ae5c130c 2603 }
2604 /* optimize inverted simple patterns (e.g. [^a-z]) */
b8c5462f 2605 if (!SIZE_ONLY && (ANYOF_FLAGS(opnd) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
2606 for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
2607 opnd[ANYOF_BITMAP_OFFSET + value] ^= ANYOF_FLAGS_ALL;
2608 ANYOF_FLAGS(opnd) = 0;
ae5c130c 2609 }
a0d0e21e 2610 return ret;
2611}
2612
a0ed51b3 2613STATIC regnode *
cea2e8a9 2614S_regclassutf8(pTHX)
a0ed51b3 2615{
b8c5462f 2616 dTHR;
a0ed51b3 2617 register char *opnd, *e;
2618 register U32 value;
b8c5462f 2619 register U32 lastvalue = OOB_UTF8;
a0ed51b3 2620 register I32 range = 0;
2621 register regnode *ret;
2622 I32 numlen;
2623 I32 n;
2624 SV *listsv;
2625 U8 flags = 0;
b8c5462f 2626 I32 namedclass;
a0ed51b3 2627
2628 if (*PL_regcomp_parse == '^') { /* Complement of range. */
2629 PL_regnaughty++;
2630 PL_regcomp_parse++;
2631 if (!SIZE_ONLY)
2632 flags |= ANYOF_INVERT;
2633 }
2634 if (!SIZE_ONLY) {
2635 if (FOLD)
2636 flags |= ANYOF_FOLD;
2637 if (LOC)
2638 flags |= ANYOF_LOCALE;
79cb57f6 2639 listsv = newSVpvn("# comment\n",10);
a0ed51b3 2640 }
2641
b8c5462f 2642 checkposixcc();
2643
a0ed51b3 2644 if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
2645 goto skipcond; /* allow 1st char to be ] or - */
2646
2647 while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
2648 skipcond:
b8c5462f 2649 namedclass = -1;
dfe13c55 2650 value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
a0ed51b3 2651 PL_regcomp_parse += numlen;
2652
620e46c5 2653 if (value == '[')
b8c5462f 2654 namedclass = regpposixcc(value);
620e46c5 2655 else if (value == '\\') {
dfe13c55 2656 value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
a0ed51b3 2657 PL_regcomp_parse += numlen;
2658 switch (value) {
b8c5462f 2659 case 'w': namedclass = ANYOF_ALNUM; break;
2660 case 'W': namedclass = ANYOF_NALNUM; break;
2661 case 's': namedclass = ANYOF_SPACE; break;
2662 case 'S': namedclass = ANYOF_NSPACE; break;
2663 case 'd': namedclass = ANYOF_DIGIT; break;
2664 case 'D': namedclass = ANYOF_NDIGIT; break;
a0ed51b3 2665 case 'p':
2666 case 'P':
2667 if (*PL_regcomp_parse == '{') {
2668 e = strchr(PL_regcomp_parse++, '}');
2669 if (!e)
2670 FAIL("Missing right brace on \\p{}");
2671 n = e - PL_regcomp_parse;
2672 }
2673 else {
2674 e = PL_regcomp_parse;
2675 n = 1;
2676 }
2677 if (!SIZE_ONLY) {
2678 if (value == 'p')
b8c5462f 2679 Perl_sv_catpvf(aTHX_ listsv,
2680 "+utf8::%.*s\n", n, PL_regcomp_parse);
a0ed51b3 2681 else
cea2e8a9 2682 Perl_sv_catpvf(aTHX_ listsv,
b8c5462f 2683 "!utf8::%.*s\n", n, PL_regcomp_parse);
a0ed51b3 2684 }
2685 PL_regcomp_parse = e + 1;
b8c5462f 2686 lastvalue = OOB_UTF8;
a0ed51b3 2687 continue;
b8c5462f 2688 case 'n': value = '\n'; break;
2689 case 'r': value = '\r'; break;
2690 case 't': value = '\t'; break;
2691 case 'f': value = '\f'; break;
2692 case 'b': value = '\b'; break;
2693 case 'e': value = '\033'; break;
2694 case 'a': value = '\007'; break;
a0ed51b3 2695 case 'x':
2696 if (*PL_regcomp_parse == '{') {
2697 e = strchr(PL_regcomp_parse++, '}');
2698 if (!e)
2699 FAIL("Missing right brace on \\x{}");
b8c5462f 2700 value = scan_hex(PL_regcomp_parse,
2701 e - PL_regcomp_parse,
2702 &numlen);
a0ed51b3 2703 PL_regcomp_parse = e + 1;
2704 }
2705 else {
2706 value = scan_hex(PL_regcomp_parse, 2, &numlen);
2707 PL_regcomp_parse += numlen;
2708 }
2709 break;
2710 case 'c':
2711 value = UCHARAT(PL_regcomp_parse++);
2712 value = toCTRL(value);
2713 break;
2714 case '0': case '1': case '2': case '3': case '4':
2715 case '5': case '6': case '7': case '8': case '9':
2716 value = scan_oct(--PL_regcomp_parse, 3, &numlen);
2717 PL_regcomp_parse += numlen;
2718 break;
2719 }
2720 }
b8c5462f 2721 if (!SIZE_ONLY && namedclass > -1) {
2722 switch (namedclass) {
2723 case ANYOF_ALNUM:
2724 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
2725 case ANYOF_NALNUM:
2726 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break;
2727 case ANYOF_ALNUMC:
2728 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break;
2729 case ANYOF_NALNUMC:
2730 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break;
2731 case ANYOF_ALPHA:
2732 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break;
2733 case ANYOF_NALPHA:
2734 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break;
2735 case ANYOF_ASCII:
2736 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break;
2737 case ANYOF_NASCII:
2738 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break;
2739 case ANYOF_CNTRL:
2740 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break;
2741 case ANYOF_NCNTRL:
2742 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break;
2743 case ANYOF_GRAPH:
2744 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break;
2745 case ANYOF_NGRAPH:
2746 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break;
2747 case ANYOF_DIGIT:
2748 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break;
2749 case ANYOF_NDIGIT:
2750 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break;
2751 case ANYOF_LOWER:
2752 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break;
2753 case ANYOF_NLOWER:
2754 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break;
2755 case ANYOF_PRINT:
2756 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break;
2757 case ANYOF_NPRINT:
2758 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break;
2759 case ANYOF_PUNCT:
2760 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break;
2761 case ANYOF_NPUNCT:
2762 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
2763 case ANYOF_SPACE:
2764 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
2765 case ANYOF_NSPACE:
2766 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
2767 case ANYOF_UPPER:
2768 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
2769 case ANYOF_NUPPER:
2770 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break;
2771 case ANYOF_XDIGIT:
2772 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break;
2773 case ANYOF_NXDIGIT:
2774 Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break;
2775 }
2776 }
2777 else
2778 if (range) {
a0ed51b3 2779 if (lastvalue > value)
2780 FAIL("invalid [] range in regexp");
2781 if (!SIZE_ONLY)
cea2e8a9 2782 Perl_sv_catpvf(aTHX_ listsv, "%04x\t%04x\n", lastvalue, value);
a0ed51b3 2783 lastvalue = value;
2784 range = 0;
2785 }
2786 else {
2787 lastvalue = value;
2788 if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
2789 PL_regcomp_parse[1] != ']') {
2790 PL_regcomp_parse++;
2791 range = 1;
2792 continue; /* do it next time */
2793 }
2794 if (!SIZE_ONLY)
cea2e8a9 2795 Perl_sv_catpvf(aTHX_ listsv, "%04x\n", value);
a0ed51b3 2796 }
2797 }
a0ed51b3 2798
2799 ret = reganode(ANYOFUTF8, 0);
2800
2801 if (!SIZE_ONLY) {
2802 SV *rv = swash_init("utf8", "", listsv, 1, 0);
2803 SvREFCNT_dec(listsv);
2804 n = add_data(1,"s");
2805 PL_regcomp_rx->data->data[n] = (void*)rv;
2806 ARG1_SET(ret, flags);
2807 ARG2_SET(ret, n);
2808 }
2809
2810 return ret;
2811}
2812
76e3520e 2813STATIC char*
cea2e8a9 2814S_nextchar(pTHX)
a0d0e21e 2815{
5c0ca799 2816 dTHR;
3280af22 2817 char* retval = PL_regcomp_parse++;
a0d0e21e 2818
4633a7c4 2819 for (;;) {
3280af22 2820 if (*PL_regcomp_parse == '(' && PL_regcomp_parse[1] == '?' &&
2821 PL_regcomp_parse[2] == '#') {
2822 while (*PL_regcomp_parse && *PL_regcomp_parse != ')')
2823 PL_regcomp_parse++;
2824 PL_regcomp_parse++;
4633a7c4 2825 continue;
2826 }
3280af22 2827 if (PL_regflags & PMf_EXTENDED) {
2828 if (isSPACE(*PL_regcomp_parse)) {
2829 PL_regcomp_parse++;
748a9306 2830 continue;
2831 }
3280af22 2832 else if (*PL_regcomp_parse == '#') {
2833 while (*PL_regcomp_parse && *PL_regcomp_parse != '\n')
2834 PL_regcomp_parse++;
2835 PL_regcomp_parse++;
748a9306 2836 continue;
2837 }
748a9306 2838 }
4633a7c4 2839 return retval;
a0d0e21e 2840 }
a687059c 2841}
2842
2843/*
c277df42 2844- reg_node - emit a node
a0d0e21e 2845*/
76e3520e 2846STATIC regnode * /* Location. */
cea2e8a9 2847S_reg_node(pTHX_ U8 op)
a687059c 2848{
5c0ca799 2849 dTHR;
c277df42 2850 register regnode *ret;
2851 register regnode *ptr;
a687059c 2852
3280af22 2853 ret = PL_regcode;
c277df42 2854 if (SIZE_ONLY) {
6b88bc9c 2855 SIZE_ALIGN(PL_regsize);
3280af22 2856 PL_regsize += 1;
a0d0e21e 2857 return(ret);
2858 }
a687059c 2859
c277df42 2860 NODE_ALIGN_FILL(ret);
a0d0e21e 2861 ptr = ret;
c277df42 2862 FILL_ADVANCE_NODE(ptr, op);
3280af22 2863 PL_regcode = ptr;
a687059c 2864
a0d0e21e 2865 return(ret);
a687059c 2866}
2867
2868/*
a0d0e21e 2869- reganode - emit a node with an argument
2870*/
76e3520e 2871STATIC regnode * /* Location. */
cea2e8a9 2872S_reganode(pTHX_ U8 op, U32 arg)
fe14fcc3 2873{
5c0ca799 2874 dTHR;
c277df42 2875 register regnode *ret;
2876 register regnode *ptr;
fe14fcc3 2877
3280af22 2878 ret = PL_regcode;
c277df42 2879 if (SIZE_ONLY) {
6b88bc9c 2880 SIZE_ALIGN(PL_regsize);
3280af22 2881 PL_regsize += 2;
a0d0e21e 2882 return(ret);
2883 }
fe14fcc3 2884
c277df42 2885 NODE_ALIGN_FILL(ret);
a0d0e21e 2886 ptr = ret;
c277df42 2887 FILL_ADVANCE_NODE_ARG(ptr, op, arg);
3280af22 2888 PL_regcode = ptr;
fe14fcc3 2889
a0d0e21e 2890 return(ret);
fe14fcc3 2891}
2892
2893/*
a0ed51b3 2894- regc - emit (if appropriate) a Unicode character
2895*/
2896STATIC void
cea2e8a9 2897S_reguni(pTHX_ UV uv, char* s, I32* lenp)
a0ed51b3 2898{
c485e607 2899 dTHR;
a0ed51b3 2900 if (SIZE_ONLY) {
dfe13c55 2901 U8 tmpbuf[10];
a0ed51b3 2902 *lenp = uv_to_utf8(tmpbuf, uv) - tmpbuf;
2903 }
2904 else
dfe13c55 2905 *lenp = uv_to_utf8((U8*)s, uv) - (U8*)s;
a0ed51b3 2906
2907}
2908
2909/*
a0d0e21e 2910- regc - emit (if appropriate) a byte of code
2911*/
76e3520e 2912STATIC void
cea2e8a9 2913S_regc(pTHX_ U8 b, char* s)
a687059c 2914{
5c0ca799 2915 dTHR;
c277df42 2916 if (!SIZE_ONLY)
2917 *s = b;
a687059c 2918}
2919
2920/*
a0d0e21e 2921- reginsert - insert an operator in front of already-emitted operand
2922*
2923* Means relocating the operand.
2924*/
76e3520e 2925STATIC void
cea2e8a9 2926S_reginsert(pTHX_ U8 op, regnode *opnd)
a687059c 2927{
5c0ca799 2928 dTHR;
c277df42 2929 register regnode *src;
2930 register regnode *dst;
2931 register regnode *place;
2932 register int offset = regarglen[(U8)op];
2933
22c35a8c 2934/* (PL_regkind[(U8)op] == CURLY ? EXTRA_STEP_2ARGS : 0); */
c277df42 2935
2936 if (SIZE_ONLY) {
3280af22 2937 PL_regsize += NODE_STEP_REGNODE + offset;
a0d0e21e 2938 return;
2939 }
a687059c 2940
3280af22 2941 src = PL_regcode;
2942 PL_regcode += NODE_STEP_REGNODE + offset;
2943 dst = PL_regcode;
a0d0e21e 2944 while (src > opnd)
c277df42 2945 StructCopy(--src, --dst, regnode);
a0d0e21e 2946
2947 place = opnd; /* Op node, where operand used to be. */
c277df42 2948 src = NEXTOPER(place);
2949 FILL_ADVANCE_NODE(place, op);
2950 Zero(src, offset, regnode);
a687059c 2951}
2952
2953/*
c277df42 2954- regtail - set the next-pointer at the end of a node chain of p to val.
a0d0e21e 2955*/
76e3520e 2956STATIC void
cea2e8a9 2957S_regtail(pTHX_ regnode *p, regnode *val)
a687059c 2958{
5c0ca799 2959 dTHR;
c277df42 2960 register regnode *scan;
2961 register regnode *temp;
a0d0e21e 2962 register I32 offset;
2963
c277df42 2964 if (SIZE_ONLY)
a0d0e21e 2965 return;
2966
2967 /* Find last node. */
2968 scan = p;
2969 for (;;) {
2970 temp = regnext(scan);
2971 if (temp == NULL)
2972 break;
2973 scan = temp;
2974 }
a687059c 2975
c277df42 2976 if (reg_off_by_arg[OP(scan)]) {
2977 ARG_SET(scan, val - scan);
a0ed51b3 2978 }
2979 else {
c277df42 2980 NEXT_OFF(scan) = val - scan;
2981 }
a687059c 2982}
2983
2984/*
a0d0e21e 2985- regoptail - regtail on operand of first argument; nop if operandless
2986*/
76e3520e 2987STATIC void
cea2e8a9 2988S_regoptail(pTHX_ regnode *p, regnode *val)
a687059c 2989{
5c0ca799 2990 dTHR;
a0d0e21e 2991 /* "Operandless" and "op != BRANCH" are synonymous in practice. */
c277df42 2992 if (p == NULL || SIZE_ONLY)
2993 return;
22c35a8c 2994 if (PL_regkind[(U8)OP(p)] == BRANCH) {
c277df42 2995 regtail(NEXTOPER(p), val);
a0ed51b3 2996 }
22c35a8c 2997 else if ( PL_regkind[(U8)OP(p)] == BRANCHJ) {
c277df42 2998 regtail(NEXTOPER(NEXTOPER(p)), val);
a0ed51b3 2999 }
3000 else
a0d0e21e 3001 return;
a687059c 3002}
3003
3004/*
3005 - regcurly - a little FSA that accepts {\d+,?\d*}
3006 */
79072805 3007STATIC I32
cea2e8a9 3008S_regcurly(pTHX_ register char *s)
a687059c 3009{
3010 if (*s++ != '{')
3011 return FALSE;
f0fcb552 3012 if (!isDIGIT(*s))
a687059c 3013 return FALSE;
f0fcb552 3014 while (isDIGIT(*s))
a687059c 3015 s++;
3016 if (*s == ',')
3017 s++;
f0fcb552 3018 while (isDIGIT(*s))
a687059c 3019 s++;
3020 if (*s != '}')
3021 return FALSE;
3022 return TRUE;
3023}
3024
a687059c 3025
76e3520e 3026STATIC regnode *
cea2e8a9 3027S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
c277df42 3028{
35ff7856 3029#ifdef DEBUGGING
f248d071 3030 register U8 op = EXACT; /* Arbitrary non-END op. */
c277df42 3031 register regnode *next, *onode;
3032
3033 while (op != END && (!last || node < last)) {
3034 /* While that wasn't END last time... */
3035
3036 NODE_ALIGN(node);
3037 op = OP(node);
3038 if (op == CLOSE)
3039 l--;
3040 next = regnext(node);
3041 /* Where, what. */
3042 if (OP(node) == OPTIMIZED)
3043 goto after_print;
3044 regprop(sv, node);
54dc92de 3045 PerlIO_printf(Perl_debug_log, "%4d:%*s%s", node - start,
c277df42 3046 2*l + 1, "", SvPVX(sv));
3047 if (next == NULL) /* Next ptr. */
3048 PerlIO_printf(Perl_debug_log, "(0)");
3049 else
3050 PerlIO_printf(Perl_debug_log, "(%d)", next - start);
3051 (void)PerlIO_putc(Perl_debug_log, '\n');
3052 after_print:
22c35a8c 3053 if (PL_regkind[(U8)op] == BRANCHJ) {
c277df42 3054 register regnode *nnode = (OP(next) == LONGJMP
3055 ? regnext(next)
3056 : next);
3057 if (last && nnode > last)
3058 nnode = last;
3059 node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
a0ed51b3 3060 }
22c35a8c 3061 else if (PL_regkind[(U8)op] == BRANCH) {
c277df42 3062 node = dumpuntil(start, NEXTOPER(node), next, sv, l + 1);
a0ed51b3 3063 }
3064 else if ( op == CURLY) { /* `next' might be very big: optimizer */
c277df42 3065 node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
3066 NEXTOPER(node) + EXTRA_STEP_2ARGS + 1, sv, l + 1);
a0ed51b3 3067 }
22c35a8c 3068 else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
c277df42 3069 node = dumpuntil(start, NEXTOPER(node) + EXTRA_STEP_2ARGS,
3070 next, sv, l + 1);
a0ed51b3 3071 }
3072 else if ( op == PLUS || op == STAR) {
c277df42 3073 node = dumpuntil(start, NEXTOPER(node), NEXTOPER(node) + 1, sv, l + 1);
a0ed51b3 3074 }
3075 else if (op == ANYOF) {
c277df42 3076 node = NEXTOPER(node);
3077 node += ANY_SKIP;
a0ed51b3 3078 }
22c35a8c 3079 else if (PL_regkind[(U8)op] == EXACT) {
c277df42 3080 /* Literal string, where present. */
3081 node += ((*OPERAND(node)) + 2 + sizeof(regnode) - 1) / sizeof(regnode);
3082 node = NEXTOPER(node);
a0ed51b3 3083 }
3084 else {
c277df42 3085 node = NEXTOPER(node);
3086 node += regarglen[(U8)op];
3087 }
3088 if (op == CURLYX || op == OPEN)
3089 l++;
3090 else if (op == WHILEM)
3091 l--;
3092 }
17c3b450 3093#endif /* DEBUGGING */
c277df42 3094 return node;
3095}
3096
a687059c 3097/*
fd181c75 3098 - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
a687059c 3099 */
3100void
864dbfa3 3101Perl_regdump(pTHX_ regexp *r)
a687059c 3102{
35ff7856 3103#ifdef DEBUGGING
5c0ca799 3104 dTHR;
46fc3d4c 3105 SV *sv = sv_newmortal();
a687059c 3106
c277df42 3107 (void)dumpuntil(r->program, r->program + 1, NULL, sv, 0);
a0d0e21e 3108
3109 /* Header fields of interest. */
c277df42 3110 if (r->anchored_substr)
3111 PerlIO_printf(Perl_debug_log, "anchored `%s%s%s'%s at %d ",
3280af22 3112 PL_colors[0],
c277df42 3113 SvPVX(r->anchored_substr),
3280af22 3114 PL_colors[1],
c277df42 3115 SvTAIL(r->anchored_substr) ? "$" : "",
3116 r->anchored_offset);
3117 if (r->float_substr)
3118 PerlIO_printf(Perl_debug_log, "floating `%s%s%s'%s at %d..%u ",
3280af22 3119 PL_colors[0],
c277df42 3120 SvPVX(r->float_substr),
3280af22 3121 PL_colors[1],
c277df42 3122 SvTAIL(r->float_substr) ? "$" : "",
3123 r->float_min_offset, r->float_max_offset);
3124 if (r->check_substr)
3125 PerlIO_printf(Perl_debug_log,
3126 r->check_substr == r->float_substr
3127 ? "(checking floating" : "(checking anchored");
3128 if (r->reganch & ROPT_NOSCAN)
3129 PerlIO_printf(Perl_debug_log, " noscan");
3130 if (r->reganch & ROPT_CHECK_ALL)
3131 PerlIO_printf(Perl_debug_log, " isall");
3132 if (r->check_substr)
3133 PerlIO_printf(Perl_debug_log, ") ");
3134
46fc3d4c 3135 if (r->regstclass) {
3136 regprop(sv, r->regstclass);
3137 PerlIO_printf(Perl_debug_log, "stclass `%s' ", SvPVX(sv));
3138 }
774d564b 3139 if (r->reganch & ROPT_ANCH) {
3140 PerlIO_printf(Perl_debug_log, "anchored");
3141 if (r->reganch & ROPT_ANCH_BOL)
3142 PerlIO_printf(Perl_debug_log, "(BOL)");
c277df42 3143 if (r->reganch & ROPT_ANCH_MBOL)
3144 PerlIO_printf(Perl_debug_log, "(MBOL)");
cad2e5aa 3145 if (r->reganch & ROPT_ANCH_SBOL)
3146 PerlIO_printf(Perl_debug_log, "(SBOL)");
774d564b 3147 if (r->reganch & ROPT_ANCH_GPOS)
3148 PerlIO_printf(Perl_debug_log, "(GPOS)");
3149 PerlIO_putc(Perl_debug_log, ' ');
3150 }
c277df42 3151 if (r->reganch & ROPT_GPOS_SEEN)
3152 PerlIO_printf(Perl_debug_log, "GPOS ");
a0d0e21e 3153 if (r->reganch & ROPT_SKIP)
760ac839 3154 PerlIO_printf(Perl_debug_log, "plus ");
a0d0e21e 3155 if (r->reganch & ROPT_IMPLICIT)
760ac839 3156 PerlIO_printf(Perl_debug_log, "implicit ");
760ac839 3157 PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen);
ce862d02 3158 if (r->reganch & ROPT_EVAL_SEEN)
3159 PerlIO_printf(Perl_debug_log, "with eval ");
760ac839 3160 PerlIO_printf(Perl_debug_log, "\n");
17c3b450 3161#endif /* DEBUGGING */
a687059c 3162}
3163
3164/*
a0d0e21e 3165- regprop - printable representation of opcode
3166*/
46fc3d4c 3167void
864dbfa3 3168Perl_regprop(pTHX_ SV *sv, regnode *o)
a687059c 3169{
35ff7856 3170#ifdef DEBUGGING
5c0ca799 3171 dTHR;
9b155405 3172 register int k;
a0d0e21e 3173
54dc92de 3174 sv_setpvn(sv, "", 0);
9b155405 3175 if (OP(o) >= reg_num) /* regnode.type is unsigned */
a0d0e21e 3176 FAIL("corrupted regexp opcode");
9b155405 3177 sv_catpv(sv, (char*)reg_name[OP(o)]); /* Take off const! */
3178
3179 k = PL_regkind[(U8)OP(o)];
3180
3181 if (k == EXACT)
cea2e8a9 3182 Perl_sv_catpvf(aTHX_ sv, " <%s%s%s>", PL_colors[0], OPERAND(o) + 1, PL_colors[1]);
9b155405 3183 else if (k == CURLY) {
3184 if (OP(o) == CURLYM || OP(o) == CURLYN)
cea2e8a9 3185 Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
3186 Perl_sv_catpvf(aTHX_ sv, " {%d,%d}", ARG1(o), ARG2(o));
a0d0e21e 3187 }
9b155405 3188 else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP )
cea2e8a9 3189 Perl_sv_catpvf(aTHX_ sv, "%d", ARG(o)); /* Parenth number */
9b155405 3190 else if (k == LOGICAL)
cea2e8a9 3191 Perl_sv_catpvf(aTHX_ sv, "[%d]", ARG(o)); /* 2: embedded, otherwise 1 */
9b155405 3192 else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
cea2e8a9 3193 Perl_sv_catpvf(aTHX_ sv, "[-%d]", o->flags);
17c3b450 3194#endif /* DEBUGGING */
35ff7856 3195}
a687059c 3196
cad2e5aa 3197SV *
3198Perl_re_intuit_string(pTHX_ regexp *prog)
3199{ /* Assume that RE_INTUIT is set */
3200 DEBUG_r(
3201 { STRLEN n_a;
3202 char *s = SvPV(prog->check_substr,n_a);
3203
3204 if (!PL_colorset) reginitcolors();
3205 PerlIO_printf(Perl_debug_log,
3206 "%sUsing REx substr:%s `%s%.60s%s%s'\n",
3207 PL_colors[4],PL_colors[5],PL_colors[0],
3208 s,
3209 PL_colors[1],
3210 (strlen(s) > 60 ? "..." : ""));
3211 } );
3212
3213 return prog->check_substr;
3214}
3215
2b69d0c2 3216void
864dbfa3 3217Perl_pregfree(pTHX_ struct regexp *r)
a687059c 3218{
5c0ca799 3219 dTHR;
cad2e5aa 3220 DEBUG_r(PerlIO_printf(Perl_debug_log,
3221 "%sFreeing REx:%s `%s%.60s%s%s'\n",
3222 PL_colors[4],PL_colors[5],PL_colors[0],
3223 r->precomp,
3224 PL_colors[1],
3225 (strlen(r->precomp) > 60 ? "..." : "")));
3226
3227
c277df42 3228 if (!r || (--r->refcnt > 0))
a0d0e21e 3229 return;
c277df42 3230 if (r->precomp)
a0d0e21e 3231 Safefree(r->precomp);
cf93c79d 3232 if (RX_MATCH_COPIED(r))
3233 Safefree(r->subbeg);
a193d654 3234 if (r->substrs) {
3235 if (r->anchored_substr)
3236 SvREFCNT_dec(r->anchored_substr);
3237 if (r->float_substr)
3238 SvREFCNT_dec(r->float_substr);
2779dcf1 3239 Safefree(r->substrs);
a193d654 3240 }
c277df42 3241 if (r->data) {
3242 int n = r->data->count;
dfad63ad 3243 AV* new_comppad = NULL;
3244 AV* old_comppad;
3245 SV** old_curpad;
3246
c277df42 3247 while (--n >= 0) {
3248 switch (r->data->what[n]) {
3249 case 's':
3250 SvREFCNT_dec((SV*)r->data->data[n]);
3251 break;
dfad63ad 3252 case 'p':
3253 new_comppad = (AV*)r->data->data[n];
3254 break;
c277df42 3255 case 'o':
dfad63ad 3256 if (new_comppad == NULL)
cea2e8a9 3257 Perl_croak(aTHX_ "panic: pregfree comppad");
dfad63ad 3258 old_comppad = PL_comppad;
3259 old_curpad = PL_curpad;
3260 PL_comppad = new_comppad;
3261 PL_curpad = AvARRAY(new_comppad);
c277df42 3262 op_free((OP_4tree*)r->data->data[n]);
dfad63ad 3263 PL_comppad = old_comppad;
3264 PL_curpad = old_curpad;
3265 SvREFCNT_dec((SV*)new_comppad);
3266 new_comppad = NULL;
c277df42 3267 break;
3268 case 'n':
3269 break;
3270 default:
3271 FAIL2("panic: regfree data code '%c'", r->data->what[n]);
3272 }
3273 }
3274 Safefree(r->data->what);
3275 Safefree(r->data);
a0d0e21e 3276 }
3277 Safefree(r->startp);
3278 Safefree(r->endp);
3279 Safefree(r);
a687059c 3280}
c277df42 3281
3282/*
3283 - regnext - dig the "next" pointer out of a node
3284 *
3285 * [Note, when REGALIGN is defined there are two places in regmatch()
3286 * that bypass this code for speed.]
3287 */
3288regnode *
864dbfa3 3289Perl_regnext(pTHX_ register regnode *p)
c277df42 3290{
5c0ca799 3291 dTHR;
c277df42 3292 register I32 offset;
3293
3280af22 3294 if (p == &PL_regdummy)
c277df42 3295 return(NULL);
3296
3297 offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
3298 if (offset == 0)
3299 return(NULL);
3300
c277df42 3301 return(p+offset);
c277df42 3302}
3303
01f988be 3304STATIC void
cea2e8a9 3305S_re_croak2(pTHX_ const char* pat1,const char* pat2,...)
c277df42 3306{
3307 va_list args;
3308 STRLEN l1 = strlen(pat1);
3309 STRLEN l2 = strlen(pat2);
3310 char buf[512];
06bf62c7 3311 SV *msv;
c277df42 3312 char *message;
3313
3314 if (l1 > 510)
3315 l1 = 510;
3316 if (l1 + l2 > 510)
3317 l2 = 510 - l1;
3318 Copy(pat1, buf, l1 , char);
3319 Copy(pat2, buf + l1, l2 , char);
3b818b81 3320 buf[l1 + l2] = '\n';
3321 buf[l1 + l2 + 1] = '\0';
8736538c 3322#ifdef I_STDARG
3323 /* ANSI variant takes additional second argument */
c277df42 3324 va_start(args, pat2);
8736538c 3325#else
3326 va_start(args);
3327#endif
06bf62c7 3328 msv = mess(buf, &args);
c277df42 3329 va_end(args);
06bf62c7 3330 message = SvPV(msv,l1);
c277df42 3331 if (l1 > 512)
3332 l1 = 512;
3333 Copy(message, buf, l1 , char);
3334 buf[l1] = '\0'; /* Overwrite \n */
cea2e8a9 3335 Perl_croak(aTHX_ "%s", buf);
c277df42 3336}
a0ed51b3 3337
3338/* XXX Here's a total kludge. But we need to re-enter for swash routines. */
3339
3340void
864dbfa3 3341Perl_save_re_context(pTHX)
c485e607 3342{
3343 dTHR;
a0ed51b3 3344 SAVEPPTR(PL_bostr);
3345 SAVEPPTR(PL_regprecomp); /* uncompiled string. */
3346 SAVEI32(PL_regnpar); /* () count. */
3347 SAVEI32(PL_regsize); /* Code size. */
3348 SAVEI16(PL_regflags); /* are we folding, multilining? */
3349 SAVEPPTR(PL_reginput); /* String-input pointer. */
3350 SAVEPPTR(PL_regbol); /* Beginning of input, for ^ check. */
3351 SAVEPPTR(PL_regeol); /* End of input, for $ check. */
3352 SAVESPTR(PL_regstartp); /* Pointer to startp array. */
3353 SAVESPTR(PL_regendp); /* Ditto for endp. */
3354 SAVESPTR(PL_reglastparen); /* Similarly for lastparen. */
3355 SAVEPPTR(PL_regtill); /* How far we are required to go. */
3356 SAVEI32(PL_regprev); /* char before regbol, \n if none */
3357 SAVESPTR(PL_reg_start_tmp); /* from regexec.c */
3358 PL_reg_start_tmp = 0;
3359 SAVEFREEPV(PL_reg_start_tmp);
3360 SAVEI32(PL_reg_start_tmpl); /* from regexec.c */
3361 PL_reg_start_tmpl = 0;
3362 SAVESPTR(PL_regdata);
3363 SAVEI32(PL_reg_flags); /* from regexec.c */
3364 SAVEI32(PL_reg_eval_set); /* from regexec.c */
3365 SAVEI32(PL_regnarrate); /* from regexec.c */
3366 SAVESPTR(PL_regprogram); /* from regexec.c */
3367 SAVEINT(PL_regindent); /* from regexec.c */
3368 SAVESPTR(PL_regcc); /* from regexec.c */
3369 SAVESPTR(PL_curcop);
3370 SAVESPTR(PL_regcomp_rx); /* from regcomp.c */
3371 SAVEI32(PL_regseen); /* from regcomp.c */
3372 SAVEI32(PL_regsawback); /* Did we see \1, ...? */
3373 SAVEI32(PL_regnaughty); /* How bad is this pattern? */
3374 SAVESPTR(PL_regcode); /* Code-emit pointer; &regdummy = don't */
3375 SAVEPPTR(PL_regxend); /* End of input for compile */
3376 SAVEPPTR(PL_regcomp_parse); /* Input-scan pointer. */
54b6e2fa 3377 SAVESPTR(PL_reg_call_cc); /* from regexec.c */
3378 SAVESPTR(PL_reg_re); /* from regexec.c */
3379 SAVEPPTR(PL_reg_ganch); /* from regexec.c */
3380 SAVESPTR(PL_reg_sv); /* from regexec.c */
3381 SAVESPTR(PL_reg_magic); /* from regexec.c */
3382 SAVEI32(PL_reg_oldpos); /* from regexec.c */
3383 SAVESPTR(PL_reg_oldcurpm); /* from regexec.c */
3384 SAVESPTR(PL_reg_curpm); /* from regexec.c */
3385#ifdef DEBUGGING
3386 SAVEPPTR(PL_reg_starttry); /* from regexec.c */
3387#endif
a0ed51b3 3388}
51371543 3389
3390#ifdef PERL_OBJECT
3391#define NO_XSLOCKS
3392#include "XSUB.h"
3393#undef this
3394#define this pPerl
3395#endif
3396
3397static void
3398clear_re(pTHXo_ void *r)
3399{
3400 ReREFCNT_dec((regexp *)r);
3401}
3402