* "A fair jaw-cracker dwarf-language must be." --Samwise Gamgee
*/
+/* This file contains functions for compiling a regular expression. See
+ * also regexec.c which funnily enough, contains functions for executing
+ * a regular expression.
+ */
+
/* NOTE: this is derived from Henry Spencer's regexp code, and should not
* confused with the original package (see point 3 below). Thanks, Henry!
*/
*
**** Alterations to Henry's code are...
****
- **** Copyright (c) 1991-2002, Larry Wall
+ **** Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+ **** 2000, 2001, 2002, 2003, by Larry Wall and others
****
**** You may distribute under the terms of either the GNU General Public
**** License or the Artistic License, as specified in the README file.
#endif /* op */
#ifdef MSDOS
-# if defined(BUGGY_MSC6)
+# if defined(BUGGY_MSC6)
/* MSC 6.00A breaks on op/regexp.t test 85 unless we turn this off */
- # pragma optimize("a",off)
+# pragma optimize("a",off)
/* But MSC 6.00A is happy with 'w', for aliases only across function calls*/
- # pragma optimize("w",on )
-# endif /* BUGGY_MSC6 */
+# pragma optimize("w",on )
+# endif /* BUGGY_MSC6 */
#endif /* MSDOS */
#ifndef STATIC
STRLEN old_l = CHR_SVLEN(*data->longest);
if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
- sv_setsv(*data->longest, data->last_found);
+ SvSetMagicSV(*data->longest, data->last_found);
if (*data->longest == data->longest_fixed) {
data->offset_fixed = l ? data->last_start_min : data->pos_min;
if (data->flags & SF_BEFORE_EOL)
}
}
SvCUR_set(data->last_found, 0);
+ {
+ SV * sv = data->last_found;
+ MAGIC *mg =
+ SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
+ if (mg && mg->mg_len > 0)
+ mg->mg_len = 0;
+ }
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
}
if (!(and_with->flags & ANYOF_EOS))
cl->flags &= ~ANYOF_EOS;
- if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE) {
+ if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE &&
+ !(and_with->flags & ANYOF_INVERT)) {
cl->flags &= ~ANYOF_UNICODE_ALL;
cl->flags |= ANYOF_UNICODE;
ARG_SET(cl, ARG(and_with));
}
- if (!(and_with->flags & ANYOF_UNICODE_ALL))
+ if (!(and_with->flags & ANYOF_UNICODE_ALL) &&
+ !(and_with->flags & ANYOF_INVERT))
cl->flags &= ~ANYOF_UNICODE_ALL;
- if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)))
+ if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) &&
+ !(and_with->flags & ANYOF_INVERT))
cl->flags &= ~ANYOF_UNICODE;
}
? I32_MAX : data->pos_min + data->pos_delta;
}
sv_catpvn(data->last_found, STRING(scan), STR_LEN(scan));
+ {
+ SV * sv = data->last_found;
+ MAGIC *mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+ mg_find(sv, PERL_MAGIC_utf8) : NULL;
+ if (mg && mg->mg_len >= 0)
+ mg->mg_len += utf8_length((U8*)STRING(scan),
+ (U8*)STRING(scan)+STR_LEN(scan));
+ }
if (UTF)
SvUTF8_on(data->last_found);
data->last_end = data->pos_min + l;
if ( OP(oscan) == CURLYX && data
&& !(data->flags & SF_HAS_PAR)
&& !(data->flags & SF_HAS_EVAL)
- && !deltanext ) {
+ && !deltanext /* atom is fixed width */
+ && minnext != 0 /* CURLYM can't handle zero width */
+ ) {
/* XXXX How to optimize if data == 0? */
/* Optimize to a simpler form. */
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */
SvCUR_set(data->last_found,
SvCUR(data->last_found) - l);
sv_catsv(data->last_found, last_str);
+ {
+ SV * sv = data->last_found;
+ MAGIC *mg =
+ SvUTF8(sv) && SvMAGICAL(sv) ?
+ mg_find(sv, PERL_MAGIC_utf8) : NULL;
+ if (mg && mg->mg_len >= 0)
+ mg->mg_len += CHR_SVLEN(last_str);
+ }
data->last_end += l * (mincount - 1);
}
} else {
r->prelen = xend - exp;
r->precomp = savepvn(RExC_precomp, r->prelen);
r->subbeg = NULL;
+#ifdef PERL_COPY_ON_WRITE
+ r->saved_copy = Nullsv;
+#endif
r->reganch = pm->op_pmflags & PMf_COMPILETIME;
r->nparens = RExC_npar - 1; /* set early to validate backrefs */
}
else { /* First pass */
if (PL_reginterp_cnt < ++RExC_seen_evals
- && PL_curcop != &PL_compiling)
+ && IN_PERL_RUNTIME)
/* No compiled RE interpolated, has runtime
components ===> unsafe. */
FAIL("Eval-group not allowed at runtime, use re 'eval'");
if (PL_tainting && PL_tainted)
FAIL("Eval-group in insecure regular expression");
+ if (IN_PERL_COMPILETIME)
+ PL_cv_has_eval = 1;
}
-
+
nextchar(pRExC_state);
if (logical) {
ret = reg_node(pRExC_state, LOGICAL);
if (paren == '>')
node = SUSPEND, flag = 0;
reginsert(pRExC_state, node,ret);
- Set_Node_Offset(ret, oregcomp_parse);
- Set_Node_Length(ret, RExC_parse - oregcomp_parse + 2);
+ Set_Node_Cur_Length(ret);
+ Set_Node_Offset(ret, parse_start + 1);
ret->flags = flag;
regtail(pRExC_state, ret, reg_node(pRExC_state, TAIL));
}
{
register regnode *ret = 0;
I32 flags;
- char *parse_start = 0;
+ char *parse_start = RExC_parse;
*flagp = WORST; /* Tentatively. */
default:
/* Do not generate `unrecognized' warnings here, we fall
back into the quick-grab loop below */
+ parse_start--;
goto defchar;
}
break;
ender = grok_hex(p + 1, &numlen, &flags, NULL);
if (ender > 0xff)
RExC_utf8 = 1;
- /* numlen is generous */
- if (numlen + len >= 127) {
- p--;
- goto loopdone;
- }
p = e + 1;
}
}
}
if (len > 0)
*flagp |= HASWIDTH;
- if (len == 1)
+ if (len == 1 && UNI_IS_INVARIANT(ender))
*flagp |= SIMPLE;
if (!SIZE_ONLY)
STR_LEN(ret) = len;
}
RExC_parse = e + 1;
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
- continue;
+ namedclass = ANYOF_MAX; /* no official name, but it's named */
+ break;
case 'n': value = '\n'; break;
case 'r': value = '\r'; break;
case 't': value = '\t'; break;
}
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n");
break;
+ case ANYOF_MAX:
+ /* this is to handle \p and \P */
+ break;
default:
vFAIL("Invalid [::] class");
break;
for (;;) {
if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
RExC_parse[2] == '#') {
- while (*RExC_parse && *RExC_parse != ')')
+ while (*RExC_parse != ')') {
+ if (RExC_parse == RExC_end)
+ FAIL("Sequence (?#... not terminated");
RExC_parse++;
+ }
RExC_parse++;
continue;
}
continue;
}
else if (*RExC_parse == '#') {
- while (*RExC_parse && *RExC_parse != '\n')
- RExC_parse++;
- RExC_parse++;
+ while (RExC_parse < RExC_end)
+ if (*RExC_parse++ == '\n') break;
continue;
}
}
RExC_parse - RExC_start,
RExC_offsets[0]));
Set_Node_Offset(place, RExC_parse);
+ Set_Node_Length(place, 1);
}
src = NEXTOPER(place);
FILL_ADVANCE_NODE(place, op);
else if (k == ANYOF) {
int i, rangestart = -1;
U8 flags = ANYOF_FLAGS(o);
- const char * const anyofs[] = { /* Should be syncronized with
+ const char * const anyofs[] = { /* Should be synchronized with
* ANYOF_ #xdefines in regcomp.h */
"\\w",
"\\W",
Safefree(r->precomp);
if (r->offsets) /* 20010421 MJD */
Safefree(r->offsets);
- if (RX_MATCH_COPIED(r))
- Safefree(r->subbeg);
+ RX_MATCH_COPY_FREE(r);
+#ifdef PERL_COPY_ON_WRITE
+ if (r->saved_copy)
+ SvREFCNT_dec(r->saved_copy);
+#endif
if (r->substrs) {
if (r->anchored_substr)
SvREFCNT_dec(r->anchored_substr);
if (l1 > 512)
l1 = 512;
Copy(message, buf, l1 , char);
- buf[l1] = '\0'; /* Overwrite \n */
+ buf[l1-1] = '\0'; /* Overwrite \n */
Perl_croak(aTHX_ "%s", buf);
}
void
Perl_save_re_context(pTHX)
{
-#if 0
- SAVEPPTR(RExC_precomp); /* uncompiled string. */
- SAVEI32(RExC_npar); /* () count. */
- SAVEI32(RExC_size); /* Code size. */
- SAVEI32(RExC_flags); /* are we folding, multilining? */
- SAVEVPTR(RExC_rx); /* from regcomp.c */
- SAVEI32(RExC_seen); /* from regcomp.c */
- SAVEI32(RExC_sawback); /* Did we see \1, ...? */
- SAVEI32(RExC_naughty); /* How bad is this pattern? */
- SAVEVPTR(RExC_emit); /* Code-emit pointer; ®dummy = don't */
- SAVEPPTR(RExC_end); /* End of input for compile */
- SAVEPPTR(RExC_parse); /* Input-scan pointer. */
-#endif
-
SAVEI32(PL_reg_flags); /* from regexec.c */
SAVEPPTR(PL_bostr);
SAVEPPTR(PL_reginput); /* String-input pointer. */
SAVEVPTR(PL_regstartp); /* Pointer to startp array. */
SAVEVPTR(PL_regendp); /* Ditto for endp. */
SAVEVPTR(PL_reglastparen); /* Similarly for lastparen. */
+ SAVEVPTR(PL_reglastcloseparen); /* Similarly for lastcloseparen. */
SAVEPPTR(PL_regtill); /* How far we are required to go. */
SAVEGENERICPV(PL_reg_start_tmp); /* from regexec.c */
PL_reg_start_tmp = 0;
SAVEI32(PL_reg_oldpos); /* from regexec.c */
SAVEVPTR(PL_reg_oldcurpm); /* from regexec.c */
SAVEVPTR(PL_reg_curpm); /* from regexec.c */
+ SAVEPPTR(PL_reg_oldsaved); /* old saved substr during match */
+ PL_reg_oldsaved = Nullch;
+ SAVEI32(PL_reg_oldsavedlen); /* old length of saved substr during match */
+ PL_reg_oldsavedlen = 0;
+#ifdef PERL_COPY_ON_WRITE
+ SAVESPTR(PL_nrs);
+ PL_nrs = Nullsv;
+#endif
+ SAVEI32(PL_reg_maxiter); /* max wait until caching pos */
+ PL_reg_maxiter = 0;
+ SAVEI32(PL_reg_leftiter); /* wait until caching pos */
+ PL_reg_leftiter = 0;
+ SAVEGENERICPV(PL_reg_poscache); /* cache of pos of WHILEM */
+ PL_reg_poscache = Nullch;
+ SAVEI32(PL_reg_poscache_size); /* size of pos cache of WHILEM */
+ PL_reg_poscache_size = 0;
+ SAVEPPTR(PL_regprecomp); /* uncompiled string. */
SAVEI32(PL_regnpar); /* () count. */
SAVEI32(PL_regsize); /* from regexec.c */