(int)offset, RExC_precomp, RExC_precomp + offset); \
} STMT_END
-#define vWARN(loc,m) STMT_START { \
+#define ckWARNreg(loc,m) STMT_START { \
const IV offset = loc - RExC_precomp; \
- Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION, \
- m, (int)offset, RExC_precomp, RExC_precomp + offset); \
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
+ (int)offset, RExC_precomp, RExC_precomp + offset); \
} STMT_END
-#define vWARNdep(loc,m) STMT_START { \
+#define ckWARNregdep(loc,m) STMT_START { \
const IV offset = loc - RExC_precomp; \
- Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
- "%s" REPORT_LOCATION, \
- m, (int)offset, RExC_precomp, RExC_precomp + offset); \
+ Perl_ck_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
+ m REPORT_LOCATION, \
+ (int)offset, RExC_precomp, RExC_precomp + offset); \
} STMT_END
-
-#define vWARN2(loc, m, a1) STMT_START { \
+#define ckWARN2reg(loc, m, a1) STMT_START { \
const IV offset = loc - RExC_precomp; \
- Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
a1, (int)offset, RExC_precomp, RExC_precomp + offset); \
} STMT_END
a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset); \
} STMT_END
+#define ckWARN3reg(loc, m, a1, a2) STMT_START { \
+ const IV offset = loc - RExC_precomp; \
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
+ a1, a2, (int)offset, RExC_precomp, RExC_precomp + offset); \
+} STMT_END
+
#define vWARN4(loc, m, a1, a2, a3) STMT_START { \
const IV offset = loc - RExC_precomp; \
Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
} STMT_END
+#define ckWARN4reg(loc, m, a1, a2, a3) STMT_START { \
+ const IV offset = loc - RExC_precomp; \
+ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
+ a1, a2, a3, (int)offset, RExC_precomp, RExC_precomp + offset); \
+} STMT_END
+
#define vWARN5(loc, m, a1, a2, a3, a4) STMT_START { \
const IV offset = loc - RExC_precomp; \
Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
(next_is_eval || !(mincount == 0 && maxcount == 1))
&& (minnext == 0) && (deltanext == 0)
&& data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
- && maxcount <= REG_INFTY/3 /* Complement check for big count */
- && ckWARN(WARN_REGEXP))
+ && maxcount <= REG_INFTY/3) /* Complement check for big count */
{
- vWARN(RExC_parse,
- "Quantifier unexpected on zero-length expression");
+ ckWARNreg(RExC_parse,
+ "Quantifier unexpected on zero-length expression");
}
min += minnext * mincount;
data->whilem_c = data_fake.whilem_c;
}
if (f & SCF_DO_STCLASS_AND) {
- const int was = (data->start_class->flags & ANYOF_EOS);
-
- cl_and(data->start_class, &intrnl);
- if (was)
- data->start_class->flags |= ANYOF_EOS;
+ if (flags & SCF_DO_STCLASS_OR) {
+ /* OR before, AND after: ideally we would recurse with
+ * data_fake to get the AND applied by study of the
+ * remainder of the pattern, and then derecurse;
+ * *** HACK *** for now just treat as "no information".
+ * See [perl #56690].
+ */
+ cl_init(pRExC_state, data->start_class);
+ } else {
+ /* AND before and after: combine and continue */
+ const int was = (data->start_class->flags & ANYOF_EOS);
+
+ cl_and(data->start_class, &intrnl);
+ if (was)
+ data->start_class->flags |= ANYOF_EOS;
+ }
}
}
#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
break;
case KEEPCOPY_PAT_MOD: /* 'p' */
if (flagsp == &negflags) {
- if (SIZE_ONLY && ckWARN(WARN_REGEXP))
- vWARN(RExC_parse + 1,"Useless use of (?-p)");
+ if (SIZE_ONLY)
+ ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
} else {
*flagsp |= RXf_PMf_KEEPCOPY;
}
/* Pick up the branches, linking them together. */
parse_start = RExC_parse; /* MJD */
br = regbranch(pRExC_state, &flags, 1,depth+1);
+
+ if (freeze_paren) {
+ if (RExC_npar > after_freeze)
+ after_freeze = RExC_npar;
+ RExC_npar = freeze_paren;
+ }
+
/* branch_len = (paren != 0); */
if (br == NULL)
goto do_curly;
}
nest_check:
- if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3 && ckWARN(WARN_REGEXP)) {
- vWARN3(RExC_parse,
- "%.*s matches null string many times",
- (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
- origparse);
+ if (!SIZE_ONLY && !(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
+ ckWARN3reg(RExC_parse,
+ "%.*s matches null string many times",
+ (int)(RExC_parse >= origparse ? RExC_parse - origparse : 0),
+ origparse);
}
if (RExC_parse < RExC_end && *RExC_parse == '?') {
/* reg_namedseq(pRExC_state,UVp)
This is expected to be called by a parser routine that has
- recognized'\N' and needs to handle the rest. RExC_parse is
+ recognized '\N' and needs to handle the rest. RExC_parse is
expected to point at the first char following the N at the time
of the call.
be returned to indicate failure. (This will NOT be a valid pointer
to a regnode.)
- If value is null then it is assumed that we are parsing normal text
+ If valuep is null then it is assumed that we are parsing normal text
and inserts a new EXACT node into the program containing the resolved
string and returns a pointer to the new node. If the string is
zerolength a NOTHING node is emitted.
-
+
On success RExC_parse is set to the char following the endbrace.
Parsing failures will generate a fatal errorvia vFAIL(...)
*/
STATIC regnode *
-S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep)
+S_reg_namedseq(pTHX_ RExC_state_t *pRExC_state, UV *valuep, I32 *flagp)
{
char * name; /* start of the content of the name */
char * endbrace; /* endbrace following the name */
PERL_ARGS_ASSERT_REG_NAMEDSEQ;
- if (*RExC_parse != '{') {
- vFAIL("Missing braces on \\N{}");
+ if (*RExC_parse != '{' ||
+ (*RExC_parse == '{' && RExC_parse[1]
+ && strchr("0123456789", RExC_parse[1])))
+ {
+ GET_RE_DEBUG_FLAGS_DECL;
+ if (valuep)
+ /* no bare \N in a charclass */
+ vFAIL("Missing braces on \\N{}");
+ GET_RE_DEBUG_FLAGS;
+ nextchar(pRExC_state);
+ ret = reg_node(pRExC_state, REG_ANY);
+ *flagp |= HASWIDTH|SIMPLE;
+ RExC_naughty++;
+ RExC_parse--;
+ Set_Node_Length(ret, 1); /* MJD */
+ return ret;
}
name = RExC_parse+1;
endbrace = strchr(RExC_parse, '}');
sv_name = newSVpvn(name, endbrace - name);
if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
- vFAIL2("Constant(\\N{%s}) unknown: "
+ vFAIL2("Constant(\\N{%" SVf "}) unknown: "
"(possibly a missing \"use charnames ...\")",
- SvPVX(sv_name));
+ SVfARG(sv_name));
}
if (!cvp || !SvOK(*cvp)) { /* when $^H{charnames} = undef; */
- vFAIL2("Constant(\\N{%s}): "
- "$^H{charnames} is not defined",SvPVX(sv_name));
+ vFAIL2("Constant(\\N{%" SVf "}): "
+ "$^H{charnames} is not defined", SVfARG(sv_name));
}
LEAVE ;
if ( !sv_str || !SvOK(sv_str) ) {
- vFAIL2("Constant(\\N{%s}): Call to &{$^H{charnames}} "
- "did not return a defined value",SvPVX(sv_name));
+ vFAIL2("Constant(\\N{%" SVf "}): Call to &{$^H{charnames}} "
+ "did not return a defined value", SVfARG(sv_name));
}
if (hv_store_ent( RExC_charnames, sv_name, sv_str, 0))
cached = 1;
*valuep = (UV)*p;
/* warn if we havent used the whole string? */
}
- if (numlen<len && SIZE_ONLY && ckWARN(WARN_REGEXP)) {
- vWARN2(RExC_parse,
- "Ignoring excess chars from \\N{%s} in character class",
- SvPVX(sv_name)
+ if (numlen<len && SIZE_ONLY) {
+ ckWARN2reg(RExC_parse,
+ "Ignoring excess chars from \\N{%" SVf "} in character class",
+ SVfARG(sv_name)
);
}
- } else if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
- vWARN2(RExC_parse,
- "Ignoring zero length \\N{%s} in character class",
- SvPVX(sv_name)
+ } else if (SIZE_ONLY) {
+ ckWARN2reg(RExC_parse,
+ "Ignoring zero length \\N{%" SVf "} in character class",
+ SVfARG(sv_name)
);
}
if (sv_name)
}
break;
case 'N':
- /* Handle \N{NAME} here and not below because it can be
+ /* Handle \N and \N{NAME} here and not below because it can be
multicharacter. join_exact() will join them up later on.
Also this makes sure that things like /\N{BLAH}+/ and
\N{BLAH} being multi char Just Happen. dmq*/
++RExC_parse;
- ret= reg_namedseq(pRExC_state, NULL);
+ ret= reg_namedseq(pRExC_state, NULL, flagp);
break;
case 'k': /* Handle \k<NAME> and \k'NAME' */
parse_named_seq:
* single-byte range. For now, warn only when
* it ends up modulo */
if (SIZE_ONLY && ender >= 0x100
- && ! UTF && ! PL_encoding
- && ckWARN2(WARN_DEPRECATED, WARN_REGEXP)) {
- vWARNdep(p, "Use of octal value above 377 is deprecated");
+ && ! UTF && ! PL_encoding) {
+ ckWARNregdep(p, "Use of octal value above 377 is deprecated");
}
p += numlen;
}
{
SV* enc = PL_encoding;
ender = reg_recode((const char)(U8)ender, &enc);
- if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
- vWARN(p, "Invalid escape in the specified encoding");
+ if (!enc && SIZE_ONLY)
+ ckWARNreg(p, "Invalid escape in the specified encoding");
RExC_utf8 = 1;
}
break;
FAIL("Trailing \\");
/* FALL THROUGH */
default:
- if (!SIZE_ONLY&& isALPHA(*p) && ckWARN(WARN_REGEXP))
- vWARN2(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
+ if (!SIZE_ONLY&& isALPHA(*p))
+ ckWARN2reg(p + 1, "Unrecognized escape \\%c passed through", UCHARAT(p));
goto normal_default;
}
break;
while (isALNUM(*s))
s++;
if (*s && c == *s && s[1] == ']') {
- if (ckWARN(WARN_REGEXP))
- vWARN3(s+2,
- "POSIX syntax [%c %c] belongs inside character classes",
- c, c);
+ ckWARN3reg(s+2,
+ "POSIX syntax [%c %c] belongs inside character classes",
+ c, c);
/* [[=foo=]] and [[.foo.]] are still future. */
if (POSIXCC_NOTYET(c)) {
from earlier versions, OTOH that behaviour was broken
as well. */
UV v; /* value is register so we cant & it /grrr */
- if (reg_namedseq(pRExC_state, &v)) {
+ if (reg_namedseq(pRExC_state, &v, NULL)) {
goto parseit;
}
value= v;
{
SV* enc = PL_encoding;
value = reg_recode((const char)(U8)value, &enc);
- if (!enc && SIZE_ONLY && ckWARN(WARN_REGEXP))
- vWARN(RExC_parse,
- "Invalid escape in the specified encoding");
+ if (!enc && SIZE_ONLY)
+ ckWARNreg(RExC_parse,
+ "Invalid escape in the specified encoding");
break;
}
default:
- if (!SIZE_ONLY && isALPHA(value) && ckWARN(WARN_REGEXP))
- vWARN2(RExC_parse,
- "Unrecognized escape \\%c in character class passed through",
- (int)value);
+ if (!SIZE_ONLY && isALPHA(value))
+ ckWARN2reg(RExC_parse,
+ "Unrecognized escape \\%c in character class passed through",
+ (int)value);
break;
}
} /* end of \blah */
/* a bad range like a-\d, a-[:digit:] ? */
if (range) {
if (!SIZE_ONLY) {
- if (ckWARN(WARN_REGEXP)) {
- const int w =
- RExC_parse >= rangebegin ?
- RExC_parse - rangebegin : 0;
- vWARN4(RExC_parse,
+ const int w =
+ RExC_parse >= rangebegin ?
+ RExC_parse - rangebegin : 0;
+ ckWARN4reg(RExC_parse,
"False [] range \"%*.*s\"",
w, w, rangebegin);
- }
+
if (prevvalue < 256) {
ANYOF_BITMAP_SET(ret, prevvalue);
ANYOF_BITMAP_SET(ret, '-');
if (r->saved_copy)
SvREFCNT_dec(r->saved_copy);
#endif
- Safefree(r->swap);
Safefree(r->offs);
}
a case of zero-ing that, rather than copying the current length. */
SvPV_set(ret_x, RX_WRAPPED(rx));
SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
- StructCopy(&(r->xpv_cur), &(ret->xpv_cur), struct regexp_allocated);
+ memcpy(&(ret->xpv_cur), &(r->xpv_cur),
+ sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
SvLEN_set(ret_x, 0);
Newx(ret->offs, npar, regexp_paren_pair);
Copy(r->offs, ret->offs, npar, regexp_paren_pair);
ret->saved_copy = NULL;
#endif
ret->mother_re = rx;
- ret->swap = NULL;
return ret_x;
}