#define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv))
#define CHR_DIST(a,b) (UTF ? utf8_distance(a,b) : a - b)
+
+/* length of regex to show in messages that don't mark a position within */
+#define RegexLengthToShowInErrorMessages 127
+
+/*
+ * If MARKER[12] are adjusted, be sure to adjust the constants at the top
+ * of t/op/regmesg.t, the tests in t/op/re_tests, and those in
+ * op/pragma/warn/regcomp.
+ */
+#define MARKER1 "HERE" /* marker as it appears in the description */
+#define MARKER2 " << HERE " /* marker as it appears within the regex */
+
+#define REPORT_LOCATION " before " MARKER1 " mark in regex m/%.*s" MARKER2 "%s/"
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
+ * arg. Show regex, up to a maximum length. If it's too long, chop and add
+ * "...".
+ */
+#define FAIL(msg) \
+ STMT_START { \
+ char *ellipses = ""; \
+ unsigned len = strlen(PL_regprecomp); \
+ \
+ if (!SIZE_ONLY) \
+ SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \
+ \
+ if (len > RegexLengthToShowInErrorMessages) { \
+ /* chop 10 shorter than the max, to ensure meaning of "..." */ \
+ len = RegexLengthToShowInErrorMessages - 10; \
+ ellipses = "..."; \
+ } \
+ Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \
+ msg, (int)len, PL_regprecomp, ellipses); \
+ } STMT_END
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
+ * args. Show regex, up to a maximum length. If it's too long, chop and add
+ * "...".
+ */
+#define FAIL2(pat,msg) \
+ STMT_START { \
+ char *ellipses = ""; \
+ unsigned len = strlen(PL_regprecomp); \
+ \
+ if (!SIZE_ONLY) \
+ SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \
+ \
+ if (len > RegexLengthToShowInErrorMessages) { \
+ /* chop 10 shorter than the max, to ensure meaning of "..." */ \
+ len = RegexLengthToShowInErrorMessages - 10; \
+ ellipses = "..."; \
+ } \
+ S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/", \
+ msg, (int)len, PL_regprecomp, ellipses); \
+ } STMT_END
+
+
+/*
+ * Simple_vFAIL -- like FAIL, but marks the current location in the scan
+ */
+#define Simple_vFAIL(m) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
+ \
+ Perl_croak(aTHX_ "%s" REPORT_LOCATION, \
+ m, (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL()
+ */
+#define vFAIL(m) \
+ STMT_START { \
+ if (!SIZE_ONLY) \
+ SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \
+ Simple_vFAIL(m); \
+ } STMT_END
+
+/*
+ * Like Simple_vFAIL(), but accepts two arguments.
+ */
+#define Simple_vFAIL2(m,a1) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
+ \
+ S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL2().
+ */
+#define vFAIL2(m,a1) \
+ STMT_START { \
+ if (!SIZE_ONLY) \
+ SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \
+ Simple_vFAIL2(m, a1); \
+ } STMT_END
+
+
+/*
+ * Like Simple_vFAIL(), but accepts three arguments.
+ */
+#define Simple_vFAIL3(m, a1, a2) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
+ \
+ S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END
+
+/*
+ * Calls SAVEDESTRUCTOR_X if needed, then Simple_vFAIL3().
+ */
+#define vFAIL3(m,a1,a2) \
+ STMT_START { \
+ if (!SIZE_ONLY) \
+ SAVEDESTRUCTOR_X(clear_re,(void*)PL_regcomp_rx); \
+ Simple_vFAIL3(m, a1, a2); \
+ } STMT_END
+
+/*
+ * Like Simple_vFAIL(), but accepts four arguments.
+ */
+#define Simple_vFAIL4(m, a1, a2, a3) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
+ \
+ S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3,\
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END
+
+/*
+ * Like Simple_vFAIL(), but accepts five arguments.
+ */
+#define Simple_vFAIL5(m, a1, a2, a3, a4) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
+ S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, a4,\
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END
+
+
+#define vWARN(loc,m) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \
+ Perl_warner(aTHX_ WARN_REGEXP, "%s" REPORT_LOCATION,\
+ m, (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END \
+
+
+#define vWARN2(loc, m, a1) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \
+ Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\
+ a1, \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END
+
+#define vWARN3(loc, m, a1, a2) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp) - (PL_regxend - (loc)); \
+ Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION, \
+ a1, a2, \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END
+
+#define vWARN4(loc, m, a1, a2, a3) \
+ STMT_START { \
+ unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \
+ Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\
+ a1, a2, a3, \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
+ } STMT_END
+
+
+
/* Allow for side effects in s */
#define REGC(c,s) STMT_START { if (!SIZE_ONLY) *(s) = (c); else (s);} STMT_END
{
int value;
- for (value = 0; value < ANYOF_MAX; value += 2)
+ for (value = 0; value <= ANYOF_MAX; value += 2)
if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
return 1;
for (value = 0; value < 256; ++value)
if (ckWARN(WARN_REGEXP) && (minnext + deltanext == 0)
&& !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
&& maxcount <= REG_INFTY/3) /* Complement check for big count */
- Perl_warner(aTHX_ WARN_REGEXP,
- "Strange *+?{} on zero-length expression");
+ {
+ vWARN(PL_regcomp_parse,
+ "Quantifier unexpected on zero-length expression");
+ }
+
min += minnext * mincount;
is_inf_internal |= ((maxcount == REG_INFTY
&& (minnext + deltanext) > 0)
regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
if (OP(nxt) != CLOSE)
- FAIL("panic opt close");
+ FAIL("Panic opt close");
oscan->flags = ARG(nxt);
OP(nxt1) = OPTIMIZED; /* was OPEN. */
OP(nxt) = OPTIMIZED; /* was CLOSE. */
sv_catsv(data->last_found, last_str);
data->last_end += l * (mincount - 1);
}
+ } else {
+ /* start offset must point into the last copy */
+ data->last_start_min += minnext * (mincount - 1);
+ data->last_start_max += is_inf ? 0 : (maxcount - 1)
+ * (minnext + data->pos_delta);
}
}
/* It is counted once already... */
minnext = study_chunk(&nscan, &deltanext, last, &data_fake, f);
if (scan->flags) {
if (deltanext) {
- FAIL("variable length lookbehind not implemented");
+ vFAIL("Variable length lookbehind not implemented");
}
else if (minnext > U8_MAX) {
- FAIL2("lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
+ vFAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
}
scan->flags = minnext;
}
if (data)
data->flags |= SF_HAS_EVAL;
}
- else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded */
+ else if (OP(scan) == LOGICAL && scan->flags == 2) { /* Embedded follows */
if (flags & SCF_DO_SUBSTR) {
scan_commit(data);
data->longest = &(data->longest_float);
is_inf = is_inf_internal = 1;
if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
cl_anything(data->start_class);
+ flags &= ~SCF_DO_STCLASS;
}
/* Else: zero-length, ignore. */
scan = regnext(scan);
PL_colorset = 1;
}
+
/*
- pregcomp - compile a regular expression into internal code
*
Newc(1001, r, sizeof(regexp) + (unsigned)PL_regsize * sizeof(regnode),
char, regexp);
if (r == NULL)
- FAIL("regexp out of space");
+ FAIL("Regexp out of space");
+
#ifdef DEBUGGING
/* avoid reading uninitialized memory in DEBUGGING code in study_chunk() */
Zero(r, sizeof(regexp) + (unsigned)PL_regsize * sizeof(regnode), char);
register regnode *ender = 0;
register I32 parno = 0;
I32 flags, oregflags = PL_regflags, have_branch = 0, open = 0;
+ char *oregcomp_parse = PL_regcomp_parse;
char c;
*flagp = 0; /* Tentatively. */
U16 posflags = 0, negflags = 0;
U16 *flagsp = &posflags;
int logical = 0;
+ char *seqstart = PL_regcomp_parse;
PL_regcomp_parse++;
paren = *PL_regcomp_parse++;
break;
case '$':
case '@':
- FAIL2("Sequence (?%c...) not implemented", (int)paren);
+ vFAIL2("Sequence (?%c...) not implemented", (int)paren);
break;
case '#':
while (*PL_regcomp_parse && *PL_regcomp_parse != ')')
*flagp = TRYAGAIN;
return NULL;
case 'p':
- Perl_warner(aTHX_ WARN_REGEXP, "(?p{}) is deprecated - use (??{})");
+ if (SIZE_ONLY)
+ vWARN(PL_regcomp_parse, "(?p{}) is deprecated - use (??{})");
/* FALL THROUGH*/
case '?':
logical = 1;
PL_regcomp_parse++;
}
if (*PL_regcomp_parse != ')')
- FAIL("Sequence (?{...}) not terminated or not {}-balanced");
+ {
+ PL_regcomp_parse = s;
+ vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
+ }
if (!SIZE_ONLY) {
AV *av;
else
sv = newSVpvn("", 0);
+ ENTER;
+ Perl_save_re_context(aTHX);
rop = sv_compile_2op(sv, &sop, "re", &av);
+ LEAVE;
n = add_data(3, "nop");
PL_regcomp_rx->data->data[n] = (void*)rop;
PL_regcomp_parse++;
ret = reganode(GROUPP, parno);
if ((c = *nextchar()) != ')')
- FAIL2("Switch (?(number%c not recognized", c);
+ vFAIL("Switch condition not recognized");
insert_if:
regtail(ret, reganode(IFTHEN, 0));
br = regbranch(&flags, 1);
else
lastbr = NULL;
if (c != ')')
- FAIL("Switch (?(condition)... contains too many branches");
+ vFAIL("Switch (?(condition)... contains too many branches");
ender = reg_node(TAIL);
regtail(br, ender);
if (lastbr) {
return ret;
}
else {
- FAIL2("Unknown condition for (?(%.2s", PL_regcomp_parse);
+ vFAIL2("Unknown switch condition (?(%.2s", PL_regcomp_parse);
}
}
case 0:
- FAIL("Sequence (? incomplete");
+ PL_regcomp_parse--; /* for vFAIL to print correctly */
+ vFAIL("Sequence (? incomplete");
break;
default:
--PL_regcomp_parse;
break;
}
unknown:
- if (*PL_regcomp_parse != ')')
- FAIL2("Sequence (?%c...) not recognized", *PL_regcomp_parse);
+ if (*PL_regcomp_parse != ')') {
+ PL_regcomp_parse++;
+ vFAIL3("Sequence (%.*s...) not recognized", PL_regcomp_parse-seqstart, seqstart);
+ }
nextchar();
*flagp = TRYAGAIN;
return NULL;
if (paren) {
PL_regflags = oregflags;
if (PL_regcomp_parse >= PL_regxend || *nextchar() != ')') {
- FAIL("unmatched () in regexp");
+ PL_regcomp_parse = oregcomp_parse;
+ vFAIL("Unmatched (");
}
}
else if (!paren && PL_regcomp_parse < PL_regxend) {
if (*PL_regcomp_parse == ')') {
- FAIL("unmatched () in regexp");
+ PL_regcomp_parse++;
+ vFAIL("Unmatched )");
}
else
- FAIL("junk on end of regexp"); /* "Can't happen". */
+ FAIL("Junk on end of regexp"); /* "Can't happen". */
/* NOTREACHED */
}
if (!max && *maxpos != '0')
max = REG_INFTY; /* meaning "infinity" */
else if (max >= REG_INFTY)
- FAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+ vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
PL_regcomp_parse = next;
nextchar();
if (max > 0)
*flagp |= HASWIDTH;
if (max && max < min)
- FAIL("Can't do {n,m} with n > m");
+ vFAIL("Can't do {n,m} with n > m");
if (!SIZE_ONLY) {
ARG1_SET(ret, min);
ARG2_SET(ret, max);
}
#if 0 /* Now runtime fix should be reliable. */
+
+ /* if this is reinstated, don't forget to put this back into perldiag:
+
+ =item Regexp *+ operand could be empty at {#} in regex m/%s/
+
+ (F) The part of the regexp subject to either the * or + quantifier
+ could match an empty string. The {#} shows in the regular
+ expression about where the problem was discovered.
+
+ */
+
if (!(flags&HASWIDTH) && op != '?')
- FAIL("regexp *+ operand could be empty");
+ vFAIL("Regexp *+ operand could be empty");
#endif
nextchar();
}
nest_check:
if (ckWARN(WARN_REGEXP) && !SIZE_ONLY && !(flags&HASWIDTH) && max > REG_INFTY/3) {
- Perl_warner(aTHX_ WARN_REGEXP, "%.*s matches null string many times",
- PL_regcomp_parse - origparse, origparse);
+ vWARN3(PL_regcomp_parse,
+ "%.*s matches null string many times",
+ PL_regcomp_parse - origparse,
+ origparse);
}
if (*PL_regcomp_parse == '?') {
reginsert(MINMOD, ret);
regtail(ret, ret + NODE_STEP_REGNODE);
}
- if (ISMULT2(PL_regcomp_parse))
- FAIL("nested *?+ in regexp");
+ if (ISMULT2(PL_regcomp_parse)) {
+ PL_regcomp_parse++;
+ vFAIL("Nested quantifiers");
+ }
return(ret);
}
* faster to run. Backslashed characters are exceptions, each becoming a
* separate node; the code is simpler that way and it's not worth fixing.
*
- * [Yes, it is worth fixing, some scripts can run twice the speed.]
- */
+ * [Yes, it is worth fixing, some scripts can run twice the speed.] */
STATIC regnode *
S_regatom(pTHX_ I32 *flagp)
{
ret = reg_node(BOL);
break;
case '$':
- if (PL_regcomp_parse[1])
- PL_seen_zerolen++;
nextchar();
+ if (*PL_regcomp_parse)
+ PL_seen_zerolen++;
if (PL_regflags & PMf_MULTILINE)
ret = reg_node(MEOL);
else if (PL_regflags & PMf_SINGLELINE)
PL_regnaughty++;
break;
case '[':
- PL_regcomp_parse++;
+ {
+ char *oregcomp_parse = ++PL_regcomp_parse;
ret = (UTF ? regclassutf8() : regclass());
- if (*PL_regcomp_parse != ']')
- FAIL("unmatched [] in regexp");
+ if (*PL_regcomp_parse != ']') {
+ PL_regcomp_parse = oregcomp_parse;
+ vFAIL("Unmatched [");
+ }
nextchar();
*flagp |= HASWIDTH|SIMPLE;
break;
+ }
case '(':
nextchar();
ret = reg(1, &flags);
if (ret == NULL) {
- if (flags & TRYAGAIN)
+ if (flags & TRYAGAIN) {
+ if (PL_regcomp_parse == PL_regxend) {
+ /* Make parent create an empty node if needed. */
+ *flagp |= TRYAGAIN;
+ return(NULL);
+ }
goto tryagain;
+ }
return(NULL);
}
*flagp |= flags&(HASWIDTH|SPSTART|SIMPLE);
*flagp |= TRYAGAIN;
return NULL;
}
- FAIL2("internal urp in regexp at /%s/", PL_regcomp_parse);
+ vFAIL("Internal urp");
/* Supposed to be caught earlier. */
break;
case '{':
case '?':
case '+':
case '*':
- FAIL("?+*{} follows nothing in regexp");
+ PL_regcomp_parse++;
+ vFAIL("Quantifier follows nothing");
break;
case '\\':
switch (*++PL_regcomp_parse) {
if (PL_regcomp_parse[1] == '{') {
PL_regxend = strchr(PL_regcomp_parse, '}');
- if (!PL_regxend)
- FAIL("Missing right brace on \\p{}");
+ if (!PL_regxend) {
+ PL_regcomp_parse += 2;
+ PL_regxend = oldregxend;
+ vFAIL("Missing right brace on \\p{}");
+ }
PL_regxend++;
}
else
if (num > 9 && num >= PL_regnpar)
goto defchar;
else {
+ while (isDIGIT(*PL_regcomp_parse))
+ PL_regcomp_parse++;
+
if (!SIZE_ONLY && num > PL_regcomp_rx->nparens)
- FAIL("reference to nonexistent group");
+ vFAIL("Reference to nonexistent group");
PL_regsawback = 1;
ret = reganode(FOLD
? (LOC ? REFFL : REFF)
: REF, num);
*flagp |= HASWIDTH;
- while (isDIGIT(*PL_regcomp_parse))
- PL_regcomp_parse++;
PL_regcomp_parse--;
nextchar();
}
break;
case '\0':
if (PL_regcomp_parse >= PL_regxend)
- FAIL("trailing \\ in regexp");
+ FAIL("Trailing \\");
/* FALL THROUGH */
default:
/* Do not generate `unrecognized' warnings here, we fall
/* FALL THROUGH */
default: {
- register I32 len;
+ register STRLEN len;
register UV ender;
register char *p;
char *oldp, *s;
- I32 numlen;
+ STRLEN numlen;
PL_regcomp_parse++;
if (*++p == '{') {
char* e = strchr(p, '}');
- if (!e)
- FAIL("Missing right brace on \\x{}");
+ if (!e) {
+ PL_regcomp_parse = p + 1;
+ vFAIL("Missing right brace on \\x{}");
+ }
else if (UTF) {
- ender = (UV)scan_hex(p + 1, e - p, &numlen);
- if (numlen + len >= 127) { /* numlen is generous */
+ numlen = 1; /* allow underscores */
+ ender = (UV)scan_hex(p + 1, e - p - 1, &numlen);
+ /* numlen is generous */
+ if (numlen + len >= 127) {
p--;
goto loopdone;
}
p = e + 1;
}
else
- FAIL("Can't use \\x{} without 'use utf8' declaration");
+ {
+ PL_regcomp_parse = e + 1;
+ vFAIL("Can't use \\x{} without 'use utf8' declaration");
+ }
+
}
else {
+ numlen = 0; /* disallow underscores */
ender = (UV)scan_hex(p, 2, &numlen);
p += numlen;
}
case '5': case '6': case '7': case '8':case '9':
if (*p == '0' ||
(isDIGIT(p[1]) && atoi(p) >= PL_regnpar) ) {
+ numlen = 0; /* disallow underscores */
ender = (UV)scan_oct(p, 3, &numlen);
p += numlen;
}
break;
case '\0':
if (p >= PL_regxend)
- FAIL("trailing \\ in regexp");
+ FAIL("Trailing \\");
/* FALL THROUGH */
default:
if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(*p))
- Perl_warner(aTHX_ WARN_REGEXP,
- "/%.127s/: Unrecognized escape \\%c passed through",
- PL_regprecomp,
- *p);
+ vWARN2(p +1, "Unrecognized escape \\%c passed through", *p);
goto normal_default;
}
break;
default:
normal_default:
if ((*p & 0xc0) == 0xc0 && UTF) {
- ender = utf8_to_uv((U8*)p, &numlen);
+ ender = utf8_to_uv((U8*)p, PL_regxend - p,
+ &numlen, 0);
p += numlen;
}
else
PL_regcomp_parse = p - 1;
nextchar();
if (len < 0)
- FAIL("internal disaster in regexp");
+ vFAIL("Internal disaster");
if (len > 0)
*flagp |= HASWIDTH;
if (len == 1)
namedclass =
complement ? ANYOF_NASCII : ANYOF_ASCII;
break;
+ case 'b':
+ if (strnEQ(posixcc, "blank", 5))
+ namedclass =
+ complement ? ANYOF_NBLANK : ANYOF_BLANK;
+ break;
case 'c':
if (strnEQ(posixcc, "cntrl", 5))
namedclass =
case 's':
if (strnEQ(posixcc, "space", 5))
namedclass =
- complement ? ANYOF_NSPACE : ANYOF_SPACE;
+ complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
+ break;
case 'u':
if (strnEQ(posixcc, "upper", 5))
namedclass =
if (namedclass == OOB_NAMEDCLASS ||
posixcc[skip] != ':' ||
posixcc[skip+1] != ']')
- Perl_croak(aTHX_
- "Character class [:%.*s:] unknown",
- t - s - 1, s + 1);
- } else if (ckWARN(WARN_REGEXP) && !SIZE_ONLY)
+ {
+ Simple_vFAIL3("POSIX class [:%.*s:] unknown",
+ t - s - 1, s + 1);
+ }
+ } else if (!SIZE_ONLY) {
/* [[=foo=]] and [[.foo.]] are still future. */
- Perl_warner(aTHX_ WARN_REGEXP,
- "Character class syntax [%c %c] is reserved for future extensions", c, c);
+
+ /* adjust PL_regcomp_parse so the warning shows after
+ the class closes */
+ while (*PL_regcomp_parse && *PL_regcomp_parse != ']')
+ PL_regcomp_parse++;
+ Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
+ }
} else {
/* Maternal grandfather:
* "[:" ending in ":" but not in ":]" */
while(*s && isALNUM(*s))
s++;
if (*s && c == *s && s[1] == ']') {
- Perl_warner(aTHX_ WARN_REGEXP,
- "Character class syntax [%c %c] belongs inside character classes", c, c);
+ vWARN3(s+2, "POSIX syntax [%c %c] belongs inside character classes", c, c);
+
+ /* [[=foo=]] and [[.foo.]] are still future. */
if (c == '=' || c == '.')
- Perl_warner(aTHX_ WARN_REGEXP,
- "Character class syntax [%c %c] is reserved for future extensions", c, c);
+ {
+ /* adjust PL_regcomp_parse so the error shows after
+ the class closes */
+ while (*PL_regcomp_parse && *PL_regcomp_parse++ != ']')
+ ;
+ Simple_vFAIL3("POSIX syntax [%c %c] is reserved for future extensions", c, c);
+ }
}
}
}
register I32 lastvalue = OOB_CHAR8;
register I32 range = 0;
register regnode *ret;
- I32 numlen;
+ STRLEN numlen;
I32 namedclass;
char *rangebegin;
bool need_class = 0;
else if (value == '\\') {
value = UCHARAT(PL_regcomp_parse++);
/* Some compilers cannot handle switching on 64-bit integer
- * values, therefore value cannot be an UV. --jhi */
+ * values, therefore the 'value' cannot be an UV. --jhi */
switch (value) {
case 'w': namedclass = ANYOF_ALNUM; break;
case 'W': namedclass = ANYOF_NALNUM; break;
case 'a': value = '\057'; break;
#endif
case 'x':
+ numlen = 0; /* disallow underscores */
value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen);
PL_regcomp_parse += numlen;
break;
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
+ numlen = 0; /* disallow underscores */
value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen);
PL_regcomp_parse += numlen;
break;
default:
if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value))
- Perl_warner(aTHX_ WARN_REGEXP,
- "/%.127s/: Unrecognized escape \\%c in character class passed through",
- PL_regprecomp,
- (int)value);
+
+ vWARN2(PL_regcomp_parse, "Unrecognized escape \\%c in character class passed through", (int)value);
break;
}
}
if (range) { /* a-\d, a-[:digit:] */
if (!SIZE_ONLY) {
if (ckWARN(WARN_REGEXP))
- Perl_warner(aTHX_ WARN_REGEXP,
- "/%.127s/: false [] range \"%*.*s\" in regexp",
- PL_regprecomp,
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
+ vWARN4(PL_regcomp_parse,
+ "False [] range \"%*.*s\"",
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
ANYOF_BITMAP_SET(ret, lastvalue);
ANYOF_BITMAP_SET(ret, '-');
}
#endif /* EBCDIC */
}
break;
+ case ANYOF_BLANK:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_BLANK);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isBLANK(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NBLANK:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NBLANK);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isBLANK(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
case ANYOF_CNTRL:
if (LOC)
ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
ANYOF_BITMAP_SET(ret, value);
}
break;
+ case ANYOF_PSXSPC:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_PSXSPC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isPSXSPC(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NPSXSPC:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isPSXSPC(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
case ANYOF_PUNCT:
if (LOC)
ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
}
break;
default:
- FAIL("invalid [::] class in regexp");
+ vFAIL("Invalid [::] class");
break;
}
if (LOC)
}
if (range) {
if (lastvalue > value) /* b-a */ {
- Perl_croak(aTHX_
- "/%.127s/: invalid [] range \"%*.*s\" in regexp",
- PL_regprecomp,
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
+ Simple_vFAIL4("Invalid [] range \"%*.*s\"",
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
}
range = 0;
}
PL_regcomp_parse++;
if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
if (ckWARN(WARN_REGEXP))
- Perl_warner(aTHX_ WARN_REGEXP,
- "/%.127s/: false [] range \"%*.*s\" in regexp",
- PL_regprecomp,
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
+ vWARN4(PL_regcomp_parse,
+ "False [] range \"%*.*s\"",
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
if (!SIZE_ONLY)
ANYOF_BITMAP_SET(ret, '-');
} else
register U32 lastvalue = OOB_UTF8;
register I32 range = 0;
register regnode *ret;
- I32 numlen;
+ STRLEN numlen;
I32 n;
SV *listsv;
U8 flags = 0;
namedclass = OOB_NAMEDCLASS;
if (!range)
rangebegin = PL_regcomp_parse;
- value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
+ value = utf8_to_uv((U8*)PL_regcomp_parse,
+ PL_regxend - PL_regcomp_parse,
+ &numlen, 0);
PL_regcomp_parse += numlen;
if (value == '[')
namedclass = regpposixcc(value);
else if (value == '\\') {
- value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
+ value = (U32)utf8_to_uv((U8*)PL_regcomp_parse,
+ PL_regxend - PL_regcomp_parse,
+ &numlen, 0);
PL_regcomp_parse += numlen;
/* Some compilers cannot handle switching on 64-bit integer
* values, therefore value cannot be an UV. Yes, this will
if (*PL_regcomp_parse == '{') {
e = strchr(PL_regcomp_parse++, '}');
if (!e)
- FAIL("Missing right brace on \\p{}");
+ vFAIL("Missing right brace on \\p{}");
n = e - PL_regcomp_parse;
}
else {
case 'x':
if (*PL_regcomp_parse == '{') {
e = strchr(PL_regcomp_parse++, '}');
- if (!e)
- FAIL("Missing right brace on \\x{}");
+ if (!e)
+ vFAIL("Missing right brace on \\x{}");
+ numlen = 1; /* allow underscores */
value = (UV)scan_hex(PL_regcomp_parse,
e - PL_regcomp_parse,
&numlen);
PL_regcomp_parse = e + 1;
}
else {
+ numlen = 0; /* disallow underscores */
value = (UV)scan_hex(PL_regcomp_parse, 2, &numlen);
PL_regcomp_parse += numlen;
}
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
+ numlen = 0; /* disallow underscores */
value = (UV)scan_oct(--PL_regcomp_parse, 3, &numlen);
PL_regcomp_parse += numlen;
break;
default:
if (!SIZE_ONLY && ckWARN(WARN_REGEXP) && isALPHA(value))
- Perl_warner(aTHX_ WARN_REGEXP,
- "/%.127s/: Unrecognized escape \\%c in character class passed through",
- PL_regprecomp,
- (int)value);
+ vWARN2(PL_regcomp_parse,
+ "Unrecognized escape \\%c in character class passed through",
+ (int)value);
break;
}
}
if (range) { /* a-\d, a-[:digit:] */
if (!SIZE_ONLY) {
if (ckWARN(WARN_REGEXP))
- Perl_warner(aTHX_ WARN_REGEXP,
- "/%.127s/: false [] range \"%*.*s\" in regexp",
- PL_regprecomp,
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
+ vWARN4(PL_regcomp_parse,
+ "False [] range \"%*.*s\"",
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
Perl_sv_catpvf(aTHX_ listsv,
/* 0x002D is Unicode for '-' */
"%04"UVxf"\n002D\n", (UV)lastvalue);
case ANYOF_NPUNCT:
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
case ANYOF_SPACE:
+ case ANYOF_PSXSPC:
+ case ANYOF_BLANK:
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
case ANYOF_NSPACE:
+ case ANYOF_NPSXSPC:
+ case ANYOF_NBLANK:
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
case ANYOF_UPPER:
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
}
if (range) {
if (lastvalue > value) { /* b-a */
- Perl_croak(aTHX_
- "/%.127s/: invalid [] range \"%*.*s\" in regexp",
- PL_regprecomp,
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
+ Simple_vFAIL4("invalid [] range \"%*.*s\"",
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
}
range = 0;
}
PL_regcomp_parse++;
if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
if (ckWARN(WARN_REGEXP))
- Perl_warner(aTHX_ WARN_REGEXP,
- "/%.127s/: false [] range \"%*.*s\" in regexp",
- PL_regprecomp,
- PL_regcomp_parse - rangebegin,
- PL_regcomp_parse - rangebegin,
- rangebegin);
+ vWARN4(PL_regcomp_parse,
+ "False [] range \"%*.*s\"",
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
if (!SIZE_ONLY)
Perl_sv_catpvf(aTHX_ listsv,
/* 0x002D is Unicode for '-' */
- reguni - emit (if appropriate) a Unicode character
*/
STATIC void
-S_reguni(pTHX_ UV uv, char* s, I32* lenp)
+S_reguni(pTHX_ UV uv, char* s, STRLEN* lenp)
{
dTHR;
if (SIZE_ONLY) {
sv_setpvn(sv, "", 0);
if (OP(o) >= reg_num) /* regnode.type is unsigned */
- FAIL("corrupted regexp opcode");
+ FAIL("Corrupted regexp opcode");
sv_catpv(sv, (char*)reg_name[OP(o)]); /* Take off const! */
k = PL_regkind[(U8)OP(o)];
else if (k == ANYOF) {
int i, rangestart = -1;
const char * const out[] = { /* Should be syncronized with
- a table in regcomp.h */
+ ANYOF_ #xdefines in regcomp.h */
"\\w",
"\\W",
"\\s",
"[:punct:]",
"[:^punct:]",
"[:upper:]",
- "[:!upper:]",
+ "[:^upper:]",
"[:xdigit:]",
- "[:^xdigit:]"
+ "[:^xdigit:]",
+ "[:space:]",
+ "[:^space:]",
+ "[:blank:]",
+ "[:^blank:]"
};
if (o->flags & ANYOF_LOCALE)
Perl_croak(aTHX_ "panic: pregfree comppad");
old_comppad = PL_comppad;
old_curpad = PL_curpad;
- PL_comppad = new_comppad;
- PL_curpad = AvARRAY(new_comppad);
+ /* Watch out for global destruction's random ordering. */
+ if (SvTYPE(new_comppad) == SVt_PVAV) {
+ PL_comppad = new_comppad;
+ PL_curpad = AvARRAY(new_comppad);
+ }
+ else
+ PL_curpad = NULL;
op_free((OP_4tree*)r->data->data[n]);
PL_comppad = old_comppad;
PL_curpad = old_curpad;