* of t/op/regmesg.t, the tests in t/op/re_tests, and those in
* op/pragma/warn/regcomp.
*/
-#define MARKER1 "=HERE=" /* marker as it appears in the description */
-#define MARKER2 " ===HERE=== " /* marker as it appears within the regex */
+#define MARKER1 "HERE" /* marker as it appears in the description */
+#define MARKER2 " << HERE " /* marker as it appears within the regex */
-#define REPORT_LOCATION " at " MARKER1 " mark in regex m/%.*s" MARKER2 "%s/"
+#define REPORT_LOCATION " before " MARKER1 " mark in regex m/%.*s" MARKER2 "%s/"
/*
* Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
* arg. Show regex, up to a maximum length. If it's too long, chop and add
* "...".
*/
-#define FAIL(m) \
+#define FAIL(msg) \
STMT_START { \
- char *elipises = ""; \
+ char *ellipses = ""; \
unsigned len = strlen(PL_regprecomp); \
\
if (!SIZE_ONLY) \
if (len > RegexLengthToShowInErrorMessages) { \
/* chop 10 shorter than the max, to ensure meaning of "..." */ \
len = RegexLengthToShowInErrorMessages - 10; \
- elipises = "..."; \
+ ellipses = "..."; \
} \
Perl_croak(aTHX_ "%s in regex m/%.*s%s/", \
- m, len, PL_regprecomp, elipises); \
+ msg, (int)len, PL_regprecomp, ellipses); \
} STMT_END
/*
* args. Show regex, up to a maximum length. If it's too long, chop and add
* "...".
*/
-#define FAIL2(pat,m) \
+#define FAIL2(pat,msg) \
STMT_START { \
- char *elipises = ""; \
+ char *ellipses = ""; \
unsigned len = strlen(PL_regprecomp); \
\
if (!SIZE_ONLY) \
if (len > RegexLengthToShowInErrorMessages) { \
/* chop 10 shorter than the max, to ensure meaning of "..." */ \
len = RegexLengthToShowInErrorMessages - 10; \
- elipises = "..."; \
+ ellipses = "..."; \
} \
S_re_croak2(aTHX_ pat, " in regex m/%.*s%s/", \
- m, len, PL_regprecomp, elipises); \
+ msg, (int)len, PL_regprecomp, ellipses); \
} STMT_END
unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
\
Perl_croak(aTHX_ "%s" REPORT_LOCATION, \
- m, offset, PL_regprecomp, PL_regprecomp + offset); \
+ m, (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END
/*
unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
\
S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, \
- offset, PL_regprecomp, PL_regprecomp + offset); \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END
/*
unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
\
S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, \
- offset, PL_regprecomp, PL_regprecomp + offset); \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END
/*
unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
\
S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3,\
- offset, PL_regprecomp, PL_regprecomp + offset); \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END
/*
STMT_START { \
unsigned offset = strlen(PL_regprecomp)-(PL_regxend-PL_regcomp_parse); \
S_re_croak2(aTHX_ m, REPORT_LOCATION, a1, a2, a3, a4,\
- offset, PL_regprecomp, PL_regprecomp + offset); \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END
STMT_START { \
unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \
Perl_warner(aTHX_ WARN_REGEXP, "%s" REPORT_LOCATION,\
- m, offset, PL_regprecomp, PL_regprecomp + offset); \
+ m, (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END \
unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \
Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\
a1, \
- offset, PL_regprecomp, PL_regprecomp + offset); \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END
#define vWARN3(loc, m, a1, a2) \
unsigned offset = strlen(PL_regprecomp) - (PL_regxend - (loc)); \
Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION, \
a1, a2, \
- offset, PL_regprecomp, PL_regprecomp + offset); \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END
#define vWARN4(loc, m, a1, a2, a3) \
unsigned offset = strlen(PL_regprecomp)-(PL_regxend-(loc)); \
Perl_warner(aTHX_ WARN_REGEXP, m REPORT_LOCATION,\
a1, a2, a3, \
- offset, PL_regprecomp, PL_regprecomp + offset); \
+ (int)offset, PL_regprecomp, PL_regprecomp + offset); \
} STMT_END
{
int value;
- for (value = 0; value < ANYOF_MAX; value += 2)
+ for (value = 0; value <= ANYOF_MAX; value += 2)
if (ANYOF_CLASS_TEST(cl, value) && ANYOF_CLASS_TEST(cl, value + 1))
return 1;
for (value = 0; value < 256; ++value)
else
sv = newSVpvn("", 0);
+ ENTER;
+ Perl_save_re_context(aTHX);
rop = sv_compile_2op(sv, &sop, "re", &av);
+ LEAVE;
n = add_data(3, "nop");
PL_regcomp_rx->data->data[n] = (void*)rop;
ret = reg_node(BOL);
break;
case '$':
- if (PL_regcomp_parse[1])
- PL_seen_zerolen++;
nextchar();
+ if (*PL_regcomp_parse)
+ PL_seen_zerolen++;
if (PL_regflags & PMf_MULTILINE)
ret = reg_node(MEOL);
else if (PL_regflags & PMf_SINGLELINE)
/* FALL THROUGH */
default: {
- register I32 len;
+ register STRLEN len;
register UV ender;
register char *p;
char *oldp, *s;
- I32 numlen;
+ STRLEN numlen;
PL_regcomp_parse++;
default:
normal_default:
if ((*p & 0xc0) == 0xc0 && UTF) {
- ender = utf8_to_uv((U8*)p, &numlen);
+ ender = utf8_to_uv((U8*)p, PL_regxend - p,
+ &numlen, 0);
p += numlen;
}
else
namedclass =
complement ? ANYOF_NASCII : ANYOF_ASCII;
break;
+ case 'b':
+ if (strnEQ(posixcc, "blank", 5))
+ namedclass =
+ complement ? ANYOF_NBLANK : ANYOF_BLANK;
+ break;
case 'c':
if (strnEQ(posixcc, "cntrl", 5))
namedclass =
case 's':
if (strnEQ(posixcc, "space", 5))
namedclass =
- complement ? ANYOF_NSPACE : ANYOF_SPACE;
+ complement ? ANYOF_NPSXSPC : ANYOF_PSXSPC;
break;
case 'u':
if (strnEQ(posixcc, "upper", 5))
register I32 lastvalue = OOB_CHAR8;
register I32 range = 0;
register regnode *ret;
- I32 numlen;
+ STRLEN numlen;
I32 namedclass;
char *rangebegin;
bool need_class = 0;
else if (value == '\\') {
value = UCHARAT(PL_regcomp_parse++);
/* Some compilers cannot handle switching on 64-bit integer
- * values, therefore value cannot be an UV. --jhi */
+ * values, therefore the 'value' cannot be an UV. --jhi */
switch (value) {
case 'w': namedclass = ANYOF_ALNUM; break;
case 'W': namedclass = ANYOF_NALNUM; break;
#endif /* EBCDIC */
}
break;
+ case ANYOF_BLANK:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_BLANK);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isBLANK(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NBLANK:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NBLANK);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isBLANK(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
case ANYOF_CNTRL:
if (LOC)
ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
ANYOF_BITMAP_SET(ret, value);
}
break;
+ case ANYOF_PSXSPC:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_PSXSPC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isPSXSPC(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
+ case ANYOF_NPSXSPC:
+ if (LOC)
+ ANYOF_CLASS_SET(ret, ANYOF_NPSXSPC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isPSXSPC(value))
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ break;
case ANYOF_PUNCT:
if (LOC)
ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
register U32 lastvalue = OOB_UTF8;
register I32 range = 0;
register regnode *ret;
- I32 numlen;
+ STRLEN numlen;
I32 n;
SV *listsv;
U8 flags = 0;
namedclass = OOB_NAMEDCLASS;
if (!range)
rangebegin = PL_regcomp_parse;
- value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
+ value = utf8_to_uv((U8*)PL_regcomp_parse,
+ PL_regxend - PL_regcomp_parse,
+ &numlen, 0);
PL_regcomp_parse += numlen;
if (value == '[')
namedclass = regpposixcc(value);
else if (value == '\\') {
- value = (U32)utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
+ value = (U32)utf8_to_uv((U8*)PL_regcomp_parse,
+ PL_regxend - PL_regcomp_parse,
+ &numlen, 0);
PL_regcomp_parse += numlen;
/* Some compilers cannot handle switching on 64-bit integer
* values, therefore value cannot be an UV. Yes, this will
case ANYOF_NPUNCT:
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
case ANYOF_SPACE:
+ case ANYOF_PSXSPC:
+ case ANYOF_BLANK:
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
case ANYOF_NSPACE:
+ case ANYOF_NPSXSPC:
+ case ANYOF_NBLANK:
Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
case ANYOF_UPPER:
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
- reguni - emit (if appropriate) a Unicode character
*/
STATIC void
-S_reguni(pTHX_ UV uv, char* s, I32* lenp)
+S_reguni(pTHX_ UV uv, char* s, STRLEN* lenp)
{
dTHR;
if (SIZE_ONLY) {
else if (k == ANYOF) {
int i, rangestart = -1;
const char * const out[] = { /* Should be syncronized with
- a table in regcomp.h */
+ ANYOF_ #xdefines in regcomp.h */
"\\w",
"\\W",
"\\s",
"[:punct:]",
"[:^punct:]",
"[:upper:]",
- "[:!upper:]",
+ "[:^upper:]",
"[:xdigit:]",
- "[:^xdigit:]"
+ "[:^xdigit:]",
+ "[:space:]",
+ "[:^space:]",
+ "[:blank:]",
+ "[:^blank:]"
};
if (o->flags & ANYOF_LOCALE)
Perl_croak(aTHX_ "panic: pregfree comppad");
old_comppad = PL_comppad;
old_curpad = PL_curpad;
- PL_comppad = new_comppad;
- PL_curpad = AvARRAY(new_comppad);
+ /* Watch out for global destruction's random ordering. */
+ if (SvTYPE(new_comppad) == SVt_PVAV) {
+ PL_comppad = new_comppad;
+ PL_curpad = AvARRAY(new_comppad);
+ }
+ else
+ PL_curpad = NULL;
op_free((OP_4tree*)r->data->data[n]);
PL_comppad = old_comppad;
PL_curpad = old_curpad;