bit in pmflags, to decide whether the pattern is UTF-8.
p4raw-id: //depot/perl@32851
pm = (PMOP*)o;
if (expr->op_type == OP_CONST) {
- SV * const pat = ((SVOP*)expr)->op_sv;
+ SV *pat = ((SVOP*)expr)->op_sv;
U32 pm_flags = pm->op_pmflags & PMf_COMPILETIME;
if (o->op_flags & OPf_SPECIAL)
pm_flags |= RXf_SPLIT;
- if (DO_UTF8(pat))
- pm_flags |= RXf_UTF8;
+ if (DO_UTF8(pat)) {
+ assert (SvUTF8(pat));
+ } else if (SvUTF8(pat)) {
+ /* Not doing UTF-8, despite what the SV says. Is this only if we're
+ trapped in use 'bytes'? */
+ /* Make a copy of the octet sequence, but without the flag on, as
+ the compiler now honours the SvUTF8 flag on pat. */
+ STRLEN len;
+ const char *const p = SvPV(pat, len);
+ pat = newSVpvn_flags(p, len, SVs_TEMP);
+ }
+ assert(!(pm_flags & RXf_UTF8));
PM_SETRE(pm, CALLREGCOMP(pat, pm_flags));
if (PL_op->op_flags & OPf_SPECIAL)
PL_reginterp_cnt = I32_MAX; /* Mark as safe. */
- if (DO_UTF8(tmpstr))
- pm_flags |= RXf_UTF8;
+ if (DO_UTF8(tmpstr)) {
+ assert (SvUTF8(tmpstr));
+ } else if (SvUTF8(tmpstr)) {
+ /* Not doing UTF-8, despite what the SV says. Is this only if
+ we're trapped in use 'bytes'? */
+ /* Make a copy of the octet sequence, but without the flag on,
+ as the compiler now honours the SvUTF8 flag on tmpstr. */
+ STRLEN len;
+ const char *const p = SvPV(tmpstr, len);
+ tmpstr = newSVpvn_flags(p, len, SVs_TEMP);
+ }
+ assert(!(pm_flags & RXf_UTF8));
if (eng)
PM_SETRE(pm, CALLREGCOMP_ENG(eng, tmpstr, pm_flags));
#endif
REGEXP *
-Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
+Perl_re_compile(pTHX_ const SV * const pattern, U32 pm_flags)
{
dVAR;
REGEXP *rx;
GET_RE_DEBUG_FLAGS_DECL;
DEBUG_r(if (!PL_colorset) reginitcolors());
- RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
+ RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
+ assert(!(pm_flags & RXf_UTF8));
+ if (RExC_utf8)
+ pm_flags |= RXf_UTF8;
DEBUG_COMPILE_r({
SV *dsv= sv_newmortal();
U32 pm_flags = 0;
const I32 osize = PL_regsize;
- if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
+ if (DO_UTF8(ret)) {
+ assert (SvUTF8(ret));
+ } else if (SvUTF8(ret)) {
+ /* Not doing UTF-8, despite what the SV says. Is
+ this only if we're trapped in use 'bytes'? */
+ /* Make a copy of the octet sequence, but without
+ the flag on, as the compiler now honours the
+ SvUTF8 flag on ret. */
+ STRLEN len;
+ const char *const p = SvPV(ret, len);
+ ret = newSVpvn_flags(p, len, SVs_TEMP);
+ }
+ assert(!(pm_flags & RXf_UTF8));
rx = CALLREGCOMP(ret, pm_flags);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY