else /* @- */
i = s;
- if (i > 0 && PL_reg_match_utf8) {
+ if (i > 0 && RX_MATCH_UTF8(rx)) {
char *b = rx->subbeg;
if (b)
i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i));
{
i = t1 - s1;
getlen:
- if (i > 0 && PL_reg_match_utf8) {
+ if (i > 0 && RX_MATCH_UTF8(rx)) {
char *s = rx->subbeg + s1;
char *send = rx->subbeg + t1;
getrx:
if (i >= 0) {
sv_setpvn(sv, s, i);
- if (PL_reg_match_utf8 && is_utf8_string((U8*)s, i))
+ if (RX_MATCH_UTF8(rx) && is_utf8_string((U8*)s, i))
SvUTF8_on(sv);
else
SvUTF8_off(sv);
TAINT_IF((pm->op_pmflags & PMf_LOCALE) &&
(pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE)));
- PL_reg_match_utf8 = do_utf8;
+ RX_MATCH_UTF8_set(rx, do_utf8);
if (pm->op_pmreplroot) {
#ifdef USE_ITHREADS
register REGEXP *rx = cx->sb_rx;
rxres_restore(&cx->sb_rxres, rx);
- PL_reg_match_utf8 = SvUTF8(cx->sb_targ) ? 1 : 0;
+ RX_MATCH_UTF8_set(rx, SvUTF8(cx->sb_targ));
if (cx->sb_iters++) {
I32 saviters = cx->sb_iters;
(PL_tainted && (pm->op_pmflags & PMf_RETAINT)));
TAINT_NOT;
- PL_reg_match_utf8 = DO_UTF8(TARG);
+ RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));
/* PMdf_USED is set after a ?? matches once */
if (pm->op_pmdynflags & PMdf_USED) {
if (global) {
rx->subbeg = truebase;
rx->startp[0] = s - truebase;
- if (PL_reg_match_utf8) {
+ if (RX_MATCH_UTF8(rx)) {
char *t = (char*)utf8_hop((U8*)s, rx->minlen);
rx->endp[0] = t - truebase;
}
rxtainted |= 2;
TAINT_NOT;
- PL_reg_match_utf8 = DO_UTF8(TARG);
+ RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));
force_it:
if (!pm || !s)
DIE(aTHX_ "panic: pp_subst");
strend = s + len;
- slen = PL_reg_match_utf8 ? utf8_length((U8*)s, (U8*)strend) : len;
+ slen = RX_MATCH_UTF8(rx) ? utf8_length((U8*)s, (U8*)strend) : len;
maxiters = 2 * slen + 10; /* We can match twice at each
position, once with zero-length,
second time with non-zero. */
char *i_strpos = strpos;
SV *dsv = PERL_DEBUG_PAD_ZERO(0);
#endif
+ RX_MATCH_UTF8_set(prog,do_utf8);
if (prog->reganch & ROPT_UTF8) {
DEBUG_r(PerlIO_printf(Perl_debug_log,
SV *dsv0 = PERL_DEBUG_PAD_ZERO(0);
SV *dsv1 = PERL_DEBUG_PAD_ZERO(1);
#endif
+ RX_MATCH_UTF8_set(prog,do_utf8);
PL_regcc = 0;
#define ROPT_NAUGHTY 0x20000 /* how exponential is this pattern? */
#define ROPT_COPY_DONE 0x40000 /* subbeg is a copy of the string */
#define ROPT_TAINTED_SEEN 0x80000
+#define ROPT_MATCH_UTF8 0x10000000 /* subbeg is utf-8 */
#define RE_USE_INTUIT_NOML 0x0100000 /* Best to intuit before matching */
#define RE_USE_INTUIT_ML 0x0200000
? RX_MATCH_COPIED_on(prog) \
: RX_MATCH_COPIED_off(prog))
+#define RX_MATCH_UTF8(prog) ((prog)->reganch & ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_on(prog) ((prog)->reganch |= ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_off(prog) ((prog)->reganch &= ~ROPT_MATCH_UTF8)
+#define RX_MATCH_UTF8_set(prog, t) ((t) \
+ ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \
+ : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0)))
+
#define REXEC_COPY_STR 0x01 /* Need to copy the string. */
#define REXEC_CHECKED 0x02 /* check_substr already checked. */
#define REXEC_SCREAM 0x04 /* use scream table. */
$| = 1;
-print "1..983\n";
+print "1..986\n";
BEGIN {
chdir 't' if -d 't';
ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
-# last test 983
+{
+ # [perl #18232]
+ "\x{100}" =~ /(.)/;
+ ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' );
+ { 'a' =~ /./; }
+ ok( $1 eq "\x{100}", '$1 is still utf-8' );
+ ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' );
+}
+
+# last test 984
+