From: Rafael Garcia-Suarez Date: Wed, 22 Jan 2003 22:35:56 +0000 (+0100) Subject: [perl #18232] [PATCH] store PL_reg_match_utf8 in reganch X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a30b2f1f43446bf5477e7baa5fee5d5c59659ce6;p=p5sagit%2Fp5-mst-13.2.git [perl #18232] [PATCH] store PL_reg_match_utf8 in reganch Message-Id: <20030122223556.57d597a3.rgarciasuarez@free.fr> p4raw-id: //depot/perl@18633 --- diff --git a/mg.c b/mg.c index fae5cda..0edd711 100644 --- a/mg.c +++ b/mg.c @@ -418,7 +418,7 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg) else /* @- */ i = s; - if (i > 0 && PL_reg_match_utf8) { + if (i > 0 && RX_MATCH_UTF8(rx)) { char *b = rx->subbeg; if (b) i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i)); @@ -459,7 +459,7 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) { i = t1 - s1; getlen: - if (i > 0 && PL_reg_match_utf8) { + if (i > 0 && RX_MATCH_UTF8(rx)) { char *s = rx->subbeg + s1; char *send = rx->subbeg + t1; @@ -707,7 +707,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) getrx: if (i >= 0) { sv_setpvn(sv, s, i); - if (PL_reg_match_utf8 && is_utf8_string((U8*)s, i)) + if (RX_MATCH_UTF8(rx) && is_utf8_string((U8*)s, i)) SvUTF8_on(sv); else SvUTF8_off(sv); diff --git a/pp.c b/pp.c index c9d1dc6..67fe7f6 100644 --- a/pp.c +++ b/pp.c @@ -4391,7 +4391,7 @@ PP(pp_split) TAINT_IF((pm->op_pmflags & PMf_LOCALE) && (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE))); - PL_reg_match_utf8 = do_utf8; + RX_MATCH_UTF8_set(rx, do_utf8); if (pm->op_pmreplroot) { #ifdef USE_ITHREADS diff --git a/pp_ctl.c b/pp_ctl.c index 7c0f8ba..a08e2b6 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -158,7 +158,7 @@ PP(pp_substcont) register REGEXP *rx = cx->sb_rx; rxres_restore(&cx->sb_rxres, rx); - PL_reg_match_utf8 = SvUTF8(cx->sb_targ) ? 1 : 0; + RX_MATCH_UTF8_set(rx, SvUTF8(cx->sb_targ)); if (cx->sb_iters++) { I32 saviters = cx->sb_iters; diff --git a/pp_hot.c b/pp_hot.c index 1a40441..8d9625b 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1180,7 +1180,7 @@ PP(pp_match) (PL_tainted && (pm->op_pmflags & PMf_RETAINT))); TAINT_NOT; - PL_reg_match_utf8 = DO_UTF8(TARG); + RX_MATCH_UTF8_set(rx, DO_UTF8(TARG)); /* PMdf_USED is set after a ?? matches once */ if (pm->op_pmdynflags & PMdf_USED) { @@ -1355,7 +1355,7 @@ yup: /* Confirmed by INTUIT */ if (global) { rx->subbeg = truebase; rx->startp[0] = s - truebase; - if (PL_reg_match_utf8) { + if (RX_MATCH_UTF8(rx)) { char *t = (char*)utf8_hop((U8*)s, rx->minlen); rx->endp[0] = t - truebase; } @@ -1907,14 +1907,14 @@ PP(pp_subst) rxtainted |= 2; TAINT_NOT; - PL_reg_match_utf8 = DO_UTF8(TARG); + RX_MATCH_UTF8_set(rx, DO_UTF8(TARG)); force_it: if (!pm || !s) DIE(aTHX_ "panic: pp_subst"); strend = s + len; - slen = PL_reg_match_utf8 ? utf8_length((U8*)s, (U8*)strend) : len; + slen = RX_MATCH_UTF8(rx) ? utf8_length((U8*)s, (U8*)strend) : len; maxiters = 2 * slen + 10; /* We can match twice at each position, once with zero-length, second time with non-zero. */ diff --git a/regexec.c b/regexec.c index 157f426..59111a0 100644 --- a/regexec.c +++ b/regexec.c @@ -400,6 +400,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *i_strpos = strpos; SV *dsv = PERL_DEBUG_PAD_ZERO(0); #endif + RX_MATCH_UTF8_set(prog,do_utf8); if (prog->reganch & ROPT_UTF8) { DEBUG_r(PerlIO_printf(Perl_debug_log, @@ -1615,6 +1616,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * SV *dsv0 = PERL_DEBUG_PAD_ZERO(0); SV *dsv1 = PERL_DEBUG_PAD_ZERO(1); #endif + RX_MATCH_UTF8_set(prog,do_utf8); PL_regcc = 0; diff --git a/regexp.h b/regexp.h index 81552ba..0564054 100644 --- a/regexp.h +++ b/regexp.h @@ -71,6 +71,7 @@ typedef struct regexp { #define ROPT_NAUGHTY 0x20000 /* how exponential is this pattern? */ #define ROPT_COPY_DONE 0x40000 /* subbeg is a copy of the string */ #define ROPT_TAINTED_SEEN 0x80000 +#define ROPT_MATCH_UTF8 0x10000000 /* subbeg is utf-8 */ #define RE_USE_INTUIT_NOML 0x0100000 /* Best to intuit before matching */ #define RE_USE_INTUIT_ML 0x0200000 @@ -99,6 +100,13 @@ typedef struct regexp { ? RX_MATCH_COPIED_on(prog) \ : RX_MATCH_COPIED_off(prog)) +#define RX_MATCH_UTF8(prog) ((prog)->reganch & ROPT_MATCH_UTF8) +#define RX_MATCH_UTF8_on(prog) ((prog)->reganch |= ROPT_MATCH_UTF8) +#define RX_MATCH_UTF8_off(prog) ((prog)->reganch &= ~ROPT_MATCH_UTF8) +#define RX_MATCH_UTF8_set(prog, t) ((t) \ + ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \ + : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0))) + #define REXEC_COPY_STR 0x01 /* Need to copy the string. */ #define REXEC_CHECKED 0x02 /* check_substr already checked. */ #define REXEC_SCREAM 0x04 /* use scream table. */ diff --git a/t/op/pat.t b/t/op/pat.t index 0868546..360892b 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..983\n"; +print "1..986\n"; BEGIN { chdir 't' if -d 't'; @@ -3081,5 +3081,15 @@ ok("bbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); -# last test 983 +{ + # [perl #18232] + "\x{100}" =~ /(.)/; + ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' ); + { 'a' =~ /./; } + ok( $1 eq "\x{100}", '$1 is still utf-8' ); + ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' ); +} + +# last test 984 +