From: Jarkko Hietaniemi Date: Sat, 18 Aug 2001 14:24:42 +0000 (+0000) Subject: New try for ID 20010407.006: detach the semantics X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d9f424b23bb434af43f899daf2cb6cfe42fe6e1a;p=p5sagit%2Fp5-mst-13.2.git New try for ID 20010407.006: detach the semantics "was the last match target UTF8" into its own variable. p4raw-id: //depot/perl@11717 --- diff --git a/embedvar.h b/embedvar.h index d0a7ec4..b5c6340 100644 --- a/embedvar.h +++ b/embedvar.h @@ -94,6 +94,7 @@ #define PL_reg_start_tmpl (vTHX->Treg_start_tmpl) #define PL_reg_starttry (vTHX->Treg_starttry) #define PL_reg_sv (vTHX->Treg_sv) +#define PL_reg_sv_utf8 (vTHX->Treg_sv_utf8) #define PL_reg_whilem_seen (vTHX->Treg_whilem_seen) #define PL_regbol (vTHX->Tregbol) #define PL_regcc (vTHX->Tregcc) @@ -812,6 +813,7 @@ #define PL_reg_start_tmpl (aTHXo->interp.Treg_start_tmpl) #define PL_reg_starttry (aTHXo->interp.Treg_starttry) #define PL_reg_sv (aTHXo->interp.Treg_sv) +#define PL_reg_sv_utf8 (aTHXo->interp.Treg_sv_utf8) #define PL_reg_whilem_seen (aTHXo->interp.Treg_whilem_seen) #define PL_regbol (aTHXo->interp.Tregbol) #define PL_regcc (aTHXo->interp.Tregcc) @@ -1519,6 +1521,7 @@ #define PL_reg_start_tmpl (aTHX->Treg_start_tmpl) #define PL_reg_starttry (aTHX->Treg_starttry) #define PL_reg_sv (aTHX->Treg_sv) +#define PL_reg_sv_utf8 (aTHX->Treg_sv_utf8) #define PL_reg_whilem_seen (aTHX->Treg_whilem_seen) #define PL_regbol (aTHX->Tregbol) #define PL_regcc (aTHX->Tregcc) @@ -1657,6 +1660,7 @@ #define PL_Treg_start_tmpl PL_reg_start_tmpl #define PL_Treg_starttry PL_reg_starttry #define PL_Treg_sv PL_reg_sv +#define PL_Treg_sv_utf8 PL_reg_sv_utf8 #define PL_Treg_whilem_seen PL_reg_whilem_seen #define PL_Tregbol PL_regbol #define PL_Tregcc PL_regcc diff --git a/mg.c b/mg.c index ea9650c..07869e0 100644 --- a/mg.c +++ b/mg.c @@ -392,7 +392,7 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg) else /* @- */ i = s; - if (i > 0 && DO_UTF8(PL_reg_sv)) { + if (i > 0 && PL_reg_sv_utf8) { char *b = rx->subbeg; if (b) i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i)); @@ -433,7 +433,7 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) { i = t1 - s1; getlen: - if (i > 0 && DO_UTF8(PL_reg_sv)) { + if (i > 0 && PL_reg_sv_utf8) { char *s = rx->subbeg + s1; char *send = rx->subbeg + t1; @@ -666,7 +666,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) PL_tainted = FALSE; } sv_setpvn(sv, s, i); - if (PL_reg_sv && DO_UTF8(PL_reg_sv) && is_utf8_string((U8*)s, i)) + if (PL_reg_sv_utf8 && is_utf8_string((U8*)s, i)) SvUTF8_on(sv); else SvUTF8_off(sv); diff --git a/perlapi.h b/perlapi.h index 6a5a6c7..8c9bb5c 100644 --- a/perlapi.h +++ b/perlapi.h @@ -772,6 +772,8 @@ START_EXTERN_C #define PL_reg_starttry (*Perl_Treg_starttry_ptr(aTHXo)) #undef PL_reg_sv #define PL_reg_sv (*Perl_Treg_sv_ptr(aTHXo)) +#undef PL_reg_sv_utf8 +#define PL_reg_sv_utf8 (*Perl_Treg_sv_utf8_ptr(aTHXo)) #undef PL_reg_whilem_seen #define PL_reg_whilem_seen (*Perl_Treg_whilem_seen_ptr(aTHXo)) #undef PL_regbol diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 57e3f5c..dc7f320 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -1566,7 +1566,7 @@ Found in file perl.c Shuts down a Perl interpreter. See L. - void perl_destruct(PerlInterpreter* interp) + int perl_destruct(PerlInterpreter* interp) =for hackers Found in file perl.c diff --git a/pp.c b/pp.c index c0148b3..e470d1c 100644 --- a/pp.c +++ b/pp.c @@ -4055,6 +4055,8 @@ PP(pp_split) TAINT_IF((pm->op_pmflags & PMf_LOCALE) && (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE))); + PL_reg_sv_utf8 = do_utf8; + if (pm->op_pmreplroot) { #ifdef USE_ITHREADS ary = GvAVn((GV*)PL_curpad[(PADOFFSET)pm->op_pmreplroot]); diff --git a/pp_hot.c b/pp_hot.c index 0f4a693..d219776 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1222,7 +1222,7 @@ PP(pp_match) TARG = DEFSV; EXTEND(SP,1); } - PL_reg_sv = TARG; + PUTBACK; /* EVAL blocks need stack_sp. */ s = SvPV(TARG, len); strend = s + len; @@ -1232,6 +1232,8 @@ PP(pp_match) (PL_tainted && (pm->op_pmflags & PMf_RETAINT))); TAINT_NOT; + PL_reg_sv_utf8 = DO_UTF8(TARG); + if (pm->op_pmdynflags & PMdf_USED) { failure: if (gimme == G_ARRAY) @@ -1398,7 +1400,7 @@ yup: /* Confirmed by INTUIT */ if (global) { rx->subbeg = truebase; rx->startp[0] = s - truebase; - if (DO_UTF8(PL_reg_sv)) { + if (PL_reg_sv_utf8) { char *t = (char*)utf8_hop((U8*)s, rx->minlen); rx->endp[0] = t - truebase; } @@ -1898,7 +1900,6 @@ PP(pp_subst) STRLEN len; int force_on_match = 0; I32 oldsave = PL_savestack_ix; - bool do_utf8; STRLEN slen; /* known replacement string? */ @@ -1909,8 +1910,7 @@ PP(pp_subst) TARG = DEFSV; EXTEND(SP,1); } - PL_reg_sv = TARG; - do_utf8 = DO_UTF8(PL_reg_sv); + if (SvFAKE(TARG) && SvREADONLY(TARG)) sv_force_normal(TARG); if (SvREADONLY(TARG) @@ -1928,12 +1928,14 @@ PP(pp_subst) rxtainted |= 2; TAINT_NOT; + PL_reg_sv_utf8 = DO_UTF8(TARG); + force_it: if (!pm || !s) DIE(aTHX_ "panic: pp_subst"); strend = s + len; - slen = do_utf8 ? utf8_length((U8*)s, (U8*)strend) : len; + slen = PL_reg_sv_utf8 ? utf8_length((U8*)s, (U8*)strend) : len; maxiters = 2 * slen + 10; /* We can match twice at each position, once with zero-length, second time with non-zero. */ diff --git a/regcomp.c b/regcomp.c index 18aa057..9877658 100644 --- a/regcomp.c +++ b/regcomp.c @@ -4735,6 +4735,7 @@ Perl_save_re_context(pTHX) SAVEVPTR(PL_reg_re); /* from regexec.c */ SAVEPPTR(PL_reg_ganch); /* from regexec.c */ SAVESPTR(PL_reg_sv); /* from regexec.c */ + SAVEI32(PL_reg_sv_utf8); /* from regexec.c */ SAVEVPTR(PL_reg_magic); /* from regexec.c */ SAVEI32(PL_reg_oldpos); /* from regexec.c */ SAVEVPTR(PL_reg_oldcurpm); /* from regexec.c */ diff --git a/regexec.c b/regexec.c index 3f062ed..4a19958 100644 --- a/regexec.c +++ b/regexec.c @@ -107,17 +107,17 @@ */ #define CHR_SVLEN(sv) (UTF ? sv_len_utf8(sv) : SvCUR(sv)) -#define CHR_DIST(a,b) (DO_UTF8(PL_reg_sv) ? utf8_distance(a,b) : a - b) +#define CHR_DIST(a,b) (PL_reg_sv_utf8 ? utf8_distance(a,b) : a - b) #define reghop_c(pos,off) ((char*)reghop((U8*)pos, off)) #define reghopmaybe_c(pos,off) ((char*)reghopmaybe((U8*)pos, off)) -#define HOP(pos,off) (DO_UTF8(PL_reg_sv) ? reghop((U8*)pos, off) : (U8*)(pos + off)) -#define HOPMAYBE(pos,off) (DO_UTF8(PL_reg_sv) ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off)) +#define HOP(pos,off) (PL_reg_sv_utf8 ? reghop((U8*)pos, off) : (U8*)(pos + off)) +#define HOPMAYBE(pos,off) (PL_reg_sv_utf8 ? reghopmaybe((U8*)pos, off) : (U8*)(pos + off)) #define HOPc(pos,off) ((char*)HOP(pos,off)) #define HOPMAYBEc(pos,off) ((char*)HOPMAYBE(pos,off)) #define HOPBACK(pos, off) ( \ - (UTF && DO_UTF8(PL_reg_sv)) \ + (UTF && PL_reg_sv_utf8) \ ? reghopmaybe((U8*)pos, -off) \ : (pos - off >= PL_bostr) \ ? (U8*)(pos - off) \ @@ -127,8 +127,8 @@ #define reghop3_c(pos,off,lim) ((char*)reghop3((U8*)pos, off, (U8*)lim)) #define reghopmaybe3_c(pos,off,lim) ((char*)reghopmaybe3((U8*)pos, off, (U8*)lim)) -#define HOP3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off)) -#define HOPMAYBE3(pos,off,lim) (DO_UTF8(PL_reg_sv) ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off)) +#define HOP3(pos,off,lim) (PL_reg_sv_utf8 ? reghop3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off)) +#define HOPMAYBE3(pos,off,lim) (PL_reg_sv_utf8 ? reghopmaybe3((U8*)pos, off, (U8*)lim) : (U8*)(pos + off)) #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim)) #define HOPMAYBE3c(pos,off,lim) ((char*)HOPMAYBE3(pos,off,lim)) @@ -878,7 +878,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta unsigned int c2; char *e; register I32 tmp = 1; /* Scratch variable? */ - register bool do_utf8 = DO_UTF8(PL_reg_sv); + register bool do_utf8 = PL_reg_sv_utf8; /* We know what class it must start with. */ switch (OP(c)) { @@ -2009,7 +2009,7 @@ S_regmatch(pTHX_ regnode *prog) #if 0 I32 firstcp = PL_savestack_ix; #endif - register bool do_utf8 = DO_UTF8(PL_reg_sv); + register bool do_utf8 = PL_reg_sv_utf8; #ifdef DEBUGGING PL_regindent++; @@ -3590,7 +3590,7 @@ S_regrepeat(pTHX_ regnode *p, I32 max) register I32 c; register char *loceol = PL_regeol; register I32 hardcount = 0; - register bool do_utf8 = DO_UTF8(PL_reg_sv); + register bool do_utf8 = PL_reg_sv_utf8; scan = PL_reginput; if (max != REG_INFTY && max < loceol - scan) @@ -3829,7 +3829,7 @@ S_regrepeat_hard(pTHX_ regnode *p, I32 max, I32 *lp) return 0; start = PL_reginput; - if (DO_UTF8(PL_reg_sv)) { + if (PL_reg_sv_utf8) { while (PL_reginput < loceol && (scan = PL_reginput, res = regmatch(p))) { if (!count++) { l = 0; diff --git a/sv.c b/sv.c index d157f71..e0a242e 100644 --- a/sv.c +++ b/sv.c @@ -10259,6 +10259,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_reg_re = (regexp*)NULL; PL_reg_ganch = Nullch; PL_reg_sv = Nullsv; + PL_reg_sv_utf8 = FALSE; PL_reg_magic = (MAGIC*)NULL; PL_reg_oldpos = 0; PL_reg_oldcurpm = (PMOP*)NULL; diff --git a/t/op/pat.t b/t/op/pat.t index d2d3205..478e299 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..684\n"; +print "1..686\n"; BEGIN { chdir 't' if -d 't'; @@ -1987,3 +1987,22 @@ print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; $c = pos; print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n"; } + +{ + package ID_20010407_006; + + sub x { + "a\x{1234}"; + } + + my $x = x; + my $y; + + $x =~ /(..)/; $y = $1; + print "not " unless length($y) == 2 && $y eq $x; + print "ok 685\n" if length($y) == 2; + + x =~ /(..)/; $y = $1; + print "not " unless length($y) == 2 && $y eq $x; + print "ok 686\n"; +} diff --git a/thrdvar.h b/thrdvar.h index 8e999fc..2dd74a6 100644 --- a/thrdvar.h +++ b/thrdvar.h @@ -256,4 +256,8 @@ PERLVAR(i, struct thread_intern) PERLVAR(trailing_nul, char) /* For the sake of thrsv and oursv */ PERLVAR(thr_done, bool) /* True when the thread has finished */ + #endif /* USE_THREADS */ + +PERLVAR(Treg_sv_utf8, bool) /* was what we matched against utf8 */ +