From: Dan Kogai Date: Mon, 21 Oct 2002 17:36:02 +0000 (+0900) Subject: The Inaba patch for tr/// vs. use encoding X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d0063567ae6829f18fa94be9ac4f0b3986e32f5a;p=p5sagit%2Fp5-mst-13.2.git The Inaba patch for tr/// vs. use encoding Message-Id: <218B4434-E4D0-11D6-A668-0003939A104C@dan.co.jp> p4raw-id: //depot/perl@18058 --- diff --git a/mg.c b/mg.c index 8881f10..64f6497 100644 --- a/mg.c +++ b/mg.c @@ -1846,32 +1846,37 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) DEBUG_x(dump_all()); break; case '\005': /* ^E */ - if (*(mg->mg_ptr+1) == '\0') { + if (*(mg->mg_ptr+1) == '\0') { #ifdef MACOS_TRADITIONAL - gMacPerl_OSErr = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv); + gMacPerl_OSErr = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv); #else # ifdef VMS - set_vaxc_errno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv)); + set_vaxc_errno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv)); # else # ifdef WIN32 - SetLastError( SvIV(sv) ); + SetLastError( SvIV(sv) ); # else # ifdef OS2 - os2_setsyserrno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv)); + os2_setsyserrno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv)); # else - /* will anyone ever use this? */ - SETERRNO(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv), 4); + /* will anyone ever use this? */ + SETERRNO(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv), 4); # endif # endif # endif #endif - } - else if (strEQ(mg->mg_ptr+1, "NCODING")) { - if (PL_encoding) - sv_setsv(PL_encoding, sv); - else - PL_encoding = newSVsv(sv); - } + } + else if (strEQ(mg->mg_ptr+1, "NCODING")) { + if (PL_encoding) + SvREFCNT_dec(PL_encoding); + if (SvOK(sv) || SvGMAGICAL(sv)) { + PL_encoding = newSVsv(sv); + } + else { + PL_encoding = Nullsv; + } + } + break; case '\006': /* ^F */ PL_maxsysfd = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv); break; diff --git a/regcomp.c b/regcomp.c index c8b5d76..a4c2d43 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3304,25 +3304,27 @@ tryagain: /* If the encoding pragma is in effect recode the text of * any EXACT-kind nodes. */ if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) { - STRLEN oldlen = STR_LEN(ret); - SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen)); - - if (RExC_utf8) - SvUTF8_on(sv); - if (sv_utf8_downgrade(sv, TRUE)) { - char *s = sv_recode_to_utf8(sv, PL_encoding); - STRLEN newlen = SvCUR(sv); - - if (!SIZE_ONLY) { - DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n", - (int)oldlen, STRING(ret), - (int)newlen, s)); - Copy(s, STRING(ret), newlen, char); - STR_LEN(ret) += newlen - oldlen; - RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen); - } else - RExC_size += STR_SZ(newlen) - STR_SZ(oldlen); - } + STRLEN oldlen = STR_LEN(ret); + SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen)); + + if (RExC_utf8) + SvUTF8_on(sv); + if (sv_utf8_downgrade(sv, TRUE)) { + char *s = sv_recode_to_utf8(sv, PL_encoding); + STRLEN newlen = SvCUR(sv); + + if (SvUTF8(sv)) + RExC_utf8 = 1; + if (!SIZE_ONLY) { + DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n", + (int)oldlen, STRING(ret), + (int)newlen, s)); + Copy(s, STRING(ret), newlen, char); + STR_LEN(ret) += newlen - oldlen; + RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen); + } else + RExC_size += STR_SZ(newlen) - STR_SZ(oldlen); + } } return(ret); diff --git a/sv.c b/sv.c index 35a7bd8..14dbc1e 100644 --- a/sv.c +++ b/sv.c @@ -10815,16 +10815,17 @@ char * Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) { if (SvPOK(sv) && !DO_UTF8(sv) && SvROK(encoding)) { - SV *uni; - STRLEN len; - char *s; - dSP; - ENTER; - SAVETMPS; - PUSHMARK(sp); - EXTEND(SP, 3); - XPUSHs(encoding); - XPUSHs(sv); + int vary = FALSE; + SV *uni; + STRLEN len; + char *s; + dSP; + ENTER; + SAVETMPS; + PUSHMARK(sp); + EXTEND(SP, 3); + XPUSHs(encoding); + XPUSHs(sv); /* NI-S 2002/07/09 Passing sv_yes is wrong - it needs to be or'ed set of constants @@ -10833,23 +10834,32 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) Both will default the value - let them. - XPUSHs(&PL_sv_yes); + XPUSHs(&PL_sv_yes); */ - PUTBACK; - call_method("decode", G_SCALAR); - SPAGAIN; - uni = POPs; - PUTBACK; - s = SvPV(uni, len); - if (s != SvPVX(sv)) { - SvGROW(sv, len + 1); - Move(s, SvPVX(sv), len, char); - SvCUR_set(sv, len); - SvPVX(sv)[len] = 0; - } - FREETMPS; - LEAVE; - SvUTF8_on(sv); + PUTBACK; + call_method("decode", G_SCALAR); + SPAGAIN; + uni = POPs; + PUTBACK; + s = SvPV(uni, len); + { + U8 *t = (U8 *)s, *e = (U8 *)s + len; + while (t < e) { + if ((vary = !UTF8_IS_INVARIANT(*t++))) + break; + } + } + if (s != SvPVX(sv)) { + SvGROW(sv, len + 1); + Move(s, SvPVX(sv), len, char); + SvCUR_set(sv, len); + SvPVX(sv)[len] = 0; + } + FREETMPS; + LEAVE; + if (vary) + SvUTF8_on(sv); + SvUTF8_on(sv); } return SvPVX(sv); } diff --git a/toke.c b/toke.c index b4c886f..d95b0a7 100644 --- a/toke.c +++ b/toke.c @@ -1674,17 +1674,18 @@ S_scan_const(pTHX_ char *start) *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); if (SvCUR(sv) >= SvLEN(sv)) - Perl_croak(aTHX_ "panic: constant overflowed allocated space"); + Perl_croak(aTHX_ "panic: constant overflowed allocated space"); SvPOK_on(sv); if (PL_encoding && !has_utf8) { - sv_recode_to_utf8(sv, PL_encoding); - has_utf8 = TRUE; + sv_recode_to_utf8(sv, PL_encoding); + if (SvUTF8(sv)) + has_utf8 = TRUE; } if (has_utf8) { SvUTF8_on(sv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { - PL_sublex_info.sub_op->op_private |= + PL_sublex_info.sub_op->op_private |= (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); } } @@ -7032,6 +7033,9 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) sv_catpvn(sv, s, 1); if (has_utf8) SvUTF8_on(sv); + else if (PL_encoding) + sv_recode_to_utf8(sv, PL_encoding); + PL_multi_end = CopLINE(PL_curcop); s++;