The Inaba patch for tr/// vs. use encoding
Dan Kogai [Mon, 21 Oct 2002 17:36:02 +0000 (02:36 +0900)]
Message-Id: <218B4434-E4D0-11D6-A668-0003939A104C@dan.co.jp>

p4raw-id: //depot/perl@18058

mg.c
regcomp.c
sv.c
toke.c

diff --git a/mg.c b/mg.c
index 8881f10..64f6497 100644 (file)
--- a/mg.c
+++ b/mg.c
@@ -1846,32 +1846,37 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
        DEBUG_x(dump_all());
        break;
     case '\005':  /* ^E */
-        if (*(mg->mg_ptr+1) == '\0') {
+       if (*(mg->mg_ptr+1) == '\0') {
 #ifdef MACOS_TRADITIONAL
-             gMacPerl_OSErr = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
+           gMacPerl_OSErr = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
 #else
 #  ifdef VMS
-             set_vaxc_errno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
+           set_vaxc_errno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
 #  else
 #    ifdef WIN32
-             SetLastError( SvIV(sv) );
+           SetLastError( SvIV(sv) );
 #    else
 #      ifdef OS2
-             os2_setsyserrno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
+           os2_setsyserrno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
 #      else
-             /* will anyone ever use this? */
-             SETERRNO(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv), 4);
+           /* will anyone ever use this? */
+           SETERRNO(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv), 4);
 #      endif
 #    endif
 #  endif
 #endif
-        }
-        else if (strEQ(mg->mg_ptr+1, "NCODING")) {
-            if (PL_encoding)
-                sv_setsv(PL_encoding, sv);
-            else
-                PL_encoding = newSVsv(sv);
-        }
+       }
+       else if (strEQ(mg->mg_ptr+1, "NCODING")) {
+           if (PL_encoding)
+               SvREFCNT_dec(PL_encoding);
+           if (SvOK(sv) || SvGMAGICAL(sv)) {
+               PL_encoding = newSVsv(sv);
+           }
+           else {
+               PL_encoding = Nullsv;
+           }
+       }
+       break;
     case '\006':       /* ^F */
        PL_maxsysfd = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
        break;
index c8b5d76..a4c2d43 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -3304,25 +3304,27 @@ tryagain:
     /* If the encoding pragma is in effect recode the text of
      * any EXACT-kind nodes. */
     if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
-        STRLEN oldlen = STR_LEN(ret);
-        SV *sv        = sv_2mortal(newSVpvn(STRING(ret), oldlen));
-
-        if (RExC_utf8)
-             SvUTF8_on(sv);
-        if (sv_utf8_downgrade(sv, TRUE)) {
-             char *s       = sv_recode_to_utf8(sv, PL_encoding);
-             STRLEN newlen = SvCUR(sv);
-        
-             if (!SIZE_ONLY) {
-                  DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
-                                        (int)oldlen, STRING(ret),
-                                        (int)newlen, s));
-                  Copy(s, STRING(ret), newlen, char);
-                  STR_LEN(ret) += newlen - oldlen;
-                  RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
-             } else
-                  RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
-        }
+       STRLEN oldlen = STR_LEN(ret);
+       SV *sv        = sv_2mortal(newSVpvn(STRING(ret), oldlen));
+
+       if (RExC_utf8)
+           SvUTF8_on(sv);
+       if (sv_utf8_downgrade(sv, TRUE)) {
+           char *s       = sv_recode_to_utf8(sv, PL_encoding);
+           STRLEN newlen = SvCUR(sv);
+
+           if (SvUTF8(sv))
+               RExC_utf8 = 1;
+           if (!SIZE_ONLY) {
+               DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
+                                     (int)oldlen, STRING(ret),
+                                     (int)newlen, s));
+               Copy(s, STRING(ret), newlen, char);
+               STR_LEN(ret) += newlen - oldlen;
+               RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
+           } else
+               RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
+       }
     }
 
     return(ret);
diff --git a/sv.c b/sv.c
index 35a7bd8..14dbc1e 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -10815,16 +10815,17 @@ char *
 Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding)
 {
     if (SvPOK(sv) && !DO_UTF8(sv) && SvROK(encoding)) {
-         SV *uni;
-         STRLEN len;
-         char *s;
-         dSP;
-         ENTER;
-         SAVETMPS;
-         PUSHMARK(sp);
-         EXTEND(SP, 3);
-         XPUSHs(encoding);
-         XPUSHs(sv);
+       int vary = FALSE;
+       SV *uni;
+       STRLEN len;
+       char *s;
+       dSP;
+       ENTER;
+       SAVETMPS;
+       PUSHMARK(sp);
+       EXTEND(SP, 3);
+       XPUSHs(encoding);
+       XPUSHs(sv);
 /* 
   NI-S 2002/07/09
   Passing sv_yes is wrong - it needs to be or'ed set of constants
@@ -10833,23 +10834,32 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding)
 
   Both will default the value - let them.
   
-         XPUSHs(&PL_sv_yes);
+       XPUSHs(&PL_sv_yes);
 */
-         PUTBACK;
-         call_method("decode", G_SCALAR);
-         SPAGAIN;
-         uni = POPs;
-         PUTBACK;
-         s = SvPV(uni, len);
-         if (s != SvPVX(sv)) {
-              SvGROW(sv, len + 1);
-              Move(s, SvPVX(sv), len, char);
-              SvCUR_set(sv, len);
-              SvPVX(sv)[len] = 0;      
-         }
-         FREETMPS;
-         LEAVE;
-         SvUTF8_on(sv);
+       PUTBACK;
+       call_method("decode", G_SCALAR);
+       SPAGAIN;
+       uni = POPs;
+       PUTBACK;
+       s = SvPV(uni, len);
+       {
+           U8 *t = (U8 *)s, *e = (U8 *)s + len;
+           while (t < e) {
+               if ((vary = !UTF8_IS_INVARIANT(*t++)))
+                   break;
+           }
+       }
+       if (s != SvPVX(sv)) {
+           SvGROW(sv, len + 1);
+           Move(s, SvPVX(sv), len, char);
+           SvCUR_set(sv, len);
+           SvPVX(sv)[len] = 0; 
+       }
+       FREETMPS;
+       LEAVE;
+       if (vary)
+           SvUTF8_on(sv);
+       SvUTF8_on(sv);
     }
     return SvPVX(sv);
 }
diff --git a/toke.c b/toke.c
index b4c886f..d95b0a7 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1674,17 +1674,18 @@ S_scan_const(pTHX_ char *start)
     *d = '\0';
     SvCUR_set(sv, d - SvPVX(sv));
     if (SvCUR(sv) >= SvLEN(sv))
-      Perl_croak(aTHX_ "panic: constant overflowed allocated space");
+       Perl_croak(aTHX_ "panic: constant overflowed allocated space");
 
     SvPOK_on(sv);
     if (PL_encoding && !has_utf8) {
-        sv_recode_to_utf8(sv, PL_encoding);
-        has_utf8 = TRUE;
+       sv_recode_to_utf8(sv, PL_encoding);
+       if (SvUTF8(sv))
+           has_utf8 = TRUE;
     }
     if (has_utf8) {
        SvUTF8_on(sv);
        if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
-               PL_sublex_info.sub_op->op_private |=
+           PL_sublex_info.sub_op->op_private |=
                    (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
        }
     }
@@ -7032,6 +7033,9 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
        sv_catpvn(sv, s, 1);
     if (has_utf8)
        SvUTF8_on(sv);
+    else if (PL_encoding)
+       sv_recode_to_utf8(sv, PL_encoding);
+
     PL_multi_end = CopLINE(PL_curcop);
     s++;