Further Unicode formats patching from Inaba Hiroto.
Jarkko Hietaniemi [Sun, 2 Mar 2003 07:21:36 +0000 (07:21 +0000)]
p4raw-id: //depot/perl@18794

MANIFEST
pp_ctl.c
t/uni/write.t [new file with mode: 0644]

index 341365f..6d583a2 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -2722,6 +2722,7 @@ t/uni/tr_eucjp.t          See if Unicode tr/// works
 t/uni/tr_sjis.t                        See if Unicode tr/// works
 t/uni/tr_utf8.t                        See if Unicode tr/// works
 t/uni/upper.t                  See if Unicode casing works
+t/uni/write.t                  See if Unicode formats work
 t/win32/longpath.t             Test if Win32::GetLongPathName() works
 t/win32/system.t               See if system works in Win*
 t/win32/system_tests           Test runner for system.t
index e22297e..763da06 100644 (file)
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -357,6 +357,7 @@ PP(pp_formline)
     STRLEN fudge = SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1;
     bool item_is_utf8 = FALSE;
     bool targ_is_utf8 = FALSE;
+    SV * nsv = Nullsv;
 
     if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) {
        if (SvREADONLY(tmpForm)) {
@@ -417,15 +418,10 @@ PP(pp_formline)
        case FF_LITERAL:
            arg = *fpc++;
            if (targ_is_utf8 && !SvUTF8(tmpForm)) {
-               while (arg--) {
-                   if (!NATIVE_IS_INVARIANT(*f)) {
-                       U8 ch = NATIVE_TO_ASCII(*f++);
-                       *t++ = (U8)UTF8_EIGHT_BIT_HI(ch);
-                       *t++ = (U8)UTF8_EIGHT_BIT_LO(ch);
-                   }
-                   else 
-                       *t++ = *f++;
-               }
+               SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget));
+               *t = '\0';
+               sv_catpvn_utf8_upgrade(PL_formtarget, f, arg, nsv);
+               t = SvEND(PL_formtarget);
                break;
            }
            if (!targ_is_utf8 && DO_UTF8(tmpForm)) {
@@ -635,6 +631,21 @@ PP(pp_formline)
                }
                break;
            }
+           if (targ_is_utf8 && !item_is_utf8) {
+               SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget));
+               *t = '\0';
+               sv_catpvn_utf8_upgrade(PL_formtarget, s, arg, nsv);
+               for (; t < SvEND(PL_formtarget); t++) {
+#ifdef EBCDIC
+                   int ch = *t++ = *s++;
+                   if (iscntrl(ch))
+#else
+                   if (!(*t & ~31))
+#endif
+                       *t = ' ';
+               }
+               break;
+           }
            while (arg--) {
 #ifdef EBCDIC
                int ch = *t++ = *s++;
diff --git a/t/uni/write.t b/t/uni/write.t
new file mode 100644 (file)
index 0000000..95c3bbb
--- /dev/null
@@ -0,0 +1,96 @@
+#!./perl -w
+use strict;
+
+BEGIN {
+    chdir 't' if -d 't';
+    @INC = qw(../lib .);
+    require "test.pl";
+}
+
+plan tests => 6;
+
+# Some tests for UTF8 and format/write
+
+our ($bitem1, $uitem1) = ("\x{ff}", "\x{100}");
+our ($bitem2, $uitem2) = ("\x{fe}", "\x{101}");
+our ($blite1, $ulite1) = ("\x{fd}", "\x{102}");
+our ($blite2, $ulite2) = ("\x{fc}", "\x{103}");
+our ($bmulti, $umulti) = ("\x{fb}\n\x{fa}\n\x{f9}\n",
+                         "\x{104}\n\x{105}\n\x{106}\n");
+
+sub fmwrtest {
+  no strict 'refs';
+  my ($out, $format, $expect, $name) = @_;
+  eval "format $out =\n$format.\n"; die $@ if $@;
+  open $out, '>:utf8', 'Uni_write.tmp' or die "Can't create Uni_write.tmp";
+  write $out;
+  close $out or die "Could not close $out: $!";
+
+  open UIN, '<:utf8', 'Uni_write.tmp' or die "Can't open Uni_write.tmp";;
+  my $result = do { local $/; <UIN>; };
+  close UIN;
+
+  is($result, $expect, $name);
+}
+
+fmwrtest OUT1 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 item (1)";
+$blite1 @<<
+\$uitem1
+$blite2 @<<
+\$bitem2
+EOFORMAT
+$blite1 $uitem1
+$blite2 $bitem2
+EOEXPECT
+
+fmwrtest OUT2 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 item (2)";
+$blite1 @<<
+\$bitem1
+$blite2 @<<
+\$uitem2
+EOFORMAT
+$blite1 $bitem1
+$blite2 $uitem2
+EOEXPECT
+
+fmwrtest OUT3 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 item (1)";
+$ulite1 @<<
+\$bitem1
+$blite2 @<<
+\$bitem2
+EOFORMAT
+$ulite1 $bitem1
+$blite2 $bitem2
+EOEXPECT
+
+fmwrtest OUT4 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 item (2)";
+$blite1 @<<
+\$bitem1
+$ulite2 @<<
+\$bitem2
+EOFORMAT
+$blite1 $bitem1
+$ulite2 $bitem2
+EOEXPECT
+
+fmwrtest OUT5 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 multiline";
+$blite1
+@*
+\$umulti
+$blite2
+EOFORMAT
+$blite1
+$umulti$blite2
+EOEXPECT
+
+fmwrtest OUT6 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 multiline";
+$ulite1
+@*
+\$bmulti
+$blite2
+EOFORMAT
+$ulite1
+$bmulti$blite2
+EOEXPECT
+
+unlink 'Uni_write.tmp';