SYN SYN

[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t

index d1546fe..c3538c0 100755 (executable)
--- a/t/pragma/utf8.t
+++ b/t/pragma/utf8.t
@@ -2,7 +2,7 @@
 
 BEGIN {
     chdir 't' if -d 't';
-    unshift @INC, '../lib';
+    @INC = '../lib';
     $ENV{PERL5LIB} = '../lib';
     if ( ord("\t") != 9 ) { # skip on ebcdic platforms
         print "1..0 # Skip utf8 tests on ebcdic platform.\n";
@@ -10,7 +10,7 @@ BEGIN {
     }
 }
 
-print "1..66\n";
+print "1..99\n";
 
 my $test = 1;
 
@@ -20,6 +20,12 @@ sub ok {
     print "ok $test\n";
 }
 
+sub nok {
+    my ($got,$expect) = @_;
+    print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
+    print "ok $test\n";
+}
+
 sub ok_bytes {
     use bytes;
     my ($got,$expect) = @_;
@@ -27,6 +33,12 @@ sub ok_bytes {
     print "ok $test\n";
 }
 
+sub nok_bytes {
+    use bytes;
+    my ($got,$expect) = @_;
+    print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
+    print "ok $test\n";
+}
 
 {
     use utf8;
@@ -295,3 +307,221 @@ sub ok_bytes {
     ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
     $test++;                # 66
 }
+
+{
+    use utf8;
+    my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
+    ok "@a", "1234 123 2345";
+    $test++;                # 67
+}
+
+{
+    use utf8;
+    my $x = chr(123);
+    my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
+    ok "@a", "1234 2345";
+    $test++;                # 68
+}
+
+{
+    # bug id 20001009.001
+
+    my($a,$b);
+    { use bytes; $a = "\xc3\xa4"; }  
+    { use utf8;  $b = "\xe4"; }
+    { use bytes; ok_bytes $a, $b; $test++; } # 69
+    { use utf8;  nok      $a, $b; $test++; } # 70
+}
+
+{
+    # bug id 20001008.001
+
+    my @x = ("stra\337e 138","stra\337e 138");
+    for (@x) {
+       s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
+       my($latin) = /^(.+)(?:\s+\d)/;
+       print $latin eq "stra\337e" ? "ok $test\n" :
+           "#latin[$latin]\nnot ok $test\n";
+       $test++;
+       $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
+       use utf8;
+       $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
+    }
+}
+
+{
+    # bug id 20000819.004 
+
+    $_ = $dx = "\x{10f2}";
+    s/($dx)/$dx$1/;
+    {
+       use bytes;
+       print "not " unless $_ eq "$dx$dx";
+       print "ok $test\n";
+       $test++;
+    }
+
+    $_ = $dx = "\x{10f2}";
+    s/($dx)/$1$dx/;
+    {
+       use bytes;
+       print "not " unless $_ eq "$dx$dx";
+       print "ok $test\n";
+       $test++;
+    }
+
+    $dx = "\x{10f2}";
+    $_  = "\x{10f2}\x{10f2}";
+    s/($dx)($dx)/$1$2/;
+    {
+       use bytes;
+       print "not " unless $_ eq "$dx$dx";
+       print "ok $test\n";
+       $test++;
+    }
+}
+
+{
+    # bug id 20000323.056
+
+    use utf8;
+
+    print "not " unless "\x{41}" eq +v65;
+    print "ok $test\n";
+    $test++;
+
+    print "not " unless "\x41" eq +v65;
+    print "ok $test\n";
+    $test++;
+
+    print "not " unless "\x{c8}" eq +v200;
+    print "ok $test\n";
+    $test++;
+
+    print "not " unless "\xc8" eq +v200;
+    print "ok $test\n";
+    $test++;
+
+    print "not " unless "\x{221b}" eq v8731;
+    print "ok $test\n";
+    $test++;
+}
+
+{
+    # bug id 20000427.003 
+
+    use utf8;
+    use warnings;
+    use strict;
+
+    my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
+
+    my @charlist = split //, $sushi;
+    my $r = '';
+    foreach my $ch (@charlist) {
+       $r = $r . " " . sprintf "U+%04X", ord($ch);
+    }
+
+    print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
+    print "ok $test\n";
+    $test++;
+}
+
+{
+    # bug id 20000901.092
+    # test that undef left and right of utf8 results in a valid string
+
+    my $a;
+    $a .= "\x{1ff}";
+    print "not " unless $a eq "\x{1ff}";
+    print "ok $test\n";
+    $test++;
+}
+
+{
+    # bug id 20000426.003
+
+    use utf8;
+
+    my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
+
+    my ($a, $b, $c) = split(/\x40/, $s);
+    print "not "
+       unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
+    print "ok $test\n";
+    $test++;
+
+    my ($a, $b) = split(/\x{100}/, $s);
+    print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
+    print "ok $test\n";
+    $test++;
+
+    my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
+    print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
+    print "ok $test\n";
+    $test++;
+
+    my ($a, $b) = split(/\x40\x{80}/, $s);
+    print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
+    print "ok $test\n";
+    $test++;
+
+    my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
+    print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
+    print "ok $test\n";
+    $test++;
+}
+
+{
+    # bug id 20000730.004
+
+    use utf8;
+
+    my $smiley = "\x{263a}";
+
+    for my $s ("\x{263a}",                     #  1
+              $smiley,                        #  2
+               
+              "" . $smiley,                   #  3
+              "" . "\x{263a}",                #  4
+
+              $smiley    . "",                #  5
+              "\x{263a}" . "",                #  6
+              ) {
+       my $length_chars = length($s);
+       my $length_bytes;
+       { use bytes; $length_bytes = length($s) }
+       my @regex_chars = $s =~ m/(.)/g;
+       my $regex_chars = @regex_chars;
+       my @split_chars = split //, $s;
+       my $split_chars = @split_chars;
+       print "not "
+           unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
+                  "1/1/1/3";
+       print "ok $test\n";
+       $test++;
+    }
+
+    for my $s ("\x{263a}" . "\x{263a}",        #  7
+              $smiley    . $smiley,           #  8
+
+              "\x{263a}\x{263a}",             #  9
+              "$smiley$smiley",               # 10
+              
+              "\x{263a}" x 2,                 # 11
+              $smiley    x 2,                 # 12
+              ) {
+       my $length_chars = length($s);
+       my $length_bytes;
+       { use bytes; $length_bytes = length($s) }
+       my @regex_chars = $s =~ m/(.)/g;
+       my $regex_chars = @regex_chars;
+       my @split_chars = split //, $s;
+       my $split_chars = @split_chars;
+       print "not "
+           unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
+                  "2/2/2/6";
+       print "ok $test\n";
+       $test++;
+    }
+}