Polymorphic regexps.
[p5sagit/p5-mst-13.2.git] / t / pragma / utf8.t
index 6986720..89416dc 100755 (executable)
@@ -10,7 +10,7 @@ BEGIN {
     }
 }
 
-print "1..90\n";
+print "1..104\n";
 
 my $test = 1;
 
@@ -42,6 +42,7 @@ sub nok_bytes {
 
 {
     use utf8;
+
     $_ = ">\x{263A}<"; 
     s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; 
     ok $_, '>&#9786;<';
@@ -106,212 +107,191 @@ sub nok_bytes {
 }
 
 {
-    use utf8;
-
-    $_ = "\x{263A}>\x{263A}\x{263A}"; 
-
-    ok length, 4;
-    $test++;                           # 13
-
-    ok length((m/>(.)/)[0]), 1;
-    $test++;                           # 14
-
-    ok length($&), 2;
-    $test++;                           # 15
+    # no use utf8 needed
+    $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
+    
+    ok length($_), 6;                  # 13
+    $test++;
 
-    ok length($'), 1;
-    $test++;                           # 16
+    ($a) = m/x(.)/;
 
-    ok length($`), 1;
-    $test++;                           # 17
+    ok length($a), 1;                  # 14
+    $test++;
 
-    ok length($1), 1;
-    $test++;                           # 18
+    ok length($`), 2;                  # 15
+    $test++;
+    ok length($&), 2;                  # 16
+    $test++;
+    ok length($'), 2;                  # 17
+    $test++;
 
-    ok length($tmp=$&), 2;
-    $test++;                           # 19
+    ok length($1), 1;                  # 18
+    $test++;
 
-    ok length($tmp=$'), 1;
-    $test++;                           # 20
+    ok length($b=$`), 2;               # 19
+    $test++;
 
-    ok length($tmp=$`), 1;
-    $test++;                           # 21
+    ok length($b=$&), 2;               # 20
+    $test++;
 
-    ok length($tmp=$1), 1;
-    $test++;                           # 22
+    ok length($b=$'), 2;               # 21
+    $test++;
 
-    {
-       use bytes;
+    ok length($b=$1), 1;               # 22
+    $test++;
 
-       my $tmp = $&;
-       ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
-       $test++;                                # 23
+    ok $a, "\x{263A}";                 # 23
+    $test++;
 
-       $tmp = $';
-       ok $tmp, pack("C*", 0342, 0230, 0272);
-       $test++;                                # 24
+    ok $`, "\x{263A}\x{263A}";         # 24
+    $test++;
 
-       $tmp = $`;
-       ok $tmp, pack("C*", 0342, 0230, 0272);
-       $test++;                                # 25
+    ok $&, "x\x{263A}";                        # 25
+    $test++;
 
-       $tmp = $1;
-       ok $tmp, pack("C*", 0342, 0230, 0272);
-       $test++;                                # 26
-    }
+    ok $', "y\x{263A}";                        # 26
+    $test++;
 
-    ok_bytes $&, pack("C*", ord(">"), 0342, 0230, 0272);
-    $test++;                           # 27
+    ok $1, "\x{263A}";                 # 27
+    $test++;
 
-    ok_bytes $', pack("C*", 0342, 0230, 0272);
-    $test++;                           # 28
+    ok_bytes $a, "\342\230\272";       # 28
+    $test++;
 
-    ok_bytes $`, pack("C*", 0342, 0230, 0272);
-    $test++;                           # 29
+    ok_bytes $1, "\342\230\272";       # 29
+    $test++;
 
-    ok_bytes $1, pack("C*", 0342, 0230, 0272);
-    $test++;                           # 30
+    ok_bytes $&, "x\342\230\272";      # 30
+    $test++;
 
     {
-       use bytes;
-       no utf8;
-
-       ok length, 10;
-       $test++;                                # 31
+       use utf8; # required
+       $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A);
+    }
 
-       ok length((m/>(.)/)[0]), 1;
-       $test++;                                # 32
+    ok length($_), 6;                  # 31
+    $test++;
 
-       ok length($&), 2;
-       $test++;                                # 33
+    ($a) = m/x(.)/;
 
-       ok length($'), 5;
-       $test++;                                # 34
+    ok length($a), 1;                  # 32
+    $test++;
 
-       ok length($`), 3;
-       $test++;                                # 35
+    ok length($`), 2;                  # 33
+    $test++;
 
-       ok length($1), 1;
-       $test++;                                # 36
+    ok length($&), 2;                  # 34
+    $test++;
 
-       ok $&, pack("C*", ord(">"), 0342);
-       $test++;                                # 37
+    ok length($'), 2;                  # 35
+    $test++;
 
-       ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
-       $test++;                                # 38
+    ok length($1), 1;                  # 36
+    $test++;
 
-       ok $`, pack("C*", 0342, 0230, 0272);
-       $test++;                                # 39
+    ok length($b=$`), 2;               # 37
+    $test++;
 
-       ok $1, pack("C*", 0342);
-       $test++;                                # 40
-    }
+    ok length($b=$&), 2;               # 38
+    $test++;
 
-    {
-       no utf8;
-       $_="\342\230\272>\342\230\272\342\230\272";
-    }
+    ok length($b=$'), 2;               # 39
+    $test++;
 
-    ok length, 10;
-    $test++;                           # 41
+    ok length($b=$1), 1;               # 40
+    $test++;
 
-    ok length((m/>(.)/)[0]), 1;
-    $test++;                           # 42
+    ok $a, "\x{263A}";                 # 41
+    $test++;
 
-    ok length($&), 2;
-    $test++;                           # 43
+    ok $`, "\x{263A}\x{263A}";         # 42
+    $test++;
 
-    ok length($'), 1;
-    $test++;                           # 44
+    ok $&, "x\x{263A}";                        # 43
+    $test++;
 
-    ok length($`), 1;
-    $test++;                           # 45
+    ok $', "y\x{263A}";                        # 44
+    $test++;
 
-    ok length($1), 1;
-    $test++;                           # 46
+    ok $1, "\x{263A}";                 # 45
+    $test++;
 
-    ok length($tmp=$&), 2;
-    $test++;                           # 47
+    ok_bytes $a, "\342\230\272";       # 46
+    $test++;
 
-    ok length($tmp=$'), 1;
-    $test++;                           # 48
+    ok_bytes $1, "\342\230\272";       # 47
+    $test++;
 
-    ok length($tmp=$`), 1;
-    $test++;                           # 49
+    ok_bytes $&, "x\342\230\272";      # 48
+    $test++;
 
-    ok length($tmp=$1), 1;
-    $test++;                           # 50
+    $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272";
 
-    {
-       use bytes;
+    ok length($_), 14;                 # 49
+    $test++;
 
-        my $tmp = $&;
-       ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
-       $test++;                                # 51
+    ($a) = m/x(.)/;
 
-        $tmp = $';
-       ok $tmp, pack("C*", 0342, 0230, 0272);
-       $test++;                                # 52
+    ok length($a), 1;                  # 50
+    $test++;
 
-        $tmp = $`;
-       ok $tmp, pack("C*", 0342, 0230, 0272);
-       $test++;                                # 53
+    ok length($`), 6;                  # 51
+    $test++;
 
-        $tmp = $1;
-       ok $tmp, pack("C*", 0342, 0230, 0272);
-       $test++;                                # 54
-    }
+    ok length($&), 2;                  # 52
+    $test++;
 
-    {
-       use bytes;
-       no utf8;
+    ok length($'), 6;                  # 53
+    $test++;
 
-       ok length, 10;
-       $test++;                                # 55
+    ok length($1), 1;                  # 54
+    $test++;
 
-       ok length((m/>(.)/)[0]), 1;
-       $test++;                                # 56
+    ok length($b=$`), 6;               # 55
+    $test++;
 
-       ok length($&), 2;
-       $test++;                                # 57
+    ok length($b=$&), 2;               # 56
+    $test++;
 
-       ok length($'), 5;
-       $test++;                                # 58
+    ok length($b=$'), 6;               # 57
+    $test++;
 
-       ok length($`), 3;
-       $test++;                                # 59
+    ok length($b=$1), 1;               # 58
+    $test++;
 
-       ok length($1), 1;
-       $test++;                                # 60
+    ok $a, "\342";                     # 59
+    $test++;
 
-       ok $&, pack("C*", ord(">"), 0342);
-       $test++;                                # 61
+    ok $`, "\342\230\272\342\230\272"; # 60
+    $test++;
 
-       ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
-       $test++;                                # 62
+    ok $&, "x\342";                    # 61
+    $test++;
 
-       ok $`, pack("C*", 0342, 0230, 0272);
-       $test++;                                # 63
+    ok $', "\230\272y\342\230\272";    # 62
+    $test++;
 
-       ok $1, pack("C*", 0342);
-       $test++;                                # 64
-    }
+    ok $1, "\342";                     # 63
+    $test++;
+}
 
+{
+    use utf8;
     ok "\x{ab}" =~ /^\x{ab}$/, 1;
-    $test++;                                   # 65
+    $test++;                           # 64
 }
 
 {
     use utf8;
     ok_bytes chr(0xe2), pack("C*", 0xc3, 0xa2);
-    $test++;                # 66
+    $test++;                # 65
 }
 
 {
     use utf8;
     my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
     ok "@a", "1234 123 2345";
-    $test++;                # 67
+    $test++;                # 66
 }
 
 {
@@ -319,7 +299,7 @@ sub nok_bytes {
     my $x = chr(123);
     my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
     ok "@a", "1234 2345";
-    $test++;                # 68
+    $test++;                # 67
 }
 
 {
@@ -331,10 +311,10 @@ sub nok_bytes {
     { use utf8;  $b = "\xe4"     } # \xXX must not produce UTF-8
 
     print "not " if $a eq $b;
-    print "ok $test\n"; $test++;
+    print "ok $test\n"; $test++;       # 68
 
     { use utf8; print "not " if $a eq $b; }
-    print "ok $test\n"; $test++;
+    print "ok $test\n"; $test++;       # 69
 }
 
 {
@@ -344,7 +324,7 @@ sub nok_bytes {
     for (@x) {
        s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
        my($latin) = /^(.+)(?:\s+\d)/;
-       print $latin eq "stra\337e" ? "ok $test\n" :
+       print $latin eq "stra\337e" ? "ok $test\n" :    # 70, 71
            "#latin[$latin]\nnot ok $test\n";
        $test++;
        $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
@@ -369,7 +349,7 @@ sub nok_bytes {
     }
 
     print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
-    print "ok $test\n";
+    print "ok $test\n";                        # 72
     $test++;
 }
 
@@ -384,27 +364,27 @@ sub nok_bytes {
     print "not "
        unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
     print "ok $test\n";
-    $test++;
+    $test++;                           # 73
 
     my ($a, $b) = split(/\x{100}/, $s);
     print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
     print "ok $test\n";
-    $test++;
+    $test++;                           # 74
 
     my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
     print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
     print "ok $test\n";
-    $test++;
+    $test++;                           # 75
 
     my ($a, $b) = split(/\x40\x{80}/, $s);
     print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
     print "ok $test\n";
-    $test++;
+    $test++;                           # 76
 
     my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
     print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
     print "ok $test\n";
-    $test++;
+    $test++;                           # 77
 }
 
 {
@@ -414,14 +394,14 @@ sub nok_bytes {
 
     my $smiley = "\x{263a}";
 
-    for my $s ("\x{263a}",                     #  1
-              $smiley,                        #  2
+    for my $s ("\x{263a}",                     # 78
+              $smiley,                        # 79
                
-              "" . $smiley,                   #  3
-              "" . "\x{263a}",                #  4
+              "" . $smiley,                   # 80
+              "" . "\x{263a}",                # 81
 
-              $smiley    . "",                #  5
-              "\x{263a}" . "",                #  6
+              $smiley    . "",                # 82
+              "\x{263a}" . "",                # 83
               ) {
        my $length_chars = length($s);
        my $length_bytes;
@@ -437,14 +417,14 @@ sub nok_bytes {
        $test++;
     }
 
-    for my $s ("\x{263a}" . "\x{263a}",        #  7
-              $smiley    . $smiley,           #  8
+    for my $s ("\x{263a}" . "\x{263a}",        # 84
+              $smiley    . $smiley,           # 85
 
-              "\x{263a}\x{263a}",             #  9
-              "$smiley$smiley",               # 10
+              "\x{263a}\x{263a}",             # 86
+              "$smiley$smiley",               # 87
               
-              "\x{263a}" x 2,                 # 11
-              $smiley    x 2,                 # 12
+              "\x{263a}" x 2,                 # 88
+              $smiley    x 2,                 # 89
               ) {
        my $length_chars = length($s);
        my $length_bytes;
@@ -460,3 +440,106 @@ sub nok_bytes {
        $test++;
     }
 }
+
+{
+    use utf8;
+
+    print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+    print "ok $test\n";
+    $test++;                                   # 90
+
+    print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+    print "ok $test\n";
+    $test++;                                   # 91
+
+    print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+    print "ok $test\n";
+    $test++;                                   # 92
+
+    print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+    print "ok $test\n";
+    $test++;                                   # 93
+
+    print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+    print "ok $test\n";
+    $test++;                                   # 94
+
+    print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+    print "ok $test\n";
+    $test++;                                   # 95
+
+    print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+    print "ok $test\n";
+    $test++;                                   # 96
+
+    print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+    print "ok $test\n";
+    $test++;                                   # 97
+}
+
+{
+    # the first half of 20001028.003
+
+    my $X = chr(1448);
+    my ($Y) = $X =~ /(.*)/;
+    print "not " unless length $Y == 1;
+    print "ok $test\n";
+    $test++;                                   # 98
+}
+
+{
+    # 20001108.001
+
+    use utf8;
+    my $X = "Szab\x{f3},Bal\x{e1}zs";
+    my $Y = $X;
+    $Y =~ s/(B)/$1/ for 0..3;
+    print "not " unless $Y eq $X;
+    print "ok $test\n";
+    $test++;                                   # 99
+}
+
+{
+    # 20001114.001     
+
+    use utf8;
+    use charnames ':full';
+    my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
+    print "not " unless ord($text) == 0xc4;
+    print "ok $test\n";
+    $test++;                                    # 100
+}
+
+{
+    # 20001205.014
+
+    use utf8;
+
+    my $a = "ABC\x{263A}";
+
+    my @b = split( //, $a );
+
+    print "not " unless @b == 4;
+    print "ok $test\n";
+    $test++;                                    # 101
+
+    print "not " unless length($b[3]) == 1;
+    print "ok $test\n";
+    $test++;                                    # 102
+
+    $a =~ s/^A/Z/;
+    print "not " unless length($a) == 4;
+    print "ok $test\n";
+    $test++;                                    # 103
+}
+
+{
+    # the second half of 20001028.003
+
+    use utf8;
+    $X =~ s/^/chr(1488)/e;
+    print "not " unless length $X == 1;
+    print "ok $test\n";
+    $test++;                                   # 104
+}
+