From: Karl Williamson Date: Fri, 20 Nov 2009 18:02:01 +0000 (-0700) Subject: Make unicode semantics the default X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=61fc5122f0d8a509834282b8ecb3252d2e4c9f5d;p=p5sagit%2Fp5-mst-13.2.git Make unicode semantics the default --- diff --git a/lib/legacy.pm b/lib/legacy.pm index 66ddc00..67f287f 100755 --- a/lib/legacy.pm +++ b/lib/legacy.pm @@ -2,7 +2,7 @@ package legacy; our $VERSION = '1.00'; -$unicode8bit::hint_uni8bit = 0x00000800; +$unicode8bit::hint_not_uni8bit = 0x00000800; my %legacy_bundle = ( "5.10" => [qw(unicode8bit)], @@ -156,7 +156,7 @@ sub import { if (!exists $legacy{$name}) { unknown_legacy($name); } - $^H &= ~$unicode8bit::hint_uni8bit; # The only valid thing as of yet + $^H |= $unicode8bit::hint_not_uni8bit; # The only valid thing as of yet } } @@ -179,7 +179,7 @@ sub unimport { unknown_legacy($name); } else { - $^H |= $unicode8bit::hint_uni8bit; # The only valid thing as of yet + $^H &= ~ $unicode8bit::hint_not_uni8bit; # The only valid thing now } } } diff --git a/lib/legacy.t b/lib/legacy.t index 1d332b7..1f0cce9 100644 --- a/lib/legacy.t +++ b/lib/legacy.t @@ -7,10 +7,9 @@ BEGIN { require './test.pl'; } -#use Test::More; +plan(13312); # Determined by experimentation -#plan("no_plan"); -plan(13312); +# Test the upper/lower/title case mappings for all characters 0-255. # First compute the case mappings without resorting to the functions we're # testing. @@ -28,7 +27,7 @@ my @posix_to_lower = @posix_to_upper; # Override the elements in the to_lower arrays that have different lower case -# mappings with those mappings. +# mappings for my $i (0x41 .. 0x5A) { $posix_to_lower[$i] = chr(ord($posix_to_lower[$i]) + 32); $latin1_to_lower[$i] = chr(ord($latin1_to_lower[$i]) + 32); @@ -82,12 +81,12 @@ $empty{'lc'} = $empty{'uc'} = ""; for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) { for my $suffix (\%empty, \%posix, \%cyrillic, \%latin1) { for my $i (0 .. 255) { # For each possible posix or latin1 character - my $cp = sprintf "%02X", $i; + my $cp = sprintf "U+%04X", $i; # First try using latin1 (Unicode) semantics. no legacy "unicode8bit"; - my $phrase = 'with unicode'; + my $phrase = 'with uni8bit'; my $char = chr($i); my $pre_lc = $prefix->{'lc'}; my $pre_uc = $prefix->{'uc'}; @@ -99,25 +98,22 @@ for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) { my $expected_lower = $pre_lc . $latin1_to_lower[$i] . $post_lc; is (uc($to_upper), $expected_upper, - - # The names are commented out for now to avoid 'wide character - # in print' messages. - ); #"$cp: $phrase: uc('$to_upper') eq '$expected_upper'"); + display("$cp: $phrase: uc($to_upper) eq $expected_upper")); is (lc($to_lower), $expected_lower, - ); #"$cp: $phrase: lc('$to_lower') eq '$expected_lower'"); + display("$cp: $phrase: lc($to_lower) eq $expected_lower")); if ($pre_uc eq "") { # Title case if null prefix. my $expected_title = $latin1_to_title[$i] . $post_lc; is (ucfirst($to_upper), $expected_title, - ); #"$cp: $phrase: ucfirst('$to_upper') eq '$expected_title'"); + display("$cp: $phrase: ucfirst($to_upper) eq $expected_title")); my $expected_lcfirst = $latin1_to_lower[$i] . $post_uc; is (lcfirst($to_lower), $expected_lcfirst, - ); #"$cp: $phrase: lcfirst('$to_lower') eq '$expected_lcfirst'"); + display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst")); } # Then try with posix semantics. use legacy "unicode8bit"; - $phrase = 'no unicode'; + $phrase = 'no uni8bit'; # These don't contribute anything in this case. next if $suffix == \%cyrillic; @@ -129,17 +125,17 @@ for my $prefix (\%empty, \%posix, \%cyrillic, \%latin1) { $expected_lower = $pre_lc . $posix_to_lower[$i] . $post_lc; is (uc($to_upper), $expected_upper, - ); #"$cp: $phrase: uc('$to_upper') eq '$expected_upper'"); + display("$cp: $phrase: uc($to_upper) eq $expected_upper")); is (lc($to_lower), $expected_lower, - ); #"$cp: $phrase: lc('$to_lower') eq '$expected_lower'"); + display("$cp: $phrase: lc($to_lower) eq $expected_lower")); if ($pre_uc eq "") { my $expected_title = $posix_to_title[$i] . $post_lc; is (ucfirst($to_upper), $expected_title, - ); #"$cp: $phrase: ucfirst('$to_upper') eq '$expected_title'"); + display("$cp: $phrase: ucfirst($to_upper) eq $expected_title")); my $expected_lcfirst = $posix_to_lower[$i] . $post_uc; is (lcfirst($to_lower), $expected_lcfirst, - ); #"$cp: $phrase: lcfirst('$to_lower') eq '$expected_lcfirst'"); + display("$cp: $phrase: lcfirst($to_lower) eq $expected_lcfirst")); } } } diff --git a/perl.h b/perl.h index 38c9664..bf49279 100644 --- a/perl.h +++ b/perl.h @@ -4755,7 +4755,7 @@ enum { /* pass one of these to get_vtbl */ #define HINT_BLOCK_SCOPE 0x00000100 #define HINT_STRICT_SUBS 0x00000200 /* strict pragma */ #define HINT_STRICT_VARS 0x00000400 /* strict pragma */ -#define HINT_UNI_8_BIT 0x00000800 /* unicode8bit pragma */ +#define HINT_NOT_UNI_8_BIT 0x00000800 /* unicode8bit pragma */ /* The HINT_NEW_* constants are used by the overload pragma */ #define HINT_NEW_INTEGER 0x00001000 diff --git a/t/uni/overload.t b/t/uni/overload.t index e20a3ab..da9b07b 100644 --- a/t/uni/overload.t +++ b/t/uni/overload.t @@ -33,6 +33,10 @@ sub stringify { package main; +# These tests are based on characters 128-255 not having latin1, and hence +# Unicode, semantics +use legacy 'unicode8bit'; + # Bug 34297 foreach my $t ("ASCII", "B\366se") { my $length = length $t; diff --git a/utf8.h b/utf8.h index 7c205d1..19f2174 100644 --- a/utf8.h +++ b/utf8.h @@ -207,7 +207,8 @@ encoded character. #define IN_BYTES (CopHINTS_get(PL_curcop) & HINT_BYTES) #define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTES) -#define IN_UNI_8_BIT (CopHINTS_get(PL_curcop) & HINT_UNI_8_BIT && ! IN_LOCALE_RUNTIME && ! IN_BYTES) +#define IN_UNI_8_BIT ( (! (CopHINTS_get(PL_curcop) & HINT_NOT_UNI_8_BIT)) \ + && ! IN_LOCALE_RUNTIME && ! IN_BYTES) #define UTF8_ALLOW_EMPTY 0x0001 #define UTF8_ALLOW_CONTINUATION 0x0002