From: Karl Williamson Date: Fri, 16 Apr 2010 04:12:32 +0000 (-0600) Subject: PATCH [perl #72624] charnames::viacode(0) returns undef X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e10d7780a27dcfeb9c50ab28b66f2df8763d8016;p=p5sagit%2Fp5-mst-13.2.git PATCH [perl #72624] charnames::viacode(0) returns undef The viacode() function contained the code from the _getcode() function from Unicode::UCD, unchanged. However, the rest of viacode() requires that the result be specially formatted to do a string match with leading zeros inserted to bring the length up to 4 if less than that. The original function only needs to get the number right, as a numerical comparison is done, so it doesn't do this. This showed up with calling viacode with 0, but the bug also affected any input that looked like a hex number, or a U+ number, such as 'BEE' or 'U+EF'. These need to be massaged into '0BEE' and '00EF' for the pattern match later in the routine to succeed. The patch also adds a test case to Unicode::UCD to verify that it really does work ok on 0. --- diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index c6ee8e0..dfe1254 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -242,7 +242,7 @@ you will need also the L, and L functions. =cut -# NB: This function is duplicated in charnames.pm +# NB: This function is nearly duplicated in charnames.pm sub _getcode { my $arg = shift; diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index a2f972e..732e0ef 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -24,6 +24,26 @@ use Unicode::UCD 'charinfo'; my $charinfo; +$charinfo = charinfo(0); # Null is often problematic, so test it. + +is($charinfo->{code}, '0000', ''); +is($charinfo->{name}, ''); +is($charinfo->{category}, 'Cc'); +is($charinfo->{combining}, '0'); +is($charinfo->{bidi}, 'BN'); +is($charinfo->{decomposition}, ''); +is($charinfo->{decimal}, ''); +is($charinfo->{digit}, ''); +is($charinfo->{numeric}, ''); +is($charinfo->{mirrored}, 'N'); +is($charinfo->{unicode10}, 'NULL'); +is($charinfo->{comment}, ''); +is($charinfo->{upper}, ''); +is($charinfo->{lower}, ''); +is($charinfo->{title}, ''); +is($charinfo->{block}, 'Basic Latin'); +is($charinfo->{script}, 'Common'); + $charinfo = charinfo(0x41); is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A'); diff --git a/lib/charnames.pm b/lib/charnames.pm index ce0938b..8c148fd 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -256,13 +256,16 @@ sub viacode my $arg = shift; - # this comes actually from Unicode::UCD, where it is the named - # function _getcode (), but it avoids the overhead of loading it + # this is derived from Unicode::UCD, where it is nearly the same as the + # function _getcode(), but it makes sure that even a hex argument has the + # proper number of leading zeros, which is critical in matching against $txt + # below my $hex; if ($arg =~ /^[1-9]\d*$/) { $hex = sprintf "%04X", $arg; } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) { - $hex = $1; + # Below is the line that differs from the _getcode() source + $hex = sprintf "%04X", hex $arg; } else { carp("unexpected arg \"$arg\" to charnames::viacode()"); return; diff --git a/lib/charnames.t b/lib/charnames.t index 50c23f3..144c826 100644 --- a/lib/charnames.t +++ b/lib/charnames.t @@ -15,7 +15,7 @@ require File::Spec; $| = 1; -print "1..80\n"; +print "1..81\n"; use charnames ':full'; @@ -254,6 +254,9 @@ print "ok 45\n"; print "not " if grep { /you asked for U+110000/ } @WARN; print "ok 46\n"; +print "not " unless "NULL" eq charnames::viacode(0); +print "ok 47\n"; + # ---- Alias extensions @@ -265,7 +268,7 @@ my @prgs; @prgs = split "\n########\n", ; } -my $i = 46; +my $i = 47; for (@prgs) { my ($code, $exp) = ((split m/\nEXPECT\n/), '$'); my ($prog, $fil) = ((split m/\nFILE\n/, $code), ""); @@ -311,7 +314,7 @@ for (@prgs) { $_ = 'foobar'; eval "use charnames ':full';"; print "not " unless $_ eq 'foobar'; -print "ok 74\n"; +print "ok 75\n"; # Unicode slowdown noted by Phil Pennock, traced to a bug fix in index # SADAHIRO Tomoyuki's suggestion is to ensure that the UTF-8ness of both @@ -320,27 +323,27 @@ print "ok 74\n"; # (or at least should be). So assert that that it's true here. my $names = do "unicore/Name.pl"; -print defined $names ? "ok 75\n" : "not ok 75\n"; +print defined $names ? "ok 76\n" : "not ok 76\n"; if (ord('A') == 65) { # as on ASCII or UTF-8 machines my $non_ascii = $names =~ tr/\0-\177//c; - print $non_ascii ? "not ok 76 # $non_ascii\n" : "ok 76\n"; + print $non_ascii ? "not ok 77 # $non_ascii\n" : "ok 77\n"; } else { - print "ok 76\n"; + print "ok 77\n"; } # Verify that charnames propagate to eval("") my $evaltry = eval q[ "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}" ]; if ($@) { - print "# $@not ok 77\nnot ok 78\n"; + print "# $@not ok 78\nnot ok 79\n"; } else { - print "ok 77\n"; - print "not " unless $evaltry eq "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}"; print "ok 78\n"; + print "not " unless $evaltry eq "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}"; + print "ok 79\n"; } # Verify that db includes the normative NameAliases.txt names print "not " unless "\N{U+1D0C5}" eq "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}"; -print "ok 79\n"; +print "ok 80\n"; # [perl #73174] use of \N{FOO} used to reset %^H @@ -356,7 +359,7 @@ print "ok 79\n"; $res .= '-2' if ":" =~ /\N{COLON}/; $res .= '-3' if ":" =~ /\N{COLON}/i; print $res eq "foo-foo-1--2-3" ? "" : "not ", - "ok 80 - \$^H{foo} correct after /\\N{bar}/i (res=$res)\n"; + "ok 81 - \$^H{foo} correct after /\\N{bar}/i (res=$res)\n"; } __END__