From: Karl Williamson <khw@khw-desktop.(none)>
Date: Fri, 16 Apr 2010 04:12:32 +0000 (-0600)
Subject: PATCH [perl #72624] charnames::viacode(0) returns undef
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e10d7780a27dcfeb9c50ab28b66f2df8763d8016;p=p5sagit%2Fp5-mst-13.2.git

PATCH [perl #72624] charnames::viacode(0) returns undef

The viacode() function contained the code from the _getcode() function from
Unicode::UCD, unchanged.  However, the rest of viacode() requires that
the result be specially formatted to do a string match with leading
zeros inserted to bring the length up to 4 if less than that.  The
original function only needs to get the number right, as a numerical
comparison is done, so it doesn't do this.  This showed up with calling
viacode with 0, but the bug also affected any input that looked like a
hex number, or a U+ number, such as 'BEE' or 'U+EF'.  These need to be
massaged into '0BEE' and '00EF' for the pattern match later in the
routine to succeed.

The patch also adds a test case to Unicode::UCD to verify that it really
does work ok on 0.
---

diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index c6ee8e0..dfe1254 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -242,7 +242,7 @@ you will need also the L</compexcl()>, and L</casespec()> functions.
 
 =cut
 
-# NB: This function is duplicated in charnames.pm
+# NB: This function is nearly duplicated in charnames.pm
 sub _getcode {
     my $arg = shift;
 
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index a2f972e..732e0ef 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -24,6 +24,26 @@ use Unicode::UCD 'charinfo';
 
 my $charinfo;
 
+$charinfo = charinfo(0);    # Null is often problematic, so test it.
+
+is($charinfo->{code},           '0000', '<control>');
+is($charinfo->{name},           '<control>');
+is($charinfo->{category},       'Cc');
+is($charinfo->{combining},      '0');
+is($charinfo->{bidi},           'BN');
+is($charinfo->{decomposition},  '');
+is($charinfo->{decimal},        '');
+is($charinfo->{digit},          '');
+is($charinfo->{numeric},        '');
+is($charinfo->{mirrored},       'N');
+is($charinfo->{unicode10},      'NULL');
+is($charinfo->{comment},        '');
+is($charinfo->{upper},          '');
+is($charinfo->{lower},          '');
+is($charinfo->{title},          '');
+is($charinfo->{block},          'Basic Latin');
+is($charinfo->{script},         'Common');
+
 $charinfo = charinfo(0x41);
 
 is($charinfo->{code},           '0041', 'LATIN CAPITAL LETTER A');
diff --git a/lib/charnames.pm b/lib/charnames.pm
index ce0938b..8c148fd 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -256,13 +256,16 @@ sub viacode
 
   my $arg = shift;
 
-  # this comes actually from Unicode::UCD, where it is the named
-  # function _getcode (), but it avoids the overhead of loading it
+  # this is derived from Unicode::UCD, where it is nearly the same as the
+  # function _getcode(), but it makes sure that even a hex argument has the
+  # proper number of leading zeros, which is critical in matching against $txt
+  # below
   my $hex;
   if ($arg =~ /^[1-9]\d*$/) {
     $hex = sprintf "%04X", $arg;
   } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) {
-    $hex = $1;
+    # Below is the line that differs from the _getcode() source
+    $hex = sprintf "%04X", hex $arg;
   } else {
     carp("unexpected arg \"$arg\" to charnames::viacode()");
     return;
diff --git a/lib/charnames.t b/lib/charnames.t
index 50c23f3..144c826 100644
--- a/lib/charnames.t
+++ b/lib/charnames.t
@@ -15,7 +15,7 @@ require File::Spec;
 
 $| = 1;
 
-print "1..80\n";
+print "1..81\n";
 
 use charnames ':full';
 
@@ -254,6 +254,9 @@ print "ok 45\n";
 print "not " if grep { /you asked for U+110000/ } @WARN;
 print "ok 46\n";
 
+print "not " unless "NULL" eq charnames::viacode(0);
+print "ok 47\n";
+
 
 # ---- Alias extensions
 
@@ -265,7 +268,7 @@ my @prgs;
     @prgs = split "\n########\n", <DATA>;
     }
 
-my $i = 46;
+my $i = 47;
 for (@prgs) {
     my ($code, $exp) = ((split m/\nEXPECT\n/), '$');
     my ($prog, $fil) = ((split m/\nFILE\n/, $code), "");
@@ -311,7 +314,7 @@ for (@prgs) {
 $_ = 'foobar';
 eval "use charnames ':full';";
 print "not " unless $_ eq 'foobar';
-print "ok 74\n";
+print "ok 75\n";
 
 # Unicode slowdown noted by Phil Pennock, traced to a bug fix in index
 # SADAHIRO Tomoyuki's suggestion is to ensure that the UTF-8ness of both
@@ -320,27 +323,27 @@ print "ok 74\n";
 # (or at least should be). So assert that that it's true here.
 
 my $names = do "unicore/Name.pl";
-print defined $names ? "ok 75\n" : "not ok 75\n";
+print defined $names ? "ok 76\n" : "not ok 76\n";
 if (ord('A') == 65) { # as on ASCII or UTF-8 machines
   my $non_ascii = $names =~ tr/\0-\177//c;
-  print $non_ascii ? "not ok 76 # $non_ascii\n" : "ok 76\n";
+  print $non_ascii ? "not ok 77 # $non_ascii\n" : "ok 77\n";
 } else {
-  print "ok 76\n";
+  print "ok 77\n";
 }
 
 # Verify that charnames propagate to eval("")
 my $evaltry = eval q[ "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}" ];
 if ($@) {
-    print "# $@not ok 77\nnot ok 78\n";
+    print "# $@not ok 78\nnot ok 79\n";
 } else {
-    print "ok 77\n";
-    print "not " unless $evaltry eq "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}";
     print "ok 78\n";
+    print "not " unless $evaltry eq "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}";
+    print "ok 79\n";
 }
 
 # Verify that db includes the normative NameAliases.txt names
 print "not " unless "\N{U+1D0C5}" eq "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}";
-print "ok 79\n";
+print "ok 80\n";
 
 # [perl #73174] use of \N{FOO} used to reset %^H
 
@@ -356,7 +359,7 @@ print "ok 79\n";
     $res .= '-2' if ":" =~ /\N{COLON}/;
     $res .= '-3' if ":" =~ /\N{COLON}/i;
     print $res eq "foo-foo-1--2-3" ? "" : "not ",
-	"ok 80 - \$^H{foo} correct after /\\N{bar}/i (res=$res)\n";
+	"ok 81 - \$^H{foo} correct after /\\N{bar}/i (res=$res)\n";
 }
 
 __END__