From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Wed, 14 Nov 2001 14:59:32 +0000 (+0000)
Subject: The First, Last ranges in the Unicode data weren't
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e904f99525ffc0cd5f09346758a1931019c2f0b0;p=p5sagit%2Fp5-mst-13.2.git

The First, Last ranges in the Unicode data weren't
getting their general categories added properly;
noticed by Jeffrey Friedl.

p4raw-id: //depot/perl@12994
---

diff --git a/lib/unicore/Category.pl b/lib/unicore/Category.pl
index 6f0979d..e8f676c 100644
--- a/lib/unicore/Category.pl
+++ b/lib/unicore/Category.pl
@@ -1381,12 +1381,17 @@ return <<'END';
 3300	3376	So
 337B	33DD	So
 33E0	33FE	So
+3400	4DB5	Lo
+4E00	9FA5	Lo
 A000	A48C	Lo
 A490	A4A1	So
 A4A4	A4B3	So
 A4B5	A4C0	So
 A4C2	A4C4	So
 A4C6		So
+AC00	D7A3	Lo
+D800	DFFF	Cs
+E000	F8FF	Co
 F900	FA2D	Lo
 FB00	FB06	Ll
 FB13	FB17	Ll
@@ -1587,7 +1592,10 @@ FFFC	FFFD	So
 1D7C3		Sm
 1D7C4	1D7C9	Ll
 1D7CE	1D7FF	Nd
+20000	2A6D6	Lo
 2F800	2FA1D	Lo
 E0001		Cf
 E0020	E007F	Cf
+F0000	FFFFD	Co
+100000	10FFFD	Co
 END
diff --git a/lib/unicore/In/0.pl b/lib/unicore/In/0.pl
index 6b95de3..db52684f 100644
--- a/lib/unicore/In/0.pl
+++ b/lib/unicore/In/0.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-3400	4DB5	CJK Ideograph Extension A
+3400	4DB5	Lo
 END
diff --git a/lib/unicore/In/1.pl b/lib/unicore/In/1.pl
index 3ef3166..e1894b8 100644
--- a/lib/unicore/In/1.pl
+++ b/lib/unicore/In/1.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-4E00	9FA5	CJK Ideograph
+4E00	9FA5	Lo
 END
diff --git a/lib/unicore/In/164.pl b/lib/unicore/In/164.pl
index afa40c3..6a1e2c3 100644
--- a/lib/unicore/In/164.pl
+++ b/lib/unicore/In/164.pl
@@ -309,13 +309,10 @@ return <<'END';
 3105	312C	
 3131	318E	
 31A0	31B7	
-3400		
-4DB5		
-4E00		
-9FA5		
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
-AC00		
-D7A3		
+AC00	D7A3	
 F900	FA2D	
 FB00	FB06	
 FB13	FB17	
@@ -378,7 +375,6 @@ FFDA	FFDC
 1D78A	1D7A8	
 1D7AA	1D7C2	
 1D7C4	1D7C9	
-20000		
-2A6D6		
+20000	2A6D6	
 2F800	2FA1D	
 END
diff --git a/lib/unicore/In/169.pl b/lib/unicore/In/169.pl
index 570636e..b41f21d 100644
--- a/lib/unicore/In/169.pl
+++ b/lib/unicore/In/169.pl
@@ -935,13 +935,10 @@ return <<'END';
 3105	312C	
 3131	318E	
 31A0	31B7	
-3400		
-4DB5		
-4E00		
-9FA5		
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
-AC00		
-D7A3		
+AC00	D7A3	
 F900	FA2D	
 FB00	FB06	
 FB13	FB17	
@@ -1034,7 +1031,6 @@ FFDA	FFDC
 1D790	1D7A8	
 1D7AA	1D7C2	
 1D7C4	1D7C9	
-20000		
-2A6D6		
+20000	2A6D6	
 2F800	2FA1D	
 END
diff --git a/lib/unicore/In/170.pl b/lib/unicore/In/170.pl
index a97c18f..30cbfe9 100644
--- a/lib/unicore/In/170.pl
+++ b/lib/unicore/In/170.pl
@@ -1099,13 +1099,10 @@ return <<'END';
 3105	312C	
 3131	318E	
 31A0	31B7	
-3400		
-4DB5		
-4E00		
-9FA5		
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
-AC00		
-D7A3		
+AC00	D7A3	
 F900	FA2D	
 FB00	FB06	
 FB13	FB17	
@@ -1212,7 +1209,6 @@ FFDA	FFDC
 1D7AA	1D7C2	
 1D7C4	1D7C9	
 1D7CE	1D7FF	
-20000		
-2A6D6		
+20000	2A6D6	
 2F800	2FA1D	
 END
diff --git a/lib/unicore/In/2.pl b/lib/unicore/In/2.pl
index eec928f..c16f7d1 100644
--- a/lib/unicore/In/2.pl
+++ b/lib/unicore/In/2.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-AC00	D7A3	Hangul Syllable
+AC00	D7A3	Lo
 END
diff --git a/lib/unicore/In/3.pl b/lib/unicore/In/3.pl
index 5df4d54..2ca13f4 100644
--- a/lib/unicore/In/3.pl
+++ b/lib/unicore/In/3.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-D800	DB7F	Non Private Use High Surrogate
+D800	DB7F	Cs
 END
diff --git a/lib/unicore/In/4.pl b/lib/unicore/In/4.pl
index f33e5c3..acf09cc 100644
--- a/lib/unicore/In/4.pl
+++ b/lib/unicore/In/4.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-DB80	DBFF	Private Use High Surrogate
+DB80	DBFF	Cs
 END
diff --git a/lib/unicore/In/5.pl b/lib/unicore/In/5.pl
index fd896ff..15c3f92 100644
--- a/lib/unicore/In/5.pl
+++ b/lib/unicore/In/5.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-DC00	DFFF	Low Surrogate
+DC00	DFFF	Cs
 END
diff --git a/lib/unicore/In/6.pl b/lib/unicore/In/6.pl
index 1404dba..fc31fb8 100644
--- a/lib/unicore/In/6.pl
+++ b/lib/unicore/In/6.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-E000	F8FF	Private Use
+E000	F8FF	Co
 END
diff --git a/lib/unicore/In/7.pl b/lib/unicore/In/7.pl
index f5481cc..8eb0eee 100644
--- a/lib/unicore/In/7.pl
+++ b/lib/unicore/In/7.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-20000	2A6D6	CJK Ideograph Extension B
+20000	2A6D6	Lo
 END
diff --git a/lib/unicore/In/8.pl b/lib/unicore/In/8.pl
index be01ceb..5c82bcd 100644
--- a/lib/unicore/In/8.pl
+++ b/lib/unicore/In/8.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-F0000	FFFFD	Plane 15 Private Use
+F0000	FFFFD	Co
 END
diff --git a/lib/unicore/In/9.pl b/lib/unicore/In/9.pl
index 8eb12d1..ec7132a 100644
--- a/lib/unicore/In/9.pl
+++ b/lib/unicore/In/9.pl
@@ -2,5 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-100000	10FFFD	Plane 16 Private Use
+100000	10FFFD	Co
 END
diff --git a/lib/unicore/Is/Alnum.pl b/lib/unicore/Is/Alnum.pl
index eb97eb8..97858ab 100644
--- a/lib/unicore/Is/Alnum.pl
+++ b/lib/unicore/Is/Alnum.pl
@@ -325,7 +325,10 @@ return <<'END';
 31A0	31B7	
 3220	3229	
 3280	3289	
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
+AC00	D7A3	
 F900	FA2D	
 FB00	FB06	
 FB13	FB17	
@@ -394,5 +397,6 @@ FFDA	FFDC
 1D7AA	1D7C2	
 1D7C4	1D7C9	
 1D7CE	1D7FF	
+20000	2A6D6	
 2F800	2FA1D	
 END
diff --git a/lib/unicore/Is/Alpha.pl b/lib/unicore/Is/Alpha.pl
index cbd65d0..b8dc6c4 100644
--- a/lib/unicore/Is/Alpha.pl
+++ b/lib/unicore/Is/Alpha.pl
@@ -295,7 +295,10 @@ return <<'END';
 3105	312C	
 3131	318E	
 31A0	31B7	
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
+AC00	D7A3	
 F900	FA2D	
 FB00	FB06	
 FB13	FB17	
@@ -361,5 +364,6 @@ FFDA	FFDC
 1D78A	1D7A8	
 1D7AA	1D7C2	
 1D7C4	1D7C9	
+20000	2A6D6	
 2F800	2FA1D	
 END
diff --git a/lib/unicore/Is/C.pl b/lib/unicore/Is/C.pl
index 199094f..b58d48d 100644
--- a/lib/unicore/Is/C.pl
+++ b/lib/unicore/Is/C.pl
@@ -9,18 +9,12 @@ return <<'END';
 200C	200F	
 202A	202E	
 206A	206F	
-D800		
-DB7F	DB80	
-DBFF	DC00	
-DFFF	E000	
-F8FF		
+D800	F8FF	
 FEFF		
 FFF9	FFFB	
 1D173	1D17A	
 E0001		
 E0020	E007F	
-F0000		
-FFFFD		
-100000		
-10FFFD		
+F0000	FFFFD	
+100000	10FFFD	
 END
diff --git a/lib/unicore/Is/Cntrl.pl b/lib/unicore/Is/Cntrl.pl
index 818cbc0..b58d48d 100644
--- a/lib/unicore/Is/Cntrl.pl
+++ b/lib/unicore/Is/Cntrl.pl
@@ -9,9 +9,12 @@ return <<'END';
 200C	200F	
 202A	202E	
 206A	206F	
+D800	F8FF	
 FEFF		
 FFF9	FFFB	
 1D173	1D17A	
 E0001		
 E0020	E007F	
+F0000	FFFFD	
+100000	10FFFD	
 END
diff --git a/lib/unicore/Is/Co.pl b/lib/unicore/Is/Co.pl
index b7ee129..04f3129 100644
--- a/lib/unicore/Is/Co.pl
+++ b/lib/unicore/Is/Co.pl
@@ -2,10 +2,7 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-E000		
-F8FF		
-F0000		
-FFFFD		
-100000		
-10FFFD		
+E000	F8FF	
+F0000	FFFFD	
+100000	10FFFD	
 END
diff --git a/lib/unicore/Is/Cs.pl b/lib/unicore/Is/Cs.pl
index 79facec..bd71bd1 100644
--- a/lib/unicore/Is/Cs.pl
+++ b/lib/unicore/Is/Cs.pl
@@ -2,8 +2,5 @@
 # This file is built by mktables from e.g. Unicode.txt.
 # Any changes made here will be lost!
 return <<'END';
-D800		
-DB7F	DB80	
-DBFF	DC00	
-DFFF		
+D800	DFFF	
 END
diff --git a/lib/unicore/Is/Graph.pl b/lib/unicore/Is/Graph.pl
index 15c9f1f..5c13624 100644
--- a/lib/unicore/Is/Graph.pl
+++ b/lib/unicore/Is/Graph.pl
@@ -319,13 +319,16 @@ return <<'END';
 3300	3376	
 337B	33DD	
 33E0	33FE	
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
 A490	A4A1	
 A4A4	A4B3	
 A4B5	A4C0	
 A4C2	A4C4	
 A4C6		
-F900	FA2D	
+AC00	D7A3	
+E000	FA2D	
 FB00	FB06	
 FB13	FB17	
 FB1D	FB36	
@@ -386,5 +389,8 @@ FFFC	FFFD
 1D552	1D6A3	
 1D6A8	1D7C9	
 1D7CE	1D7FF	
+20000	2A6D6	
 2F800	2FA1D	
+F0000	FFFFD	
+100000	10FFFD	
 END
diff --git a/lib/unicore/Is/L.pl b/lib/unicore/Is/L.pl
index bb34126..811603b 100644
--- a/lib/unicore/Is/L.pl
+++ b/lib/unicore/Is/L.pl
@@ -228,13 +228,10 @@ return <<'END';
 3105	312C	
 3131	318E	
 31A0	31B7	
-3400		
-4DB5		
-4E00		
-9FA5		
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
-AC00		
-D7A3		
+AC00	D7A3	
 F900	FA2D	
 FB00	FB06	
 FB13	FB17	
@@ -295,7 +292,6 @@ FFDA	FFDC
 1D78A	1D7A8	
 1D7AA	1D7C2	
 1D7C4	1D7C9	
-20000		
-2A6D6		
+20000	2A6D6	
 2F800	2FA1D	
 END
diff --git a/lib/unicore/Is/Lo.pl b/lib/unicore/Is/Lo.pl
index ff84f2b..726bbf7 100644
--- a/lib/unicore/Is/Lo.pl
+++ b/lib/unicore/Is/Lo.pl
@@ -161,13 +161,10 @@ return <<'END';
 3105	312C	
 3131	318E	
 31A0	31B7	
-3400		
-4DB5		
-4E00		
-9FA5		
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
-AC00		
-D7A3		
+AC00	D7A3	
 F900	FA2D	
 FB1D		
 FB1F	FB28	
@@ -193,7 +190,6 @@ FFD2	FFD7
 FFDA	FFDC	
 10300	1031E	
 10330	10349	
-20000		
-2A6D6		
+20000	2A6D6	
 2F800	2FA1D	
 END
diff --git a/lib/unicore/Is/Print.pl b/lib/unicore/Is/Print.pl
index 27eb056..0b94722 100644
--- a/lib/unicore/Is/Print.pl
+++ b/lib/unicore/Is/Print.pl
@@ -320,13 +320,16 @@ return <<'END';
 3300	3376	
 337B	33DD	
 33E0	33FE	
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
 A490	A4A1	
 A4A4	A4B3	
 A4B5	A4C0	
 A4C2	A4C4	
 A4C6		
-F900	FA2D	
+AC00	D7A3	
+E000	FA2D	
 FB00	FB06	
 FB13	FB17	
 FB1D	FB36	
@@ -387,5 +390,8 @@ FFFC	FFFD
 1D552	1D6A3	
 1D6A8	1D7C9	
 1D7CE	1D7FF	
+20000	2A6D6	
 2F800	2FA1D	
+F0000	FFFFD	
+100000	10FFFD	
 END
diff --git a/lib/unicore/Is/Word.pl b/lib/unicore/Is/Word.pl
index 437c067..baba914 100644
--- a/lib/unicore/Is/Word.pl
+++ b/lib/unicore/Is/Word.pl
@@ -326,7 +326,10 @@ return <<'END';
 31A0	31B7	
 3220	3229	
 3280	3289	
+3400	4DB5	
+4E00	9FA5	
 A000	A48C	
+AC00	D7A3	
 F900	FA2D	
 FB00	FB06	
 FB13	FB17	
@@ -395,5 +398,6 @@ FFDA	FFDC
 1D7AA	1D7C2	
 1D7C4	1D7C9	
 1D7CE	1D7FF	
+20000	2A6D6	
 2F800	2FA1D	
 END
diff --git a/lib/unicore/Name.pl b/lib/unicore/Name.pl
index de76f40..860f087 100644
--- a/lib/unicore/Name.pl
+++ b/lib/unicore/Name.pl
@@ -7950,6 +7950,8 @@ return <<'END';
 33FC		IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWENTY-NINE
 33FD		IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY
 33FE		IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
+3400	4DB5	CJK Ideograph Extension A
+4E00	9FA5	CJK Ideograph
 A000		YI SYLLABLE IT
 A001		YI SYLLABLE IX
 A002		YI SYLLABLE I
@@ -9165,6 +9167,11 @@ A4C2		YI RADICAL SHOP
 A4C3		YI RADICAL CHE
 A4C4		YI RADICAL ZZIET
 A4C6		YI RADICAL KE
+AC00	D7A3	Hangul Syllable
+D800	DB7F	Non Private Use High Surrogate
+DB80	DBFF	Private Use High Surrogate
+DC00	DFFF	Low Surrogate
+E000	F8FF	Private Use
 F900		CJK COMPATIBILITY IDEOGRAPH-F900
 F901		CJK COMPATIBILITY IDEOGRAPH-F901
 F902		CJK COMPATIBILITY IDEOGRAPH-F902
@@ -12138,6 +12145,7 @@ FFFD		REPLACEMENT CHARACTER
 1D7FD		MATHEMATICAL MONOSPACE DIGIT SEVEN
 1D7FE		MATHEMATICAL MONOSPACE DIGIT EIGHT
 1D7FF		MATHEMATICAL MONOSPACE DIGIT NINE
+20000	2A6D6	CJK Ideograph Extension B
 2F800		CJK COMPATIBILITY IDEOGRAPH-2F800
 2F801		CJK COMPATIBILITY IDEOGRAPH-2F801
 2F802		CJK COMPATIBILITY IDEOGRAPH-2F802
@@ -12777,4 +12785,6 @@ E007C		TAG VERTICAL LINE
 E007D		TAG RIGHT CURLY BRACKET
 E007E		TAG TILDE
 E007F		CANCEL TAG
+F0000	FFFFD	Plane 15 Private Use
+100000	10FFFD	Plane 16 Private Use
 END
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 676e189..5615aee 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -114,6 +114,55 @@ my %Cat;
 my %General;
 my @General;
 
+sub gencat {
+    my ($Name, $GeneralH, $GeneralA, $Cat,
+	$name, $cat, $code, $op) = @_;
+
+    $op->($Name,                     $code, $name);
+    $op->($GeneralA,                 $code, $cat);
+
+    $op->($GeneralH->{$name} ||= [], $code, $name);
+
+    $op->($Cat->{$cat}       ||= [], $code);
+    $op->($Cat->{substr($cat, 0, 1)}
+      	                    ||= [],  $code);
+    # 005F: SPACING UNDERSCORE
+    $op->($Cat->{Word}       ||= [], $code)
+	if $cat =~ /^[LMN]/ or $code eq "005F";
+    $op->($Cat->{Alnum}      ||= [], $code)
+	if $cat =~ /^[LMN]/;
+    $op->($Cat->{Alpha}      ||= [], $code)
+	if $cat =~ /^[LM]/;
+    # 0009: HORIZONTAL TABULATION
+    # 000A: LINE FEED
+    # 000B: VERTICAL TABULATION
+    # 000C: FORM FEED
+    # 000D: CARRIAGE RETURN
+    # 0020: SPACE
+    $op->($Cat->{Space}      ||= [], $code)
+	if $cat  =~ /^Z/ ||
+	    $code =~ /^(0009|000A|000B|000C|000D)$/;
+    $op->($Cat->{SpacePerl}  ||= [], $code)
+	if $cat  =~ /^Z/ ||
+	    $code =~ /^(0009|000A|000C|000D)$/;
+    $op->($Cat->{Blank}      ||= [], $code)
+	if $code =~ /^(0020|0009)$/ ||
+	    $cat  =~ /^Z[^lp]$/;
+    $op->($Cat->{Digit}      ||= [], $code) if $cat eq "Nd";
+    $op->($Cat->{Upper}      ||= [], $code) if $cat eq "Lu";
+    $op->($Cat->{Lower}      ||= [], $code) if $cat eq "Ll";
+    $op->($Cat->{Title}      ||= [], $code) if $cat eq "Lt";
+    $op->($Cat->{ASCII}      ||= [], $code) if $code le "007F";
+    $op->($Cat->{Cntrl}      ||= [], $code) if $cat =~ /^C/;
+    $op->($Cat->{Graph}      ||= [], $code) if $cat =~ /^([LMNPS]|Co)/;
+    $op->($Cat->{Print}      ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/;
+    $op->($Cat->{Punct}      ||= [], $code) if $cat =~ /^P/;
+    # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
+    $op->($Cat->{XDigit}     ||= [], $code)
+	if $code =~ /^00(3[0-9]|[46][1-6])$/;
+
+}
+
 if (open(my $Unicode, "Unicode.txt")) {
     my @Name;
     my @Bidi;
@@ -136,61 +185,18 @@ if (open(my $Unicode, "Unicode.txt")) {
 
 	if ($name =~ /^<(.+), (First|Last)>$/) {
 	    $name = $1;
-	    if ($2 eq 'First') {
-		append($General{$name} ||= [], $code, $name);
-	    } else {
-		extend($General{$name}       , $code);
-	    }
+	    gencat(\@Name, \%General, \@General, \%Cat,
+		   $name, $cat, $code,
+		   $2 eq 'First' ? \&append : \&extend);
 	    unless (defined $In{$name}) {
 		$In{$name}   = $InId++;
 		$InIn{$name} = $General{$name};
 	    }
-	    append($Cat{$cat}       ||= [], $code);
-	    append($Cat{substr($cat, 0, 1)}
-		                    ||= [], $code);
 	} else {
-	    append(\@Name,                  $code, $name);
-
-	    append(\@General,               $code, $cat);
-
-	    append($Cat{$cat}       ||= [], $code);
-	    append($Cat{substr($cat, 0, 1)}
-		                    ||= [], $code);
-	    # 005F: SPACING UNDERSCORE
-	    append($Cat{Word}       ||= [], $code)
-		if $cat =~ /^[LMN]/ or $code eq "005F";
-	    append($Cat{Alnum}      ||= [], $code)
-		if $cat =~ /^[LMN]/;
-	    append($Cat{Alpha}      ||= [], $code)
-		if $cat =~ /^[LM]/;
-	    # 0009: HORIZONTAL TABULATION
-	    # 000A: LINE FEED
-	    # 000B: VERTICAL TABULATION
-	    # 000C: FORM FEED
-	    # 000D: CARRIAGE RETURN
-	    # 0020: SPACE
-	    append($Cat{Space}      ||= [], $code)
-		if $cat  =~ /^Z/ ||
-		    $code =~ /^(0009|000A|000B|000C|000D)$/;
-	    append($Cat{SpacePerl}  ||= [], $code)
-		if $cat  =~ /^Z/ ||
-		    $code =~ /^(0009|000A|000C|000D)$/;
-	    append($Cat{Blank}      ||= [], $code)
-		if $code =~ /^(0020|0009)$/ ||
-		    $cat  =~ /^Z[^lp]$/;
-	    append($Cat{Digit}      ||= [], $code) if $cat eq "Nd";
-	    append($Cat{Upper}      ||= [], $code) if $cat eq "Lu";
-	    append($Cat{Lower}      ||= [], $code) if $cat eq "Ll";
-	    append($Cat{Title}      ||= [], $code) if $cat eq "Lt";
-	    append($Cat{ASCII}      ||= [], $code) if $code le "007F";
-	    append($Cat{Cntrl}      ||= [], $code) if $cat =~ /^C/;
-	    append($Cat{Graph}      ||= [], $code) if $cat =~ /^([LMNPS]|Co)/;
-	    append($Cat{Print}      ||= [], $code) if $cat =~ /^([LMNPS]|Co|Zs)/;
-	    append($Cat{Punct}      ||= [], $code) if $cat =~ /^P/;
-	    # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f
-	    append($Cat{XDigit}     ||= [], $code)
-		if $code =~ /^00(3[0-9]|[46][1-6])$/;
-	    
+
+	    gencat(\@Name, \%General, \@General, \%Cat,
+		   $name, $cat, $code, \&append);
+
 	    append($To{Upper}       ||= [], $code, $upper)   if $upper;
 	    append($To{Lower}       ||= [], $code, $lower)   if $lower;
 	    append($To{Title}       ||= [], $code, $title)   if $title;
@@ -653,7 +659,7 @@ foreach my $in (sort { $In{$a} <=> $In{$b} } keys %In) {
 #
 # The mapping from General Category long forms to short forms is
 # currently hardwired here since no simple data file in the UCD
-# seems to do that.
+# seems to do that.  Unicode 3.2 will assumedly correct this.
 #
 
 my %Is = (