From: Jarkko Hietaniemi Date: Sun, 29 Jul 2001 18:55:04 +0000 (+0000) Subject: Small UnicodeCD tweaks. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=74f8133ec9a4ba92f24946c916bc0abc74e42021;p=p5sagit%2Fp5-mst-13.2.git Small UnicodeCD tweaks. p4raw-id: //depot/perl@11482 --- diff --git a/lib/UnicodeCD.pm b/lib/UnicodeCD.pm index 4f4c19d..bde511c 100644 --- a/lib/UnicodeCD.pm +++ b/lib/UnicodeCD.pm @@ -3,11 +3,12 @@ package UnicodeCD; use strict; use warnings; -our $VERSION = '0.1'; +our $VERSION = '0.2'; require Exporter; our @ISA = qw(Exporter); + our @EXPORT_OK = qw(charinfo charblock charscript charblocks charscripts @@ -137,7 +138,7 @@ sub _getcode { sub han_charname { my $arg = shift; my $code = _getcode($arg); - croak __PACKAGE__, "::charinfo: unknown code '$arg'" + croak __PACKAGE__, "::han_charname: unknown code '$arg'" unless defined $code; croak __PACKAGE__, "::han_charname: outside CJK Unified Ideographs '$arg'" unless 0x3400 <= $code && $code <= 0x4DB5 @@ -179,7 +180,7 @@ my %HangulConst = ( sub hangul_charname { my $arg = shift; my $code = _getcode($arg); - croak __PACKAGE__, "::charinfo: unknown code '$arg'" + croak __PACKAGE__, "::hangul_charname: unknown code '$arg'" unless defined $code; croak __PACKAGE__, "::hangul_charname: outside Hangul Syllables '$arg'" unless $HangulConst{SBase} <= $code && $code <= $HangulConst{Final}; @@ -198,7 +199,7 @@ sub hangul_charname { sub hangul_decomp { my $arg = shift; my $code = _getcode($arg); - croak __PACKAGE__, "::charinfo: unknown code '$arg'" + croak __PACKAGE__, "::hangul_decomp: unknown code '$arg'" unless defined $code; croak __PACKAGE__, "::hangul_decomp: outside Hangul Syllables '$arg'" unless $HangulConst{SBase} <= $code && $code <= $HangulConst{Final}; @@ -240,6 +241,18 @@ my @CharinfoRanges = ( [ 0x100000, 0x10FFFD, undef, undef ], ); +sub TIEHANDLE { + my $class = shift; + bless { @_ }, $class; +} + +sub READLINE { + warn "READLINE @_\n"; + my $self = shift; + my $fh = $self->{FH}; + "00 ". <$fh>; +} + sub charinfo { my $arg = shift; my $code = _getcode($arg); @@ -248,20 +261,22 @@ sub charinfo { my $hexk = sprintf("%04X", $code); my($rcode,$rname,$rdec); foreach my $range (@CharinfoRanges){ - if($range->[0] <= $code && $code <= $range->[1]){ + if ($range->[0] <= $code && $code <= $range->[1]) { $rcode = $hexk; $rname = $range->[2] ? $range->[2]->($code) : ''; $rdec = $range->[3] ? $range->[3]->($code) : ''; - $hexk = sprintf("%04X",$range->[0]); # replace by the first + $hexk = sprintf("%04X", $range->[0]); # replace by the first last; } } - openunicode(\$UNICODEFH, "Unicode.sort"); # sorted + openunicode(\$UNICODEFH, "Unicode.txt"); if (defined $UNICODEFH) { use Search::Dict; + tie *UNICODEFH, __PACKAGE__, FH => *UNICODEFH unless tied *UNICODEFH; if (look($UNICODEFH, "$hexk;") >= 0) { my $line = <$UNICODEFH>; chomp $line; + $line =~ s/^0+(\w{4};)/$1/; my %prop; @prop{qw( code name category @@ -555,6 +570,8 @@ sub _compexcl { sub compexcl { my $arg = shift; my $code = _getcode($arg); + croak __PACKAGE__, "::compexcl: unknown code '$arg'" + unless defined $code; _compexcl() unless %COMPEXCL; @@ -625,6 +642,8 @@ sub _casefold { sub casefold { my $arg = shift; my $code = _getcode($arg); + croak __PACKAGE__, "::casefold: unknown code '$arg'" + unless defined $code; _casefold() unless %CASEFOLD; @@ -700,6 +719,8 @@ sub _casespec { sub casespec { my $arg = shift; my $code = _getcode($arg); + croak __PACKAGE__, "::casespec: unknown code '$arg'" + unless defined $code; _casespec() unless %CASESPEC; diff --git a/lib/UnicodeCD.t b/lib/UnicodeCD.t index 6e92284..746ebcb 100644 --- a/lib/UnicodeCD.t +++ b/lib/UnicodeCD.t @@ -3,7 +3,7 @@ use UnicodeCD; use Test; use strict; -BEGIN { plan tests => 111 + 17 * 3}; +BEGIN { plan tests => 162 }; use UnicodeCD 'charinfo'; @@ -93,7 +93,7 @@ ok($charinfo->{title}, ''); ok($charinfo->{block}, 'Hebrew'); ok($charinfo->{script}, 'Hebrew'); -# an open syllable in Hangul +# An open syllable in Hangul. $charinfo = charinfo(0xAC00); @@ -115,7 +115,7 @@ ok($charinfo->{title}, ''); ok($charinfo->{block}, 'Hangul Syllables'); ok($charinfo->{script}, 'Hangul'); -# a close syllable in Hangul +# A closed syllable in Hangul. $charinfo = charinfo(0xAE00);