X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FUnicode%2FUCD.pm;h=96dee9a8164c03ecfbc0b8f3b4c98b2e9f36ab80;hb=cd340a5d1de2dbc931e802b4aaed98e9e4d75f51;hp=6c20d4066d8526cc63c0f4d40330f3cf62fb5c73;hpb=2796c109dc2c56e2241410992d78bd8e0cccd71f;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 6c20d40..96dee9a 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -3,12 +3,18 @@ package Unicode::UCD; use strict; use warnings; -our $VERSION = '3.1.0'; +our $VERSION = '0.2'; require Exporter; our @ISA = qw(Exporter); -our @EXPORT_OK = qw(charinfo charblock charscript); + +our @EXPORT_OK = qw(charinfo + charblock charscript + charblocks charscripts + charinrange + compexcl + casefold casespec); use Carp; @@ -18,11 +24,8 @@ Unicode::UCD - Unicode character database =head1 SYNOPSIS - use Unicode::UCD 3.1.0; - # requires that level of the Unicode character database - use Unicode::UCD 'charinfo'; - my %charinfo = charinfo($codepoint); + my $charinfo = charinfo($codepoint); use Unicode::UCD 'charblock'; my $charblock = charblock($codepoint); @@ -30,16 +33,35 @@ Unicode::UCD - Unicode character database use Unicode::UCD 'charscript'; my $charscript = charblock($codepoint); + use Unicode::UCD 'charblocks'; + my $charblocks = charblocks(); + + use Unicode::UCD 'charscripts'; + my %charscripts = charscripts(); + + use Unicode::UCD qw(charscript charinrange); + my $range = charscript($script); + print "looks like $script\n" if charinrange($range, $codepoint); + + use Unicode::UCD 'compexcl'; + my $compexcl = compexcl($codepoint); + + my $unicode_version = Unicode::UCD::UnicodeVersion(); + =head1 DESCRIPTION -The Unicode module offers a simple interface to the Unicode Character -Database. +The Unicode::UCD module offers a simple interface to the Unicode +Character Database. =cut -my $UNICODE; -my $BLOCKS; -my $SCRIPTS; +my $UNICODEFH; +my $BLOCKSFH; +my $SCRIPTSFH; +my $VERSIONFH; +my $COMPEXCLFH; +my $CASEFOLDFH; +my $CASESPECFH; sub openunicode { my ($rfh, @path) = @_; @@ -47,7 +69,7 @@ sub openunicode { unless (defined $$rfh) { for my $d (@INC) { use File::Spec; - $f = File::Spec->catfile($d, "unicode", @path); + $f = File::Spec->catfile($d, "unicore", @path); last if open($$rfh, $f); undef $f; } @@ -62,10 +84,10 @@ sub openunicode { use Unicode::UCD 'charinfo'; - my %charinfo = charinfo(0x41); + my $charinfo = charinfo(0x41); -charinfo() returns a hash that has the following fields as defined -by the Unicode standard: +charinfo() returns a reference to a hash that has the following fields +as defined by the Unicode standard: key @@ -86,32 +108,107 @@ by the Unicode standard: title titlecase equivalent mapping block block the character belongs to (used in \p{In...}) - script script the character belongs to + script script the character belongs to -If no match is found, an empty hash is returned. +If no match is found, a reference to an empty hash is returned. -The C property is the same as as returned by charinfo(). It is +The C property is the same as returned by charinfo(). It is not defined in the Unicode Character Database proper (Chapter 4 of the -Unicode 3.0 Standard) but instead in an auxiliary database (Chapter 14 -of TUS3). Similarly for the C