X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FUnicode%2FUCD.pm;h=96dee9a8164c03ecfbc0b8f3b4c98b2e9f36ab80;hb=cd340a5d1de2dbc931e802b4aaed98e9e4d75f51;hp=ff819cde1a14084fca86903307fcdf29b21f9881;hpb=10a6ecd25e80ad20ebf67b311125411d51e78bc0;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index ff819cd..96dee9a 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -3,15 +3,18 @@ package Unicode::UCD; use strict; use warnings; -our $VERSION = '0.1'; +our $VERSION = '0.2'; require Exporter; our @ISA = qw(Exporter); + our @EXPORT_OK = qw(charinfo charblock charscript charblocks charscripts - charinrange); + charinrange + compexcl + casefold casespec); use Carp; @@ -22,7 +25,7 @@ Unicode::UCD - Unicode character database =head1 SYNOPSIS use Unicode::UCD 'charinfo'; - my %charinfo = charinfo($codepoint); + my $charinfo = charinfo($codepoint); use Unicode::UCD 'charblock'; my $charblock = charblock($codepoint); @@ -30,10 +33,25 @@ Unicode::UCD - Unicode character database use Unicode::UCD 'charscript'; my $charscript = charblock($codepoint); + use Unicode::UCD 'charblocks'; + my $charblocks = charblocks(); + + use Unicode::UCD 'charscripts'; + my %charscripts = charscripts(); + + use Unicode::UCD qw(charscript charinrange); + my $range = charscript($script); + print "looks like $script\n" if charinrange($range, $codepoint); + + use Unicode::UCD 'compexcl'; + my $compexcl = compexcl($codepoint); + + my $unicode_version = Unicode::UCD::UnicodeVersion(); + =head1 DESCRIPTION -The Unicode module offers a simple interface to the Unicode Character -Database. +The Unicode::UCD module offers a simple interface to the Unicode +Character Database. =cut @@ -41,6 +59,9 @@ my $UNICODEFH; my $BLOCKSFH; my $SCRIPTSFH; my $VERSIONFH; +my $COMPEXCLFH; +my $CASEFOLDFH; +my $CASESPECFH; sub openunicode { my ($rfh, @path) = @_; @@ -48,7 +69,7 @@ sub openunicode { unless (defined $$rfh) { for my $d (@INC) { use File::Spec; - $f = File::Spec->catfile($d, "unicode", @path); + $f = File::Spec->catfile($d, "unicore", @path); last if open($$rfh, $f); undef $f; } @@ -63,10 +84,10 @@ sub openunicode { use Unicode::UCD 'charinfo'; - my %charinfo = charinfo(0x41); + my $charinfo = charinfo(0x41); -charinfo() returns a hash that has the following fields as defined -by the Unicode standard: +charinfo() returns a reference to a hash that has the following fields +as defined by the Unicode standard: key @@ -87,20 +108,18 @@ by the Unicode standard: title titlecase equivalent mapping block block the character belongs to (used in \p{In...}) - script script the character belongs to + script script the character belongs to -If no match is found, an empty hash is returned. +If no match is found, a reference to an empty hash is returned. -The C property is the same as as returned by charinfo(). It is +The C property is the same as returned by charinfo(). It is not defined in the Unicode Character Database proper (Chapter 4 of the -Unicode 3.0 Standard) but instead in an auxiliary database (Chapter 14 -of TUS3). Similarly for the C