X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FUnicode%2FUCD.pm;h=dfdd2dcb51938a515e79c4b4babbadbc81460675;hb=98641f606c65e71cca89f9a694e2796b5a21cbd8;hp=81a9aed3487da154aa8f252fb88f45bace172e2e;hpb=a196fbfd2938a6fe215a07e9209acfe497d87208;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 81a9aed..dfdd2dc 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -3,12 +3,20 @@ package Unicode::UCD; use strict; use warnings; -our $VERSION = '3.1.0'; +our $VERSION = '0.22'; + +use Storable qw(dclone); require Exporter; our @ISA = qw(Exporter); -our @EXPORT_OK = qw(charinfo charblock charscript); + +our @EXPORT_OK = qw(charinfo + charblock charscript + charblocks charscripts + charinrange + compexcl + casefold casespec); use Carp; @@ -18,28 +26,44 @@ Unicode::UCD - Unicode character database =head1 SYNOPSIS - use Unicode::UCD 3.1.0; - # requires that level of the Unicode character database - use Unicode::UCD 'charinfo'; - my %charinfo = charinfo($codepoint); + my $charinfo = charinfo($codepoint); use Unicode::UCD 'charblock'; my $charblock = charblock($codepoint); use Unicode::UCD 'charscript'; - my $charscript = charblock($codepoint); + my $charscript = charscript($codepoint); + + use Unicode::UCD 'charblocks'; + my $charblocks = charblocks(); + + use Unicode::UCD 'charscripts'; + my %charscripts = charscripts(); + + use Unicode::UCD qw(charscript charinrange); + my $range = charscript($script); + print "looks like $script\n" if charinrange($range, $codepoint); + + use Unicode::UCD 'compexcl'; + my $compexcl = compexcl($codepoint); + + my $unicode_version = Unicode::UCD::UnicodeVersion(); =head1 DESCRIPTION -The Unicode module offers a simple interface to the Unicode Character -Database. +The Unicode::UCD module offers a simple interface to the Unicode +Character Database. =cut -my $UNICODE; -my $BLOCKS; -my $SCRIPTS; +my $UNICODEFH; +my $BLOCKSFH; +my $SCRIPTSFH; +my $VERSIONFH; +my $COMPEXCLFH; +my $CASEFOLDFH; +my $CASESPECFH; sub openunicode { my ($rfh, @path) = @_; @@ -47,7 +71,7 @@ sub openunicode { unless (defined $$rfh) { for my $d (@INC) { use File::Spec; - $f = File::Spec->catfile($d, "unicode", @path); + $f = File::Spec->catfile($d, "unicore", @path); last if open($$rfh, $f); undef $f; } @@ -62,10 +86,10 @@ sub openunicode { use Unicode::UCD 'charinfo'; - my %charinfo = charinfo(0x41); + my $charinfo = charinfo(0x41); -charinfo() returns a hash that has the following fields as defined -by the Unicode standard: +charinfo() returns a reference to a hash that has the following fields +as defined by the Unicode standard: key @@ -86,32 +110,109 @@ by the Unicode standard: title titlecase equivalent mapping block block the character belongs to (used in \p{In...}) - script script the character belongs to + script script the character belongs to -If no match is found, an empty hash is returned. +If no match is found, a reference to an empty hash is returned. -The C property is the same as as returned by charinfo(). It is +The C property is the same as returned by charinfo(). It is not defined in the Unicode Character Database proper (Chapter 4 of the -Unicode 3.0 Standard) but instead in an auxiliary database (Chapter 14 -of TUS3). Similarly for the C