X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FUnicode%2FUCD.pm;h=96dee9a8164c03ecfbc0b8f3b4c98b2e9f36ab80;hb=cd340a5d1de2dbc931e802b4aaed98e9e4d75f51;hp=ce657a1ed4f2ee8e06a49be2315490b95efdd780;hpb=00f2772c079193695a7679493dd74f0c369a0102;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index ce657a1..96dee9a 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -3,12 +3,18 @@ package Unicode::UCD; use strict; use warnings; -our $VERSION = '3.1.0'; +our $VERSION = '0.2'; require Exporter; our @ISA = qw(Exporter); -our @EXPORT_OK = qw(charinfo charblock); + +our @EXPORT_OK = qw(charinfo + charblock charscript + charblocks charscripts + charinrange + compexcl + casefold casespec); use Carp; @@ -18,24 +24,44 @@ Unicode::UCD - Unicode character database =head1 SYNOPSIS - use Unicode::UCD 3.1.0; - # requires that level of the Unicode character database - use Unicode::UCD 'charinfo'; - my %charinfo = charinfo($codepoint); + my $charinfo = charinfo($codepoint); use Unicode::UCD 'charblock'; - my $charblock = charblock($codepoint); + my $charblock = charblock($codepoint); + + use Unicode::UCD 'charscript'; + my $charscript = charblock($codepoint); + + use Unicode::UCD 'charblocks'; + my $charblocks = charblocks(); + + use Unicode::UCD 'charscripts'; + my %charscripts = charscripts(); + + use Unicode::UCD qw(charscript charinrange); + my $range = charscript($script); + print "looks like $script\n" if charinrange($range, $codepoint); + + use Unicode::UCD 'compexcl'; + my $compexcl = compexcl($codepoint); + + my $unicode_version = Unicode::UCD::UnicodeVersion(); =head1 DESCRIPTION -The Unicode module offers a simple interface to the Unicode Character -Database. +The Unicode::UCD module offers a simple interface to the Unicode +Character Database. =cut -my $UNICODE; -my $BLOCKS; +my $UNICODEFH; +my $BLOCKSFH; +my $SCRIPTSFH; +my $VERSIONFH; +my $COMPEXCLFH; +my $CASEFOLDFH; +my $CASESPECFH; sub openunicode { my ($rfh, @path) = @_; @@ -43,15 +69,13 @@ sub openunicode { unless (defined $$rfh) { for my $d (@INC) { use File::Spec; - $f = File::Spec->catfile($d, "unicode", @path); - if (open($$rfh, $f)) { - last; - } else { - croak __PACKAGE__, ": open '$f' failed: $!\n"; - } + $f = File::Spec->catfile($d, "unicore", @path); + last if open($$rfh, $f); + undef $f; } - croak __PACKAGE__, ": failed to find ",join("/",@path)," in @INC\n" - unless defined $rfh; + croak __PACKAGE__, ": failed to find ", + File::Spec->catfile(@path), " in @INC" + unless defined $f; } return $f; } @@ -60,10 +84,10 @@ sub openunicode { use Unicode::UCD 'charinfo'; - my %charinfo = charinfo(0x41); + my $charinfo = charinfo(0x41); -charinfo() returns a hash that has the following fields as defined -by the Unicode standard: +charinfo() returns a reference to a hash that has the following fields +as defined by the Unicode standard: key @@ -82,25 +106,109 @@ by the Unicode standard: upper uppercase equivalent mapping lower lowercase equivalent mapping title titlecase equivalent mapping + block block the character belongs to (used in \p{In...}) + script script the character belongs to + +If no match is found, a reference to an empty hash is returned. -If no match is found, an empty hash is returned. +The C property is the same as returned by charinfo(). It is +not defined in the Unicode Character Database proper (Chapter 4 of the +Unicode 3.0 Standard, aka TUS3) but instead in an auxiliary database +(Chapter 14 of TUS3). Similarly for the C