X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FUnicode%2FUCD.pm;h=1496663c5495a73f0f119efc9ccf8799267e8a1d;hb=06c0cc96ebd866767a6d107ed78967600f7e0395;hp=449229a7ef204c95a56f1ace920f31f972d6ad4a;hpb=14f14a1020aa168014cccb2f21519e9d6b74be72;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 449229a..1496663 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -3,12 +3,21 @@ package Unicode::UCD; use strict; use warnings; -our $VERSION = '3.1.0'; +our $VERSION = '0.23'; + +use Storable qw(dclone); require Exporter; our @ISA = qw(Exporter); -our @EXPORT_OK = qw(charinfo charblock); + +our @EXPORT_OK = qw(charinfo + charblock charscript + charblocks charscripts + charinrange + compexcl + casefold casespec + namedseq); use Carp; @@ -18,24 +27,48 @@ Unicode::UCD - Unicode character database =head1 SYNOPSIS - use Unicode::UCD 3.1.0; - # requires that level of the Unicode character database - use Unicode::UCD 'charinfo'; - my %charinfo = charinfo($codepoint); + my $charinfo = charinfo($codepoint); use Unicode::UCD 'charblock'; - my $charblock = charblock($codepoint); + my $charblock = charblock($codepoint); + + use Unicode::UCD 'charscript'; + my $charscript = charscript($codepoint); + + use Unicode::UCD 'charblocks'; + my $charblocks = charblocks(); + + use Unicode::UCD 'charscripts'; + my %charscripts = charscripts(); + + use Unicode::UCD qw(charscript charinrange); + my $range = charscript($script); + print "looks like $script\n" if charinrange($range, $codepoint); + + use Unicode::UCD 'compexcl'; + my $compexcl = compexcl($codepoint); + + use Unicode::UCD 'namedseq'; + my $namedseq = namedseq($named_sequence_name); + + my $unicode_version = Unicode::UCD::UnicodeVersion(); =head1 DESCRIPTION -The Unicode module offers a simple interface to the Unicode Character -Database. +The Unicode::UCD module offers a simple interface to the Unicode +Character Database. =cut -my $UNICODE; -my $BLOCKS; +my $UNICODEFH; +my $BLOCKSFH; +my $SCRIPTSFH; +my $VERSIONFH; +my $COMPEXCLFH; +my $CASEFOLDFH; +my $CASESPECFH; +my $NAMEDSEQFH; sub openunicode { my ($rfh, @path) = @_; @@ -43,16 +76,13 @@ sub openunicode { unless (defined $$rfh) { for my $d (@INC) { use File::Spec; - $f = File::Spec->catfile($d, "unicode", @path); - next unless -f $f; - if (open($$rfh, $f)) { - last; - } else { - croak __PACKAGE__, ": open '$f' failed: $!\n"; - } + $f = File::Spec->catfile($d, "unicore", @path); + last if open($$rfh, $f); + undef $f; } - croak __PACKAGE__, ": failed to find ",join("/",@path)," in @INC\n" - unless defined $rfh; + croak __PACKAGE__, ": failed to find ", + File::Spec->catfile(@path), " in @INC" + unless defined $f; } return $f; } @@ -61,10 +91,10 @@ sub openunicode { use Unicode::UCD 'charinfo'; - my %charinfo = charinfo(0x41); + my $charinfo = charinfo(0x41); -charinfo() returns a hash that has the following fields as defined -by the Unicode standard: +charinfo() returns a reference to a hash that has the following fields +as defined by the Unicode standard: key @@ -83,25 +113,111 @@ by the Unicode standard: upper uppercase equivalent mapping lower lowercase equivalent mapping title titlecase equivalent mapping + block block the character belongs to (used in \p{In...}) + script script the character belongs to + +If no match is found, a reference to an empty hash is returned. -If no match is found, an empty hash is returned. +The C property is the same as returned by charinfo(). It is +not defined in the Unicode Character Database proper (Chapter 4 of the +Unicode 3.0 Standard, aka TUS3) but instead in an auxiliary database +(Chapter 14 of TUS3). Similarly for the C