X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FLocale%2FScript.pm;h=f5fdbab05c978bdb840b566d2b6d37cb128ae2bd;hb=6a9befb105d93024902eb178dab77655333f1829;hp=a7168fe9297c8b2b658d156c985710d618dd3e91;hpb=6b6e008c33e4c468fb6c1356921c16cdf5c73b26;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/Locale/Script.pm b/lib/Locale/Script.pm index a7168fe..f5fdbab 100644 --- a/lib/Locale/Script.pm +++ b/lib/Locale/Script.pm @@ -1,111 +1,13 @@ -#----------------------------------------------------------------------- - -=head1 NAME - -Locale::Script - ISO codes for script identification (ISO 15924) - -=head1 SYNOPSIS - - use Locale::Script; - use Locale::Constants; - - $script = code2script('ph'); # 'Phoenician' - $code = script2code('Tibetan'); # 'bo' - $code3 = script2code('Tibetan', - LOCALE_CODE_ALPHA_3); # 'bod' - $codeN = script2code('Tibetan', - LOCALE_CODE_ALPHA_NUMERIC); # 330 - - @codes = all_script_codes(); - @scripts = all_script_names(); - -=cut - -#----------------------------------------------------------------------- +# +# Locale::Script - ISO codes for script identification (ISO 15924) +# +# $Id: Script.pm,v 2.7 2004/06/10 21:19:34 neilb Exp $ +# package Locale::Script; use strict; require 5.002; -#----------------------------------------------------------------------- - -=head1 DESCRIPTION - -The C module provides access to the ISO -codes for identifying scripts, as defined in ISO 15924. -For example, Egyptian hieroglyphs are denoted by the two-letter -code 'eg', the three-letter code 'egy', and the numeric code 050. - -You can either access the codes via the conversion routines -(described below), or with the two functions which return lists -of all script codes or all script names. - -There are three different code sets you can use for identifying -scripts: - -=over 4 - -=item B - -Two letter codes, such as 'bo' for Tibetan. -This code set is identified with the symbol C. - -=item B - -Three letter codes, such as 'ell' for Greek. -This code set is identified with the symbol C. - -=item B - -Numeric codes, such as 410 for Hiragana. -This code set is identified with the symbol C. - -=back - -All of the routines take an optional additional argument -which specifies the code set to use. -If not specified, it defaults to the two-letter codes. -This is partly for backwards compatibility (previous versions -of Locale modules only supported the alpha-2 codes), and -partly because they are the most widely used codes. - -The alpha-2 and alpha-3 codes are not case-dependent, -so you can use 'BO', 'Bo', 'bO' or 'bo' for Tibetan. -When a code is returned by one of the functions in -this module, it will always be lower-case. - -=head2 SPECIAL CODES - -The standard defines various special codes. - -=over 4 - -=item * - -The standard reserves codes in the ranges B - B, -B - B, and B<900> - B<919>, for private use. - -=item * - -B, B, and B<997>, are the codes for unwritten languages. - -=item * - -B, B, and B<998>, are the codes for an undetermined script. - -=item * - -B, B, and B<999>, are the codes for an uncoded script. - -=back - -The private codes are not recognised by Locale::Script, -but the others are. - -=cut - -#----------------------------------------------------------------------- - require Exporter; use Carp; use Locale::Constants; @@ -115,7 +17,7 @@ use Locale::Constants; # Public Global Variables #----------------------------------------------------------------------- use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); -$VERSION = sprintf("%d.%02d", q$Revision: 2.0 $ =~ /(\d+)\.(\d+)/); +$VERSION = sprintf("%d.%02d", q$Revision: 2.7 $ =~ /(\d+)\.(\d+)/); @ISA = qw(Exporter); @EXPORT = qw(code2script script2code all_script_codes all_script_names @@ -130,54 +32,9 @@ my $COUNTRIES = []; #======================================================================= - -=head1 CONVERSION ROUTINES - -There are three conversion routines: C, C, -and C. - -=over 8 - -=item code2script( CODE, [ CODESET ] ) - -This function takes a script code and returns a string -which contains the name of the script identified. -If the code is not a valid script code, as defined by ISO 15924, -then C will be returned: - - $script = code2script('cy'); # Cyrillic - -=item script2code( STRING, [ CODESET ] ) - -This function takes a script name and returns the corresponding -script code, if such exists. -If the argument could not be identified as a script name, -then C will be returned: - - $code = script2code('Gothic', LOCALE_CODE_ALPHA_3); - # $code will now be 'gth' - -The case of the script name is not important. -See the section L below. - -=item script_code2code( CODE, CODESET, CODESET ) - -This function takes a script code from one code set, -and returns the corresponding code from another code set. - - $alpha2 = script_code2code('jwi', - LOCALE_CODE_ALPHA_3 => LOCALE_CODE_ALPHA_2); - # $alpha2 will now be 'jw' (Javanese) - -If the code passed is not a valid script code in -the first code set, or if there isn't a code for the -corresponding script in the second code set, -then C will be returned. - -=back - -=cut - +# +# code2script ( CODE [, CODESET ] ) +# #======================================================================= sub code2script { @@ -216,6 +73,12 @@ sub code2script } } + +#======================================================================= +# +# script2code ( SCRIPT [, CODESET ] ) +# +#======================================================================= sub script2code { my $script = shift; @@ -237,6 +100,12 @@ sub script2code } } + +#======================================================================= +# +# script_code2code ( CODE, IN-CODESET, OUT-CODESET ) +# +#======================================================================= sub script_code2code { (@_ == 3) or croak "script_code2code() takes 3 arguments!"; @@ -244,7 +113,7 @@ sub script_code2code my $code = shift; my $inset = shift; my $outset = shift; - my $outcode = shift; + my $outcode; my $script; @@ -255,32 +124,12 @@ sub script_code2code return $outcode; } -#======================================================================= - -=head1 QUERY ROUTINES - -There are two function which can be used to obtain a list of all codes, -or all script names: - -=over 8 - -=item C - -Returns a list of all two-letter script codes. -The codes are guaranteed to be all lower-case, -and not in any particular order. - -=item C - -Returns a list of all script names for which there is a corresponding -script code in the specified code set. -The names are capitalised, and not returned in any particular order. - -=back - -=cut #======================================================================= +# +# all_script_codes() +# +#======================================================================= sub all_script_codes { my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; @@ -288,6 +137,12 @@ sub all_script_codes return keys %{ $CODES->[$codeset] }; } + +#======================================================================= +# +# all_script_names() +# +#======================================================================= sub all_script_names { my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; @@ -296,106 +151,15 @@ sub all_script_names } -#----------------------------------------------------------------------- - -=head1 EXAMPLES - -The following example illustrates use of the C function. -The user is prompted for a script code, and then told the corresponding -script name: - - $| = 1; # turn off buffering - - print "Enter script code: "; - chop($code = ); - $script = code2script($code, LOCALE_CODE_ALPHA_2); - if (defined $script) - { - print "$code = $script\n"; - } - else - { - print "'$code' is not a valid script code!\n"; - } - - -=head1 KNOWN BUGS AND LIMITATIONS - -=over 4 - -=item * - -When using C, the script name must currently appear -exactly as it does in the source of the module. For example, - - script2code('Egyptian hieroglyphs') - -will return B, as expected. But the following will all return C: - - script2code('hieroglyphs') - script2code('Egyptian Hieroglypics') - -If there's need for it, a future version could have variants -for script names. - -=item * - -In the current implementation, all data is read in when the -module is loaded, and then held in memory. -A lazy implementation would be more memory friendly. - -=back - -=head1 SEE ALSO - -=over 4 - -=item Locale::Language - -ISO two letter codes for identification of language (ISO 639). - -=item Locale::Currency - -ISO three letter codes for identification of currencies -and funds (ISO 4217). - -=item Locale::Country - -ISO three letter codes for identification of countries (ISO 3166) - -=item ISO 15924 - -The ISO standard which defines these codes. - -=item http://www.evertype.com/standards/iso15924/ - -Home page for ISO 15924. - - -=back - - -=head1 AUTHOR - -Neil Bowers Eneil@bowers.comE - -=head1 COPYRIGHT - -Copyright (c) 2002 Neil Bowers. - -This module is free software; you can redistribute it and/or -modify it under the same terms as Perl itself. - -=cut - -#----------------------------------------------------------------------- - #======================================================================= +# # initialisation code - stuff the DATA into the ALPHA2 hash +# #======================================================================= { - my ($alpha2, $alpha3, $numeric); - my $script; + my ($alpha2, $alpha3, $numeric); + my $script; + local $_; while () @@ -420,6 +184,8 @@ modify it under the same terms as Perl itself. } } + + close(DATA); } 1;