lib/Locale/Maketext.pod Locale::Maketext documentation
lib/Locale/Maketext/ChangeLog Locale::Maketext
lib/Locale/Maketext/README Locale::Maketext
-lib/Locale/Maketext/test.pl See if Locale::Maketext works
lib/Locale/Maketext/TPJ13.pod Locale::Maketext documentation article
+lib/Locale/Maketext/t/00about.t See if Locale::Maketext works
+lib/Locale/Maketext/t/01make.t See if Locale::Maketext works
+lib/Locale/Maketext/t/02get.t See if Locale::Maketext works
+lib/Locale/Maketext/t/03http.t See if Locale::Maketext works
lib/Locale/Script.pm Locale::Codes
lib/Locale/Script.pod Locale::Codes documentation
lib/look.pl A "look" equivalent
-# Time-stamp: "2001-06-21 23:09:33 MDT"
+# Time-stamp: "2003-04-02 11:04:55 AHST"
require 5;
package Locale::Maketext;
BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
# define the constant 'DEBUG' at compile-time
-$VERSION = "1.03";
+$VERSION = "1.04";
@ISA = ();
$MATCH_SUPERS = 1;
unless(@languages) { # Calling with no args is magical! wooo, magic!
if(length( $ENV{'REQUEST_METHOD'} || '' )) { # I'm a CGI
- my $in = $ENV{'HTTP_ACCEPT_LANGUAGE'} || '';
- # supposedly that works under mod_perl, too.
- $in =~ s<\([\)]*\)><>g; # Kill parens'd things -- just a hack.
- @languages = &I18N::LangTags::extract_language_tags($in) if length $in;
- # ...which untaints, incidentally.
+ @languages = $base_class->_http_accept_langs;
+ # it's off in its own routine because it's complicated
} else { # Not running as a CGI: try to puzzle out from the environment
if(length( $ENV{'LANG'} || '' )) {
#
###########################################################################
+sub _http_accept_langs {
+ # Deal with HTTP "Accept-Language:" stuff. Hassle.
+ # This code is more lenient than RFC 3282, which you must read.
+ # Hm. Should I just move this into I18N::LangTags at some point?
+ no integer;
+
+ my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'};
+ # (always ends up untainting)
+
+ return() unless defined $in and length $in;
+
+ $in =~ s/\([^\)]*\)//g; # nix just about any comment
+
+ if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) {
+ # Very common case: just one language tag
+ return lc $1;
+ } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) {
+ # Common case these days: just "foo, bar, baz"
+ return map lc($_), $in =~ m/([a-zA-Z][-a-zA-Z]+)/g;
+ }
+
+ # Else it's complicated...
+
+ $in =~ s/\s+//g; # Yes, we can just do without the WS!
+ my @in = $in =~ m/([^,]+)/g;
+ my %pref;
+
+ my $q;
+ foreach my $tag (@in) {
+ next unless $tag =~
+ m/^([a-zA-Z][-a-zA-Z]+)
+ (?:
+ ;q=
+ (
+ \d* # a bit too broad of a RE, but so what.
+ (?:
+ \.\d+
+ )?
+ )
+ )?
+ $
+ /sx
+ ;
+ $q = (defined $2 and length $2) ? $2 : 1;
+ #print "$1 with q=$q\n";
+ push @{ $pref{$q} }, lc $1;
+ }
+
+ return # Read off %pref, in descending key order...
+ map @{$pref{$_}},
+ sort {$b <=> $a}
+ keys %pref;
+}
+
+###########################################################################
+
sub _compile {
# This big scarp routine compiles an entry.
# It returns either a coderef if there's brackety bits in this, or
###########################################################################
1;
+__END__
+
+HEY YOU! You need some FOOD!
+
+
+ ~~ Tangy Moroccan Carrot Salad ~~
+
+* 6 to 8 medium carrots, peeled and then sliced in 1/4-inch rounds
+* 1/4 teaspoon chile powder (cayenne, chipotle, ancho, or the like)
+* 1 tablespoon ground cumin
+* 1 tablespoon honey
+* The juice of about a half a big lemon, or of a whole smaller one
+* 1/3 cup olive oil
+* 1 tablespoon of fresh dill, washed and chopped fine
+* Pinch of salt, maybe a pinch of pepper
+
+Cook the carrots in a pot of boiling water until just tender -- roughly
+six minutes. (Just don't let them get mushy!) Drain the carrots.
+
+In a largish bowl, combine the lemon juice, the cumin, the chile
+powder, and the honey. Mix well.
+Add the olive oil and whisk it together well. Add the dill and stir.
+
+Add the warm carrots to the bowl and toss it all to coat the carrots
+well. Season with salt and pepper, to taste.
+
+Serve warm or at room temperature.
+
+The measurements here are very approximate, and you should feel free to
+improvise and experiment. It's a very forgiving recipe. For example,
+you could easily halve or double the amount of cumin, or use chopped mint
+leaves instead of dill, or lime juice instead of lemon, et cetera.
+
+[end]
-# Time-stamp: "2001-06-21 23:12:39 MDT"
+# Time-stamp: "2003-04-02 11:10:32 AHST"
=head1 NAME
-Locale::Maketext -- framework for localization
+Locale::Maketext - framework for localization
=head1 SYNOPSIS
=over
-=item *
+=item *
$lh = YourProjClass->get_handle( ...langtags... ) || die "lg-handle?";
Otherwise (i.e., if not a CGI), this tries various OS-specific ways
to get the language-tags for the current locale/language, and then
-pretends that those were the value(s) passed to C<cet_handle>.
+pretends that those were the value(s) passed to C<get_handle>.
Currently this OS-specific stuff consists of looking in the environment
variables "LANG" and "LANGUAGE"; and on MSWin machines (where those
"Your search matched [quant,_1,document]!"
It's for I<quantifying> a noun (i.e., saying how much of it there is,
-while giving the currect form of it). The behavior of this method is
+while giving the correct form of it). The behavior of this method is
handy for English and a few other Western European languages, and you
should override it for languages where it's not suitable. You can feel
free to read the source, but the current implementation is basically
C<"...[quant,_1,file]..."> is fine (for 0 it returns "0 files",
for 1 it returns "1 file", and for more it returns "2 files", etc.)
-But for "directory", you'd want C<"[quant,_1,direcory,directories]">
+But for "directory", you'd want C<"[quant,_1,directory,directories]">
so that our elementary C<quant> method doesn't think that the
plural of "directory" is "directorys". And you might find that the
output may sound better if you specify a negative form, as in:
While the key must be a string value (since that's a basic
restriction that Perl places on hash keys), the value in
-the lexicon can currenly be of several types:
+the lexicon can currently be of several types:
a defined scalar, scalarref, or coderef. The use of these is
explained above, in the section 'The "maketext" Method', and
Bracket Notation for strings is discussed in the next section.
I often just use a key "_USAGE_MESSAGE". At that point I then go
and immediately to define that lexicon entry in the
ProjectClass::L10N::en lexicon (since English is always my "project
-lanuage"):
+language"):
'_USAGE_MESSAGE' => <<'EOSTUFF',
...long long message...
),
Examples: "[_1]" and "[,_1]", which are synonymous; and
-"[,ID-(,_4,-,_2,)]", which compiles as
+"C<[,ID-(,_4,-,_2,)]>", which compiles as
C<join "", "ID-(", $_[4], "-", $_[2], ")">.
=item *
Currently, an unescaped "~" before a character
other than a bracket or a comma is taken to mean just a "~" and that
-charecter. I.e., "~X" means the same as "~~X" -- i.e., one literal tilde,
+character. I.e., "~X" means the same as "~~X" -- i.e., one literal tilde,
and then one literal "X". However, by using "~X", you are assuming that
no future version of Maketext will use "~X" as a magic escape sequence.
In practice this is not a great problem, since first off you can just
my $lh = ThisProject::I18N->get_handle();
# For the moment, assume that things are set up so
# that we load class ThisProject::I18N::en
- # and that's the class that $lh belongs to.
+ # and that that's the class that $lh belongs to.
...
if(-e $filename) {
go_process_file($filename)
S<eval { }>. However, I want programmers to reserve the right (via
the "fail" attribute) to treat lookup failure as something other than
an exception of the same level of severity as a config file being
-unreadable, or some essential resource being inaccessable.
+unreadable, or some essential resource being inaccessible.
One possibly useful value for the "fail" attribute is the method name
"failure_handler_auto". This is a method defined in class
language, so that you can override the C<numf> method as
appropriate. Typical variables in number formatting are: what to
use as a decimal point (comma? period?); what to use as a thousands
-separator (space? nonbreakinng space? comma? period? small
+separator (space? nonbreaking space? comma? period? small
middot? prime? apostrophe?); and even whether the so-called "thousands
separator" is actually for every third digit -- I've heard reports of
-two hundred thousand being expressable as "2,00,000" for some Indian
+two hundred thousand being expressible as "2,00,000" for some Indian
(Subcontinental) languages, besides the less surprising "S<200 000>",
"200.000", "200,000", and "200'000". Also, using a set of numeral
glyphs other than the usual ASCII "0"-"9" might be appreciated, as via
message catalogs that are just databases of sprintf formats.
L<File::Findgrep|File::Findgrep> is a sample application/module
-that uses Locale::Maketext to localize its messages.
+that uses Locale::Maketext to localize its messages. For a larger
+internationalized system, see also L<Apache::MP3>.
L<I18N::LangTags|I18N::LangTags>.
=head1 COPYRIGHT AND DISCLAIMER
-Copyright (c) 1999-2001 Sean M. Burke. All rights reserved.
+Copyright (c) 1999-2003 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
Sean M. Burke C<sburke@cpan.org>
=cut
-
-# Zing!
Revision history for Perl suite Locale::Maketext
- Time-stamp: "2001-06-21 23:18:31 MDT"
+ Time-stamp: "2003-04-02 10:37:42 AHST"
+2003-04-02 Sean M. Burke sburke@cpan.org
+ * Release 1.04: Implementing proper HTTP "tag;q=rank" parsing for
+ get_handle. This should make all the difference for users/victims
+ of the current version of Safari, which uses that syntax as well
+ as inserts random languages with low q numbers.
+ Thanks to Jesse Vincent and the whole RT junta for finding this.
+
+ * Added more tests, now in t/
+
+ * Lots of typo fixes to Maketext.pm. Thanks to Evan A. Zacks for
+ patient help in finding them all.
+
2001-06-21 Sean M. Burke sburke@cpan.org
* Release 1.03: basically cosmetic tweaks to the docs and the
test.pl.
README for Locale::Maketext
- Time-stamp: "2001-05-25 08:15:55 MDT"
+ Time-stamp: "2003-04-02 11:06:17 AHST"
Locale::Maketext
This suite requires Perl 5. It also requires a recent version
of I18N::LangTags. MSWin users should also get Win32::Locale.
-File::Findgrep is also useful example code.
+File::Findgrep is also useful example code, as is the rather
+larger Apache::MP3 source (even if you don't run Apache).
INSTALLATION
The latest version of Locale::Maketext is available from the
Comprehensive Perl Archive Network (CPAN). Visit
-<http://www.cpan.org/> to find a CPAN site near you.
+<http://www.perl.com/CPAN/> to find a CPAN site near you.
COPYRIGHT
-Copyright 1999-2001, Sean M. Burke <sburke@cpan.org>, all rights
+Copyright 1999-2003, Sean M. Burke <sburke@cpan.org>, all rights
reserved. This program is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
--- /dev/null
+
+require 5;
+use Test;
+BEGIN { plan tests => 1; }
+use Locale::Maketext 1.01;
+
+print "#\n#\n",
+ "# Locale::Maketext v$Locale::Maketext::VERSION\n",
+ "# I18N::LangTags v", $I18N::LangTags::VERSION || "?", "\n",
+ "#\n#\n",
+;
+
+print "# Running under perl version $] for $^O",
+ (chr(65) eq 'A') ? "\n" : " in a non-ASCII world\n";
+
+print "# Win32::BuildNumber ", &Win32::BuildNumber(), "\n"
+ if defined(&Win32::BuildNumber) and defined &Win32::BuildNumber();
+
+print "# MacPerl verison $MacPerl::Version\n"
+ if defined $MacPerl::Version;
+
+printf
+ "# Current time local: %s\n# Current time GMT: %s\n",
+ scalar( gmtime($^T)), scalar(localtime($^T));
+
+print "# Using Test.pm v", $Test::VERSION || "?", "\n";
+
+ok 1;
+
--- /dev/null
+
+require 5;
+use Test;
+BEGIN { plan tests => 6; }
+use Locale::Maketext 1.01;
+print "# Hi there...\n";
+ok 1;
+
+# declare some classes...
+{
+ package Woozle;
+ @ISA = ('Locale::Maketext');
+ sub dubbil { return $_[1] * 2 }
+ sub numerate { return $_[2] . 'en' }
+}
+{
+ package Woozle::elx;
+ @ISA = ('Woozle');
+ %Lexicon = (
+ 'd2' => 'hum [dubbil,_1]',
+ 'd3' => 'hoo [quant,_1,zaz]',
+ 'd4' => 'hoo [*,_1,zaz]',
+ );
+ keys %Lexicon; # dodges the 'used only once' warning
+}
+
+ok defined( $lh = Woozle->get_handle('elx') ) && ref($lh);
+ok $lh && $lh->maketext('d2', 7), "hum 14" ;
+ok $lh && $lh->maketext('d3', 7), "hoo 7 zazen" ;
+ok $lh && $lh->maketext('d4', 7), "hoo 7 zazen" ;
+
+print "# Byebye!\n";
+ok 1;
+
--- /dev/null
+
+require 5;
+use Test;
+BEGIN { plan tests => 11; }
+use Locale::Maketext 1.01;
+print "# Hi there...\n";
+ok 1;
+
+print "# --- Making sure that get_handle works ---\n";
+
+# declare some classes...
+{
+ package Woozle;
+ @ISA = ('Locale::Maketext');
+ sub dubbil { return $_[1] * 2 }
+ sub numerate { return $_[2] . 'en' }
+}
+{
+ package Woozle::eu_mt;
+ @ISA = ('Woozle');
+ %Lexicon = (
+ 'd2' => 'hum [dubbil,_1]',
+ 'd3' => 'hoo [quant,_1,zaz]',
+ 'd4' => 'hoo [*,_1,zaz]',
+ );
+ keys %Lexicon; # dodges the 'used only once' warning
+}
+
+my $lh;
+print "# Basic sanity:\n";
+ok defined( $lh = Woozle->get_handle('eu-mt') ) && ref($lh);
+ok $lh && $lh->maketext('d2', 7), "hum 14" ;
+
+
+
+print "# Make sure we can assign to ENV entries\n",
+ "# (Otherwise we can't run the subsequent tests)...\n";
+$ENV{'MYORP'} = 'Zing';
+ok $ENV{'MYORP'}, 'Zing';
+$ENV{'SWUZ'} = 'KLORTHO HOOBOY';
+ok $ENV{'SWUZ'}, 'KLORTHO HOOBOY';
+
+delete $ENV{'MYORP'};
+delete $ENV{'SWUZ'};
+
+print "# Test LANG...\n";
+$ENV{'REQUEST_METHOD'} = '';
+$ENV{'LANG'} = 'Eu_MT';
+$ENV{'LANGUAGE'} = '';
+ok defined( $lh = Woozle->get_handle() ) && ref($lh);
+
+print "# Test LANGUAGE...\n";
+$ENV{'LANG'} = '';
+$ENV{'LANGUAGE'} = 'Eu-MT';
+ok defined( $lh = Woozle->get_handle() ) && ref($lh);
+
+print "# Test HTTP_ACCEPT_LANGUAGE...\n";
+$ENV{'REQUEST_METHOD'} = 'GET';
+$ENV{'HTTP_ACCEPT_LANGUAGE'} = 'eu-MT';
+ok defined( $lh = Woozle->get_handle() ) && ref($lh);
+$ENV{'HTTP_ACCEPT_LANGUAGE'} = 'x-plorp, zaz, eu-MT, i-klung';
+ok defined( $lh = Woozle->get_handle() ) && ref($lh);
+$ENV{'HTTP_ACCEPT_LANGUAGE'} = 'x-plorp, zaz, eU-Mt, i-klung';
+ok defined( $lh = Woozle->get_handle() ) && ref($lh);
+
+
+print "# Byebye!\n";
+ok 1;
+
--- /dev/null
+
+use Locale::Maketext;
+
+use Test;
+BEGIN { plan tests => 87 };
+
+my @in = grep m/\S/, split /\n/, q{
+
+[ sv ] sv
+[ en ] en
+[ en fi ] en, fi
+[ en-us ] en-us
+[ en-us ] en-US
+[ en-us ] EN-US
+
+[ en-au en i-klingon en-gb en-us mt-mt mt ja ] EN-au, JA;q=0.14, i-klingon;q=0.83, en-gb;q=0.71, en-us;q=0.57, mt-mt;q=0.43, mt;q=0.29, en;q=0.86
+[ en-au en i-klingon en-gb en-us mt-mt mt tli ja ] EN-au, tli;q=0.201, JA;q=0.14, i-klingon;q=0.83, en-gb;q=0.71, en-us;q=0.57, mt-mt;q=0.43, mt;q=0.29, en;q=0.86
+[ en-au en en-gb en-us ja ] en-au, ja;q=0.20, en-gb;q=0.60, en-us;q=0.40, en;q=0.80
+
+[ en-au en en-gb en-us mt-mt mt ja ] EN-au, JA;q=0.14, en-gb;q=0.71, en-us;q=0.57, mt-mt;q=0.43, mt;q=0.29, en;q=0.86
+[ en-au en en-gb en-us ja ] en-au, ja;q=0.20, en-gb;q=0.60, en-us;q=0.40, en;q=0.80
+[ en fr ] en;q=1,fr;q=.5
+[ en fr ] en;q=1,fr;q=.99
+[ en ru ko ] en, ru;q=0.7, ko;q=0.3
+[ en ru ko ] en, ru;q=0.7, KO;q=0.3
+[ en-us en ] en-us, en;q=0.50
+[ en fr ] fr ; q = 0.9, en
+[ en fr ] en,fr;q=.90
+[ ru en-uk en fr ] ru, en-UK;q=0.5, en;q=0.3, fr;q=0.1
+[ en-us fr es-mx ] en-us,fr;q=0.7,es-mx;q=0.3
+[ en-us en ] en-us, en;q=0.50
+
+[ da en-gb en ] da, en-gb;q=0.8, en;q=0.7
+[ da en-gb en ] da, en;q=0.7, en-gb;q=0.8
+[ da en-gb en ] da, en-gb;q=0.8, en;q=0.7
+[ da en-gb en ] da,en;q=0.7,en-gb;q=0.8
+[ da en-gb en ] da, en-gb ; q=0.8, en ; q=0.7
+[ da en-gb en ] da , en-gb ; q = 0.8 , en ; q =0.7
+[ da en-gb en ] da (yup, Danish) , en-gb ; q = 0.8 , en ; q =0.7
+
+[ no dk en-uk en-us ] en-UK;q=0.7, en-US;q=0.6, no;q=1.0, dk;q=0.8
+[ no dk en-uk en-us ] en-US;q=0.6, en-UK;q=0.7, no;q=1.0, dk;q=0.8
+[ no dk en-uk en-us ] en-UK;q=0.7, no;q=1.0, en-US;q=0.6, dk;q=0.8
+[ no dk en-uk en-us ] en-UK;q=0.7, no;q=1.0, dk;q=0.8, en-US;q=0.6
+
+[ fi en ] fi;q=1, en;q=0.2
+[ de-de de en en-us en-gb ] de-DE, de;q=0.80, en;q=0.60, en-US;q=0.40, en-GB;q=0.20
+[ ru ] ru; q=1, *; q=0.1
+[ ru en ] ru, en; q=0.1
+[ ja en ] ja,en;q=0.5
+[ en ] en; q=1.0
+[ ja ] ja; q=1.0
+[ ja ] ja; q=1.0
+[ en ja ] en; q=0.5, ja; q=0.5
+[ fr-ca fr en ] fr-ca, fr;q=0.8, en;q=0.7
+[ NIX ] NIX
+};
+
+foreach my $in (@in) {
+ $in =~ s/^\s*\[([^\]]+)\]\s*//s or die "Bad input: $in";
+ my @should = do { my $x = $1; $x =~ m/(\S+)/g };
+
+ if($in eq 'NIX') { $in = ''; @should = (); }
+
+ local $ENV{'HTTP_ACCEPT_LANGUAGE'};
+
+ foreach my $modus (
+ sub {
+ print "# Testing with arg...\n";
+ $ENV{'HTTP_ACCEPT_LANGUAGE'} = 'PLORK';
+ return $_[0];
+ },
+ sub {
+ print "# Testing wath HTTP_ACCEPT_LANGUAGE...\n";
+ $ENV{'HTTP_ACCEPT_LANGUAGE'} = $_[0];
+ return();
+ },
+ ) {
+ my @args = &$modus($in);
+
+ # ////////////////////////////////////////////////////
+ my @out = Locale::Maketext->_http_accept_langs(@args);
+ # \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
+
+ if(
+ @out == @should
+ and lc( join "\e", @out ) eq lc( join "\e", @should )
+ ) {
+ print "# Happily got [@out] from [$in]\n";
+ ok 1;
+ } else {
+ ok 0;
+ print "#Got: [@out]\n",
+ "# but wanted: [@should]\n",
+ "# < \"$in\"\n#\n";
+ }
+ }
+}
+
+print "#\n#\n# Bye-bye!\n";
+ok 1;
+
+++ /dev/null
-# Before `make install' is performed this script should be runnable with
-# `make test'. After `make install' it should work as `perl test.pl'
-# Time-stamp: "2001-06-20 02:12:53 MDT"
-######################### We start with some black magic to print on failure.
-
-# (It may become useful if the test is moved to ./t subdirectory.)
-
-BEGIN { $| = 1; print "1..5\n"; }
-END {print "fail 1\n" unless $loaded;}
-use Locale::Maketext 1.01;
-print "# Perl v$], Locale::Maketext v$Locale::Maketext::VERSION\n";
-$loaded = 1;
-print "ok 1\n";
-{
- package Woozle;
- @ISA = ('Locale::Maketext');
- sub dubbil { return $_[1] * 2 }
- sub numerate { return $_[2] . 'en' }
-}
-{
- package Woozle::elx;
- @ISA = ('Woozle');
- %Lexicon = (
- 'd2' => 'hum [dubbil,_1]',
- 'd3' => 'hoo [quant,_1,zaz]',
- 'd4' => 'hoo [*,_1,zaz]',
- );
-}
-
-$lh = Woozle->get_handle('elx');
-if($lh) {
- print "ok 2\n";
-
- my $x;
-
- $x = $lh->maketext('d2', 7);
- if($x eq "hum 14") {
- print "ok 3\n";
- } else {
- print "fail 3 # (got \"$x\")\n";
- }
-
- $x = $lh->maketext('d3', 7);
- if($x eq "hoo 7 zazen") {
- print "ok 4\n";
- } else {
- print "fail 4 # (got \"$x\")\n";
- }
-
- $x = $lh->maketext('d4', 7);
- if($x eq "hoo 7 zazen") {
- print "ok 5\n";
- } else {
- print "fail 5 # (got \"$x\")\n";
- }
-
-
-} else {
- print "fail 2\n";
-}
-#Shazam!