-# Time-stamp: "2001-06-21 23:09:33 MDT"
+# Time-stamp: "2004-01-11 19:02:37 AST"
require 5;
package Locale::Maketext;
use strict;
use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS
- $USE_LITERALS);
+ $USE_LITERALS $MATCH_SUPERS_TIGHTLY);
use Carp ();
use I18N::LangTags 0.21 ();
BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
# define the constant 'DEBUG' at compile-time
-$VERSION = "1.03";
+$VERSION = "1.07";
@ISA = ();
$MATCH_SUPERS = 1;
-$USING_LANGUAGE_TAGS = 1;
+$MATCH_SUPERS_TIGHTLY = 1;
+$USING_LANGUAGE_TAGS = 1;
# Turning this off is somewhat of a security risk in that little or no
# checking will be done on the legality of tokens passed to the
# eval("use $module_name") in _try_use. If you turn this off, you have
sub get_handle { # This is a constructor and, yes, it CAN FAIL.
# Its class argument has to be the base class for the current
# application's l10n files.
+
my($base_class, @languages) = @_;
$base_class = ref($base_class) || $base_class;
# Complain if they use __PACKAGE__ as a project base class?
- unless(@languages) { # Calling with no args is magical! wooo, magic!
- if(length( $ENV{'REQUEST_METHOD'} || '' )) { # I'm a CGI
- my $in = $ENV{'HTTP_ACCEPT_LANGUAGE'} || '';
- # supposedly that works under mod_perl, too.
- $in =~ s<\([\)]*\)><>g; # Kill parens'd things -- just a hack.
- @languages = &I18N::LangTags::extract_language_tags($in) if length $in;
- # ...which untaints, incidentally.
-
- } else { # Not running as a CGI: try to puzzle out from the environment
- if(length( $ENV{'LANG'} || '' )) {
- push @languages, split m/[,:]/, $ENV{'LANG'};
- # LANG can be only /one/ locale as far as I know, but what the hey.
- }
- if(length( $ENV{'LANGUAGE'} || '' )) {
- push @languages, split m/[,:]/, $ENV{'LANGUAGE'};
- }
- print "Noting ENV LANG ", join(',', @languages),"\n" if DEBUG;
- # Those are really locale IDs, but they get xlated a few lines down.
-
- if(&_try_use('Win32::Locale')) {
- # If we have that module installed...
- push @languages, Win32::Locale::get_language()
- if defined &Win32::Locale::get_language;
- }
- }
+ @languages = $base_class->_ambient_langprefs() unless @languages;
+ @languages = $base_class->_langtag_munging(@languages);
+
+ my %seen;
+ foreach my $module_name ( map { $base_class . "::" . $_ } @languages ) {
+ next unless length $module_name; # sanity
+ next if $seen{$module_name}++ # Already been here, and it was no-go
+ || !&_try_use($module_name); # Try to use() it, but can't it.
+ return($module_name->new); # Make it!
}
- #------------------------------------------------------------------------
- print "Lgs1: ", map("<$_>", @languages), "\n" if DEBUG;
+ return undef; # Fail!
+}
+
+###########################################################################
+
+sub _langtag_munging {
+ my($base_class, @languages) = @_;
+
+ DEBUG and print "Lgs1: ", map("<$_>", @languages), "\n";
if($USING_LANGUAGE_TAGS) {
@languages = map &I18N::LangTags::locale2language_tag($_), @languages;
# if it's a locale ID, try converting to a lg tag (untainted),
# otherwise nix it.
- push @languages, map I18N::LangTags::super_languages($_), @languages
- if $MATCH_SUPERS;
-
- @languages = map { $_, I18N::LangTags::alternate_language_tags($_) }
+ @languages = map {; $_, I18N::LangTags::alternate_language_tags($_) }
@languages; # catch alternation
+ DEBUG and print "Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
- push @languages, I18N::LangTags::panic_languages(@languages)
- if defined &I18N::LangTags::panic_languages;
+ if( defined &I18N::LangTags::panic_languages ) {
+ push @languages, I18N::LangTags::panic_languages(@languages);
+ DEBUG and print "After adding panic languages:\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
+ }
+
+ @languages = $base_class->_add_supers( @languages );
push @languages, $base_class->fallback_languages;
# You are free to override fallback_languages to return empty-list!
+ DEBUG and print "Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
@languages = # final bit of processing:
map {
$it;
} @languages
;
+ DEBUG and print "Nearing end of munging:\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
+ } else {
+ DEBUG and print "Bypassing language-tags.\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
}
- print "Lgs2: ", map("<$_>", @languages), "\n" if DEBUG > 1;
+
+ DEBUG and print "Before adding fallback classes:\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
push @languages, $base_class->fallback_language_classes;
# You are free to override that to return whatever.
+ DEBUG and print "Finally:\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
- my %seen = ();
- foreach my $module_name ( map { $base_class . "::" . $_ } @languages )
- {
- next unless length $module_name; # sanity
- next if $seen{$module_name}++ # Already been here, and it was no-go
- || !&_try_use($module_name); # Try to use() it, but can't it.
- return($module_name->new); # Make it!
- }
-
- return undef; # Fail!
+ return @languages;
}
###########################################################################
-#
-# This is where most people should stop reading.
-#
-###########################################################################
-sub _compile {
- # This big scarp routine compiles an entry.
- # It returns either a coderef if there's brackety bits in this, or
- # otherwise a ref to a scalar.
-
- my $target = ref($_[0]) || $_[0];
+sub _ambient_langprefs {
+ my $base_class = $_[0];
- my(@code);
- my(@c) = (''); # "chunks" -- scratch.
- my $call_count = 0;
- my $big_pile = '';
- {
- my $in_group = 0; # start out outside a group
- my($m, @params); # scratch
-
- while($_[1] =~ # Iterate over chunks.
- m<\G(
- [^\~\[\]]+ # non-~[] stuff
- |
- ~. # ~[, ~], ~~, ~other
- |
- \[ # [ presumably opening a group
- |
- \] # ] presumably closing a group
- |
- ~ # terminal ~ ?
- |
- $
- )>xgs
- ) {
- print " \"$1\"\n" if DEBUG > 2;
-
- if($1 eq '[' or $1 eq '') { # "[" or end
- # Whether this is "[" or end, force processing of any
- # preceding literal.
- if($in_group) {
- if($1 eq '') {
- $target->_die_pointing($_[1], "Unterminated bracket group");
- } else {
- $target->_die_pointing($_[1], "You can't nest bracket groups");
- }
- } else {
- if($1 eq '') {
- print " [end-string]\n" if DEBUG > 2;
- } else {
- $in_group = 1;
- }
- die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity
- if(length $c[-1]) {
- # Now actually processing the preceding literal
- $big_pile .= $c[-1];
- if($USE_LITERALS and (
- (ord('A') == 65)
- ? $c[-1] !~ m<[^\x20-\x7E]>s
- # ASCII very safe chars
- : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
- # EBCDIC very safe chars
- )) {
- # normal case -- all very safe chars
- $c[-1] =~ s/'/\\'/g;
- push @code, q{ '} . $c[-1] . "',\n";
- $c[-1] = ''; # reuse this slot
- } else {
- push @code, ' $c[' . $#c . "],\n";
- push @c, ''; # new chunk
- }
- }
- # else just ignore the empty string.
- }
-
- } elsif($1 eq ']') { # "]"
- # close group -- go back in-band
- if($in_group) {
- $in_group = 0;
-
- print " --Closing group [$c[-1]]\n" if DEBUG > 2;
-
- # And now process the group...
-
- if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
- DEBUG > 2 and print " -- (Ignoring)\n";
- $c[-1] = ''; # reset out chink
- next;
- }
-
- #$c[-1] =~ s/^\s+//s;
- #$c[-1] =~ s/\s+$//s;
- ($m,@params) = split(",", $c[-1], -1); # was /\s*,\s*/
-
- # A bit of a hack -- we've turned "~,"'s into DELs, so turn
- # 'em into real commas here.
- if (ord('A') == 65) { # ASCII, etc
- foreach($m, @params) { tr/\x7F/,/ }
- } else { # EBCDIC (1047, 0037, POSIX-BC)
- # Thanks to Peter Prymmer for the EBCDIC handling
- foreach($m, @params) { tr/\x07/,/ }
- }
-
- # Special-case handling of some method names:
- if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) {
- # Treat [_1,...] as [,_1,...], etc.
- unshift @params, $m;
- $m = '';
- } elsif($m eq '*') {
- $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
- } elsif($m eq '#') {
- $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
- }
-
- # Most common case: a simple, legal-looking method name
- if($m eq '') {
- # 0-length method name means to just interpolate:
- push @code, ' (';
- } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s
- and $m !~ m<(?:^|\:)\d>s
- # exclude starting a (sub)package or symbol with a digit
- ) {
- # Yes, it even supports the demented (and undocumented?)
- # $obj->Foo::bar(...) syntax.
- $target->_die_pointing(
- $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method",
- 2 + length($c[-1])
- )
- if $m =~ m/^SUPER::/s;
- # Because for SUPER:: to work, we'd have to compile this into
- # the right package, and that seems just not worth the bother,
- # unless someone convinces me otherwise.
-
- push @code, ' $_[0]->' . $m . '(';
- } else {
- # TODO: implement something? or just too icky to consider?
- $target->_die_pointing(
- $_[1],
- "Can't use \"$m\" as a method name in bracket group",
- 2 + length($c[-1])
- );
- }
-
- pop @c; # we don't need that chunk anymore
- ++$call_count;
-
- foreach my $p (@params) {
- if($p eq '_*') {
- # Meaning: all parameters except $_[0]
- $code[-1] .= ' @_[1 .. $#_], ';
- # and yes, that does the right thing for all @_ < 3
- } elsif($p =~ m<^_(-?\d+)$>s) {
- # _3 meaning $_[3]
- $code[-1] .= '$_[' . (0 + $1) . '], ';
- } elsif($USE_LITERALS and (
- (ord('A') == 65)
- ? $p !~ m<[^\x20-\x7E]>s
- # ASCII very safe chars
- : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
- # EBCDIC very safe chars
- )) {
- # Normal case: a literal containing only safe characters
- $p =~ s/'/\\'/g;
- $code[-1] .= q{'} . $p . q{', };
- } else {
- # Stow it on the chunk-stack, and just refer to that.
- push @c, $p;
- push @code, ' $c[' . $#c . "], ";
- }
- }
- $code[-1] .= "),\n";
-
- push @c, '';
- } else {
- $target->_die_pointing($_[1], "Unbalanced ']'");
- }
-
- } elsif(substr($1,0,1) ne '~') {
- # it's stuff not containing "~" or "[" or "]"
- # i.e., a literal blob
- $c[-1] .= $1;
-
- } elsif($1 eq '~~') { # "~~"
- $c[-1] .= '~';
-
- } elsif($1 eq '~[') { # "~["
- $c[-1] .= '[';
-
- } elsif($1 eq '~]') { # "~]"
- $c[-1] .= ']';
-
- } elsif($1 eq '~,') { # "~,"
- if($in_group) {
- # This is a hack, based on the assumption that no-one will actually
- # want a DEL inside a bracket group. Let's hope that's it's true.
- if (ord('A') == 65) { # ASCII etc
- $c[-1] .= "\x7F";
- } else { # EBCDIC (cp 1047, 0037, POSIX-BC)
- $c[-1] .= "\x07";
- }
- } else {
- $c[-1] .= '~,';
- }
-
- } elsif($1 eq '~') { # possible only at string-end, it seems.
- $c[-1] .= '~';
-
- } else {
- # It's a "~X" where X is not a special character.
- # Consider it a literal ~ and X.
- $c[-1] .= $1;
- }
- }
+ return $base_class->_http_accept_langs
+ if length( $ENV{'REQUEST_METHOD'} || '' ); # I'm a CGI
+ # it's off in its own routine because it's complicated
+
+ # Not running as a CGI: try to puzzle out from the environment
+ my @languages;
+
+ if(length( $ENV{'LANG'} || '' )) {
+ push @languages, split m/[,:]/, $ENV{'LANG'};
+ # LANG can be only /one/ locale as far as I know, but what the hey.
}
- if($call_count) {
- undef $big_pile; # Well, nevermind that.
- } else {
- # It's all literals! Ahwell, that can happen.
- # So don't bother with the eval. Return a SCALAR reference.
- return \$big_pile;
+ if(length( $ENV{'LANGUAGE'} || '' )) {
+ push @languages, split m/[,:]/, $ENV{'LANGUAGE'};
}
- die "Last chunk isn't null??" if @c and length $c[-1]; # sanity
- print scalar(@c), " chunks under closure\n" if DEBUG;
- if(@code == 0) { # not possible?
- print "Empty code\n" if DEBUG;
- return \'';
- } elsif(@code > 1) { # most cases, presumably!
- unshift @code, "join '',\n";
+ print "Noting ENV LANG ", join(',', @languages),"\n" if DEBUG;
+ # Those are really locale IDs, but they get xlated a few lines down.
+
+ if(&_try_use('Win32::Locale')) {
+ # If we have that module installed...
+ push @languages, Win32::Locale::get_language() || ''
+ if defined &Win32::Locale::get_language;
}
- unshift @code, "use strict; sub {\n";
- push @code, "}\n";
- print @code if DEBUG;
- my $sub = eval(join '', @code);
- die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
- return $sub;
+ return @languages;
}
-# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+###########################################################################
-sub _die_pointing {
- # This is used by _compile to throw a fatal error
- my $target = shift; # class name
- # ...leaving $_[0] the error-causing text, and $_[1] the error message
-
- my $i = index($_[0], "\n");
-
- my $pointy;
- my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
- if($pos < 1) {
- $pointy = "^=== near there\n";
- } else { # we need to space over
- my $first_tab = index($_[0], "\t");
- if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
- # No tabs, or the first tab is harmlessly after where we will point to,
- # AND we're far enough from the margin that we can draw a proper arrow.
- $pointy = ('=' x $pos) . "^ near there\n";
- } else {
- # tabs screw everything up!
- $pointy = substr($_[0],0,$pos);
- $pointy =~ tr/\t //cd;
- # make everything into whitespace, but preseving tabs
- $pointy .= "^=== near there\n";
+sub _add_supers {
+ my($base_class, @languages) = @_;
+
+ if(!$MATCH_SUPERS) {
+ # Nothing
+ DEBUG and print "Bypassing any super-matching.\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
+
+ } elsif( $MATCH_SUPERS_TIGHTLY ) {
+ DEBUG and print "Before adding new supers tightly:\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
+
+ my %seen_encoded;
+ foreach my $lang (@languages) {
+ $seen_encoded{ I18N::LangTags::encode_language_tag($lang) } = 1
+ }
+
+ my(@output_languages);
+ foreach my $lang (@languages) {
+ push @output_languages, $lang;
+ foreach my $s ( I18N::LangTags::super_languages($lang) ) {
+ # Note that super_languages returns the longest first.
+ last if $seen_encoded{ I18N::LangTags::encode_language_tag($s) };
+ push @output_languages, $s;
+ }
}
+ @languages = @output_languages;
+
+ DEBUG and print "After adding new supers tightly:\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
+
+ } else {
+
+ push @languages, map I18N::LangTags::super_languages($_), @languages;
+ DEBUG and print "After adding supers to end:\n",
+ " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n";
}
- my $errmsg = "$_[1], in\:\n$_[0]";
+ return @languages;
+}
+
+###########################################################################
+#
+# This is where most people should stop reading.
+#
+###########################################################################
+
+use Locale::Maketext::GutsLoader;
+
+sub _http_accept_langs {
+ # Deal with HTTP "Accept-Language:" stuff. Hassle.
+ # This code is more lenient than RFC 3282, which you must read.
+ # Hm. Should I just move this into I18N::LangTags at some point?
+ no integer;
+
+ my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'};
+ # (always ends up untainting)
+
+ return() unless defined $in and length $in;
+
+ $in =~ s/\([^\)]*\)//g; # nix just about any comment
- if($i == -1) {
- # No newline.
- $errmsg .= "\n" . $pointy;
- } elsif($i == (length($_[0]) - 1) ) {
- # Already has a newline at end.
- $errmsg .= $pointy;
- } else {
- # don't bother with the pointy bit, I guess.
+ if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) {
+ # Very common case: just one language tag
+ return lc $1;
+ } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) {
+ # Common case these days: just "foo, bar, baz"
+ return map lc($_), $in =~ m/([a-zA-Z][-a-zA-Z]+)/g;
+ }
+
+ # Else it's complicated...
+
+ $in =~ s/\s+//g; # Yes, we can just do without the WS!
+ my @in = $in =~ m/([^,]+)/g;
+ my %pref;
+
+ my $q;
+ foreach my $tag (@in) {
+ next unless $tag =~
+ m/^([a-zA-Z][-a-zA-Z]+)
+ (?:
+ ;q=
+ (
+ \d* # a bit too broad of a RE, but so what.
+ (?:
+ \.\d+
+ )?
+ )
+ )?
+ $
+ /sx
+ ;
+ $q = (defined $2 and length $2) ? $2 : 1;
+ #print "$1 with q=$q\n";
+ push @{ $pref{$q} }, lc $1;
}
- Carp::croak( "$errmsg via $target, as used" );
+
+ return # Read off %pref, in descending key order...
+ map @{$pref{$_}},
+ sort {$b <=> $a}
+ keys %pref;
}
###########################################################################
###########################################################################
1;
+__END__
+
+HEY YOU! You need some FOOD!
+
+
+ ~~ Tangy Moroccan Carrot Salad ~~
+
+* 6 to 8 medium carrots, peeled and then sliced in 1/4-inch rounds
+* 1/4 teaspoon chile powder (cayenne, chipotle, ancho, or the like)
+* 1 tablespoon ground cumin
+* 1 tablespoon honey
+* The juice of about a half a big lemon, or of a whole smaller one
+* 1/3 cup olive oil
+* 1 tablespoon of fresh dill, washed and chopped fine
+* Pinch of salt, maybe a pinch of pepper
+
+Cook the carrots in a pot of boiling water until just tender -- roughly
+six minutes. (Just don't let them get mushy!) Drain the carrots.
+
+In a largish bowl, combine the lemon juice, the cumin, the chile
+powder, and the honey. Mix well.
+Add the olive oil and whisk it together well. Add the dill and stir.
+
+Add the warm carrots to the bowl and toss it all to coat the carrots
+well. Season with salt and pepper, to taste.
+
+Serve warm or at room temperature.
+
+The measurements here are very approximate, and you should feel free to
+improvise and experiment. It's a very forgiving recipe. For example,
+you could easily halve or double the amount of cumin, or use chopped mint
+leaves instead of dill, or lime juice instead of lemon, et cetera.
+
+[end]