2 # Time-stamp: "2003-04-02 11:04:55 AHST"
5 package Locale::Maketext;
7 use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS
10 use I18N::LangTags 0.21 ();
12 #--------------------------------------------------------------------------
14 BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
15 # define the constant 'DEBUG' at compile-time
21 $USING_LANGUAGE_TAGS = 1;
22 # Turning this off is somewhat of a security risk in that little or no
23 # checking will be done on the legality of tokens passed to the
24 # eval("use $module_name") in _try_use. If you turn this off, you have
25 # to do your own taint checking.
27 $USE_LITERALS = 1 unless defined $USE_LITERALS;
28 # a hint for compiling bracket-notation things.
32 ###########################################################################
35 my($handle, $num, @forms) = @_;
37 return $num if @forms == 0; # what should this mean?
38 return $forms[2] if @forms > 2 and $num == 0; # special zeroth case
41 # Note that the formatting of $num is preserved.
42 return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) );
43 # Most human languages put the number phrase before the qualified phrase.
48 # return this lexical item in a form appropriate to this number
49 my($handle, $num, @forms) = @_;
52 return '' unless @forms;
53 if(@forms == 1) { # only the headword form specified
54 return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack.
55 } else { # sing and plural were specified
56 return $s ? $forms[0] : $forms[1];
60 #--------------------------------------------------------------------------
63 my($handle, $num) = @_[0,1];
64 if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) {
65 $num += 0; # Just use normal integer stringification.
66 # Specifically, don't let %G turn ten million into 1E+007
68 $num = CORE::sprintf("%G", $num);
69 # "CORE::" is there to avoid confusion with the above sub sprintf.
71 while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5
72 # The initial \d+ gobbles as many digits as it can, and then we
73 # backtrack so it un-eats the rightmost three, and then we
74 # insert the comma there.
76 $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'};
77 # This is just a lame hack instead of using Number::Format
83 my($handle, $format, @params) = @_;
84 return CORE::sprintf($format, @params);
85 # "CORE::" is there to avoid confusion with myself!
88 #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
90 use integer; # vroom vroom... applies to the whole rest of the module
93 my $it = ref($_[0]) || $_[0];
94 return undef unless $it =~ m/([^':]+)(?:::)?$/s;
103 (ref($it) && $it->{'encoding'})
104 || "iso-8859-1" # Latin-1
108 #--------------------------------------------------------------------------
110 sub fallback_languages { return('i-default', 'en', 'en-US') }
112 sub fallback_language_classes { return () }
114 #--------------------------------------------------------------------------
116 sub fail_with { # an actual attribute method!
117 my($handle, @params) = @_;
118 return unless ref($handle);
119 $handle->{'fail'} = $params[0] if @params;
120 return $handle->{'fail'};
123 #--------------------------------------------------------------------------
125 sub failure_handler_auto {
126 # Meant to be used like:
127 # $handle->fail_with('failure_handler_auto')
129 my($handle, $phrase, @params) = @_;
130 $handle->{'failure_lex'} ||= {};
131 my $lex = $handle->{'failure_lex'};
134 $lex->{$phrase} ||= ($value = $handle->_compile($phrase));
136 # Dumbly copied from sub maketext:
138 local $SIG{'__DIE__'};
139 eval { $value = &$value($handle, @_) };
141 # If we make it here, there was an exception thrown in the
142 # call to $value, and so scream:
145 # pretty up the error message
146 $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?>
147 <\n in bracket code [compiled line $1],>s;
148 #$err =~ s/\n?$/\n/s;
149 Carp::croak "Error in maketexting \"$phrase\":\n$err as used";
150 # Rather unexpected, but suppose that the sub tried calling
151 # a method that didn't exist.
157 #==========================================================================
161 my $class = ref($_[0]) || $_[0];
162 my $handle = bless {}, $class;
167 sub init { return } # no-op
169 ###########################################################################
172 # Remember, this can fail. Failure is controllable many ways.
173 Carp::croak "maketext requires at least one parameter" unless @_ > 1;
175 my($handle, $phrase) = splice(@_,0,2);
181 @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs }
183 print "* Looking up \"$phrase\" in $h_r\n" if DEBUG;
184 if(exists $h_r->{$phrase}) {
185 print " Found \"$phrase\" in $h_r\n" if DEBUG;
186 unless(ref($value = $h_r->{$phrase})) {
187 # Nonref means it's not yet compiled. Compile and replace.
188 $value = $h_r->{$phrase} = $handle->_compile($value);
191 } elsif($phrase !~ m/^_/s and $h_r->{'_AUTO'}) {
192 # it's an auto lex, and this is an autoable key!
193 print " Automaking \"$phrase\" into $h_r\n" if DEBUG;
195 $value = $h_r->{$phrase} = $handle->_compile($phrase);
198 print " Not found in $h_r, nor automakable\n" if DEBUG > 1;
202 unless(defined($value)) {
203 print "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle,
204 " fails.\n" if DEBUG;
205 if(ref($handle) and $handle->{'fail'}) {
206 print "WARNING0: maketext fails looking for <$phrase>\n" if DEBUG;
208 if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference
209 return &{$fail}($handle, $phrase, @_);
210 # If it ever returns, it should return a good value.
211 } else { # It's a method name
212 return $handle->$fail($phrase, @_);
213 # If it ever returns, it should return a good value.
216 # All we know how to do is this;
217 Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed");
221 return $$value if ref($value) eq 'SCALAR';
222 return $value unless ref($value) eq 'CODE';
225 local $SIG{'__DIE__'};
226 eval { $value = &$value($handle, @_) };
228 # If we make it here, there was an exception thrown in the
229 # call to $value, and so scream:
232 # pretty up the error message
233 $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?>
234 <\n in bracket code [compiled line $1],>s;
235 #$err =~ s/\n?$/\n/s;
236 Carp::croak "Error in maketexting \"$phrase\":\n$err as used";
237 # Rather unexpected, but suppose that the sub tried calling
238 # a method that didn't exist.
244 ###########################################################################
246 sub get_handle { # This is a constructor and, yes, it CAN FAIL.
247 # Its class argument has to be the base class for the current
248 # application's l10n files.
249 my($base_class, @languages) = @_;
250 $base_class = ref($base_class) || $base_class;
251 # Complain if they use __PACKAGE__ as a project base class?
253 unless(@languages) { # Calling with no args is magical! wooo, magic!
254 if(length( $ENV{'REQUEST_METHOD'} || '' )) { # I'm a CGI
255 @languages = $base_class->_http_accept_langs;
256 # it's off in its own routine because it's complicated
258 } else { # Not running as a CGI: try to puzzle out from the environment
259 if(length( $ENV{'LANG'} || '' )) {
260 push @languages, split m/[,:]/, $ENV{'LANG'};
261 # LANG can be only /one/ locale as far as I know, but what the hey.
263 if(length( $ENV{'LANGUAGE'} || '' )) {
264 push @languages, split m/[,:]/, $ENV{'LANGUAGE'};
266 print "Noting ENV LANG ", join(',', @languages),"\n" if DEBUG;
267 # Those are really locale IDs, but they get xlated a few lines down.
269 if(&_try_use('Win32::Locale')) {
270 # If we have that module installed...
271 push @languages, Win32::Locale::get_language()
272 if defined &Win32::Locale::get_language;
277 #------------------------------------------------------------------------
278 print "Lgs1: ", map("<$_>", @languages), "\n" if DEBUG;
280 if($USING_LANGUAGE_TAGS) {
281 @languages = map &I18N::LangTags::locale2language_tag($_), @languages;
282 # if it's a lg tag, fine, pass thru (untainted)
283 # if it's a locale ID, try converting to a lg tag (untainted),
286 push @languages, map I18N::LangTags::super_languages($_), @languages
289 @languages = map { $_, I18N::LangTags::alternate_language_tags($_) }
290 @languages; # catch alternation
292 push @languages, I18N::LangTags::panic_languages(@languages)
293 if defined &I18N::LangTags::panic_languages;
295 push @languages, $base_class->fallback_languages;
296 # You are free to override fallback_languages to return empty-list!
298 @languages = # final bit of processing:
301 $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _
302 $it =~ tr<_a-z0-9><>cd; # remove all but a-z0-9_
307 print "Lgs2: ", map("<$_>", @languages), "\n" if DEBUG > 1;
309 push @languages, $base_class->fallback_language_classes;
310 # You are free to override that to return whatever.
314 foreach my $module_name ( map { $base_class . "::" . $_ } @languages )
316 next unless length $module_name; # sanity
317 next if $seen{$module_name}++ # Already been here, and it was no-go
318 || !&_try_use($module_name); # Try to use() it, but can't it.
319 return($module_name->new); # Make it!
322 return undef; # Fail!
325 ###########################################################################
327 # This is where most people should stop reading.
329 ###########################################################################
331 sub _http_accept_langs {
332 # Deal with HTTP "Accept-Language:" stuff. Hassle.
333 # This code is more lenient than RFC 3282, which you must read.
334 # Hm. Should I just move this into I18N::LangTags at some point?
337 my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'};
338 # (always ends up untainting)
340 return() unless defined $in and length $in;
342 $in =~ s/\([^\)]*\)//g; # nix just about any comment
344 if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) {
345 # Very common case: just one language tag
347 } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) {
348 # Common case these days: just "foo, bar, baz"
349 return map lc($_), $in =~ m/([a-zA-Z][-a-zA-Z]+)/g;
352 # Else it's complicated...
354 $in =~ s/\s+//g; # Yes, we can just do without the WS!
355 my @in = $in =~ m/([^,]+)/g;
359 foreach my $tag (@in) {
361 m/^([a-zA-Z][-a-zA-Z]+)
365 \d* # a bit too broad of a RE, but so what.
374 $q = (defined $2 and length $2) ? $2 : 1;
375 #print "$1 with q=$q\n";
376 push @{ $pref{$q} }, lc $1;
379 return # Read off %pref, in descending key order...
385 ###########################################################################
388 # This big scarp routine compiles an entry.
389 # It returns either a coderef if there's brackety bits in this, or
390 # otherwise a ref to a scalar.
392 my $target = ref($_[0]) || $_[0];
395 my(@c) = (''); # "chunks" -- scratch.
399 my $in_group = 0; # start out outside a group
400 my($m, @params); # scratch
402 while($_[1] =~ # Iterate over chunks.
404 [^\~\[\]]+ # non-~[] stuff
406 ~. # ~[, ~], ~~, ~other
408 \[ # [ presumably opening a group
410 \] # ] presumably closing a group
417 print " \"$1\"\n" if DEBUG > 2;
419 if($1 eq '[' or $1 eq '') { # "[" or end
420 # Whether this is "[" or end, force processing of any
424 $target->_die_pointing($_[1], "Unterminated bracket group");
426 $target->_die_pointing($_[1], "You can't nest bracket groups");
430 print " [end-string]\n" if DEBUG > 2;
434 die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity
436 # Now actually processing the preceding literal
438 if($USE_LITERALS and (
440 ? $c[-1] !~ m<[^\x20-\x7E]>s
441 # ASCII very safe chars
442 : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
443 # EBCDIC very safe chars
445 # normal case -- all very safe chars
447 push @code, q{ '} . $c[-1] . "',\n";
448 $c[-1] = ''; # reuse this slot
450 push @code, ' $c[' . $#c . "],\n";
451 push @c, ''; # new chunk
454 # else just ignore the empty string.
457 } elsif($1 eq ']') { # "]"
458 # close group -- go back in-band
462 print " --Closing group [$c[-1]]\n" if DEBUG > 2;
464 # And now process the group...
466 if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
467 DEBUG > 2 and print " -- (Ignoring)\n";
468 $c[-1] = ''; # reset out chink
472 #$c[-1] =~ s/^\s+//s;
473 #$c[-1] =~ s/\s+$//s;
474 ($m,@params) = split(",", $c[-1], -1); # was /\s*,\s*/
476 # A bit of a hack -- we've turned "~,"'s into DELs, so turn
477 # 'em into real commas here.
478 if (ord('A') == 65) { # ASCII, etc
479 foreach($m, @params) { tr/\x7F/,/ }
480 } else { # EBCDIC (1047, 0037, POSIX-BC)
481 # Thanks to Peter Prymmer for the EBCDIC handling
482 foreach($m, @params) { tr/\x07/,/ }
485 # Special-case handling of some method names:
486 if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) {
487 # Treat [_1,...] as [,_1,...], etc.
491 $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
493 $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
496 # Most common case: a simple, legal-looking method name
498 # 0-length method name means to just interpolate:
500 } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s
501 and $m !~ m<(?:^|\:)\d>s
502 # exclude starting a (sub)package or symbol with a digit
504 # Yes, it even supports the demented (and undocumented?)
505 # $obj->Foo::bar(...) syntax.
506 $target->_die_pointing(
507 $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method",
510 if $m =~ m/^SUPER::/s;
511 # Because for SUPER:: to work, we'd have to compile this into
512 # the right package, and that seems just not worth the bother,
513 # unless someone convinces me otherwise.
515 push @code, ' $_[0]->' . $m . '(';
517 # TODO: implement something? or just too icky to consider?
518 $target->_die_pointing(
520 "Can't use \"$m\" as a method name in bracket group",
525 pop @c; # we don't need that chunk anymore
528 foreach my $p (@params) {
530 # Meaning: all parameters except $_[0]
531 $code[-1] .= ' @_[1 .. $#_], ';
532 # and yes, that does the right thing for all @_ < 3
533 } elsif($p =~ m<^_(-?\d+)$>s) {
535 $code[-1] .= '$_[' . (0 + $1) . '], ';
536 } elsif($USE_LITERALS and (
538 ? $p !~ m<[^\x20-\x7E]>s
539 # ASCII very safe chars
540 : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
541 # EBCDIC very safe chars
543 # Normal case: a literal containing only safe characters
545 $code[-1] .= q{'} . $p . q{', };
547 # Stow it on the chunk-stack, and just refer to that.
549 push @code, ' $c[' . $#c . "], ";
556 $target->_die_pointing($_[1], "Unbalanced ']'");
559 } elsif(substr($1,0,1) ne '~') {
560 # it's stuff not containing "~" or "[" or "]"
561 # i.e., a literal blob
564 } elsif($1 eq '~~') { # "~~"
567 } elsif($1 eq '~[') { # "~["
570 } elsif($1 eq '~]') { # "~]"
573 } elsif($1 eq '~,') { # "~,"
575 # This is a hack, based on the assumption that no-one will actually
576 # want a DEL inside a bracket group. Let's hope that's it's true.
577 if (ord('A') == 65) { # ASCII etc
579 } else { # EBCDIC (cp 1047, 0037, POSIX-BC)
586 } elsif($1 eq '~') { # possible only at string-end, it seems.
590 # It's a "~X" where X is not a special character.
591 # Consider it a literal ~ and X.
598 undef $big_pile; # Well, nevermind that.
600 # It's all literals! Ahwell, that can happen.
601 # So don't bother with the eval. Return a SCALAR reference.
605 die "Last chunk isn't null??" if @c and length $c[-1]; # sanity
606 print scalar(@c), " chunks under closure\n" if DEBUG;
607 if(@code == 0) { # not possible?
608 print "Empty code\n" if DEBUG;
610 } elsif(@code > 1) { # most cases, presumably!
611 unshift @code, "join '',\n";
613 unshift @code, "use strict; sub {\n";
616 print @code if DEBUG;
617 my $sub = eval(join '', @code);
618 die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
622 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
625 # This is used by _compile to throw a fatal error
626 my $target = shift; # class name
627 # ...leaving $_[0] the error-causing text, and $_[1] the error message
629 my $i = index($_[0], "\n");
632 my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
634 $pointy = "^=== near there\n";
635 } else { # we need to space over
636 my $first_tab = index($_[0], "\t");
637 if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
638 # No tabs, or the first tab is harmlessly after where we will point to,
639 # AND we're far enough from the margin that we can draw a proper arrow.
640 $pointy = ('=' x $pos) . "^ near there\n";
642 # tabs screw everything up!
643 $pointy = substr($_[0],0,$pos);
644 $pointy =~ tr/\t //cd;
645 # make everything into whitespace, but preseving tabs
646 $pointy .= "^=== near there\n";
650 my $errmsg = "$_[1], in\:\n$_[0]";
654 $errmsg .= "\n" . $pointy;
655 } elsif($i == (length($_[0]) - 1) ) {
656 # Already has a newline at end.
659 # don't bother with the pointy bit, I guess.
661 Carp::croak( "$errmsg via $target, as used" );
664 ###########################################################################
667 # memoization of whether we've used this module, or found it unusable.
669 sub _try_use { # Basically a wrapper around "require Modulename"
670 # "Many men have tried..." "They tried and failed?" "They tried and died."
671 return $tried{$_[0]} if exists $tried{$_[0]}; # memoization
673 my $module = $_[0]; # ASSUME sane module name!
675 return($tried{$module} = 1)
676 if defined(%{$module . "::Lexicon"}) or defined(@{$module . "::ISA"});
677 # weird case: we never use'd it, but there it is!
680 print " About to use $module ...\n" if DEBUG;
682 local $SIG{'__DIE__'};
683 eval "require $module"; # used to be "use $module", but no point in that.
686 print "Error using $module \: $@\n" if DEBUG > 1;
687 return $tried{$module} = 0;
689 print " OK, $module is used\n" if DEBUG;
690 return $tried{$module} = 1;
694 #--------------------------------------------------------------------------
696 sub _lex_refs { # report the lexicon references for this handle's class
697 # returns an arrayREF!
699 my $class = ref($_[0]) || $_[0];
700 print "Lex refs lookup on $class\n" if DEBUG > 1;
701 return $isa_scan{$class} if exists $isa_scan{$class}; # memoization!
704 my $seen_r = ref($_[1]) ? $_[1] : {};
706 if( defined( *{$class . '::Lexicon'}{'HASH'} )) {
707 push @lex_refs, *{$class . '::Lexicon'}{'HASH'};
708 print "%" . $class . "::Lexicon contains ",
709 scalar(keys %{$class . '::Lexicon'}), " entries\n" if DEBUG;
712 # Implements depth(height?)-first recursive searching of superclasses.
713 # In hindsight, I suppose I could have just used Class::ISA!
714 foreach my $superclass (@{$class . "::ISA"}) {
715 print " Super-class search into $superclass\n" if DEBUG;
716 next if $seen_r->{$superclass}++;
717 push @lex_refs, @{&_lex_refs($superclass, $seen_r)}; # call myself
720 $isa_scan{$class} = \@lex_refs; # save for next time
724 sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity!
726 ###########################################################################
731 HEY YOU! You need some FOOD!
734 ~~ Tangy Moroccan Carrot Salad ~~
736 * 6 to 8 medium carrots, peeled and then sliced in 1/4-inch rounds
737 * 1/4 teaspoon chile powder (cayenne, chipotle, ancho, or the like)
738 * 1 tablespoon ground cumin
740 * The juice of about a half a big lemon, or of a whole smaller one
742 * 1 tablespoon of fresh dill, washed and chopped fine
743 * Pinch of salt, maybe a pinch of pepper
745 Cook the carrots in a pot of boiling water until just tender -- roughly
746 six minutes. (Just don't let them get mushy!) Drain the carrots.
748 In a largish bowl, combine the lemon juice, the cumin, the chile
749 powder, and the honey. Mix well.
750 Add the olive oil and whisk it together well. Add the dill and stir.
752 Add the warm carrots to the bowl and toss it all to coat the carrots
753 well. Season with salt and pepper, to taste.
755 Serve warm or at room temperature.
757 The measurements here are very approximate, and you should feel free to
758 improvise and experiment. It's a very forgiving recipe. For example,
759 you could easily halve or double the amount of cumin, or use chopped mint
760 leaves instead of dill, or lime juice instead of lemon, et cetera.