Commit | Line | Data |
9378c581 |
1 | |
cb0af213 |
2 | # Time-stamp: "2004-01-19 15:11:14 AST" |
9378c581 |
3 | |
4 | require 5; |
5 | package Locale::Maketext; |
6 | use strict; |
7 | use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS |
14be35aa |
8 | $USE_LITERALS $MATCH_SUPERS_TIGHTLY); |
9378c581 |
9 | use Carp (); |
10 | use I18N::LangTags 0.21 (); |
11 | |
12 | #-------------------------------------------------------------------------- |
13 | |
14 | BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } } |
15 | # define the constant 'DEBUG' at compile-time |
16 | |
cb0af213 |
17 | $VERSION = "1.08"; |
9378c581 |
18 | @ISA = (); |
19 | |
20 | $MATCH_SUPERS = 1; |
14be35aa |
21 | $MATCH_SUPERS_TIGHTLY = 1; |
22 | $USING_LANGUAGE_TAGS = 1; |
9378c581 |
23 | # Turning this off is somewhat of a security risk in that little or no |
24 | # checking will be done on the legality of tokens passed to the |
25 | # eval("use $module_name") in _try_use. If you turn this off, you have |
26 | # to do your own taint checking. |
27 | |
28 | $USE_LITERALS = 1 unless defined $USE_LITERALS; |
29 | # a hint for compiling bracket-notation things. |
30 | |
31 | my %isa_scan = (); |
32 | |
33 | ########################################################################### |
34 | |
35 | sub quant { |
36 | my($handle, $num, @forms) = @_; |
37 | |
38 | return $num if @forms == 0; # what should this mean? |
39 | return $forms[2] if @forms > 2 and $num == 0; # special zeroth case |
40 | |
41 | # Normal case: |
42 | # Note that the formatting of $num is preserved. |
43 | return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) ); |
44 | # Most human languages put the number phrase before the qualified phrase. |
45 | } |
46 | |
47 | |
48 | sub numerate { |
49 | # return this lexical item in a form appropriate to this number |
50 | my($handle, $num, @forms) = @_; |
51 | my $s = ($num == 1); |
52 | |
53 | return '' unless @forms; |
54 | if(@forms == 1) { # only the headword form specified |
55 | return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack. |
56 | } else { # sing and plural were specified |
57 | return $s ? $forms[0] : $forms[1]; |
58 | } |
59 | } |
60 | |
61 | #-------------------------------------------------------------------------- |
62 | |
63 | sub numf { |
64 | my($handle, $num) = @_[0,1]; |
65 | if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) { |
66 | $num += 0; # Just use normal integer stringification. |
67 | # Specifically, don't let %G turn ten million into 1E+007 |
68 | } else { |
69 | $num = CORE::sprintf("%G", $num); |
70 | # "CORE::" is there to avoid confusion with the above sub sprintf. |
71 | } |
72 | while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5 |
73 | # The initial \d+ gobbles as many digits as it can, and then we |
74 | # backtrack so it un-eats the rightmost three, and then we |
75 | # insert the comma there. |
76 | |
77 | $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'}; |
78 | # This is just a lame hack instead of using Number::Format |
79 | return $num; |
80 | } |
81 | |
82 | sub sprintf { |
83 | no integer; |
84 | my($handle, $format, @params) = @_; |
85 | return CORE::sprintf($format, @params); |
86 | # "CORE::" is there to avoid confusion with myself! |
87 | } |
88 | |
89 | #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=# |
90 | |
91 | use integer; # vroom vroom... applies to the whole rest of the module |
92 | |
93 | sub language_tag { |
94 | my $it = ref($_[0]) || $_[0]; |
95 | return undef unless $it =~ m/([^':]+)(?:::)?$/s; |
96 | $it = lc($1); |
97 | $it =~ tr<_><->; |
98 | return $it; |
99 | } |
100 | |
101 | sub encoding { |
102 | my $it = $_[0]; |
103 | return( |
104 | (ref($it) && $it->{'encoding'}) |
105 | || "iso-8859-1" # Latin-1 |
106 | ); |
107 | } |
108 | |
109 | #-------------------------------------------------------------------------- |
110 | |
111 | sub fallback_languages { return('i-default', 'en', 'en-US') } |
112 | |
113 | sub fallback_language_classes { return () } |
114 | |
115 | #-------------------------------------------------------------------------- |
116 | |
117 | sub fail_with { # an actual attribute method! |
118 | my($handle, @params) = @_; |
119 | return unless ref($handle); |
120 | $handle->{'fail'} = $params[0] if @params; |
121 | return $handle->{'fail'}; |
122 | } |
123 | |
124 | #-------------------------------------------------------------------------- |
125 | |
126 | sub failure_handler_auto { |
127 | # Meant to be used like: |
128 | # $handle->fail_with('failure_handler_auto') |
129 | |
130 | my($handle, $phrase, @params) = @_; |
131 | $handle->{'failure_lex'} ||= {}; |
132 | my $lex = $handle->{'failure_lex'}; |
133 | |
134 | my $value; |
135 | $lex->{$phrase} ||= ($value = $handle->_compile($phrase)); |
136 | |
137 | # Dumbly copied from sub maketext: |
138 | { |
139 | local $SIG{'__DIE__'}; |
140 | eval { $value = &$value($handle, @_) }; |
141 | } |
142 | # If we make it here, there was an exception thrown in the |
143 | # call to $value, and so scream: |
144 | if($@) { |
145 | my $err = $@; |
146 | # pretty up the error message |
147 | $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?> |
148 | <\n in bracket code [compiled line $1],>s; |
149 | #$err =~ s/\n?$/\n/s; |
150 | Carp::croak "Error in maketexting \"$phrase\":\n$err as used"; |
151 | # Rather unexpected, but suppose that the sub tried calling |
152 | # a method that didn't exist. |
153 | } else { |
154 | return $value; |
155 | } |
156 | } |
157 | |
158 | #========================================================================== |
159 | |
160 | sub new { |
161 | # Nothing fancy! |
162 | my $class = ref($_[0]) || $_[0]; |
163 | my $handle = bless {}, $class; |
164 | $handle->init; |
165 | return $handle; |
166 | } |
167 | |
168 | sub init { return } # no-op |
169 | |
170 | ########################################################################### |
171 | |
172 | sub maketext { |
173 | # Remember, this can fail. Failure is controllable many ways. |
174 | Carp::croak "maketext requires at least one parameter" unless @_ > 1; |
175 | |
176 | my($handle, $phrase) = splice(@_,0,2); |
177 | |
178 | # Look up the value: |
179 | |
180 | my $value; |
181 | foreach my $h_r ( |
182 | @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs } |
183 | ) { |
184 | print "* Looking up \"$phrase\" in $h_r\n" if DEBUG; |
185 | if(exists $h_r->{$phrase}) { |
186 | print " Found \"$phrase\" in $h_r\n" if DEBUG; |
187 | unless(ref($value = $h_r->{$phrase})) { |
188 | # Nonref means it's not yet compiled. Compile and replace. |
189 | $value = $h_r->{$phrase} = $handle->_compile($value); |
190 | } |
191 | last; |
192 | } elsif($phrase !~ m/^_/s and $h_r->{'_AUTO'}) { |
193 | # it's an auto lex, and this is an autoable key! |
194 | print " Automaking \"$phrase\" into $h_r\n" if DEBUG; |
195 | |
196 | $value = $h_r->{$phrase} = $handle->_compile($phrase); |
197 | last; |
198 | } |
199 | print " Not found in $h_r, nor automakable\n" if DEBUG > 1; |
200 | # else keep looking |
201 | } |
202 | |
203 | unless(defined($value)) { |
204 | print "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, |
205 | " fails.\n" if DEBUG; |
206 | if(ref($handle) and $handle->{'fail'}) { |
207 | print "WARNING0: maketext fails looking for <$phrase>\n" if DEBUG; |
208 | my $fail; |
209 | if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference |
210 | return &{$fail}($handle, $phrase, @_); |
211 | # If it ever returns, it should return a good value. |
212 | } else { # It's a method name |
213 | return $handle->$fail($phrase, @_); |
214 | # If it ever returns, it should return a good value. |
215 | } |
216 | } else { |
217 | # All we know how to do is this; |
218 | Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed"); |
219 | } |
220 | } |
221 | |
222 | return $$value if ref($value) eq 'SCALAR'; |
223 | return $value unless ref($value) eq 'CODE'; |
224 | |
225 | { |
226 | local $SIG{'__DIE__'}; |
227 | eval { $value = &$value($handle, @_) }; |
228 | } |
229 | # If we make it here, there was an exception thrown in the |
230 | # call to $value, and so scream: |
231 | if($@) { |
232 | my $err = $@; |
233 | # pretty up the error message |
234 | $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?> |
235 | <\n in bracket code [compiled line $1],>s; |
236 | #$err =~ s/\n?$/\n/s; |
237 | Carp::croak "Error in maketexting \"$phrase\":\n$err as used"; |
238 | # Rather unexpected, but suppose that the sub tried calling |
239 | # a method that didn't exist. |
240 | } else { |
241 | return $value; |
242 | } |
243 | } |
244 | |
245 | ########################################################################### |
246 | |
247 | sub get_handle { # This is a constructor and, yes, it CAN FAIL. |
248 | # Its class argument has to be the base class for the current |
249 | # application's l10n files. |
14be35aa |
250 | |
9378c581 |
251 | my($base_class, @languages) = @_; |
252 | $base_class = ref($base_class) || $base_class; |
253 | # Complain if they use __PACKAGE__ as a project base class? |
254 | |
14be35aa |
255 | @languages = $base_class->_ambient_langprefs() unless @languages; |
256 | @languages = $base_class->_langtag_munging(@languages); |
257 | |
258 | my %seen; |
259 | foreach my $module_name ( map { $base_class . "::" . $_ } @languages ) { |
260 | next unless length $module_name; # sanity |
261 | next if $seen{$module_name}++ # Already been here, and it was no-go |
262 | || !&_try_use($module_name); # Try to use() it, but can't it. |
263 | return($module_name->new); # Make it! |
9378c581 |
264 | } |
265 | |
14be35aa |
266 | return undef; # Fail! |
267 | } |
268 | |
269 | ########################################################################### |
270 | |
271 | sub _langtag_munging { |
272 | my($base_class, @languages) = @_; |
273 | |
274 | DEBUG and print "Lgs1: ", map("<$_>", @languages), "\n"; |
9378c581 |
275 | |
276 | if($USING_LANGUAGE_TAGS) { |
277 | @languages = map &I18N::LangTags::locale2language_tag($_), @languages; |
278 | # if it's a lg tag, fine, pass thru (untainted) |
279 | # if it's a locale ID, try converting to a lg tag (untainted), |
280 | # otherwise nix it. |
281 | |
14be35aa |
282 | @languages = map {; $_, I18N::LangTags::alternate_language_tags($_) } |
9378c581 |
283 | @languages; # catch alternation |
14be35aa |
284 | DEBUG and print "Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
285 | |
cb0af213 |
286 | @languages = $base_class->_add_supers( @languages ); |
287 | |
14be35aa |
288 | if( defined &I18N::LangTags::panic_languages ) { |
289 | push @languages, I18N::LangTags::panic_languages(@languages); |
290 | DEBUG and print "After adding panic languages:\n", |
291 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
292 | } |
9378c581 |
293 | |
294 | push @languages, $base_class->fallback_languages; |
295 | # You are free to override fallback_languages to return empty-list! |
14be35aa |
296 | DEBUG and print "Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
9378c581 |
297 | |
298 | @languages = # final bit of processing: |
299 | map { |
300 | my $it = $_; # copy |
301 | $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _ |
302 | $it =~ tr<_a-z0-9><>cd; # remove all but a-z0-9_ |
303 | $it; |
304 | } @languages |
305 | ; |
14be35aa |
306 | DEBUG and print "Nearing end of munging:\n", |
307 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
308 | } else { |
309 | DEBUG and print "Bypassing language-tags.\n", |
310 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
9378c581 |
311 | } |
14be35aa |
312 | |
313 | DEBUG and print "Before adding fallback classes:\n", |
314 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
9378c581 |
315 | |
316 | push @languages, $base_class->fallback_language_classes; |
317 | # You are free to override that to return whatever. |
318 | |
14be35aa |
319 | DEBUG and print "Finally:\n", |
320 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
9378c581 |
321 | |
14be35aa |
322 | return @languages; |
323 | } |
324 | |
325 | ########################################################################### |
326 | |
327 | sub _ambient_langprefs { |
328 | my $base_class = $_[0]; |
329 | |
330 | return $base_class->_http_accept_langs |
331 | if length( $ENV{'REQUEST_METHOD'} || '' ); # I'm a CGI |
332 | # it's off in its own routine because it's complicated |
333 | |
334 | # Not running as a CGI: try to puzzle out from the environment |
335 | my @languages; |
336 | |
337 | if(length( $ENV{'LANG'} || '' )) { |
338 | push @languages, split m/[,:]/, $ENV{'LANG'}; |
339 | # LANG can be only /one/ locale as far as I know, but what the hey. |
9378c581 |
340 | } |
341 | |
14be35aa |
342 | if(length( $ENV{'LANGUAGE'} || '' )) { |
343 | push @languages, split m/[,:]/, $ENV{'LANGUAGE'}; |
344 | } |
345 | |
346 | print "Noting ENV LANG ", join(',', @languages),"\n" if DEBUG; |
347 | # Those are really locale IDs, but they get xlated a few lines down. |
348 | |
349 | if(&_try_use('Win32::Locale')) { |
350 | # If we have that module installed... |
351 | push @languages, Win32::Locale::get_language() || '' |
352 | if defined &Win32::Locale::get_language; |
353 | } |
354 | |
355 | return @languages; |
356 | } |
357 | |
358 | ########################################################################### |
359 | |
360 | sub _add_supers { |
361 | my($base_class, @languages) = @_; |
362 | |
363 | if(!$MATCH_SUPERS) { |
364 | # Nothing |
365 | DEBUG and print "Bypassing any super-matching.\n", |
366 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
367 | |
368 | } elsif( $MATCH_SUPERS_TIGHTLY ) { |
369 | DEBUG and print "Before adding new supers tightly:\n", |
370 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
371 | |
372 | my %seen_encoded; |
373 | foreach my $lang (@languages) { |
374 | $seen_encoded{ I18N::LangTags::encode_language_tag($lang) } = 1 |
375 | } |
376 | |
377 | my(@output_languages); |
378 | foreach my $lang (@languages) { |
379 | push @output_languages, $lang; |
380 | foreach my $s ( I18N::LangTags::super_languages($lang) ) { |
381 | # Note that super_languages returns the longest first. |
382 | last if $seen_encoded{ I18N::LangTags::encode_language_tag($s) }; |
383 | push @output_languages, $s; |
384 | } |
385 | } |
386 | @languages = @output_languages; |
387 | |
388 | DEBUG and print "After adding new supers tightly:\n", |
389 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
390 | |
391 | } else { |
392 | |
393 | push @languages, map I18N::LangTags::super_languages($_), @languages; |
394 | DEBUG and print "After adding supers to end:\n", |
395 | " Lgs\@", __LINE__, ": ", map("<$_>", @languages), "\n"; |
396 | } |
397 | |
398 | return @languages; |
9378c581 |
399 | } |
400 | |
401 | ########################################################################### |
402 | # |
403 | # This is where most people should stop reading. |
404 | # |
405 | ########################################################################### |
406 | |
f600d105 |
407 | use Locale::Maketext::GutsLoader; |
408 | |
f918d677 |
409 | sub _http_accept_langs { |
410 | # Deal with HTTP "Accept-Language:" stuff. Hassle. |
411 | # This code is more lenient than RFC 3282, which you must read. |
412 | # Hm. Should I just move this into I18N::LangTags at some point? |
413 | no integer; |
414 | |
415 | my $in = (@_ > 1) ? $_[1] : $ENV{'HTTP_ACCEPT_LANGUAGE'}; |
416 | # (always ends up untainting) |
417 | |
418 | return() unless defined $in and length $in; |
419 | |
420 | $in =~ s/\([^\)]*\)//g; # nix just about any comment |
421 | |
422 | if( $in =~ m/^\s*([a-zA-Z][-a-zA-Z]+)\s*$/s ) { |
423 | # Very common case: just one language tag |
424 | return lc $1; |
425 | } elsif( $in =~ m/^\s*[a-zA-Z][-a-zA-Z]+(?:\s*,\s*[a-zA-Z][-a-zA-Z]+)*\s*$/s ) { |
426 | # Common case these days: just "foo, bar, baz" |
427 | return map lc($_), $in =~ m/([a-zA-Z][-a-zA-Z]+)/g; |
428 | } |
429 | |
430 | # Else it's complicated... |
431 | |
432 | $in =~ s/\s+//g; # Yes, we can just do without the WS! |
433 | my @in = $in =~ m/([^,]+)/g; |
434 | my %pref; |
435 | |
436 | my $q; |
437 | foreach my $tag (@in) { |
438 | next unless $tag =~ |
439 | m/^([a-zA-Z][-a-zA-Z]+) |
440 | (?: |
441 | ;q= |
442 | ( |
443 | \d* # a bit too broad of a RE, but so what. |
444 | (?: |
445 | \.\d+ |
446 | )? |
447 | ) |
448 | )? |
449 | $ |
450 | /sx |
451 | ; |
452 | $q = (defined $2 and length $2) ? $2 : 1; |
453 | #print "$1 with q=$q\n"; |
454 | push @{ $pref{$q} }, lc $1; |
455 | } |
456 | |
457 | return # Read off %pref, in descending key order... |
458 | map @{$pref{$_}}, |
459 | sort {$b <=> $a} |
460 | keys %pref; |
461 | } |
462 | |
463 | ########################################################################### |
464 | |
9378c581 |
465 | my %tried = (); |
466 | # memoization of whether we've used this module, or found it unusable. |
467 | |
468 | sub _try_use { # Basically a wrapper around "require Modulename" |
469 | # "Many men have tried..." "They tried and failed?" "They tried and died." |
470 | return $tried{$_[0]} if exists $tried{$_[0]}; # memoization |
471 | |
472 | my $module = $_[0]; # ASSUME sane module name! |
473 | { no strict 'refs'; |
474 | return($tried{$module} = 1) |
475 | if defined(%{$module . "::Lexicon"}) or defined(@{$module . "::ISA"}); |
476 | # weird case: we never use'd it, but there it is! |
477 | } |
478 | |
479 | print " About to use $module ...\n" if DEBUG; |
480 | { |
481 | local $SIG{'__DIE__'}; |
482 | eval "require $module"; # used to be "use $module", but no point in that. |
483 | } |
484 | if($@) { |
485 | print "Error using $module \: $@\n" if DEBUG > 1; |
486 | return $tried{$module} = 0; |
487 | } else { |
488 | print " OK, $module is used\n" if DEBUG; |
489 | return $tried{$module} = 1; |
490 | } |
491 | } |
492 | |
493 | #-------------------------------------------------------------------------- |
494 | |
495 | sub _lex_refs { # report the lexicon references for this handle's class |
496 | # returns an arrayREF! |
497 | no strict 'refs'; |
498 | my $class = ref($_[0]) || $_[0]; |
499 | print "Lex refs lookup on $class\n" if DEBUG > 1; |
500 | return $isa_scan{$class} if exists $isa_scan{$class}; # memoization! |
501 | |
502 | my @lex_refs; |
503 | my $seen_r = ref($_[1]) ? $_[1] : {}; |
504 | |
505 | if( defined( *{$class . '::Lexicon'}{'HASH'} )) { |
506 | push @lex_refs, *{$class . '::Lexicon'}{'HASH'}; |
507 | print "%" . $class . "::Lexicon contains ", |
508 | scalar(keys %{$class . '::Lexicon'}), " entries\n" if DEBUG; |
509 | } |
510 | |
ff5ad48a |
511 | # Implements depth(height?)-first recursive searching of superclasses. |
512 | # In hindsight, I suppose I could have just used Class::ISA! |
9378c581 |
513 | foreach my $superclass (@{$class . "::ISA"}) { |
514 | print " Super-class search into $superclass\n" if DEBUG; |
515 | next if $seen_r->{$superclass}++; |
516 | push @lex_refs, @{&_lex_refs($superclass, $seen_r)}; # call myself |
517 | } |
518 | |
519 | $isa_scan{$class} = \@lex_refs; # save for next time |
520 | return \@lex_refs; |
521 | } |
522 | |
523 | sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity! |
524 | |
525 | ########################################################################### |
526 | 1; |
527 | |
f918d677 |
528 | __END__ |
529 | |
530 | HEY YOU! You need some FOOD! |
531 | |
532 | |
533 | ~~ Tangy Moroccan Carrot Salad ~~ |
534 | |
535 | * 6 to 8 medium carrots, peeled and then sliced in 1/4-inch rounds |
536 | * 1/4 teaspoon chile powder (cayenne, chipotle, ancho, or the like) |
537 | * 1 tablespoon ground cumin |
538 | * 1 tablespoon honey |
539 | * The juice of about a half a big lemon, or of a whole smaller one |
540 | * 1/3 cup olive oil |
541 | * 1 tablespoon of fresh dill, washed and chopped fine |
542 | * Pinch of salt, maybe a pinch of pepper |
543 | |
544 | Cook the carrots in a pot of boiling water until just tender -- roughly |
545 | six minutes. (Just don't let them get mushy!) Drain the carrots. |
546 | |
547 | In a largish bowl, combine the lemon juice, the cumin, the chile |
548 | powder, and the honey. Mix well. |
549 | Add the olive oil and whisk it together well. Add the dill and stir. |
550 | |
551 | Add the warm carrots to the bowl and toss it all to coat the carrots |
552 | well. Season with salt and pepper, to taste. |
553 | |
554 | Serve warm or at room temperature. |
555 | |
556 | The measurements here are very approximate, and you should feel free to |
557 | improvise and experiment. It's a very forgiving recipe. For example, |
558 | you could easily halve or double the amount of cumin, or use chopped mint |
559 | leaves instead of dill, or lime juice instead of lemon, et cetera. |
560 | |
561 | [end] |
cb0af213 |
562 | |