X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FI18N%2FLangTags.pm;h=f141ab4bd216bd094ae386dbdbef9e47037f7a5b;hb=8000a3fa7bb45bbd1016a26c76a82389badfc8ce;hp=8a8cb181c5aa9cd7cfbb2564558930b62b2a591e;hpb=5629c67553f30a19f4842982925a3d994d46684c;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/I18N/LangTags.pm b/lib/I18N/LangTags.pm index 8a8cb18..f141ab4 100644 --- a/lib/I18N/LangTags.pm +++ b/lib/I18N/LangTags.pm @@ -1,5 +1,5 @@ -# Time-stamp: "2001-06-20 14:59:28 MDT" +# Time-stamp: "2004-03-30 18:21:55 AST" # Sean M. Burke require 5.000; @@ -14,10 +14,15 @@ require Exporter; similarity_language_tag is_dialect_of locale2language_tag alternate_language_tags encode_language_tag panic_languages + implicate_supers + implicate_supers_strictly ); %EXPORT_TAGS = ('ALL' => \@EXPORT_OK); -$VERSION = "0.25"; +$VERSION = "0.30"; + +sub uniq { my %seen; return grep(!($seen{$_}++), @_); } # a util function + =head1 NAME @@ -25,16 +30,15 @@ I18N::LangTags - functions for dealing with RFC3066-style language tags =head1 SYNOPSIS - use I18N::LangTags qw(is_language_tag same_language_tag - extract_language_tags super_languages - similarity_language_tag is_dialect_of - locale2language_tag alternate_language_tags - encode_language_tag panic_languages - ); + use I18N::LangTags(); + +...or specify whichever of those functions you want to import, like so: -...or whatever of those functions you want to import. Those are -all the exportable functions -- you're free to import only some, -or none at all. By default, none are imported. If you say: + use I18N::LangTags qw(implicate_supers similarity_language_tag); + +All the exportable functions are listed below -- you're free to import +only some, or none at all. By default, none are imported. If you +say: use I18N::LangTags qw(:ALL) @@ -529,9 +533,16 @@ sub encode_language_tag { # we can just handle them here with regexps. $tag =~ s/^iw\b/he/i; # Hebrew $tag =~ s/^in\b/id/i; # Indonesian + $tag =~ s/^cre\b/cr/i; # Cree + $tag =~ s/^jw\b/jv/i; # Javanese $tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger $tag =~ s/^[ix]-navajo\b/nv/i; # Navajo $tag =~ s/^ji\b/yi/i; # Yiddish + # SMB 2003 -- Hm. There's a bunch of new XXX->YY variances now, + # but maybe they're all so obscure I can ignore them. "Obscure" + # meaning either that the language is obscure, and/or that the + # XXX form was extant so briefly that it's unlikely it was ever + # used. I hope. # # These go FROM the simplex to complex form, to get # similarity-comparison right. And that's okay, since @@ -730,6 +741,84 @@ sub panic_languages { return grep !$seen{$_}++, @out, 'en'; } +#--------------------------------------------------------------------------- +#--------------------------------------------------------------------------- + +=item * the function implicate_supers( ...languages... ) + +This takes a list of strings (which are presumed to be language-tags; +strings that aren't, are ignored); and after each one, this function +inserts super-ordinate forms that don't already appear in the list. +The original list, plus these insertions, is returned. + +In other words, it takes this: + + pt-br de-DE en-US fr pt-br-janeiro + +and returns this: + + pt-br pt de-DE de en-US en fr pt-br-janeiro + +This function is most useful in the idiom + + implicate_supers( I18N::LangTags::Detect::detect() ); + +(See L.) + + +=item * the function implicate_supers_strictly( ...languages... ) + +This works like C except that the implicated +forms are added to the end of the return list. + +In other words, implicate_supers_strictly takes a list of strings +(which are presumed to be language-tags; strings that aren't, are +ignored) and after the whole given list, it inserts the super-ordinate forms +of all given tags, minus any tags that already appear in the input list. + +In other words, it takes this: + + pt-br de-DE en-US fr pt-br-janeiro + +and returns this: + + pt-br de-DE en-US fr pt-br-janeiro pt de en + +The reason this function has "_strictly" in its name is that when +you're processing an Accept-Language list according to the RFCs, if +you interpret the RFCs quite strictly, then you would use +implicate_supers_strictly, but for normal use (i.e., common-sense use, +as far as I'm concerned) you'd use implicate_supers. + +=cut + +sub implicate_supers { + my @languages = grep is_language_tag($_), @_; + my %seen_encoded; + foreach my $lang (@languages) { + $seen_encoded{ I18N::LangTags::encode_language_tag($lang) } = 1 + } + + my(@output_languages); + foreach my $lang (@languages) { + push @output_languages, $lang; + foreach my $s ( I18N::LangTags::super_languages($lang) ) { + # Note that super_languages returns the longest first. + last if $seen_encoded{ I18N::LangTags::encode_language_tag($s) }; + push @output_languages, $s; + } + } + return uniq( @output_languages ); + +} + +sub implicate_supers_strictly { + my @tags = grep is_language_tag($_), @_; + return uniq( @_, map super_languages($_), @_ ); +} + + + ########################################################################### 1; __END__ @@ -770,19 +859,16 @@ Continuations". * Locale::Codes, in C -* ISO 639, "Code for the representation of names of languages", -C - * ISO 639-2, "Codes for the representation of names of languages", -including three-letter codes, -C +including two-letter and three-letter codes, +C * The IANA list of registered languages (hopefully up-to-date), -C +C =head1 COPYRIGHT -Copyright (c) 1998-2001 Sean M. Burke. All rights reserved. +Copyright (c) 1998-2004 Sean M. Burke. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.