-# Time-stamp: "2002-02-02 20:43:03 MST"
+# Time-stamp: "2003-07-20 07:44:42 ADT"
# Sean M. Burke <sburke@cpan.org>
require 5.000;
);
%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
-$VERSION = "0.27";
+$VERSION = "0.28";
=head1 NAME
# we can just handle them here with regexps.
$tag =~ s/^iw\b/he/i; # Hebrew
$tag =~ s/^in\b/id/i; # Indonesian
+ $tag =~ s/^cre\b/cr/i; # Cree
+ $tag =~ s/^jw\b/jv/i; # Javanese
$tag =~ s/^[ix]-lux\b/lb/i; # Luxemburger
$tag =~ s/^[ix]-navajo\b/nv/i; # Navajo
$tag =~ s/^ji\b/yi/i; # Yiddish
+ # SMB 2003 -- Hm. There's a bunch of new XXX->YY variances now,
+ # but maybe they're all so obscure I can ignore them. "Obscure"
+ # meaning either that the language is obscure, and/or that the
+ # XXX form was extant so briefly that it's unlikely it was ever
+ # used. I hope.
#
# These go FROM the simplex to complex form, to get
# similarity-comparison right. And that's okay, since
* Locale::Codes, in
C<http://www.perl.com/CPAN/modules/by-module/Locale/>
-* ISO 639, "Code for the representation of names of languages",
-C<http://www.indigo.ie/egt/standards/iso639/iso639-1-en.html>
-
* ISO 639-2, "Codes for the representation of names of languages",
-including three-letter codes,
-C<http://lcweb.loc.gov/standards/iso639-2/bibcodes.html>
+including two-letter and three-letter codes,
+C<http://www.loc.gov/standards/iso639-2/langcodes.html>
* The IANA list of registered languages (hopefully up-to-date),
-C<ftp://ftp.isi.edu/in-notes/iana/assignments/languages/>
+C<http://www.iana.org/assignments/language-tags>
=head1 COPYRIGHT
-Copyright (c) 1998-2001 Sean M. Burke. All rights reserved.
+Copyright (c) 1998-2003 Sean M. Burke. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
require 5;
package I18N::LangTags::List;
-# Time-stamp: "2002-02-02 20:13:58 MST"
+# Time-stamp: "2003-07-20 07:31:08 ADT"
use strict;
use vars qw(%Name $Debug $VERSION);
-$VERSION = '0.25';
+$VERSION = '0.26';
# POD at the end.
#----------------------------------------------------------------------
my $seeking = 1;
my $count = 0;
my($tag,$name);
+ my $last_name = '';
while(<I18N::LangTags::List::DATA>) {
if($seeking) {
$seeking = 0 if m/=for woohah/;
- } else {
- next unless ($tag, $name) =
- m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;
+ } elsif( ($tag, $name) =
+ m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/
+ ) {
$name =~ s/\s*[;\.]*\s*$//g;
next unless $name;
++$count;
print "<$tag> <$name>\n" if $Debug;
- $Name{$tag} = $name;
+ $last_name = $Name{$tag} = $name;
+ } elsif (m/Formerly \"([-a-z0-9]+)\"/) {
+ $Name{$1} = "$last_name (old tag)" if $last_name;
}
}
die "No tags read??" unless $count;
=item {ada} : Adangme
+=item {ady} : Adyghe
+
+eq Adygei
+
=item {aa} : Afar
=item {afh} : Afrihili
=item [{afa} : Afro-Asiatic (Other)]
-=item {aka} : Akan
+=item {ak} : Akan
+
+(Formerly "aka".)
=item {akk} : Akkadian
=item {hy} : Armenian
+=item {an} : Aragonese
+
=item [{art} : Artificial (Other)]
+=item {ast} : Asturian
+
+eq Bable.
+
=item {as} : Assamese
=item [{ath} : Athapascan languages]
=item [{map} : Austronesian (Other)]
-=item {ava} : Avaric
+=item {av} : Avaric
+
+(Formerly "ava".)
=item {ae} : Avestan
eq Azeri
+Notable forms:
+{az-Arab} Azerbaijani in Arabic script;
+{az-Cyrl} Azerbaijani in Cyrillic script;
+{az-Latn} Azerbaijani in Latin script.
+
=item {ban} : Balinese
=item [{bat} : Baltic (Other)]
=item {bal} : Baluchi
-=item {bam} : Bambara
+=item {bm} : Bambara
+
+(Formerly "bam".)
=item [{bai} : Bamileke languages]
=item {zh} : Chinese
Many forms are mutually un-intelligible in spoken media.
-Notable subforms:
+Notable forms:
+{zh-Hans} Chinese, in simplified script;
+{zh-Hant} Chinese, in traditional script;
+{zh-tw} Taiwan Chinese;
{zh-cn} PRC Chinese;
-{zh-hk} Hong Kong Chinese;
-{zh-mo} Macau Chinese;
{zh-sg} Singapore Chinese;
-{zh-tw} Taiwan Chinese;
+{zh-mo} Macau Chinese;
+{zh-hk} Hong Kong Chinese;
{zh-guoyu} Mandarin [Putonghua/Guoyu];
-{zh-hakka} Hakka [formerly i-hakka];
+{zh-hakka} Hakka [formerly "i-hakka"];
{zh-min} Hokkien;
{zh-min-nan} Southern Hokkien;
{zh-wuu} Shanghaiese;
eq Corse.
-=item {cre} : Cree
+=item {cr} : Cree
-NOT Creek!
+NOT Creek! (Formerly "cre".)
=item {mus} : Creek
=item {da} : Danish
+=item {dar} : Dargwa
+
=item {day} : Dayak
=item {i-default} : Default (Fallthru) Language
=item {din} : Dinka
-=item {div} : Divehi
+=item {dv} : Divehi
+
+eq Maldivian. (Formerly "div".)
=item {doi} : Dogri
eq Anglo-Saxon. (Historical)
+=item {i-enochian} : Enochian (Artificial)
+
+=item {myv} : Erzya
+
=item {eo} : Esperanto
(Artificial)
=item {et} : Estonian
-=item {ewe} : Ewe
+=item {ee} : Ewe
+
+(Formerly "ewe".)
=item {ewo} : Ewondo
=item {fur} : Friulian
-=item {ful} : Fulah
+=item {ff} : Fulah
+
+(Formerly "ful".)
=item {gaa} : Ga
eq Galician
-=item {lug} : Ganda
+=item {lg} : Ganda
+
+(Formerly "lug".)
=item {gay} : Gayo
=item {hai} : Haida
+=item {ht} : Haitian
+
+eq Haitian Creole
+
=item {ha} : Hausa
=item {haw} : Hawaiian
=item {is} : Icelandic
-=item {ibo} : Igbo
+=item {io} : Ido
+
+(Artificial)
+
+=item {ig} : Igbo
+
+(Formerly "ibo".)
=item {ijo} : Ijo
=for etc
{in} Indonesian (old tag)
+=item {inh} : Ingush
+
=item {ia} : Interlingua (International Auxiliary Language Association)
(Artificial) NOT Interlingue!
(NOT "jp"!)
-=item {jw} : Javanese
+=item {jv} : Javanese
+
+(Formerly "jw" because of a typo.)
=item {jrb} : Judeo-Arabic
=item {jpr} : Judeo-Persian
+=item {kbd} : Kabardian
+
=item {kab} : Kabyle
=item {kac} : Kachin
eq Greenlandic "Eskimo"
+=item {xal} : Kalmyk
+
=item {kam} : Kamba
=item {kn} : Kannada
eq Kanarese. NOT Canadian!
-=item {kau} : Kanuri
+=item {kr} : Kanuri
+
+(Formerly "kau".)
+
+=item {krc} : Karachay-Balkar
=item {kaa} : Kara-Kalpak
=item {ks} : Kashmiri
+=item {csb} : Kashubian
+
+eq Kashub
+
=item {kaw} : Kawi
=item {kk} : Kazakh
=item {kv} : Komi
-=item {kon} : Kongo
+=item {kg} : Kongo
+
+(Formerly "kon".)
=item {kok} : Konkani
=item {lb} : Letzeburgesch
-eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)
+eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".)
=for etc
{i-lux} Letzeburgesch (old tag)
=item {lez} : Lezghian
+=item {li} : Limburgish
+
+eq Limburger, eq Limburgan. NOT Letzeburgesch!
+
=item {ln} : Lingala
=item {lt} : Lithuanian
eq Low Saxon. eq Low German. eq Low Saxon.
+=item {art-lojban} : Lojban (Artificial)
+
=item {loz} : Lozi
-=item {lub} : Luba-Katanga
+=item {lu} : Luba-Katanga
+
+(Formerly "lub".)
=item {lua} : Luba-Lulua
=item {moh} : Mohawk
+=item {mdf} : Moksha
+
=item {mo} : Moldavian
eq Moldovan.
=item {nah} : Nahuatl
+=item {nap} : Neapolitan
+
=item {na} : Nauru
=item {nv} : Navajo
-eq Navaho. (Formerly i-navajo.)
+eq Navaho. (Formerly "i-navajo".)
=for etc
{i-navajo} Navajo (old tag)
=item {niu} : Niuean
+=item {nog} : Nogai
+
=item {non} : Old Norse
(Historical)
Do not use this.
-=item {se} : Northern Sami
-
-eq Lappish. eq Lapp. eq (Northern) Saami.
-
=item {no} : Norwegian
Note the two following forms:
=item {nb} : Norwegian Bokmal
-eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)
+eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".)
=for etc
{no-bok} Norwegian Bokmal (old tag)
=item {nn} : Norwegian Nynorsk
-(A form of Norwegian.) (Formerly no-nyn.)
+(A form of Norwegian.) (Formerly "no-nyn".)
=for etc
{no-nyn} Norwegian Nynorsk (old tag)
eq ProvenE<ccedil>al, eq Provencal
-=item {oji} : Ojibwa
+=item {oj} : Ojibwa
-eq Ojibwe.
+eq Ojibwe. (Formerly "oji".)
=item {or} : Oriya
NOT Aramaic!
+=item {se} : Northern Sami
+
+eq Lappish. eq Lapp. eq (Northern) Saami.
+
+=item {sma} : Southern Sami
+
+=item {smn} : Inari Sami
+
+=item {smj} : Lule Sami
+
+=item {sms} : Skolt Sami
+
=item [{smi} : Sami languages (Other)]
=item {sm} : Samoan
eq Serb. NOT Sorbian.
+Notable forms:
+{sr-Cyrl} : Serbian in Cyrillic script;
+{sr-Latn} : Serbian in Latin script.
+
=item {srr} : Serer
=item {shn} : Shan
{sgn-ni} Nicaraguan Sign Language (ISN);
{sgn-us} American Sign Language (ASL).
+(And so on with other country codes as the subtag.)
+
=item {bla} : Siksika
eq Blackfoot. eq Pikanii.
=item {tum} : Tumbuka
+=item [{tup} : Tupi languages]
+
=item {tr} : Turkish
(Typically in Roman script)
(Typically in Arabic script) (Historical)
+=item {crh} : Crimean Turkish
+
+eq Crimean Tatar
+
=item {tk} : Turkmen
eq Turkmeni.
=item {tw} : Twi
+=item {udm} : Udmurt
+
=item {uga} : Ugaritic
NOT Ugric!
eq E<Ouml>zbek
+Notable forms:
+{uz-Cyrl} Uzbek in Cyrillic script;
+{uz-Latn} Uzbek in Latin script.
+
=item {vai} : Vai
-=item {ven} : Venda
+=item {ve} : Venda
-NOT Wendish! NOT Wend! NOT Avestan!
+NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".)
=item {vi} : Vietnamese
=item [{wak} : Wakashan languages]
+=item {wa} : Walloon
+
=item {wal} : Walamo
eq Wolaytta.
eq Yap
+=item {ii} : Sichuan Yi
+
=item {yi} : Yiddish
-Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script.
+Formerly "ji". Usually in Hebrew script.
-=for etc
-{ji} Yiddish (old tag)
+Notable forms:
+{yi-latn} Yiddish in Latin script
=item {yo} : Yoruba
=head1 COPYRIGHT AND DISCLAIMER
-Copyright (c) 2001,2002 Sean M. Burke. All rights reserved.
+Copyright (c) 2001,2002,2003 Sean M. Burke. All rights reserved.
You can redistribute and/or
modify this document under the same terms as Perl itself.