3 package I18N::LangTags::List;
4 # Time-stamp: "2002-02-02 20:13:58 MST"
6 use vars qw(%Name $Debug $VERSION);
10 #----------------------------------------------------------------------
12 # read the table out of our own POD!
16 while(<I18N::LangTags::List::DATA>) {
18 $seeking = 0 if m/=for woohah/;
20 next unless ($tag, $name) =
21 m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;
22 $name =~ s/\s*[;\.]*\s*$//g;
25 print "<$tag> <$name>\n" if $Debug;
29 die "No tags read??" unless $count;
31 #----------------------------------------------------------------------
34 my $tag = lc($_[0] || return);
39 if($tag =~ m/^x-(.+)/) {
41 } elsif($tag =~ m/^i-(.+)/) {
49 print "Input: {$tag}\n" if $Debug;
51 last if $name = $Name{$tag};
52 last if $name = $Name{$alt};
53 if($tag =~ s/(-[a-z0-9]+)$//s) {
54 print "Shaving off: $1 leaving $tag\n" if $Debug;
55 $subform = "$1$subform";
56 # and loop around again
58 $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n";
60 # we're trying to pull a subform off a primary tag. TILT!
61 print "Aborting on: {$name}{$subform}\n" if $Debug;
65 print "Output: {$name}{$subform}\n" if $Debug;
67 return unless $name; # Failure
68 return $name unless $subform; # Exact match
71 return "$name (Subform \"$subform\")";
80 I18N::LangTags::List -- tags and names for human languages
84 use I18N::LangTags::List;
85 print "Parlez-vous... ", join(', ',
86 I18N::LangTags::List::name('elx') || 'unknown_language',
87 I18N::LangTags::List::name('ar-Kw') || 'unknown_language',
88 I18N::LangTags::List::name('en') || 'unknown_language',
89 I18N::LangTags::List::name('en-CA') || 'unknown_language',
94 Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English?
98 This module provides a function
99 C<I18N::LangTags::List::name( I<langtag> ) > that takes
100 a language tag (see L<I18N::LangTags|I18N::LangTags>)
101 and returns the best attempt at an English name for it, or
102 undef if it can't make sense of the tag.
104 The function I18N::LangTags::List::name(...) is not exported.
106 The map of tags-to-names that it uses is accessable as
107 %I18N::LangTags::List::Name, and it's the same as the list
108 that follows in this documentation, which should be useful
109 to you even if you don't use this module.
111 =head1 ABOUT LANGUAGE TAGS
113 Internet language tags, as defined in RFC 3066, are a formalism
114 for denoting human languages. The two-letter ISO 639-1 language
115 codes are well known (as "en" for English), as are their forms
116 when qualified by a country code ("en-US"). Less well-known are the
117 arbitrary-length non-ISO codes (like "i-mingo"), and the
118 recently (in 2001) introduced three-letter ISO-639-2 codes.
120 Remember these important facts:
126 Language tags are not locale IDs. A locale ID is written with a "_"
127 instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and
128 I<means> something different than a language tag. A language tag
129 denotes a language. A locale ID denotes a language I<as used in>
130 a particular place, in combination with non-linguistic
131 location-specific information such as what currency is used
132 there. Locales I<also> often denote character set information,
133 as in "en_US.ISO8859-1".
137 Language tags are not for computer languages.
141 "Dialect" is not a useful term, since there is no objective
142 criterion for establishing when two language-forms are
143 dialects of eachother, or are separate languages.
147 Language tags are not case-sensitive. en-US, en-us, En-Us, etc.,
148 are all the same tag, and denote the same language.
152 Not every language tag really refers to a single language. Some
153 language tags refer to conditions: i-default (system-message text
154 in English plus maybe other languages), und (undetermined
155 language). Others (notably lots of the three-letter codes) are
156 bibliographic tags that classify whole groups of languages, as
157 with cus "Cushitic (Other)" (i.e., a
158 language that has been classed as Cushtic, but which has no more
159 specific code) or the even less linguistically coherent
160 sai for "South American Indian (Other)". Though useful in
161 bibliography, B<SUCH TAGS ARE NOT
162 FOR GENERAL USE>. For further guidance, email me.
166 Language tags are not country codes. In fact, they are often
167 distinct codes, as with language tag ja for Japanese, and
168 ISO 3166 country code C<.jp> for Japan.
172 =head1 LIST OF LANGUAGES
174 The first part of each item is the language tag, between
176 is followed by an English name for the language or language-group.
177 Language tags that I judge to be not for general use, are bracketed.
179 This list is in alphabetical order by English name of the language.
182 The name in the =item line MUST NOT have E<...>'s in it!!
188 =item {ab} : Abkhazian
192 =item {ace} : Achinese
196 =item {ada} : Adangme
200 =item {afh} : Afrihili
204 =item {af} : Afrikaans
206 =item [{afa} : Afro-Asiatic (Other)]
210 =item {akk} : Akkadian
214 =item {sq} : Albanian
218 =item [{alg} : Algonquian languages]
222 =item [{tut} : Altaic (Other)]
230 eq Amis. eq 'Amis. eq Pangca.
232 =item [{apa} : Apache languages]
236 Many forms are mutually un-intelligible in spoken media.
239 {ar-bh} Bahrain Arabic;
240 {ar-dz} Algerian Arabic;
241 {ar-eg} Egyptian Arabic;
242 {ar-iq} Iraqi Arabic;
243 {ar-jo} Jordanian Arabic;
244 {ar-kw} Kuwait Arabic;
245 {ar-lb} Lebanese Arabic;
246 {ar-ly} Libyan Arabic;
247 {ar-ma} Moroccan Arabic;
248 {ar-om} Omani Arabic;
249 {ar-qa} Qatari Arabic;
250 {ar-sa} Sauda Arabic;
251 {ar-sy} Syrian Arabic;
252 {ar-tn} Tunisian Arabic;
253 {ar-ye} Yemen Arabic.
255 =item {arc} : Aramaic
257 NOT Amharic! NOT Samaritan Aramaic!
259 =item {arp} : Arapaho
261 =item {arn} : Araucanian
265 =item {hy} : Armenian
267 =item [{art} : Artificial (Other)]
269 =item {as} : Assamese
271 =item [{ath} : Athapascan languages]
273 eq Athabaskan. eq Athapaskan. eq Athabascan.
275 =item [{aus} : Australian languages]
277 =item [{map} : Austronesian (Other)]
289 =item {az} : Azerbaijani
293 =item {ban} : Balinese
295 =item [{bat} : Baltic (Other)]
297 =item {bal} : Baluchi
299 =item {bam} : Bambara
301 =item [{bai} : Bamileke languages]
305 =item [{bnt} : Bantu (Other)]
313 =item {btk} : Batak (Indonesia)
317 =item {be} : Belarusian
319 eq Belarussian. eq Byelarussian.
320 eq Belorussian. eq Byelorussian.
321 eq White Russian. eq White Ruthenian.
330 =item [{ber} : Berber (Other)]
332 =item {bho} : Bhojpuri
350 =item {bug} : Buginese
352 =item {bg} : Bulgarian
354 =item {i-bnn} : Bunun
366 eq CatalE<aacute>n. eq Catalonian.
368 =item [{cau} : Caucasian (Other)]
370 =item {ceb} : Cebuano
372 =item [{cel} : Celtic (Other)]
375 {cel-gaulish} Gaulish (Historical)
377 =item [{cai} : Central American Indian (Other)]
379 =item {chg} : Chagatai
383 =item [{cmc} : Chamic languages]
385 =item {ch} : Chamorro
389 =item {chr} : Cherokee
393 =item {chy} : Cheyenne
395 =item {chb} : Chibcha
397 (Historical) NOT Chibchan (which is a language family).
399 =item {ny} : Chichewa
401 eq Nyanja. eq Chinyanja.
405 Many forms are mutually un-intelligible in spoken media.
408 {zh-hk} Hong Kong Chinese;
409 {zh-mo} Macau Chinese;
410 {zh-sg} Singapore Chinese;
411 {zh-tw} Taiwan Chinese;
412 {zh-guoyu} Mandarin [Putonghua/Guoyu];
413 {zh-hakka} Hakka [formerly i-hakka];
415 {zh-min-nan} Southern Hokkien;
416 {zh-wuu} Shanghaiese;
422 {i-hakka} Hakka (old tag)
424 =item {chn} : Chinook Jargon
428 =item {chp} : Chipewyan
430 =item {cho} : Choctaw
432 =item {cu} : Church Slavic
434 eq Old Church Slavonic.
436 =item {chk} : Chuukese
438 eq Trukese. eq Chuuk. eq Truk. eq Ruk.
446 =item {co} : Corsican
458 =item [{cpe} : English-based Creoles and pidgins (Other)]
460 =item [{cpf} : French-based Creoles and pidgins (Other)]
462 =item [{cpp} : Portuguese-based Creoles and pidgins (Other)]
464 =item [{crp} : Creoles and pidgins (Other)]
466 =item {hr} : Croatian
470 =item [{cus} : Cushitic (Other)]
476 eq Nakota. eq Latoka.
482 =item {i-default} : Default (Fallthru) Language
484 Defined in RFC 2277, this is for tagging text
485 (which must include English text, and might/should include text
486 in other appropriate languages) that is emitted in a context
487 where language-negotiation wasn't possible -- in SMTP mail failure
488 messages, for example.
490 =item {del} : Delaware
504 =item [{dra} : Dravidian (Other)]
510 eq Netherlander. Notable forms:
511 {nl-nl} Netherlands Dutch;
512 {nl-be} Belgian Dutch.
514 =item {dum} : Middle Dutch (ca.1050-1350)
520 =item {dz} : Dzongkha
524 =item {egy} : Ancient Egyptian
530 =item {elx} : Elamite
537 {en-au} Australian English;
538 {en-bz} Belize English;
539 {en-ca} Canadian English;
541 {en-ie} Irish English;
542 {en-jm} Jamaican English;
543 {en-nz} New Zealand English;
544 {en-ph} Philippine English;
545 {en-tt} Trinidad English;
547 {en-za} South African English;
548 {en-zw} Zimbabwe English.
550 =item {enm} : Old English (1100-1500)
554 =item {ang} : Old English (ca.450-1100)
556 eq Anglo-Saxon. (Historical)
558 =item {eo} : Esperanto
562 =item {et} : Estonian
578 =item [{fiu} : Finno-Ugrian (Other)]
580 eq Finno-Ugric. NOT Ugaritic!
587 {fr-fr} France French;
588 {fr-be} Belgian French;
589 {fr-ca} Canadian French;
590 {fr-ch} Swiss French;
591 {fr-lu} Luxembourg French;
592 {fr-mc} Monaco French.
594 =item {frm} : Middle French (ca.1400-1600)
598 =item {fro} : Old French (842-ca.1400)
604 =item {fur} : Friulian
610 =item {gd} : Scots Gaelic
614 =item {gl} : Gallegan
628 =item {ka} : Georgian
633 {de-at} Austrian German;
634 {de-be} Belgian German;
635 {de-ch} Swiss German;
636 {de-de} Germany German;
637 {de-li} Liechtenstein German;
638 {de-lu} Luxembourg German.
640 =item {gmh} : Middle High German (ca.1050-1500)
644 =item {goh} : Old High German (ca.750-1050)
648 =item [{gem} : Germanic (Other)]
650 =item {gil} : Gilbertese
654 =item {gor} : Gorontalo
662 =item {grc} : Ancient Greek
664 (Historical) (Until 15th century or so.)
666 =item {el} : Modern Greek
668 (Since 15th century or so.)
674 =item {gu} : Gujarati
676 =item {gwi} : Gwich'in
684 =item {haw} : Hawaiian
693 {iw} Hebrew (old tag)
697 =item {hil} : Hiligaynon
699 =item {him} : Himachali
703 =item {ho} : Hiri Motu
705 =item {hit} : Hittite
711 =item {hu} : Hungarian
717 =item {is} : Icelandic
725 =item [{inc} : Indic (Other)]
727 =item [{ine} : Indo-European (Other)]
729 =item {id} : Indonesian
734 {in} Indonesian (old tag)
736 =item {ia} : Interlingua (International Auxiliary Language Association)
738 (Artificial) NOT Interlingue!
740 =item {ie} : Interlingue
742 (Artificial) NOT Interlingua!
744 =item {iu} : Inuktitut
746 A subform of "Eskimo".
750 A subform of "Eskimo".
752 =item [{ira} : Iranian (Other)]
756 =item {mga} : Middle Irish (900-1200)
760 =item {sga} : Old Irish (to 900)
764 =item [{iro} : Iroquoian languages]
769 {it-it} Italy Italian;
770 {it-ch} Swiss Italian.
772 =item {ja} : Japanese
776 =item {jw} : Javanese
778 =item {jrb} : Judeo-Arabic
780 =item {jpr} : Judeo-Persian
786 =item {kl} : Kalaallisut
788 eq Greenlandic "Eskimo"
794 eq Kanarese. NOT Canadian!
798 =item {kaa} : Kara-Kalpak
802 =item {ks} : Kashmiri
812 eq Cambodian. eq Kampuchean.
814 =item [{khi} : Khoisan (Other)]
816 =item {kho} : Khotanese
822 =item {kmb} : Kimbundu
824 =item {rw} : Kinyarwanda
828 =item {i-klingon} : Klingon
834 =item {kok} : Konkani
838 =item {kos} : Kosraean
844 =item {kj} : Kuanyama
852 =item {kut} : Kutenai
856 eq Judeo-Spanish. NOT Ladin (a minority language in Italy).
872 (Historical) NOT Ladin! NOT Ladino!
878 =item {lb} : Letzeburgesch
880 eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)
883 {i-lux} Letzeburgesch (old tag)
885 =item {lez} : Lezghian
889 =item {lt} : Lithuanian
891 =item {nds} : Low German
893 eq Low Saxon. eq Low German. eq Low Saxon.
897 =item {lub} : Luba-Katanga
899 =item {lua} : Luba-Lulua
901 =item {lui} : Luiseno
907 =item {luo} : Luo (Kenya and Tanzania)
911 =item {mk} : Macedonian
913 eq the modern Slavic language spoken in what was Yugoslavia.
914 NOT the form of Greek spoken in Greek Macedonia!
916 =item {mad} : Madurese
920 =item {mai} : Maithili
922 =item {mak} : Makasar
924 =item {mg} : Malagasy
930 =item {ml} : Malayalam
942 =item {man} : Mandingo
944 =item {mni} : Manipuri
948 =item [{mno} : Manobo languages]
962 =item {mh} : Marshall
966 =item {mwr} : Marwari
970 =item [{myn} : Mayan languages]
976 =item {min} : Minangkabau
978 =item {i-mingo} : Mingo
980 eq the Irquoian language West Virginia Seneca. NOT New York Seneca!
982 =item [{mis} : Miscellaneous languages]
988 =item {mo} : Moldavian
992 =item [{mkh} : Mon-Khmer (Other)]
996 =item {mn} : Mongolian
1002 =item [{mul} : Multiple languages]
1006 =item [{mun} : Munda languages]
1008 =item {nah} : Nahuatl
1014 eq Navaho. (Formerly i-navajo.)
1017 {i-navajo} Navajo (old tag)
1019 =item {nd} : North Ndebele
1021 =item {nr} : South Ndebele
1027 eq Nepalese. Notable forms:
1028 {ne-np} Nepal Nepali;
1029 {ne-in} India Nepali.
1031 =item {new} : Newari
1035 =item [{nic} : Niger-Kordofanian (Other)]
1037 =item [{ssa} : Nilo-Saharan (Other)]
1039 =item {niu} : Niuean
1041 =item {non} : Old Norse
1045 =item [{nai} : North American Indian]
1049 =item {se} : Northern Sami
1051 eq Lappish. eq Lapp. eq (Northern) Saami.
1053 =item {no} : Norwegian
1055 Note the two following forms:
1057 =item {nb} : Norwegian Bokmal
1059 eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)
1062 {no-bok} Norwegian Bokmal (old tag)
1064 =item {nn} : Norwegian Nynorsk
1066 (A form of Norwegian.) (Formerly no-nyn.)
1069 {no-nyn} Norwegian Nynorsk (old tag)
1071 =item [{nub} : Nubian languages]
1073 =item {nym} : Nyamwezi
1075 =item {nyn} : Nyankole
1081 =item {oc} : Occitan (post 1500)
1083 eq ProvenE<ccedil>al, eq Provencal
1085 =item {oji} : Ojibwa
1095 =item {os} : Ossetian; Ossetic
1097 =item [{oto} : Otomian languages]
1099 Group of languages collectively called "OtomE<iacute>".
1101 =item {pal} : Pahlavi
1105 =item {i-pwn} : Paiwan
1109 =item {pau} : Palauan
1115 =item {pam} : Pampanga
1117 =item {pag} : Pangasinan
1119 =item {pa} : Panjabi
1123 =item {pap} : Papiamento
1127 =item [{paa} : Papuan (Other)]
1129 =item {fa} : Persian
1131 eq Farsi. eq Iranian.
1133 =item {peo} : Old Persian (ca.600-400 B.C.)
1135 =item [{phi} : Philippine (Other)]
1137 =item {phn} : Phoenician
1141 =item {pon} : Pohnpeian
1147 =item {pt} : Portuguese
1149 eq Portugese. Notable forms:
1150 {pt-pt} Portugal Portuguese;
1151 {pt-br} Brazilian Portuguese.
1153 =item [{pra} : Prakrit languages]
1155 =item {pro} : Old Provencal (to 1500)
1157 eq Old ProvenE<ccedil>al. (Historical.)
1161 eq Pashto. eq Pushtu.
1163 =item {qu} : Quechua
1167 =item {rm} : Raeto-Romance
1171 =item {raj} : Rajasthani
1173 =item {rap} : Rapanui
1175 =item {rar} : Rarotongan
1177 =item [{qaa - qtz} : Reserved for local use.]
1179 =item [{roa} : Romance (Other)]
1181 NOT Romanian! NOT Romany! NOT Romansh!
1183 =item {ro} : Romanian
1185 eq Rumanian. NOT Romany!
1187 =item {rom} : Romany
1189 eq Rom. NOT Romanian!
1193 =item {ru} : Russian
1195 NOT White Russian! NOT Rusyn!
1197 =item [{sal} : Salishan languages]
1199 Large language group.
1201 =item {sam} : Samaritan Aramaic
1205 =item [{smi} : Sami languages (Other)]
1209 =item {sad} : Sandawe
1213 =item {sa} : Sanskrit
1217 =item {sat} : Santali
1219 =item {sc} : Sardinian
1229 =item {sel} : Selkup
1231 =item [{sem} : Semitic (Other)]
1233 =item {sr} : Serbian
1235 eq Serb. NOT Sorbian.
1243 =item {sid} : Sidamo
1245 =item {sgn-...} : Sign Languages
1247 Always use with a subtag. Notable forms:
1248 {sgn-gb} British Sign Language (BSL);
1249 {sgn-ie} Irish Sign Language (ESL);
1250 {sgn-ni} Nicaraguan Sign Language (ISN);
1251 {sgn-us} American Sign Language (ASL).
1253 =item {bla} : Siksika
1255 eq Blackfoot. eq Pikanii.
1259 =item {si} : Sinhalese
1263 =item [{sit} : Sino-Tibetan (Other)]
1265 =item [{sio} : Siouan languages]
1267 =item {den} : Slave (Athapascan)
1269 ("Slavey" is a subform.)
1271 =item [{sla} : Slavic (Other)]
1277 =item {sl} : Slovenian
1281 =item {sog} : Sogdian
1285 =item {son} : Songhai
1287 =item {snk} : Soninke
1289 =item {wen} : Sorbian languages
1291 eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian!
1293 =item {nso} : Northern Sotho
1295 =item {st} : Southern Sotho
1297 eq Sutu. eq Sesotho.
1299 =item [{sai} : South American Indian (Other)]
1301 =item {es} : Spanish
1304 {es-ar} Argentine Spanish;
1305 {es-bo} Bolivian Spanish;
1306 {es-cl} Chilean Spanish;
1307 {es-co} Colombian Spanish;
1308 {es-do} Dominican Spanish;
1309 {es-ec} Ecuadorian Spanish;
1310 {es-es} Spain Spanish;
1311 {es-gt} Guatemalan Spanish;
1312 {es-hn} Honduran Spanish;
1313 {es-mx} Mexican Spanish;
1314 {es-pa} Panamanian Spanish;
1315 {es-pe} Peruvian Spanish;
1316 {es-pr} Puerto Rican Spanish;
1317 {es-py} Paraguay Spanish;
1318 {es-sv} Salvadoran Spanish;
1320 {es-uy} Uruguayan Spanish;
1321 {es-ve} Venezuelan Spanish.
1323 =item {suk} : Sukuma
1325 =item {sux} : Sumerian
1329 =item {su} : Sundanese
1333 =item {sw} : Swahili
1339 =item {sv} : Swedish
1342 {sv-se} Sweden Swedish;
1343 {sv-fi} Finland Swedish.
1345 =item {syr} : Syriac
1347 =item {tl} : Tagalog
1349 =item {ty} : Tahitian
1351 =item [{tai} : Tai (Other)]
1357 =item {tmh} : Tamashek
1367 =item {i-tay} : Tayal
1369 eq Atayal. eq Atayan.
1373 =item {ter} : Tereno
1381 =item {bo} : Tibetan
1385 =item {ti} : Tigrinya
1389 eq Themne. eq Timene.
1393 =item {tli} : Tlingit
1395 =item {tpi} : Tok Pisin
1397 =item {tkl} : Tokelau
1399 =item {tog} : Tonga (Nyasa)
1403 =item {to} : Tonga (Tonga Islands)
1405 (Pronounced "Tong-a", not "Tong-ga")
1409 =item {tsi} : Tsimshian
1417 =item {i-tsu} : Tsou
1423 =item {tum} : Tumbuka
1425 =item {tr} : Turkish
1427 (Typically in Roman script)
1429 =item {ota} : Ottoman Turkish (1500-1928)
1431 (Typically in Arabic script) (Historical)
1433 =item {tk} : Turkmen
1437 =item {tvl} : Tuvalu
1439 =item {tyv} : Tuvinian
1445 =item {uga} : Ugaritic
1451 =item {uk} : Ukrainian
1453 =item {umb} : Umbundu
1455 =item {und} : Undetermined
1457 Not a tag for normal use.
1469 NOT Wendish! NOT Wend! NOT Avestan!
1471 =item {vi} : Vietnamese
1475 =item {vo} : Volapuk
1477 eq VolapE<uuml>k. (Artificial)
1483 =item [{wak} : Wakashan languages]
1485 =item {wal} : Walamo
1491 Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),
1492 not the smaller Philippine language Waray Sorsogon, nor the extinct
1493 Australian language Waray.
1503 =item {x-...} : Unregistered (Semi-Private Use)
1505 "x-" is a prefix for language tags that are not registered with ISO
1506 or IANA. Example, x-double-dutch
1514 (The Yao in Malawi?)
1516 =item {yap} : Yapese
1520 =item {yi} : Yiddish
1522 Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script.
1525 {ji} Yiddish (old tag)
1529 =item [{ypk} : Yupik languages]
1531 Several "Eskimo" languages.
1535 =item [{zap} : Zapotec]
1537 (A group of languages.)
1539 =item {zen} : Zenaga
1557 L<I18N::LangTags|I18N::LangTags> and its "See Also" section.
1559 =head1 COPYRIGHT AND DISCLAIMER
1561 Copyright (c) 2001,2002 Sean M. Burke. All rights reserved.
1563 You can redistribute and/or
1564 modify this document under the same terms as Perl itself.
1566 This document is provided in the hope that it will be
1567 useful, but without any warranty;
1568 without even the implied warranty of accuracy, authoritativeness,
1569 completeness, merchantability, or fitness for a particular purpose.
1571 Email any corrections or questions to me.
1575 Sean M. Burke, sburkeE<64>cpan.org
1580 # To generate a list of just the two and three-letter codes:
1582 #!/usr/local/bin/perl -w
1584 require 5; # Time-stamp: "2001-03-13 21:53:39 MST"
1585 # Sean M. Burke, sburke@cpan.org
1586 # This program is for generating the language_codes.txt file
1589 use HTML::TreeBuilder 3.10;
1590 my $root = HTML::TreeBuilder->new();
1591 my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';
1592 $root->parse(get($url) || die "Can't get $url");
1597 foreach my $tr ($root->find_by_tag_name('tr')) {
1598 my @f = map $_->as_text(), $tr->content_list();
1599 #print map("<$_> ", @f), "\n";
1600 next unless @f == 5;
1601 pop @f; # nix the French name
1602 next if $f[-1] eq 'Language Name (English)'; # it's a header line
1603 my $xx = splice(@f, 2,1); # pull out the two-letter code
1606 if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it
1607 push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ];
1608 } else { # print the three-letter codes.
1609 if($f[0] eq $f[1]) {
1610 push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];
1611 } else { # shouldn't happen
1612 push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ];
1617 print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;
1618 print "[ based on $url\n at ", scalar(localtime), "]\n",
1619 "[Note: doesn't include IANA-registered codes.]\n";