From: Jarkko Hietaniemi Date: Mon, 28 May 2001 17:50:05 +0000 (+0000) Subject: Upgrade to I18N::LangTags 0.22. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=e7525a1746b960b70659004cda7d2e0da2348766;p=p5sagit%2Fp5-mst-13.2.git Upgrade to I18N::LangTags 0.22. p4raw-id: //depot/perl@10262 --- diff --git a/MANIFEST b/MANIFEST index bef35c7..a06fbbf 100644 --- a/MANIFEST +++ b/MANIFEST @@ -767,6 +767,7 @@ lib/Getopt/Long.pm Fetch command options (GetOptions) lib/Getopt/Std.pm Fetch command options (getopt, getopts) lib/I18N/Collate.pm Routines to do strxfrm-based collation lib/I18N/LangTags.pm I18N::LangTags +lib/I18N/LangTags/List.pod list of tags for human languages lib/IPC/Open2.pm Open a two-ended pipe lib/IPC/Open3.pm Open a three-ended pipe! lib/Locale/Constants.pm Locale::Codes diff --git a/lib/I18N/LangTags.pm b/lib/I18N/LangTags.pm index c8a64d3..f5db282 100644 --- a/lib/I18N/LangTags.pm +++ b/lib/I18N/LangTags.pm @@ -1,5 +1,5 @@ -# Time-stamp: "2001-05-25 07:36:55 MDT" +# Time-stamp: "2001-05-27 19:53:11 MDT" # Sean M. Burke require 5.000; @@ -17,7 +17,7 @@ require Exporter; encode_language_tag ); -$VERSION = "0.21"; +$VERSION = "0.22"; =head1 NAME @@ -569,6 +569,8 @@ language tags with their ASCII characters shifted into Plane 14. =head1 SEE ALSO +* L + * RFC 3066, C, "Tags for the Identification of Languages". (Obsoletes RFC 1766) diff --git a/lib/I18N/LangTags/List.pod b/lib/I18N/LangTags/List.pod new file mode 100644 index 0000000..9bb5e07 --- /dev/null +++ b/lib/I18N/LangTags/List.pod @@ -0,0 +1,1446 @@ +=head1 NAME + +I18n::LangTags::List -- list of tags for human languages + +=head1 SYNOPSIS + + Time-stamp: "2001-05-27 19:55:19 MDT" + [This is not a module; it is documentation] + +=head1 ABOUT LANGUAGE TAGS + +Internet language tags, as defined in RFC 3066, are a formalism +for denoting human languages. The two-letter ISO 639-1 language +codes are well known (as "en" for English), as are their forms +when qualified by a country code ("en-US"). Less well-known are the +arbitrary-length non-ISO codes (like "i-mingo"), and the +recently (in 2001) introduced three-letter ISO-639-2 codes. + +Remember this important facts: + +=over + +=item * + +Language tags are not locale IDs. A locale ID is written with a "_" +instead of a "-", (almost?) always matches C, and +I something different than a language tag. A language tag +denotes a language. A locale ID denotes a language I +a particular place, in combination with non-linguistic +location-specific information such as what currency in used +there. Locales I often denote character set information, +as in "en_US.ISO8859-1". + +=item * + +Language tags are not for computer languages. + +=item * + +"Dialect" is not a useful term, since there is no objective +criterion for establishing when two languages are +dialects of eachother, or are separate languages. + +=item * + +Language tags are not case-sensitive. en-US, en-us, En-Us, etc., +are all the same tag, and denote the same language. + +=item * + +Not every language tag really refers to a single language. Some +language tags refer to conditions: i-default (system-message text +in English plus maybe other languages), und (undetermined +language). Others (notably lots of the three-letter codes) are +bibliographic tags that classify whole groups of languages, as +with cus "Cushitic (Other)" (i.e., a +language that has been classed as Cushtic, but which has no more +specific code) or the even less linguistically coherent +sai for "South American Indian (Other)". While useful in +bibliography, B. For further guidance, email me. + +=item * + +Language tags are not country codes. In fact, they are often +distinct codes, as with language tag ja for Japanese, and +ISO 3166 country code C<.jp> for Japan. + +=back + +=head1 LIST OF LANGUAGES + +The first part of each item is the language tag, between +{...} and in italic characters. It +is followed by an English name for the language or language-group. +Language tags that I judge to be not for general use, are bracketed. + +This list is in alphabetical order by English name of the language. + +=over + +=item I<{ab}> : Abkhazian + +eq Abkhaz + +=item I<{ace}> : Achinese + +=item I<{ach}> : Acoli + +=item I<{ada}> : Adangme + +=item I<{aa}> : Afar + +=item I<{afh}> : Afrihili + +(Artificial) + +=item I<{af}> : Afrikaans + +=item [I<{afa}> : Afro-Asiatic (Other)] + +=item I<{aka}> : Akan + +=item I<{akk}> : Akkadian + +(Historical) + +=item I<{sq}> : Albanian + +=item I<{ale}> : Aleut + +=item [I<{alg}> : Algonquian languages] + +NOT Algonquin! + +=item [I<{tut}> : Altaic (Other)] + +=item I<{am}> : Amharic + +NOT Aramaic! + +=item I<{i-ami}> : Ami + +eq Amis. eq 'Amis. eq Pangca. + +=item [I<{apa}> : Apache languages] + +=item I<{ar}> : Arabic + +Many forms are mutually un-intelligible in spoken media. +Notable forms: +ar-ae +ar-bh +ar-dz +ar-eg +ar-iq +ar-jo +ar-kw +ar-lb +ar-ly +ar-ma +ar-om +ar-qa +ar-sa +ar-sy +ar-tn +ar-ye. + +=item I<{arc}> : Aramaic + +NOT Amharic! NOT Samaritan Aramaic! + +=item I<{arp}> : Arapaho + +=item I<{arn}> : Araucanian + +=item I<{arw}> : Arawak + +=item I<{hy}> : Armenian + +=item [I<{art}> : Artificial (Other)] + +=item I<{as}> : Assamese + +=item [I<{ath}> : Athapascan languages] + +eq Athabaskan. eq Athapaskan. eq Athabascan. + +=item [I<{aus}> : Australian languages] + +=item [I<{map}> : Austronesian (Other)] + +=item I<{ava}> : Avaric + +=item I<{ae}> : Avestan + +eq Zend + +=item I<{awa}> : Awadhi + +=item I<{ay}> : Aymara + +=item I<{az}> : Azerbaijani + +eq Azeri + +=item I<{ban}> : Balinese + +=item [I<{bat}> : Baltic (Other)] + +=item I<{bal}> : Baluchi + +=item I<{bam}> : Bambara + +=item [I<{bai}> : Bamileke languages] + +=item I<{bad}> : Banda + +=item [I<{bnt}> : Bantu (Other)] + +=item I<{bas}> : Basa + +=item I<{ba}> : Bashkir + +=item I<{eu}> : Basque + +=item I<{btk}> : Batak (Indonesia) + +=item I<{bej}> : Beja + +=item I<{be}> : Belarusian + +eq Belarussian. eq Byelarussian. +eq Belorussian. eq Byelorussian. +eq White Russian. eq White Ruthenian. +NOT Ruthenian! + +=item I<{bem}> : Bemba + +=item I<{bn}> : Bengali + +=item [I<{ber}> : Berber (Other)] + +=item I<{bho}> : Bhojpuri + +=item I<{bh}> : Bihari + +=item I<{bik}> : Bikol + +=item I<{bin}> : Bini + +=item I<{bi}> : Bislama + +=item I<{bs}> : Bosnian + +=item I<{bra}> : Braj + +=item I<{br}> : Breton + +=item I<{bug}> : Buginese + +=item I<{bg}> : Bulgarian + +=item I<{i-bnn}> : Bunun + +=item I<{bua}> : Buriat + +=item I<{my}> : Burmese + +=item I<{cad}> : Caddo + +=item I<{car}> : Carib + +=item I<{ca}> : Catalan + +eq CatalEn. eq Catalonian. + +=item [I<{cau}> : Caucasian (Other)] + +=item I<{ceb}> : Cebuano + +=item [I<{cel}> : Celtic (Other)] + +Notable forms: cel-gaulish. + +=item [I<{cai}> : Central American Indian (Other)] + +=item I<{chg}> : Chagatai + +(Historical?) + +=item [I<{cmc}> : Chamic languages] + +=item I<{ch}> : Chamorro + +=item I<{ce}> : Chechen + +=item I<{chr}> : Cherokee + +eq Tsalagi + +=item I<{chy}> : Cheyenne + +=item I<{chb}> : Chibcha + +(Historical) NOT Chibchan (which is a language family). + +=item I<{ny}> : Chichewa + +eq Nyanja. eq Chinyanja. + +=item I<{zh}> : Chinese + +Many forms are mutually un-intelligible in spoken media. +Notable subforms: +zh-cn (PRC Chinese), +zh-hk (Hong Kong Chinese), +zh-mo (Macau Chinese), +zh-sg (Singapore Chinese), +zh-tw (Taiwan Chinese), +zh-guoyu (Putonghua/Guoyu/Mandarin), +zh-hakka (Hakka; formerly i-hakka), +zh-min (Hokkien), +zh-min-nan (Southern Hokkien), +zh-wuu (Shanghaiese), +zh-xiang (Hunanese), +zh-yue (Cantonese). + +=item I<{chn}> : Chinook Jargon + +eq Chinook Wawa. + +=item I<{chp}> : Chipewyan + +=item I<{cho}> : Choctaw + +=item I<{cu}> : Church Slavic + +eq Old Church Slavonic. + +=item I<{chk}> : Chuukese + +eq Trukese. eq Chuuk. eq Truk. eq Ruk. + +=item I<{cv}> : Chuvash + +=item I<{cop}> : Coptic + +=item I<{kw}> : Cornish + +=item I<{co}> : Corsican + +eq Corse. + +=item I<{cre}> : Cree + +NOT Creek! + +=item I<{mus}> : Creek + +NOT Cree! + +=item [I<{cpe}> : English-based Creoles and pidgins (Other)] + +=item [I<{cpf}> : French-based Creoles and pidgins (Other)] + +=item [I<{cpp}> : Portuguese-based Creoles and pidgins (Other)] + +=item [I<{crp}> : Creoles and pidgins (Other)] + +=item I<{hr}> : Croatian + +eq Croat. + +=item [I<{cus}> : Cushitic (Other)] + +=item I<{cs}> : Czech + +=item I<{dak}> : Dakota + +eq Nakota. eq Latoka. + +=item I<{da}> : Danish + +=item I<{day}> : Dayak + +=item I<{i-default}> : Default (Fallthru) Language + +Defined in RFC 2277, this is for tagging text +(which must include English text, and might/should include text +in other appropriate languages) that is emitted in a context +where language-negotiation wasn't possible -- in SMTP mail failure +messages, for example. + +=item I<{del}> : Delaware + +=item I<{din}> : Dinka + +=item I<{div}> : Divehi + +=item I<{doi}> : Dogri + +NOT Dogrib! + +=item I<{dgr}> : Dogrib + +NOT Dogri! + +=item [I<{dra}> : Dravidian (Other)] + +=item I<{dua}> : Duala + +=item I<{nl}> : Dutch + +eq Netherlander. Notable forms: nl-nl, nl-be. + +=item I<{dum}> : Middle Dutch (ca.1050-1350) + +(Historical) + +=item I<{dyu}> : Dyula + +=item I<{dz}> : Dzongkha + +=item I<{efi}> : Efik + +=item I<{egy}> : Ancient Egyptian + +(Historical) + +=item I<{eka}> : Ekajuk + +=item I<{elx}> : Elamite + +(Historical) + +=item I<{en}> : English + +Notable forms: +en-au +en-bz +en-ca +en-gb +en-ie +en-jm +en-nz +en-ph +en-tt +en-us +en-za +en-zw. + +=item I<{enm}> : Old English (1100-1500) + +(Historical) + +=item I<{ang}> : Old English (ca.450-1100) + +eq Anglo-Saxon. (Historical) + +=item I<{eo}> : Esperanto + +(Artificial) + +=item I<{et}> : Estonian + +=item I<{ewe}> : Ewe + +=item I<{ewo}> : Ewondo + +=item I<{fan}> : Fang + +=item I<{fat}> : Fanti + +=item I<{fo}> : Faroese + +=item I<{fj}> : Fijian + +=item I<{fi}> : Finnish + +=item [I<{fiu}> : Finno-Ugrian (Other)] + +eq Finno-Ugric. NOT Ugaritic! + +=item I<{fon}> : Fon + +=item I<{fr}> : French + +Notable forms: +fr-fr +fr-be +fr-ca +fr-ch +fr-lu +fr-mc. + +=item I<{frm}> : Middle French (ca.1400-1600) + +(Historical) + +=item I<{fro}> : Old French (842-ca.1400) + +(Historical) + +=item I<{fy}> : Frisian + +=item I<{fur}> : Friulian + +=item I<{ful}> : Fulah + +=item I<{gaa}> : Ga + +=item I<{gd}> : Scots Gaelic + +NOT Scots! + +=item I<{gl}> : Gallegan + +eq Galician + +=item I<{lug}> : Ganda + +=item I<{gay}> : Gayo + +=item I<{gba}> : Gbaya + +=item I<{gez}> : Geez + +eq Ge'ez + +=item I<{ka}> : Georgian + +=item I<{de}> : German + +Notable forms: de-at +de-be +de-ch +de-de +de-li +de-lu. + +=item I<{gmh}> : Middle High German (ca.1050-1500) + +(Historical) + +=item I<{goh}> : Old High German (ca.750-1050) + +(Historical) + +=item [I<{gem}> : Germanic (Other)] + +=item I<{gil}> : Gilbertese + +=item I<{gon}> : Gondi + +=item I<{gor}> : Gorontalo + +=item I<{got}> : Gothic + +(Historical) + +=item I<{grb}> : Grebo + +=item I<{grc}> : Ancient Greek (to 1453) + +(Historical) + +=item I<{el}> : Modern Greek (1453-) + +=item I<{gn}> : Guarani + +GuaranE + +=item I<{gu}> : Gujarati + +=item I<{gwi}> : Gwich'in + +eq Gwichin + +=item I<{hai}> : Haida + +=item I<{ha}> : Hausa + +=item I<{haw}> : Hawaiian + +Hawai'ian + +=item I<{he}> : Hebrew + +(Formerly "iw".) + +=item I<{hz}> : Herero + +=item I<{hil}> : Hiligaynon + +=item I<{him}> : Himachali + +=item I<{hi}> : Hindi + +=item I<{ho}> : Hiri Motu + +=item I<{hit}> : Hittite + +(Historical) + +=item I<{hmn}> : Hmong + +=item I<{hu}> : Hungarian + +=item I<{hup}> : Hupa + +=item I<{iba}> : Iban + +=item I<{is}> : Icelandic + +=item I<{ibo}> : Igbo + +=item I<{ijo}> : Ijo + +=item I<{ilo}> : Iloko + +=item [I<{inc}> : Indic (Other)] + +=item [I<{ine}> : Indo-European (Other)] + +=item I<{id}> : Indonesian + +(Formerly "in".) + +=item I<{ia}> : Interlingua (International Auxiliary Language Association) + +(Artificial) NOT Interlingue! + +=item I<{ie}> : Interlingue + +(Artificial) NOT Interlingua! + +=item I<{iu}> : Inuktitut + +A subform of "Eskimo". + +=item I<{ik}> : Inupiaq + +A subform of "Eskimo". + +=item [I<{ira}> : Iranian (Other)] + +=item I<{ga}> : Irish + +=item I<{mga}> : Middle Irish (900-1200) + +(Historical) + +=item I<{sga}> : Old Irish (to 900) + +(Historical) + +=item [I<{iro}> : Iroquoian languages] + +=item I<{it}> : Italian + +Notable forms: it-it, it-ch + +=item I<{ja}> : Japanese + +(NOT "jp"!) + +=item I<{jw}> : Javanese + +=item I<{jrb}> : Judeo-Arabic + +=item I<{jpr}> : Judeo-Persian + +=item I<{kab}> : Kabyle + +=item I<{kac}> : Kachin + +=item I<{kl}> : Kalaallisut + +eq Greenlandic "Eskimo" + +=item I<{kam}> : Kamba + +=item I<{kn}> : Kannada + +NOT Canadian! + +=item I<{kau}> : Kanuri + +=item I<{kaa}> : Kara-Kalpak + +=item I<{kar}> : Karen + +=item I<{ks}> : Kashmiri + +=item I<{kaw}> : Kawi + +=item I<{kk}> : Kazakh + +=item I<{kha}> : Khasi + +=item I<{km}> : Khmer + +eq Cambodian. eq Kampuchean. + +=item [I<{khi}> : Khoisan (Other)] + +=item I<{kho}> : Khotanese + +=item I<{ki}> : Kikuyu + +eq Gikuyu. + +=item I<{kmb}> : Kimbundu + +=item I<{rw}> : Kinyarwanda + +=item I<{ky}> : Kirghiz + +=item I<{i-klingon}> : Klingon + +=item I<{kv}> : Komi + +=item I<{kon}> : Kongo + +=item I<{kok}> : Konkani + +=item I<{ko}> : Korean + +=item I<{kos}> : Kosraean + +=item I<{kpe}> : Kpelle + +=item I<{kro}> : Kru + +=item I<{kj}> : Kuanyama + +=item I<{kum}> : Kumyk + +=item I<{ku}> : Kurdish + +=item I<{kru}> : Kurukh + +=item I<{kut}> : Kutenai + +=item I<{lad}> : Ladino + +eq Judeo-Spanish. NOT Ladin (a minority language in Italy). + +=item I<{lah}> : Lahnda + +NOT Lamba! + +=item I<{lam}> : Lamba + +NOT Lahnda! + +=item I<{lo}> : Lao + +=item I<{la}> : Latin + +(Historical) NOT Ladin! NOT Ladino! + +=item I<{lv}> : Latvian + +eq Lettish. + +=item I<{lb}> : Letzeburgesch + +eq Luxemburgian, eq Luxemburger. (Formerly i-lux.) + +=item I<{lez}> : Lezghian + +=item I<{ln}> : Lingala + +=item I<{lt}> : Lithuanian + +=item I<{nds}> : Low German + +eq Low Saxon. eq Low German. eq Low Saxon. + +=item I<{loz}> : Lozi + +=item I<{lub}> : Luba-Katanga + +=item I<{lua}> : Luba-Lulua + +=item I<{lui}> : Luiseno + +eq LuiseEo. + +=item I<{lun}> : Lunda + +=item I<{luo}> : Luo (Kenya and Tanzania) + +=item I<{lus}> : Lushai + +=item I<{mk}> : Macedonian + +eq the modern Slavic language spoken in what was Yugoslavia. +NOT the form of Greek spoken in Greek Macedonia! + +=item I<{mad}> : Madurese + +=item I<{mag}> : Magahi + +=item I<{mai}> : Maithili + +=item I<{mak}> : Makasar + +=item I<{mg}> : Malagasy + +=item I<{ms}> : Malay + +NOT Malayalam! + +=item I<{ml}> : Malayalam + +NOT Malay! + +=item I<{mt}> : Maltese + +=item I<{mnc}> : Manchu + +=item I<{mdr}> : Mandar + +NOT Mandarin! + +=item I<{man}> : Mandingo + +=item I<{mni}> : Manipuri + +=item [I<{mno}> : Manobo languages] + +=item I<{gv}> : Manx + +=item I<{mi}> : Maori + +NOT Mari! + +=item I<{mr}> : Marathi + +=item I<{chm}> : Mari + +NOT Maori! + +=item I<{mh}> : Marshall + +eq Marshallese. + +=item I<{mwr}> : Marwari + +=item I<{mas}> : Masai + +=item [I<{myn}> : Mayan languages] + +=item I<{men}> : Mende + +=item I<{mic}> : Micmac + +=item I<{min}> : Minangkabau + +=item I<{i-mingo}> : Mingo + +eq the Irquoian language West Virginia Seneca. NOT New York Seneca! + +=item [I<{mis}> : Miscellaneous languages] + +Don't use this. + +=item I<{moh}> : Mohawk + +=item I<{mo}> : Moldavian + +eq Moldovan. + +=item [I<{mkh}> : Mon-Khmer (Other)] + +=item I<{lol}> : Mongo + +=item I<{mn}> : Mongolian + +eq Mongol. + +=item I<{mos}> : Mossi + +=item [I<{mul}> : Multiple languages] + +Not for normal use. + +=item [I<{mun}> : Munda languages] + +=item I<{nah}> : Nahuatl + +=item I<{na}> : Nauru + +=item I<{nv}> : Navajo + +eq Navaho. (Formerly i-navajo.) + +=item I<{nd}> : North Ndebele + +=item I<{nr}> : South Ndebele + +=item I<{ng}> : Ndonga + +=item I<{ne}> : Nepali + +eq Nepalese. Notable forms: ne-np ne-in. + +=item I<{new}> : Newari + +=item I<{nia}> : Nias + +=item [I<{nic}> : Niger-Kordofanian (Other)] + +=item [I<{ssa}> : Nilo-Saharan (Other)] + +=item I<{niu}> : Niuean + +=item I<{non}> : Old Norse + +(Historical) + +=item [I<{nai}> : North American Indian] + +Do not use this. + +=item I<{se}> : Northern Sami + +eq Lappish. eq Lapp. eq (Northern) Saami. + +=item I<{no}> : Norwegian + +Note the two following forms: + +=item I<{nb}> : Norwegian BokmEl + +(A form of Norwegian.) (Formerly no-bok.) + +=item I<{nn}> : Norwegian Nynorsk + +(A form of Norwegian.) (Formerly no-nyn.) + +=item [I<{nub}> : Nubian languages] + +=item I<{nym}> : Nyamwezi + +=item I<{nyn}> : Nyankole + +=item I<{nyo}> : Nyoro + +=item I<{nzi}> : Nzima + +=item I<{oc}> : Occitan (post 1500) + +eq ProvenEal, eq Provencal + +=item I<{oji}> : Ojibwa + +eq Ojibwe. + +=item I<{or}> : Oriya + +=item I<{om}> : Oromo + +=item I<{osa}> : Osage + +=item I<{os}> : Ossetian; Ossetic + +=item [I<{oto}> : Otomian languages] + +Group of languages collectively called "OtomE". + +=item I<{pal}> : Pahlavi + +eq Pahlevi + +=item I<{i-pwn}> : Paiwan + +eq Pariwan + +=item I<{pau}> : Palauan + +=item I<{pi}> : Pali + +(Historical?) + +=item I<{pam}> : Pampanga + +=item I<{pag}> : Pangasinan + +=item I<{pa}> : Panjabi + +eq Punjabi + +=item I<{pap}> : Papiamento + +eq Papiamentu. + +=item [I<{paa}> : Papuan (Other)] + +=item I<{fa}> : Persian + +eq Farsi. + +=item I<{peo}> : Old Persian (ca.600-400 B.C.) + +=item [I<{phi}> : Philippine (Other)] + +=item I<{phn}> : Phoenician + +(Historical) + +=item I<{pon}> : Pohnpeian + +=item I<{pl}> : Polish + +=item I<{pt}> : Portuguese + +eq Portugese. Notable forms: pt-pt pt-br. + +=item [I<{pra}> : Prakrit languages] + +=item I<{pro}> : Old ProvenEal (to 1500) + +eq Old Provencal. (Historical.) + +=item I<{ps}> : Pushto + +eq Pashto. eq Pushtu. + +=item I<{qu}> : Quechua + +eq Quecha. + +=item I<{rm}> : Raeto-Romance + +eq Romansh. + +=item I<{raj}> : Rajasthani + +=item I<{rap}> : Rapanui + +=item I<{rar}> : Rarotongan + +=item [I<{qaa}>-I : Reserved for local use.] + +=item [I<{roa}> : Romance (Other)] + +NOT Romanian! NOT Romany! NOT Romansh! + +=item I<{ro}> : Romanian + +eq Rumanian. NOT Romany! + +=item I<{rom}> : Romany + +eq Rom. NOT Romanian! + +=item I<{rn}> : Rundi + +=item I<{ru}> : Russian + +NOT White Russian! NOT Rusyn! + +=item [I<{sal}> : Salishan languages] + +Large language group. + +=item I<{sam}> : Samaritan Aramaic + +NOT Aramaic! + +=item [I<{smi}> : Sami languages (Other)] + +=item I<{sm}> : Samoan + +=item I<{sad}> : Sandawe + +=item I<{sg}> : Sango + +=item I<{sa}> : Sanskrit + +(Historical) + +=item I<{sat}> : Santali + +=item I<{sc}> : Sardinian + +eq Sard. + +=item I<{sas}> : Sasak + +=item I<{sco}> : Scots + +NOT Scots Gaelic! + +=item I<{sel}> : Selkup + +=item [I<{sem}> : Semitic (Other)] + +=item I<{sr}> : Serbian + +eq Serb. NOT Sorbian. + +=item I<{srr}> : Serer + +=item I<{shn}> : Shan + +=item I<{sn}> : Shona + +=item I<{sid}> : Sidamo + +=item I<{sgn-...}> : Sign Languages + +Always use with a subtag. Notable forms: sgn-gb sgn-ie sgn-ni sgn-us. + +=item I<{bla}> : Siksika + +eq Blackfoot. eq Pikanii. + +=item I<{sd}> : Sindhi + +=item I<{si}> : Sinhalese + +eq Sinhala. + +=item [I<{sit}> : Sino-Tibetan (Other)] + +=item [I<{sio}> : Siouan languages] + +=item I<{den}> : Slave (Athapascan) + +("Slavey" is a subform.) + +=item [I<{sla}> : Slavic (Other)] + +=item I<{sk}> : Slovak + +eq Slovakian. + +=item I<{sl}> : Slovenian + +eq Slovene. + +=item I<{sog}> : Sogdian + +=item I<{so}> : Somali + +=item I<{son}> : Songhai + +=item I<{snk}> : Soninke + +=item I<{wen}> : Sorbian languages + +eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian! + +=item I<{nso}> : Northern Sotho + +=item I<{st}> : Southern Sotho + +eq Sutu. eq Sesotho. + +=item [I<{sai}> : South American Indian (Other)] + +=item I<{es}> : Spanish + +Notable forms: +es-ar es-bo es-cl es-co es-do es-ec es-es es-gt +es-hn es-mx es-pa es-pe es-pr es-py es-sv es-us +es-uy es-ve + +=item I<{suk}> : Sukuma + +=item I<{sux}> : Sumerian + +(Historical) + +=item I<{su}> : Sundanese + +=item I<{sus}> : Susu + +=item I<{sw}> : Swahili + +eq Kiswahili + +=item I<{ss}> : Swati + +=item I<{sv}> : Swedish + +Notable forms: sv-se sv-fi. + +=item I<{syr}> : Syriac + +=item I<{tl}> : Tagalog + +=item I<{ty}> : Tahitian + +=item [I<{tai}> : Tai (Other)] + +NOT Thai! + +=item I<{tg}> : Tajik + +=item I<{tmh}> : Tamashek + +=item I<{ta}> : Tamil + +=item I<{i-tao}> : Tao + +eq Yami. + +=item I<{tt}> : Tatar + +=item I<{i-tay}> : Tayal + +eq Atayal. eq Atayan. + +=item I<{te}> : Telugu + +=item I<{ter}> : Tereno + +=item I<{tet}> : Tetum + +=item I<{th}> : Thai + +NOT Tai! + +=item I<{bo}> : Tibetan + +=item I<{tig}> : Tigre + +=item I<{ti}> : Tigrinya + +=item I<{tem}> : Timne + +eq Themne. eq Timene. + +=item I<{tiv}> : Tiv + +=item I<{tli}> : Tlingit + +=item I<{tpi}> : Tok Pisin + +=item I<{tkl}> : Tokelau + +=item I<{tog}> : Tonga (Nyasa) + +NOT Tsonga! + +=item I<{to}> : Tonga (Tonga Islands) + +(Pronounced "Tong-a", not "Tong-ga") + +NOT Tsonga! + +=item I<{tsi}> : Tsimshian + +eq Sm'algyax + +=item I<{ts}> : Tsonga + +NOT Tonga! + +=item I<{i-tsu}> : Tsou + +=item I<{tn}> : Tswana + +Same as Setswana. + +=item I<{tum}> : Tumbuka + +=item I<{tr}> : Turkish + +(Typically in Roman script) + +=item I<{ota}> : Ottoman Turkish (1500-1928) + +(Typically in Arabic script) (Historical) + +=item I<{tk}> : Turkmen + +eq Turkmeni. + +=item I<{tvl}> : Tuvalu + +=item I<{tyv}> : Tuvinian + +eq Tuvan. eq Tuvin. + +=item I<{tw}> : Twi + +=item I<{uga}> : Ugaritic + +NOT Ugric! + +=item I<{ug}> : Uighur + +=item I<{uk}> : Ukrainian + +=item I<{umb}> : Umbundu + +=item I<{und}> : Undetermined + +Not a tag for normal use. + +=item I<{ur}> : Urdu + +=item I<{uz}> : Uzbek + +eq Ezbek + +=item I<{vai}> : Vai + +=item I<{ven}> : Venda + +NOT Wendish! NOT Wend! NOT Avestan! + +=item I<{vi}> : Vietnamese + +eq Viet. + +=item I<{vo}> : VolapEk + +eq Volapuk. (Artificial) + +=item I<{vot}> : Votic + +eq Votian. eq Vod. + +=item [I<{wak}> : Wakashan languages] + +=item I<{wal}> : Walamo + +eq Wolaytta. + +=item I<{war}> : Waray + +Presumably the Philippine language Waray-Waray (SamareEo), +not the smaller Philippine language Waray Sorsogon, nor the extinct +Australian language Waray. + +=item I<{was}> : Washo + +eq Washoe + +=item I<{cy}> : Welsh + +=item I<{wo}> : Wolof + +=item I<{x-...}> : Unregistered (Private Use) + +"x-" is a prefix for language tags that are not registered with ISO +or IANA. Example, x-double-dutch + +=item I<{xh}> : Xhosa + +=item I<{sah}> : Yakut + +=item I<{yao}> : Yao + +(The Yao in Malawi?) + +=item I<{yap}> : Yapese + +eq Yap + +=item I<{yi}> : Yiddish + +Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script. + +=item I<{yo}> : Yoruba + +=item [I<{ypk}> : Yupik languages] + +Several "Eskimo" languages. + +=item I<{znd}> : Zande + +=item [I<{zap}> : Zapotec] + +(A group of languages.) + +=item I<{zen}> : Zenaga + +NOT Zend. + +=item I<{za}> : Zhuang + +=item I<{zu}> : Zulu + +=item I<{zun}> : Zuni + +eq ZuEi + +=back + +=head1 SEE ALSO + +L + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (c) 2001 Sean M. Burke. All rights reserved. + +You can redistribute and/or +modify this document under the same terms as Perl itself. + +This document is provided in the the hope that it will be +useful, but without any warranty; +without even the implied warranty of accuracy, authoritativeness, +completeness, merchantability, or fitness for a particular purpose. + +Email any corrections or questions to me. + +=head1 AUTHOR + +Sean M. Burke, sburkeE<64>cpan.org + +=cut + + +# To generate a list of just the two and three-letter codes: + +#!/usr/local/bin/perl -w + +require 5; # Time-stamp: "2001-03-13 21:53:39 MST" + # Sean M. Burke, sburke@cpan.org + # This program is for generating the language_codes.txt file +use strict; +use LWP::Simple; +use HTML::TreeBuilder 3.10; +my $root = HTML::TreeBuilder->new(); +my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html'; +$root->parse(get($url) || die "Can't get $url"); +$root->eof(); + +my @codes; + +foreach my $tr ($root->find_by_tag_name('tr')) { + my @f = map $_->as_text(), $tr->content_list(); + #print map("<$_> ", @f), "\n"; + next unless @f == 5; + pop @f; # nix the French name + next if $f[-1] eq 'Language Name (English)'; # it's a header line + my $xx = splice(@f, 2,1); # pull out the two-letter code + $f[-1] =~ s/^\s+//; + $f[-1] =~ s/\s+$//; + if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it + push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ]; + } else { # print the three-letter codes. + if($f[0] eq $f[1]) { + push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ]; + } else { # shouldn't happen + push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; + } + } +} + +print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes; +print "[ based on $url\n at ", scalar(localtime), "]\n", + "[Note: doesn't include IANA-registered codes.]\n"; +exit; +__END__ +