Turn on UTF-8 flag only if the $str is valid utf8
[p5sagit/p5-mst-13.2.git] / lib / I18N / LangTags / List.pm
CommitLineData
21aeefd5 1
2require 5;
3package I18N::LangTags::List;
483dd220 4# Time-stamp: "2002-02-02 20:13:58 MST"
21aeefd5 5use strict;
6use vars qw(%Name $Debug $VERSION);
483dd220 7$VERSION = '0.25';
21aeefd5 8# POD at the end.
9
10#----------------------------------------------------------------------
11{
12# read the table out of our own POD!
13 my $seeking = 1;
14 my $count = 0;
15 my($tag,$name);
16 while(<I18N::LangTags::List::DATA>) {
17 if($seeking) {
18 $seeking = 0 if m/=for woohah/;
19 } else {
20 next unless ($tag, $name) =
21 m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;
22 $name =~ s/\s*[;\.]*\s*$//g;
23 next unless $name;
24 ++$count;
25 print "<$tag> <$name>\n" if $Debug;
26 $Name{$tag} = $name;
27 }
28 }
29 die "No tags read??" unless $count;
30}
31#----------------------------------------------------------------------
32
33sub name {
34 my $tag = lc($_[0] || return);
35 $tag =~ s/^\s+//s;
36 $tag =~ s/\s+$//s;
37
38 my $alt;
39 if($tag =~ m/^x-(.+)/) {
40 $alt = "i-$1";
41 } elsif($tag =~ m/^i-(.+)/) {
42 $alt = "x-$1";
43 } else {
44 $alt = '';
45 }
46
47 my $subform = '';
48 my $name = '';
49 print "Input: {$tag}\n" if $Debug;
50 while(length $tag) {
51 last if $name = $Name{$tag};
52 last if $name = $Name{$alt};
53 if($tag =~ s/(-[a-z0-9]+)$//s) {
54 print "Shaving off: $1 leaving $tag\n" if $Debug;
55 $subform = "$1$subform";
56 # and loop around again
57
58 $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n";
59 } else {
60 # we're trying to pull a subform off a primary tag. TILT!
61 print "Aborting on: {$name}{$subform}\n" if $Debug;
62 last;
63 }
64 }
65 print "Output: {$name}{$subform}\n" if $Debug;
66
67 return unless $name; # Failure
68 return $name unless $subform; # Exact match
69 $subform =~ s/^-//s;
70 $subform =~ s/-$//s;
71 return "$name (Subform \"$subform\")";
72}
73
741;
75
76__DATA__
77
78=head1 NAME
79
80I18N::LangTags::List -- tags and names for human languages
81
82=head1 SYNOPSIS
83
84 use I18N::LangTags::List;
85 print "Parlez-vous... ", join(', ',
86 I18N::LangTags::List::name('elx') || 'unknown_language',
87 I18N::LangTags::List::name('ar-Kw') || 'unknown_language',
88 I18N::LangTags::List::name('en') || 'unknown_language',
89 I18N::LangTags::List::name('en-CA') || 'unknown_language',
90 ), "?\n";
91
92prints:
93
94 Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English?
95
96=head1 DESCRIPTION
97
98This module provides a function
99C<I18N::LangTags::List::name( I<langtag> ) > that takes
100a language tag (see L<I18N::LangTags|I18N::LangTags>)
101and returns the best attempt at an English name for it, or
102undef if it can't make sense of the tag.
103
104The function I18N::LangTags::List::name(...) is not exported.
105
106The map of tags-to-names that it uses is accessable as
107%I18N::LangTags::List::Name, and it's the same as the list
108that follows in this documentation, which should be useful
109to you even if you don't use this module.
110
111=head1 ABOUT LANGUAGE TAGS
112
113Internet language tags, as defined in RFC 3066, are a formalism
114for denoting human languages. The two-letter ISO 639-1 language
115codes are well known (as "en" for English), as are their forms
116when qualified by a country code ("en-US"). Less well-known are the
117arbitrary-length non-ISO codes (like "i-mingo"), and the
118recently (in 2001) introduced three-letter ISO-639-2 codes.
119
483dd220 120Remember these important facts:
21aeefd5 121
122=over
123
124=item *
125
126Language tags are not locale IDs. A locale ID is written with a "_"
127instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and
128I<means> something different than a language tag. A language tag
129denotes a language. A locale ID denotes a language I<as used in>
130a particular place, in combination with non-linguistic
483dd220 131location-specific information such as what currency is used
21aeefd5 132there. Locales I<also> often denote character set information,
133as in "en_US.ISO8859-1".
134
135=item *
136
137Language tags are not for computer languages.
138
139=item *
140
141"Dialect" is not a useful term, since there is no objective
483dd220 142criterion for establishing when two language-forms are
21aeefd5 143dialects of eachother, or are separate languages.
144
145=item *
146
147Language tags are not case-sensitive. en-US, en-us, En-Us, etc.,
148are all the same tag, and denote the same language.
149
150=item *
151
152Not every language tag really refers to a single language. Some
153language tags refer to conditions: i-default (system-message text
154in English plus maybe other languages), und (undetermined
155language). Others (notably lots of the three-letter codes) are
156bibliographic tags that classify whole groups of languages, as
157with cus "Cushitic (Other)" (i.e., a
158language that has been classed as Cushtic, but which has no more
159specific code) or the even less linguistically coherent
483dd220 160sai for "South American Indian (Other)". Though useful in
21aeefd5 161bibliography, B<SUCH TAGS ARE NOT
162FOR GENERAL USE>. For further guidance, email me.
163
164=item *
165
166Language tags are not country codes. In fact, they are often
167distinct codes, as with language tag ja for Japanese, and
168ISO 3166 country code C<.jp> for Japan.
169
170=back
171
172=head1 LIST OF LANGUAGES
173
174The first part of each item is the language tag, between
175{...}. It
176is followed by an English name for the language or language-group.
177Language tags that I judge to be not for general use, are bracketed.
178
179This list is in alphabetical order by English name of the language.
180
181=for reminder
182 The name in the =item line MUST NOT have E<...>'s in it!!
183
184=for woohah START
185
186=over
187
188=item {ab} : Abkhazian
189
190eq Abkhaz
191
192=item {ace} : Achinese
193
194=item {ach} : Acoli
195
196=item {ada} : Adangme
197
198=item {aa} : Afar
199
200=item {afh} : Afrihili
201
202(Artificial)
203
204=item {af} : Afrikaans
205
206=item [{afa} : Afro-Asiatic (Other)]
207
208=item {aka} : Akan
209
210=item {akk} : Akkadian
211
212(Historical)
213
214=item {sq} : Albanian
215
216=item {ale} : Aleut
217
218=item [{alg} : Algonquian languages]
219
220NOT Algonquin!
221
222=item [{tut} : Altaic (Other)]
223
224=item {am} : Amharic
225
226NOT Aramaic!
227
228=item {i-ami} : Ami
229
230eq Amis. eq 'Amis. eq Pangca.
231
232=item [{apa} : Apache languages]
233
234=item {ar} : Arabic
235
236Many forms are mutually un-intelligible in spoken media.
237Notable forms:
238{ar-ae} UAE Arabic;
239{ar-bh} Bahrain Arabic;
240{ar-dz} Algerian Arabic;
241{ar-eg} Egyptian Arabic;
242{ar-iq} Iraqi Arabic;
243{ar-jo} Jordanian Arabic;
244{ar-kw} Kuwait Arabic;
245{ar-lb} Lebanese Arabic;
246{ar-ly} Libyan Arabic;
247{ar-ma} Moroccan Arabic;
248{ar-om} Omani Arabic;
249{ar-qa} Qatari Arabic;
250{ar-sa} Sauda Arabic;
251{ar-sy} Syrian Arabic;
252{ar-tn} Tunisian Arabic;
253{ar-ye} Yemen Arabic.
254
255=item {arc} : Aramaic
256
257NOT Amharic! NOT Samaritan Aramaic!
258
259=item {arp} : Arapaho
260
261=item {arn} : Araucanian
262
263=item {arw} : Arawak
264
265=item {hy} : Armenian
266
267=item [{art} : Artificial (Other)]
268
269=item {as} : Assamese
270
271=item [{ath} : Athapascan languages]
272
273eq Athabaskan. eq Athapaskan. eq Athabascan.
274
275=item [{aus} : Australian languages]
276
277=item [{map} : Austronesian (Other)]
278
279=item {ava} : Avaric
280
281=item {ae} : Avestan
282
283eq Zend
284
285=item {awa} : Awadhi
286
287=item {ay} : Aymara
288
289=item {az} : Azerbaijani
290
291eq Azeri
292
293=item {ban} : Balinese
294
295=item [{bat} : Baltic (Other)]
296
297=item {bal} : Baluchi
298
299=item {bam} : Bambara
300
301=item [{bai} : Bamileke languages]
302
303=item {bad} : Banda
304
305=item [{bnt} : Bantu (Other)]
306
307=item {bas} : Basa
308
309=item {ba} : Bashkir
310
311=item {eu} : Basque
312
313=item {btk} : Batak (Indonesia)
314
315=item {bej} : Beja
316
317=item {be} : Belarusian
318
319eq Belarussian. eq Byelarussian.
320eq Belorussian. eq Byelorussian.
321eq White Russian. eq White Ruthenian.
322NOT Ruthenian!
323
324=item {bem} : Bemba
325
326=item {bn} : Bengali
327
328eq Bangla.
329
330=item [{ber} : Berber (Other)]
331
332=item {bho} : Bhojpuri
333
334=item {bh} : Bihari
335
336=item {bik} : Bikol
337
338=item {bin} : Bini
339
340=item {bi} : Bislama
341
342eq Bichelamar.
343
344=item {bs} : Bosnian
345
346=item {bra} : Braj
347
348=item {br} : Breton
349
350=item {bug} : Buginese
351
352=item {bg} : Bulgarian
353
354=item {i-bnn} : Bunun
355
356=item {bua} : Buriat
357
358=item {my} : Burmese
359
360=item {cad} : Caddo
361
362=item {car} : Carib
363
364=item {ca} : Catalan
365
366eq CatalE<aacute>n. eq Catalonian.
367
368=item [{cau} : Caucasian (Other)]
369
370=item {ceb} : Cebuano
371
372=item [{cel} : Celtic (Other)]
373
374Notable forms:
375{cel-gaulish} Gaulish (Historical)
376
377=item [{cai} : Central American Indian (Other)]
378
379=item {chg} : Chagatai
380
381(Historical?)
382
383=item [{cmc} : Chamic languages]
384
385=item {ch} : Chamorro
386
387=item {ce} : Chechen
388
389=item {chr} : Cherokee
390
391eq Tsalagi
392
393=item {chy} : Cheyenne
394
395=item {chb} : Chibcha
396
397(Historical) NOT Chibchan (which is a language family).
398
399=item {ny} : Chichewa
400
401eq Nyanja. eq Chinyanja.
402
403=item {zh} : Chinese
404
405Many forms are mutually un-intelligible in spoken media.
406Notable subforms:
407{zh-cn} PRC Chinese;
408{zh-hk} Hong Kong Chinese;
409{zh-mo} Macau Chinese;
410{zh-sg} Singapore Chinese;
411{zh-tw} Taiwan Chinese;
412{zh-guoyu} Mandarin [Putonghua/Guoyu];
413{zh-hakka} Hakka [formerly i-hakka];
414{zh-min} Hokkien;
415{zh-min-nan} Southern Hokkien;
416{zh-wuu} Shanghaiese;
417{zh-xiang} Hunanese;
418{zh-gan} Gan;
419{zh-yue} Cantonese.
420
421=for etc
422{i-hakka} Hakka (old tag)
423
424=item {chn} : Chinook Jargon
425
426eq Chinook Wawa.
427
428=item {chp} : Chipewyan
429
430=item {cho} : Choctaw
431
432=item {cu} : Church Slavic
433
434eq Old Church Slavonic.
435
436=item {chk} : Chuukese
437
438eq Trukese. eq Chuuk. eq Truk. eq Ruk.
439
440=item {cv} : Chuvash
441
442=item {cop} : Coptic
443
444=item {kw} : Cornish
445
446=item {co} : Corsican
447
448eq Corse.
449
450=item {cre} : Cree
451
452NOT Creek!
453
454=item {mus} : Creek
455
456NOT Cree!
457
458=item [{cpe} : English-based Creoles and pidgins (Other)]
459
460=item [{cpf} : French-based Creoles and pidgins (Other)]
461
462=item [{cpp} : Portuguese-based Creoles and pidgins (Other)]
463
464=item [{crp} : Creoles and pidgins (Other)]
465
466=item {hr} : Croatian
467
468eq Croat.
469
470=item [{cus} : Cushitic (Other)]
471
472=item {cs} : Czech
473
474=item {dak} : Dakota
475
476eq Nakota. eq Latoka.
477
478=item {da} : Danish
479
480=item {day} : Dayak
481
482=item {i-default} : Default (Fallthru) Language
483
484Defined in RFC 2277, this is for tagging text
485(which must include English text, and might/should include text
486in other appropriate languages) that is emitted in a context
487where language-negotiation wasn't possible -- in SMTP mail failure
488messages, for example.
489
490=item {del} : Delaware
491
492=item {din} : Dinka
493
494=item {div} : Divehi
495
496=item {doi} : Dogri
497
498NOT Dogrib!
499
500=item {dgr} : Dogrib
501
502NOT Dogri!
503
504=item [{dra} : Dravidian (Other)]
505
506=item {dua} : Duala
507
508=item {nl} : Dutch
509
510eq Netherlander. Notable forms:
511{nl-nl} Netherlands Dutch;
512{nl-be} Belgian Dutch.
513
514=item {dum} : Middle Dutch (ca.1050-1350)
515
516(Historical)
517
518=item {dyu} : Dyula
519
520=item {dz} : Dzongkha
521
522=item {efi} : Efik
523
524=item {egy} : Ancient Egyptian
525
526(Historical)
527
528=item {eka} : Ekajuk
529
530=item {elx} : Elamite
531
532(Historical)
533
534=item {en} : English
535
536Notable forms:
537{en-au} Australian English;
538{en-bz} Belize English;
539{en-ca} Canadian English;
540{en-gb} UK English;
541{en-ie} Irish English;
542{en-jm} Jamaican English;
543{en-nz} New Zealand English;
544{en-ph} Philippine English;
545{en-tt} Trinidad English;
546{en-us} US English;
547{en-za} South African English;
548{en-zw} Zimbabwe English.
549
550=item {enm} : Old English (1100-1500)
551
552(Historical)
553
554=item {ang} : Old English (ca.450-1100)
555
556eq Anglo-Saxon. (Historical)
557
558=item {eo} : Esperanto
559
560(Artificial)
561
562=item {et} : Estonian
563
564=item {ewe} : Ewe
565
566=item {ewo} : Ewondo
567
568=item {fan} : Fang
569
570=item {fat} : Fanti
571
572=item {fo} : Faroese
573
574=item {fj} : Fijian
575
576=item {fi} : Finnish
577
578=item [{fiu} : Finno-Ugrian (Other)]
579
580eq Finno-Ugric. NOT Ugaritic!
581
582=item {fon} : Fon
583
584=item {fr} : French
585
586Notable forms:
587{fr-fr} France French;
588{fr-be} Belgian French;
589{fr-ca} Canadian French;
590{fr-ch} Swiss French;
591{fr-lu} Luxembourg French;
592{fr-mc} Monaco French.
593
594=item {frm} : Middle French (ca.1400-1600)
595
596(Historical)
597
598=item {fro} : Old French (842-ca.1400)
599
600(Historical)
601
602=item {fy} : Frisian
603
604=item {fur} : Friulian
605
606=item {ful} : Fulah
607
608=item {gaa} : Ga
609
610=item {gd} : Scots Gaelic
611
612NOT Scots!
613
614=item {gl} : Gallegan
615
616eq Galician
617
618=item {lug} : Ganda
619
620=item {gay} : Gayo
621
622=item {gba} : Gbaya
623
624=item {gez} : Geez
625
626eq Ge'ez
627
628=item {ka} : Georgian
629
630=item {de} : German
631
632Notable forms:
633{de-at} Austrian German;
634{de-be} Belgian German;
635{de-ch} Swiss German;
636{de-de} Germany German;
637{de-li} Liechtenstein German;
638{de-lu} Luxembourg German.
639
640=item {gmh} : Middle High German (ca.1050-1500)
641
642(Historical)
643
644=item {goh} : Old High German (ca.750-1050)
645
646(Historical)
647
648=item [{gem} : Germanic (Other)]
649
650=item {gil} : Gilbertese
651
652=item {gon} : Gondi
653
654=item {gor} : Gorontalo
655
656=item {got} : Gothic
657
658(Historical)
659
660=item {grb} : Grebo
661
4cf5bee0 662=item {grc} : Ancient Greek
21aeefd5 663
4cf5bee0 664(Historical) (Until 15th century or so.)
665
666=item {el} : Modern Greek
21aeefd5 667
4cf5bee0 668(Since 15th century or so.)
21aeefd5 669
670=item {gn} : Guarani
671
672GuaranE<iacute>
673
674=item {gu} : Gujarati
675
676=item {gwi} : Gwich'in
677
678eq Gwichin
679
680=item {hai} : Haida
681
682=item {ha} : Hausa
683
684=item {haw} : Hawaiian
685
686Hawai'ian
687
688=item {he} : Hebrew
689
690(Formerly "iw".)
691
692=for etc
693{iw} Hebrew (old tag)
694
695=item {hz} : Herero
696
697=item {hil} : Hiligaynon
698
699=item {him} : Himachali
700
701=item {hi} : Hindi
702
703=item {ho} : Hiri Motu
704
705=item {hit} : Hittite
706
707(Historical)
708
709=item {hmn} : Hmong
710
711=item {hu} : Hungarian
712
713=item {hup} : Hupa
714
715=item {iba} : Iban
716
717=item {is} : Icelandic
718
719=item {ibo} : Igbo
720
721=item {ijo} : Ijo
722
723=item {ilo} : Iloko
724
725=item [{inc} : Indic (Other)]
726
727=item [{ine} : Indo-European (Other)]
728
729=item {id} : Indonesian
730
731(Formerly "in".)
732
733=for etc
734{in} Indonesian (old tag)
735
736=item {ia} : Interlingua (International Auxiliary Language Association)
737
738(Artificial) NOT Interlingue!
739
740=item {ie} : Interlingue
741
742(Artificial) NOT Interlingua!
743
744=item {iu} : Inuktitut
745
746A subform of "Eskimo".
747
748=item {ik} : Inupiaq
749
750A subform of "Eskimo".
751
752=item [{ira} : Iranian (Other)]
753
754=item {ga} : Irish
755
756=item {mga} : Middle Irish (900-1200)
757
758(Historical)
759
760=item {sga} : Old Irish (to 900)
761
762(Historical)
763
764=item [{iro} : Iroquoian languages]
765
766=item {it} : Italian
767
768Notable forms:
769{it-it} Italy Italian;
770{it-ch} Swiss Italian.
771
772=item {ja} : Japanese
773
774(NOT "jp"!)
775
776=item {jw} : Javanese
777
778=item {jrb} : Judeo-Arabic
779
780=item {jpr} : Judeo-Persian
781
782=item {kab} : Kabyle
783
784=item {kac} : Kachin
785
786=item {kl} : Kalaallisut
787
788eq Greenlandic "Eskimo"
789
790=item {kam} : Kamba
791
792=item {kn} : Kannada
793
794eq Kanarese. NOT Canadian!
795
796=item {kau} : Kanuri
797
798=item {kaa} : Kara-Kalpak
799
800=item {kar} : Karen
801
802=item {ks} : Kashmiri
803
804=item {kaw} : Kawi
805
806=item {kk} : Kazakh
807
808=item {kha} : Khasi
809
810=item {km} : Khmer
811
812eq Cambodian. eq Kampuchean.
813
814=item [{khi} : Khoisan (Other)]
815
816=item {kho} : Khotanese
817
818=item {ki} : Kikuyu
819
820eq Gikuyu.
821
822=item {kmb} : Kimbundu
823
824=item {rw} : Kinyarwanda
825
826=item {ky} : Kirghiz
827
828=item {i-klingon} : Klingon
829
830=item {kv} : Komi
831
832=item {kon} : Kongo
833
834=item {kok} : Konkani
835
836=item {ko} : Korean
837
838=item {kos} : Kosraean
839
840=item {kpe} : Kpelle
841
842=item {kro} : Kru
843
844=item {kj} : Kuanyama
845
846=item {kum} : Kumyk
847
848=item {ku} : Kurdish
849
850=item {kru} : Kurukh
851
852=item {kut} : Kutenai
853
854=item {lad} : Ladino
855
856eq Judeo-Spanish. NOT Ladin (a minority language in Italy).
857
858=item {lah} : Lahnda
859
860NOT Lamba!
861
862=item {lam} : Lamba
863
864NOT Lahnda!
865
866=item {lo} : Lao
867
868eq Laotian.
869
870=item {la} : Latin
871
872(Historical) NOT Ladin! NOT Ladino!
873
874=item {lv} : Latvian
875
876eq Lettish.
877
878=item {lb} : Letzeburgesch
879
880eq Luxemburgian, eq Luxemburger. (Formerly i-lux.)
881
882=for etc
883{i-lux} Letzeburgesch (old tag)
884
885=item {lez} : Lezghian
886
887=item {ln} : Lingala
888
889=item {lt} : Lithuanian
890
891=item {nds} : Low German
892
893eq Low Saxon. eq Low German. eq Low Saxon.
894
895=item {loz} : Lozi
896
897=item {lub} : Luba-Katanga
898
899=item {lua} : Luba-Lulua
900
901=item {lui} : Luiseno
902
903eq LuiseE<ntilde>o.
904
905=item {lun} : Lunda
906
907=item {luo} : Luo (Kenya and Tanzania)
908
909=item {lus} : Lushai
910
911=item {mk} : Macedonian
912
913eq the modern Slavic language spoken in what was Yugoslavia.
914NOT the form of Greek spoken in Greek Macedonia!
915
916=item {mad} : Madurese
917
918=item {mag} : Magahi
919
920=item {mai} : Maithili
921
922=item {mak} : Makasar
923
924=item {mg} : Malagasy
925
926=item {ms} : Malay
927
928NOT Malayalam!
929
930=item {ml} : Malayalam
931
932NOT Malay!
933
934=item {mt} : Maltese
935
936=item {mnc} : Manchu
937
938=item {mdr} : Mandar
939
940NOT Mandarin!
941
942=item {man} : Mandingo
943
944=item {mni} : Manipuri
945
946eq Meithei.
947
948=item [{mno} : Manobo languages]
949
950=item {gv} : Manx
951
952=item {mi} : Maori
953
954NOT Mari!
955
956=item {mr} : Marathi
957
958=item {chm} : Mari
959
960NOT Maori!
961
962=item {mh} : Marshall
963
964eq Marshallese.
965
966=item {mwr} : Marwari
967
968=item {mas} : Masai
969
970=item [{myn} : Mayan languages]
971
972=item {men} : Mende
973
974=item {mic} : Micmac
975
976=item {min} : Minangkabau
977
978=item {i-mingo} : Mingo
979
980eq the Irquoian language West Virginia Seneca. NOT New York Seneca!
981
982=item [{mis} : Miscellaneous languages]
983
984Don't use this.
985
986=item {moh} : Mohawk
987
988=item {mo} : Moldavian
989
990eq Moldovan.
991
992=item [{mkh} : Mon-Khmer (Other)]
993
994=item {lol} : Mongo
995
996=item {mn} : Mongolian
997
998eq Mongol.
999
1000=item {mos} : Mossi
1001
1002=item [{mul} : Multiple languages]
1003
1004Not for normal use.
1005
1006=item [{mun} : Munda languages]
1007
1008=item {nah} : Nahuatl
1009
1010=item {na} : Nauru
1011
1012=item {nv} : Navajo
1013
1014eq Navaho. (Formerly i-navajo.)
1015
1016=for etc
1017{i-navajo} Navajo (old tag)
1018
1019=item {nd} : North Ndebele
1020
1021=item {nr} : South Ndebele
1022
1023=item {ng} : Ndonga
1024
1025=item {ne} : Nepali
1026
1027eq Nepalese. Notable forms:
1028{ne-np} Nepal Nepali;
1029{ne-in} India Nepali.
1030
1031=item {new} : Newari
1032
1033=item {nia} : Nias
1034
1035=item [{nic} : Niger-Kordofanian (Other)]
1036
1037=item [{ssa} : Nilo-Saharan (Other)]
1038
1039=item {niu} : Niuean
1040
1041=item {non} : Old Norse
1042
1043(Historical)
1044
1045=item [{nai} : North American Indian]
1046
1047Do not use this.
1048
1049=item {se} : Northern Sami
1050
1051eq Lappish. eq Lapp. eq (Northern) Saami.
1052
1053=item {no} : Norwegian
1054
1055Note the two following forms:
1056
1057=item {nb} : Norwegian Bokmal
1058
1059eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.)
1060
1061=for etc
1062{no-bok} Norwegian Bokmal (old tag)
1063
1064=item {nn} : Norwegian Nynorsk
1065
1066(A form of Norwegian.) (Formerly no-nyn.)
1067
1068=for etc
1069{no-nyn} Norwegian Nynorsk (old tag)
1070
1071=item [{nub} : Nubian languages]
1072
1073=item {nym} : Nyamwezi
1074
1075=item {nyn} : Nyankole
1076
1077=item {nyo} : Nyoro
1078
1079=item {nzi} : Nzima
1080
1081=item {oc} : Occitan (post 1500)
1082
1083eq ProvenE<ccedil>al, eq Provencal
1084
1085=item {oji} : Ojibwa
1086
1087eq Ojibwe.
1088
1089=item {or} : Oriya
1090
1091=item {om} : Oromo
1092
1093=item {osa} : Osage
1094
1095=item {os} : Ossetian; Ossetic
1096
1097=item [{oto} : Otomian languages]
1098
1099Group of languages collectively called "OtomE<iacute>".
1100
1101=item {pal} : Pahlavi
1102
1103eq Pahlevi
1104
1105=item {i-pwn} : Paiwan
1106
1107eq Pariwan
1108
1109=item {pau} : Palauan
1110
1111=item {pi} : Pali
1112
1113(Historical?)
1114
1115=item {pam} : Pampanga
1116
1117=item {pag} : Pangasinan
1118
1119=item {pa} : Panjabi
1120
1121eq Punjabi
1122
1123=item {pap} : Papiamento
1124
1125eq Papiamentu.
1126
1127=item [{paa} : Papuan (Other)]
1128
1129=item {fa} : Persian
1130
1131eq Farsi. eq Iranian.
1132
1133=item {peo} : Old Persian (ca.600-400 B.C.)
1134
1135=item [{phi} : Philippine (Other)]
1136
1137=item {phn} : Phoenician
1138
1139(Historical)
1140
1141=item {pon} : Pohnpeian
1142
1143NOT Pompeiian!
1144
1145=item {pl} : Polish
1146
1147=item {pt} : Portuguese
1148
1149eq Portugese. Notable forms:
1150{pt-pt} Portugal Portuguese;
1151{pt-br} Brazilian Portuguese.
1152
1153=item [{pra} : Prakrit languages]
1154
1155=item {pro} : Old Provencal (to 1500)
1156
1157eq Old ProvenE<ccedil>al. (Historical.)
1158
1159=item {ps} : Pushto
1160
1161eq Pashto. eq Pushtu.
1162
1163=item {qu} : Quechua
1164
1165eq Quecha.
1166
1167=item {rm} : Raeto-Romance
1168
1169eq Romansh.
1170
1171=item {raj} : Rajasthani
1172
1173=item {rap} : Rapanui
1174
1175=item {rar} : Rarotongan
1176
1177=item [{qaa - qtz} : Reserved for local use.]
1178
1179=item [{roa} : Romance (Other)]
1180
1181NOT Romanian! NOT Romany! NOT Romansh!
1182
1183=item {ro} : Romanian
1184
1185eq Rumanian. NOT Romany!
1186
1187=item {rom} : Romany
1188
1189eq Rom. NOT Romanian!
1190
1191=item {rn} : Rundi
1192
1193=item {ru} : Russian
1194
1195NOT White Russian! NOT Rusyn!
1196
1197=item [{sal} : Salishan languages]
1198
1199Large language group.
1200
1201=item {sam} : Samaritan Aramaic
1202
1203NOT Aramaic!
1204
1205=item [{smi} : Sami languages (Other)]
1206
1207=item {sm} : Samoan
1208
1209=item {sad} : Sandawe
1210
1211=item {sg} : Sango
1212
1213=item {sa} : Sanskrit
1214
1215(Historical)
1216
1217=item {sat} : Santali
1218
1219=item {sc} : Sardinian
1220
1221eq Sard.
1222
1223=item {sas} : Sasak
1224
1225=item {sco} : Scots
1226
1227NOT Scots Gaelic!
1228
1229=item {sel} : Selkup
1230
1231=item [{sem} : Semitic (Other)]
1232
1233=item {sr} : Serbian
1234
1235eq Serb. NOT Sorbian.
1236
1237=item {srr} : Serer
1238
1239=item {shn} : Shan
1240
1241=item {sn} : Shona
1242
1243=item {sid} : Sidamo
1244
1245=item {sgn-...} : Sign Languages
1246
1247Always use with a subtag. Notable forms:
1248{sgn-gb} British Sign Language (BSL);
1249{sgn-ie} Irish Sign Language (ESL);
1250{sgn-ni} Nicaraguan Sign Language (ISN);
1251{sgn-us} American Sign Language (ASL).
1252
1253=item {bla} : Siksika
1254
1255eq Blackfoot. eq Pikanii.
1256
1257=item {sd} : Sindhi
1258
1259=item {si} : Sinhalese
1260
1261eq Sinhala.
1262
1263=item [{sit} : Sino-Tibetan (Other)]
1264
1265=item [{sio} : Siouan languages]
1266
1267=item {den} : Slave (Athapascan)
1268
1269("Slavey" is a subform.)
1270
1271=item [{sla} : Slavic (Other)]
1272
1273=item {sk} : Slovak
1274
1275eq Slovakian.
1276
1277=item {sl} : Slovenian
1278
1279eq Slovene.
1280
1281=item {sog} : Sogdian
1282
1283=item {so} : Somali
1284
1285=item {son} : Songhai
1286
1287=item {snk} : Soninke
1288
1289=item {wen} : Sorbian languages
1290
1291eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian!
1292
1293=item {nso} : Northern Sotho
1294
1295=item {st} : Southern Sotho
1296
1297eq Sutu. eq Sesotho.
1298
1299=item [{sai} : South American Indian (Other)]
1300
1301=item {es} : Spanish
1302
1303Notable forms:
1304{es-ar} Argentine Spanish;
1305{es-bo} Bolivian Spanish;
1306{es-cl} Chilean Spanish;
1307{es-co} Colombian Spanish;
1308{es-do} Dominican Spanish;
1309{es-ec} Ecuadorian Spanish;
1310{es-es} Spain Spanish;
1311{es-gt} Guatemalan Spanish;
1312{es-hn} Honduran Spanish;
1313{es-mx} Mexican Spanish;
1314{es-pa} Panamanian Spanish;
1315{es-pe} Peruvian Spanish;
1316{es-pr} Puerto Rican Spanish;
1317{es-py} Paraguay Spanish;
1318{es-sv} Salvadoran Spanish;
1319{es-us} US Spanish;
1320{es-uy} Uruguayan Spanish;
1321{es-ve} Venezuelan Spanish.
1322
1323=item {suk} : Sukuma
1324
1325=item {sux} : Sumerian
1326
1327(Historical)
1328
1329=item {su} : Sundanese
1330
1331=item {sus} : Susu
1332
1333=item {sw} : Swahili
1334
1335eq Kiswahili
1336
1337=item {ss} : Swati
1338
1339=item {sv} : Swedish
1340
1341Notable forms:
483dd220 1342{sv-se} Sweden Swedish;
1343{sv-fi} Finland Swedish.
21aeefd5 1344
1345=item {syr} : Syriac
1346
1347=item {tl} : Tagalog
1348
1349=item {ty} : Tahitian
1350
1351=item [{tai} : Tai (Other)]
1352
1353NOT Thai!
1354
1355=item {tg} : Tajik
1356
1357=item {tmh} : Tamashek
1358
1359=item {ta} : Tamil
1360
1361=item {i-tao} : Tao
1362
1363eq Yami.
1364
1365=item {tt} : Tatar
1366
1367=item {i-tay} : Tayal
1368
1369eq Atayal. eq Atayan.
1370
1371=item {te} : Telugu
1372
1373=item {ter} : Tereno
1374
1375=item {tet} : Tetum
1376
1377=item {th} : Thai
1378
1379NOT Tai!
1380
1381=item {bo} : Tibetan
1382
1383=item {tig} : Tigre
1384
1385=item {ti} : Tigrinya
1386
1387=item {tem} : Timne
1388
1389eq Themne. eq Timene.
1390
1391=item {tiv} : Tiv
1392
1393=item {tli} : Tlingit
1394
1395=item {tpi} : Tok Pisin
1396
1397=item {tkl} : Tokelau
1398
1399=item {tog} : Tonga (Nyasa)
1400
1401NOT Tsonga!
1402
1403=item {to} : Tonga (Tonga Islands)
1404
1405(Pronounced "Tong-a", not "Tong-ga")
1406
1407NOT Tsonga!
1408
1409=item {tsi} : Tsimshian
1410
1411eq Sm'algyax
1412
1413=item {ts} : Tsonga
1414
1415NOT Tonga!
1416
1417=item {i-tsu} : Tsou
1418
1419=item {tn} : Tswana
1420
1421Same as Setswana.
1422
1423=item {tum} : Tumbuka
1424
1425=item {tr} : Turkish
1426
1427(Typically in Roman script)
1428
1429=item {ota} : Ottoman Turkish (1500-1928)
1430
1431(Typically in Arabic script) (Historical)
1432
1433=item {tk} : Turkmen
1434
1435eq Turkmeni.
1436
1437=item {tvl} : Tuvalu
1438
1439=item {tyv} : Tuvinian
1440
1441eq Tuvan. eq Tuvin.
1442
1443=item {tw} : Twi
1444
1445=item {uga} : Ugaritic
1446
1447NOT Ugric!
1448
1449=item {ug} : Uighur
1450
1451=item {uk} : Ukrainian
1452
1453=item {umb} : Umbundu
1454
1455=item {und} : Undetermined
1456
1457Not a tag for normal use.
1458
1459=item {ur} : Urdu
1460
1461=item {uz} : Uzbek
1462
1463eq E<Ouml>zbek
1464
1465=item {vai} : Vai
1466
1467=item {ven} : Venda
1468
1469NOT Wendish! NOT Wend! NOT Avestan!
1470
1471=item {vi} : Vietnamese
1472
1473eq Viet.
1474
1475=item {vo} : Volapuk
1476
1477eq VolapE<uuml>k. (Artificial)
1478
1479=item {vot} : Votic
1480
1481eq Votian. eq Vod.
1482
1483=item [{wak} : Wakashan languages]
1484
1485=item {wal} : Walamo
1486
1487eq Wolaytta.
1488
1489=item {war} : Waray
1490
1491Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),
1492not the smaller Philippine language Waray Sorsogon, nor the extinct
1493Australian language Waray.
1494
1495=item {was} : Washo
1496
1497eq Washoe
1498
1499=item {cy} : Welsh
1500
1501=item {wo} : Wolof
1502
1503=item {x-...} : Unregistered (Semi-Private Use)
1504
1505"x-" is a prefix for language tags that are not registered with ISO
1506or IANA. Example, x-double-dutch
1507
1508=item {xh} : Xhosa
1509
1510=item {sah} : Yakut
1511
1512=item {yao} : Yao
1513
1514(The Yao in Malawi?)
1515
1516=item {yap} : Yapese
1517
1518eq Yap
1519
1520=item {yi} : Yiddish
1521
1522Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script.
1523
1524=for etc
1525{ji} Yiddish (old tag)
1526
1527=item {yo} : Yoruba
1528
1529=item [{ypk} : Yupik languages]
1530
1531Several "Eskimo" languages.
1532
1533=item {znd} : Zande
1534
1535=item [{zap} : Zapotec]
1536
1537(A group of languages.)
1538
1539=item {zen} : Zenaga
1540
1541NOT Zend.
1542
1543=item {za} : Zhuang
1544
1545=item {zu} : Zulu
1546
1547=item {zun} : Zuni
1548
1549eq ZuE<ntilde>i
1550
1551=back
1552
1553=for woohah END
1554
1555=head1 SEE ALSO
1556
1557L<I18N::LangTags|I18N::LangTags> and its "See Also" section.
1558
1559=head1 COPYRIGHT AND DISCLAIMER
1560
483dd220 1561Copyright (c) 2001,2002 Sean M. Burke. All rights reserved.
21aeefd5 1562
1563You can redistribute and/or
1564modify this document under the same terms as Perl itself.
1565
d1be9408 1566This document is provided in the hope that it will be
21aeefd5 1567useful, but without any warranty;
1568without even the implied warranty of accuracy, authoritativeness,
1569completeness, merchantability, or fitness for a particular purpose.
1570
1571Email any corrections or questions to me.
1572
1573=head1 AUTHOR
1574
1575Sean M. Burke, sburkeE<64>cpan.org
1576
1577=cut
1578
1579
1580# To generate a list of just the two and three-letter codes:
1581
1582#!/usr/local/bin/perl -w
1583
1584require 5; # Time-stamp: "2001-03-13 21:53:39 MST"
1585 # Sean M. Burke, sburke@cpan.org
1586 # This program is for generating the language_codes.txt file
1587use strict;
1588use LWP::Simple;
1589use HTML::TreeBuilder 3.10;
1590my $root = HTML::TreeBuilder->new();
1591my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';
1592$root->parse(get($url) || die "Can't get $url");
1593$root->eof();
1594
1595my @codes;
1596
1597foreach my $tr ($root->find_by_tag_name('tr')) {
1598 my @f = map $_->as_text(), $tr->content_list();
1599 #print map("<$_> ", @f), "\n";
1600 next unless @f == 5;
1601 pop @f; # nix the French name
1602 next if $f[-1] eq 'Language Name (English)'; # it's a header line
1603 my $xx = splice(@f, 2,1); # pull out the two-letter code
1604 $f[-1] =~ s/^\s+//;
1605 $f[-1] =~ s/\s+$//;
1606 if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it
1607 push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ];
1608 } else { # print the three-letter codes.
1609 if($f[0] eq $f[1]) {
1610 push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];
1611 } else { # shouldn't happen
1612 push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ];
1613 }
1614 }
1615}
1616
1617print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;
1618print "[ based on $url\n at ", scalar(localtime), "]\n",
1619 "[Note: doesn't include IANA-registered codes.]\n";
1620exit;
1621__END__
1622