Commit | Line | Data |
21aeefd5 |
1 | |
2 | require 5; |
3 | package I18N::LangTags::List; |
77b20956 |
4 | # Time-stamp: "2004-10-06 23:26:21 ADT" |
21aeefd5 |
5 | use strict; |
537c2f98 |
6 | use vars qw(%Name %Is_Disrec $Debug $VERSION); |
77b20956 |
7 | $VERSION = '0.35'; |
21aeefd5 |
8 | # POD at the end. |
9 | |
10 | #---------------------------------------------------------------------- |
11 | { |
12 | # read the table out of our own POD! |
13 | my $seeking = 1; |
14 | my $count = 0; |
537c2f98 |
15 | my($disrec,$tag,$name); |
aaf52a42 |
16 | my $last_name = ''; |
21aeefd5 |
17 | while(<I18N::LangTags::List::DATA>) { |
18 | if($seeking) { |
19 | $seeking = 0 if m/=for woohah/; |
537c2f98 |
20 | } elsif( ($disrec, $tag, $name) = |
21 | m/(\[?)\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/ |
aaf52a42 |
22 | ) { |
21aeefd5 |
23 | $name =~ s/\s*[;\.]*\s*$//g; |
24 | next unless $name; |
25 | ++$count; |
26 | print "<$tag> <$name>\n" if $Debug; |
aaf52a42 |
27 | $last_name = $Name{$tag} = $name; |
537c2f98 |
28 | $Is_Disrec{$tag} = 1 if $disrec; |
29 | } elsif (m/[Ff]ormerly \"([-a-z0-9]+)\"/) { |
aaf52a42 |
30 | $Name{$1} = "$last_name (old tag)" if $last_name; |
537c2f98 |
31 | $Is_Disrec{$1} = 1; |
21aeefd5 |
32 | } |
33 | } |
34 | die "No tags read??" unless $count; |
35 | } |
36 | #---------------------------------------------------------------------- |
37 | |
38 | sub name { |
39 | my $tag = lc($_[0] || return); |
40 | $tag =~ s/^\s+//s; |
41 | $tag =~ s/\s+$//s; |
42 | |
43 | my $alt; |
44 | if($tag =~ m/^x-(.+)/) { |
45 | $alt = "i-$1"; |
46 | } elsif($tag =~ m/^i-(.+)/) { |
47 | $alt = "x-$1"; |
48 | } else { |
49 | $alt = ''; |
50 | } |
51 | |
52 | my $subform = ''; |
53 | my $name = ''; |
54 | print "Input: {$tag}\n" if $Debug; |
55 | while(length $tag) { |
56 | last if $name = $Name{$tag}; |
57 | last if $name = $Name{$alt}; |
58 | if($tag =~ s/(-[a-z0-9]+)$//s) { |
59 | print "Shaving off: $1 leaving $tag\n" if $Debug; |
60 | $subform = "$1$subform"; |
61 | # and loop around again |
62 | |
63 | $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n"; |
64 | } else { |
65 | # we're trying to pull a subform off a primary tag. TILT! |
66 | print "Aborting on: {$name}{$subform}\n" if $Debug; |
67 | last; |
68 | } |
69 | } |
70 | print "Output: {$name}{$subform}\n" if $Debug; |
71 | |
72 | return unless $name; # Failure |
73 | return $name unless $subform; # Exact match |
74 | $subform =~ s/^-//s; |
75 | $subform =~ s/-$//s; |
76 | return "$name (Subform \"$subform\")"; |
77 | } |
78 | |
537c2f98 |
79 | #-------------------------------------------------------------------------- |
80 | |
81 | sub is_decent { |
82 | my $tag = lc($_[0] || return 0); |
83 | #require I18N::LangTags; |
84 | |
85 | return 0 unless |
86 | $tag =~ |
87 | /^(?: # First subtag |
88 | [xi] | [a-z]{2,3} |
89 | ) |
90 | (?: # Subtags thereafter |
91 | - # separator |
92 | [a-z0-9]{1,8} # subtag |
93 | )* |
94 | $/xs; |
95 | |
96 | my @supers = (); |
97 | foreach my $bit (split('-', $tag)) { |
98 | push @supers, |
99 | scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; |
100 | } |
101 | return 0 unless @supers; |
102 | shift @supers if $supers[0] =~ m<^(i|x|sgn)$>s; |
103 | return 0 unless @supers; |
104 | |
105 | foreach my $f ($tag, @supers) { |
106 | return 0 if $Is_Disrec{$f}; |
107 | return 2 if $Name{$f}; |
108 | # so that decent subforms of indecent tags are decent |
109 | } |
110 | return 2 if $Name{$tag}; # not only is it decent, it's known! |
111 | return 1; |
112 | } |
113 | |
114 | #-------------------------------------------------------------------------- |
21aeefd5 |
115 | 1; |
116 | |
117 | __DATA__ |
118 | |
119 | =head1 NAME |
120 | |
121 | I18N::LangTags::List -- tags and names for human languages |
122 | |
123 | =head1 SYNOPSIS |
124 | |
125 | use I18N::LangTags::List; |
126 | print "Parlez-vous... ", join(', ', |
127 | I18N::LangTags::List::name('elx') || 'unknown_language', |
128 | I18N::LangTags::List::name('ar-Kw') || 'unknown_language', |
129 | I18N::LangTags::List::name('en') || 'unknown_language', |
130 | I18N::LangTags::List::name('en-CA') || 'unknown_language', |
131 | ), "?\n"; |
132 | |
133 | prints: |
134 | |
135 | Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English? |
136 | |
137 | =head1 DESCRIPTION |
138 | |
8000a3fa |
139 | This module provides a function |
21aeefd5 |
140 | C<I18N::LangTags::List::name( I<langtag> ) > that takes |
141 | a language tag (see L<I18N::LangTags|I18N::LangTags>) |
142 | and returns the best attempt at an English name for it, or |
143 | undef if it can't make sense of the tag. |
144 | |
145 | The function I18N::LangTags::List::name(...) is not exported. |
146 | |
537c2f98 |
147 | This module also provides a function |
148 | C<I18N::LangTags::List::is_decent( I<langtag> )> that returns true iff |
149 | the language tag is syntactically valid and is for general use (like |
150 | "fr" or "fr-ca", below). That is, it returns false for tags that are |
151 | syntactically invalid and for tags, like "aus", that are listed in |
152 | brackets below. This function is not exported. |
153 | |
3c4b39be |
154 | The map of tags-to-names that it uses is accessible as |
21aeefd5 |
155 | %I18N::LangTags::List::Name, and it's the same as the list |
156 | that follows in this documentation, which should be useful |
157 | to you even if you don't use this module. |
158 | |
159 | =head1 ABOUT LANGUAGE TAGS |
160 | |
161 | Internet language tags, as defined in RFC 3066, are a formalism |
162 | for denoting human languages. The two-letter ISO 639-1 language |
163 | codes are well known (as "en" for English), as are their forms |
164 | when qualified by a country code ("en-US"). Less well-known are the |
8000a3fa |
165 | arbitrary-length non-ISO codes (like "i-mingo"), and the |
21aeefd5 |
166 | recently (in 2001) introduced three-letter ISO-639-2 codes. |
167 | |
483dd220 |
168 | Remember these important facts: |
21aeefd5 |
169 | |
170 | =over |
171 | |
172 | =item * |
173 | |
174 | Language tags are not locale IDs. A locale ID is written with a "_" |
175 | instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and |
176 | I<means> something different than a language tag. A language tag |
177 | denotes a language. A locale ID denotes a language I<as used in> |
178 | a particular place, in combination with non-linguistic |
483dd220 |
179 | location-specific information such as what currency is used |
21aeefd5 |
180 | there. Locales I<also> often denote character set information, |
181 | as in "en_US.ISO8859-1". |
182 | |
183 | =item * |
184 | |
185 | Language tags are not for computer languages. |
186 | |
187 | =item * |
188 | |
189 | "Dialect" is not a useful term, since there is no objective |
483dd220 |
190 | criterion for establishing when two language-forms are |
21aeefd5 |
191 | dialects of eachother, or are separate languages. |
192 | |
193 | =item * |
194 | |
195 | Language tags are not case-sensitive. en-US, en-us, En-Us, etc., |
196 | are all the same tag, and denote the same language. |
197 | |
198 | =item * |
199 | |
200 | Not every language tag really refers to a single language. Some |
201 | language tags refer to conditions: i-default (system-message text |
202 | in English plus maybe other languages), und (undetermined |
203 | language). Others (notably lots of the three-letter codes) are |
204 | bibliographic tags that classify whole groups of languages, as |
205 | with cus "Cushitic (Other)" (i.e., a |
206 | language that has been classed as Cushtic, but which has no more |
207 | specific code) or the even less linguistically coherent |
483dd220 |
208 | sai for "South American Indian (Other)". Though useful in |
21aeefd5 |
209 | bibliography, B<SUCH TAGS ARE NOT |
210 | FOR GENERAL USE>. For further guidance, email me. |
211 | |
212 | =item * |
213 | |
214 | Language tags are not country codes. In fact, they are often |
215 | distinct codes, as with language tag ja for Japanese, and |
216 | ISO 3166 country code C<.jp> for Japan. |
217 | |
218 | =back |
219 | |
220 | =head1 LIST OF LANGUAGES |
221 | |
222 | The first part of each item is the language tag, between |
223 | {...}. It |
224 | is followed by an English name for the language or language-group. |
225 | Language tags that I judge to be not for general use, are bracketed. |
226 | |
227 | This list is in alphabetical order by English name of the language. |
228 | |
229 | =for reminder |
230 | The name in the =item line MUST NOT have E<...>'s in it!! |
231 | |
232 | =for woohah START |
233 | |
234 | =over |
235 | |
236 | =item {ab} : Abkhazian |
237 | |
238 | eq Abkhaz |
239 | |
240 | =item {ace} : Achinese |
241 | |
242 | =item {ach} : Acoli |
243 | |
244 | =item {ada} : Adangme |
245 | |
aaf52a42 |
246 | =item {ady} : Adyghe |
247 | |
248 | eq Adygei |
249 | |
21aeefd5 |
250 | =item {aa} : Afar |
251 | |
252 | =item {afh} : Afrihili |
253 | |
254 | (Artificial) |
255 | |
256 | =item {af} : Afrikaans |
257 | |
258 | =item [{afa} : Afro-Asiatic (Other)] |
259 | |
aaf52a42 |
260 | =item {ak} : Akan |
261 | |
262 | (Formerly "aka".) |
21aeefd5 |
263 | |
264 | =item {akk} : Akkadian |
265 | |
266 | (Historical) |
267 | |
268 | =item {sq} : Albanian |
269 | |
270 | =item {ale} : Aleut |
271 | |
272 | =item [{alg} : Algonquian languages] |
273 | |
274 | NOT Algonquin! |
275 | |
276 | =item [{tut} : Altaic (Other)] |
277 | |
278 | =item {am} : Amharic |
279 | |
280 | NOT Aramaic! |
281 | |
282 | =item {i-ami} : Ami |
283 | |
284 | eq Amis. eq 'Amis. eq Pangca. |
285 | |
286 | =item [{apa} : Apache languages] |
287 | |
288 | =item {ar} : Arabic |
289 | |
290 | Many forms are mutually un-intelligible in spoken media. |
291 | Notable forms: |
292 | {ar-ae} UAE Arabic; |
293 | {ar-bh} Bahrain Arabic; |
294 | {ar-dz} Algerian Arabic; |
295 | {ar-eg} Egyptian Arabic; |
296 | {ar-iq} Iraqi Arabic; |
297 | {ar-jo} Jordanian Arabic; |
298 | {ar-kw} Kuwait Arabic; |
299 | {ar-lb} Lebanese Arabic; |
300 | {ar-ly} Libyan Arabic; |
301 | {ar-ma} Moroccan Arabic; |
302 | {ar-om} Omani Arabic; |
303 | {ar-qa} Qatari Arabic; |
304 | {ar-sa} Sauda Arabic; |
305 | {ar-sy} Syrian Arabic; |
306 | {ar-tn} Tunisian Arabic; |
307 | {ar-ye} Yemen Arabic. |
308 | |
309 | =item {arc} : Aramaic |
310 | |
311 | NOT Amharic! NOT Samaritan Aramaic! |
312 | |
313 | =item {arp} : Arapaho |
314 | |
315 | =item {arn} : Araucanian |
316 | |
317 | =item {arw} : Arawak |
318 | |
319 | =item {hy} : Armenian |
320 | |
aaf52a42 |
321 | =item {an} : Aragonese |
322 | |
21aeefd5 |
323 | =item [{art} : Artificial (Other)] |
324 | |
aaf52a42 |
325 | =item {ast} : Asturian |
326 | |
327 | eq Bable. |
328 | |
21aeefd5 |
329 | =item {as} : Assamese |
330 | |
331 | =item [{ath} : Athapascan languages] |
332 | |
333 | eq Athabaskan. eq Athapaskan. eq Athabascan. |
334 | |
335 | =item [{aus} : Australian languages] |
336 | |
337 | =item [{map} : Austronesian (Other)] |
338 | |
aaf52a42 |
339 | =item {av} : Avaric |
340 | |
341 | (Formerly "ava".) |
21aeefd5 |
342 | |
343 | =item {ae} : Avestan |
344 | |
345 | eq Zend |
346 | |
347 | =item {awa} : Awadhi |
348 | |
349 | =item {ay} : Aymara |
350 | |
351 | =item {az} : Azerbaijani |
352 | |
353 | eq Azeri |
354 | |
aaf52a42 |
355 | Notable forms: |
356 | {az-Arab} Azerbaijani in Arabic script; |
357 | {az-Cyrl} Azerbaijani in Cyrillic script; |
358 | {az-Latn} Azerbaijani in Latin script. |
359 | |
21aeefd5 |
360 | =item {ban} : Balinese |
361 | |
362 | =item [{bat} : Baltic (Other)] |
363 | |
364 | =item {bal} : Baluchi |
365 | |
aaf52a42 |
366 | =item {bm} : Bambara |
367 | |
368 | (Formerly "bam".) |
21aeefd5 |
369 | |
370 | =item [{bai} : Bamileke languages] |
371 | |
372 | =item {bad} : Banda |
373 | |
374 | =item [{bnt} : Bantu (Other)] |
375 | |
376 | =item {bas} : Basa |
377 | |
378 | =item {ba} : Bashkir |
379 | |
380 | =item {eu} : Basque |
381 | |
382 | =item {btk} : Batak (Indonesia) |
383 | |
384 | =item {bej} : Beja |
385 | |
386 | =item {be} : Belarusian |
387 | |
388 | eq Belarussian. eq Byelarussian. |
389 | eq Belorussian. eq Byelorussian. |
390 | eq White Russian. eq White Ruthenian. |
391 | NOT Ruthenian! |
392 | |
393 | =item {bem} : Bemba |
394 | |
395 | =item {bn} : Bengali |
396 | |
397 | eq Bangla. |
398 | |
399 | =item [{ber} : Berber (Other)] |
400 | |
401 | =item {bho} : Bhojpuri |
402 | |
403 | =item {bh} : Bihari |
404 | |
405 | =item {bik} : Bikol |
406 | |
407 | =item {bin} : Bini |
408 | |
409 | =item {bi} : Bislama |
410 | |
411 | eq Bichelamar. |
412 | |
413 | =item {bs} : Bosnian |
414 | |
415 | =item {bra} : Braj |
416 | |
417 | =item {br} : Breton |
418 | |
419 | =item {bug} : Buginese |
420 | |
421 | =item {bg} : Bulgarian |
422 | |
423 | =item {i-bnn} : Bunun |
424 | |
425 | =item {bua} : Buriat |
426 | |
427 | =item {my} : Burmese |
428 | |
429 | =item {cad} : Caddo |
430 | |
431 | =item {car} : Carib |
432 | |
433 | =item {ca} : Catalan |
434 | |
435 | eq CatalE<aacute>n. eq Catalonian. |
436 | |
437 | =item [{cau} : Caucasian (Other)] |
438 | |
439 | =item {ceb} : Cebuano |
440 | |
441 | =item [{cel} : Celtic (Other)] |
442 | |
443 | Notable forms: |
444 | {cel-gaulish} Gaulish (Historical) |
445 | |
446 | =item [{cai} : Central American Indian (Other)] |
447 | |
448 | =item {chg} : Chagatai |
449 | |
450 | (Historical?) |
451 | |
452 | =item [{cmc} : Chamic languages] |
453 | |
454 | =item {ch} : Chamorro |
455 | |
456 | =item {ce} : Chechen |
457 | |
458 | =item {chr} : Cherokee |
459 | |
460 | eq Tsalagi |
461 | |
462 | =item {chy} : Cheyenne |
463 | |
464 | =item {chb} : Chibcha |
465 | |
466 | (Historical) NOT Chibchan (which is a language family). |
467 | |
468 | =item {ny} : Chichewa |
469 | |
470 | eq Nyanja. eq Chinyanja. |
471 | |
472 | =item {zh} : Chinese |
473 | |
474 | Many forms are mutually un-intelligible in spoken media. |
aaf52a42 |
475 | Notable forms: |
476 | {zh-Hans} Chinese, in simplified script; |
477 | {zh-Hant} Chinese, in traditional script; |
478 | {zh-tw} Taiwan Chinese; |
21aeefd5 |
479 | {zh-cn} PRC Chinese; |
21aeefd5 |
480 | {zh-sg} Singapore Chinese; |
aaf52a42 |
481 | {zh-mo} Macau Chinese; |
482 | {zh-hk} Hong Kong Chinese; |
21aeefd5 |
483 | {zh-guoyu} Mandarin [Putonghua/Guoyu]; |
aaf52a42 |
484 | {zh-hakka} Hakka [formerly "i-hakka"]; |
21aeefd5 |
485 | {zh-min} Hokkien; |
486 | {zh-min-nan} Southern Hokkien; |
487 | {zh-wuu} Shanghaiese; |
488 | {zh-xiang} Hunanese; |
489 | {zh-gan} Gan; |
490 | {zh-yue} Cantonese. |
491 | |
492 | =for etc |
493 | {i-hakka} Hakka (old tag) |
494 | |
495 | =item {chn} : Chinook Jargon |
496 | |
497 | eq Chinook Wawa. |
498 | |
499 | =item {chp} : Chipewyan |
500 | |
501 | =item {cho} : Choctaw |
502 | |
503 | =item {cu} : Church Slavic |
504 | |
505 | eq Old Church Slavonic. |
506 | |
507 | =item {chk} : Chuukese |
508 | |
509 | eq Trukese. eq Chuuk. eq Truk. eq Ruk. |
510 | |
511 | =item {cv} : Chuvash |
512 | |
513 | =item {cop} : Coptic |
514 | |
515 | =item {kw} : Cornish |
516 | |
517 | =item {co} : Corsican |
518 | |
519 | eq Corse. |
520 | |
aaf52a42 |
521 | =item {cr} : Cree |
21aeefd5 |
522 | |
aaf52a42 |
523 | NOT Creek! (Formerly "cre".) |
21aeefd5 |
524 | |
525 | =item {mus} : Creek |
526 | |
527 | NOT Cree! |
528 | |
529 | =item [{cpe} : English-based Creoles and pidgins (Other)] |
530 | |
531 | =item [{cpf} : French-based Creoles and pidgins (Other)] |
532 | |
533 | =item [{cpp} : Portuguese-based Creoles and pidgins (Other)] |
534 | |
535 | =item [{crp} : Creoles and pidgins (Other)] |
536 | |
537 | =item {hr} : Croatian |
538 | |
539 | eq Croat. |
540 | |
541 | =item [{cus} : Cushitic (Other)] |
542 | |
543 | =item {cs} : Czech |
544 | |
545 | =item {dak} : Dakota |
546 | |
547 | eq Nakota. eq Latoka. |
548 | |
549 | =item {da} : Danish |
550 | |
aaf52a42 |
551 | =item {dar} : Dargwa |
552 | |
21aeefd5 |
553 | =item {day} : Dayak |
554 | |
555 | =item {i-default} : Default (Fallthru) Language |
556 | |
557 | Defined in RFC 2277, this is for tagging text |
558 | (which must include English text, and might/should include text |
559 | in other appropriate languages) that is emitted in a context |
560 | where language-negotiation wasn't possible -- in SMTP mail failure |
561 | messages, for example. |
562 | |
563 | =item {del} : Delaware |
564 | |
565 | =item {din} : Dinka |
566 | |
aaf52a42 |
567 | =item {dv} : Divehi |
568 | |
569 | eq Maldivian. (Formerly "div".) |
21aeefd5 |
570 | |
571 | =item {doi} : Dogri |
572 | |
573 | NOT Dogrib! |
574 | |
575 | =item {dgr} : Dogrib |
576 | |
577 | NOT Dogri! |
578 | |
579 | =item [{dra} : Dravidian (Other)] |
580 | |
581 | =item {dua} : Duala |
582 | |
583 | =item {nl} : Dutch |
584 | |
585 | eq Netherlander. Notable forms: |
586 | {nl-nl} Netherlands Dutch; |
587 | {nl-be} Belgian Dutch. |
588 | |
589 | =item {dum} : Middle Dutch (ca.1050-1350) |
590 | |
591 | (Historical) |
592 | |
593 | =item {dyu} : Dyula |
594 | |
595 | =item {dz} : Dzongkha |
596 | |
597 | =item {efi} : Efik |
598 | |
599 | =item {egy} : Ancient Egyptian |
600 | |
601 | (Historical) |
602 | |
603 | =item {eka} : Ekajuk |
604 | |
605 | =item {elx} : Elamite |
606 | |
607 | (Historical) |
608 | |
609 | =item {en} : English |
610 | |
611 | Notable forms: |
612 | {en-au} Australian English; |
613 | {en-bz} Belize English; |
614 | {en-ca} Canadian English; |
615 | {en-gb} UK English; |
616 | {en-ie} Irish English; |
617 | {en-jm} Jamaican English; |
618 | {en-nz} New Zealand English; |
619 | {en-ph} Philippine English; |
620 | {en-tt} Trinidad English; |
621 | {en-us} US English; |
622 | {en-za} South African English; |
623 | {en-zw} Zimbabwe English. |
624 | |
625 | =item {enm} : Old English (1100-1500) |
626 | |
627 | (Historical) |
628 | |
629 | =item {ang} : Old English (ca.450-1100) |
630 | |
631 | eq Anglo-Saxon. (Historical) |
632 | |
aaf52a42 |
633 | =item {i-enochian} : Enochian (Artificial) |
634 | |
635 | =item {myv} : Erzya |
636 | |
21aeefd5 |
637 | =item {eo} : Esperanto |
638 | |
639 | (Artificial) |
640 | |
641 | =item {et} : Estonian |
642 | |
aaf52a42 |
643 | =item {ee} : Ewe |
644 | |
645 | (Formerly "ewe".) |
21aeefd5 |
646 | |
647 | =item {ewo} : Ewondo |
648 | |
649 | =item {fan} : Fang |
650 | |
651 | =item {fat} : Fanti |
652 | |
653 | =item {fo} : Faroese |
654 | |
655 | =item {fj} : Fijian |
656 | |
657 | =item {fi} : Finnish |
658 | |
659 | =item [{fiu} : Finno-Ugrian (Other)] |
660 | |
661 | eq Finno-Ugric. NOT Ugaritic! |
662 | |
663 | =item {fon} : Fon |
664 | |
665 | =item {fr} : French |
666 | |
667 | Notable forms: |
668 | {fr-fr} France French; |
669 | {fr-be} Belgian French; |
670 | {fr-ca} Canadian French; |
671 | {fr-ch} Swiss French; |
672 | {fr-lu} Luxembourg French; |
673 | {fr-mc} Monaco French. |
674 | |
675 | =item {frm} : Middle French (ca.1400-1600) |
676 | |
677 | (Historical) |
678 | |
679 | =item {fro} : Old French (842-ca.1400) |
680 | |
681 | (Historical) |
682 | |
683 | =item {fy} : Frisian |
684 | |
685 | =item {fur} : Friulian |
686 | |
aaf52a42 |
687 | =item {ff} : Fulah |
688 | |
689 | (Formerly "ful".) |
21aeefd5 |
690 | |
691 | =item {gaa} : Ga |
692 | |
693 | =item {gd} : Scots Gaelic |
694 | |
695 | NOT Scots! |
696 | |
697 | =item {gl} : Gallegan |
698 | |
699 | eq Galician |
700 | |
aaf52a42 |
701 | =item {lg} : Ganda |
702 | |
703 | (Formerly "lug".) |
21aeefd5 |
704 | |
705 | =item {gay} : Gayo |
706 | |
707 | =item {gba} : Gbaya |
708 | |
709 | =item {gez} : Geez |
710 | |
711 | eq Ge'ez |
712 | |
713 | =item {ka} : Georgian |
714 | |
715 | =item {de} : German |
716 | |
717 | Notable forms: |
718 | {de-at} Austrian German; |
719 | {de-be} Belgian German; |
720 | {de-ch} Swiss German; |
721 | {de-de} Germany German; |
722 | {de-li} Liechtenstein German; |
723 | {de-lu} Luxembourg German. |
724 | |
725 | =item {gmh} : Middle High German (ca.1050-1500) |
726 | |
727 | (Historical) |
728 | |
729 | =item {goh} : Old High German (ca.750-1050) |
730 | |
731 | (Historical) |
732 | |
733 | =item [{gem} : Germanic (Other)] |
734 | |
735 | =item {gil} : Gilbertese |
736 | |
737 | =item {gon} : Gondi |
738 | |
739 | =item {gor} : Gorontalo |
740 | |
741 | =item {got} : Gothic |
742 | |
743 | (Historical) |
744 | |
745 | =item {grb} : Grebo |
746 | |
4cf5bee0 |
747 | =item {grc} : Ancient Greek |
21aeefd5 |
748 | |
4cf5bee0 |
749 | (Historical) (Until 15th century or so.) |
750 | |
751 | =item {el} : Modern Greek |
21aeefd5 |
752 | |
4cf5bee0 |
753 | (Since 15th century or so.) |
21aeefd5 |
754 | |
755 | =item {gn} : Guarani |
756 | |
757 | GuaranE<iacute> |
758 | |
759 | =item {gu} : Gujarati |
760 | |
761 | =item {gwi} : Gwich'in |
762 | |
763 | eq Gwichin |
764 | |
765 | =item {hai} : Haida |
766 | |
aaf52a42 |
767 | =item {ht} : Haitian |
768 | |
769 | eq Haitian Creole |
770 | |
21aeefd5 |
771 | =item {ha} : Hausa |
772 | |
773 | =item {haw} : Hawaiian |
774 | |
775 | Hawai'ian |
776 | |
777 | =item {he} : Hebrew |
778 | |
779 | (Formerly "iw".) |
780 | |
781 | =for etc |
782 | {iw} Hebrew (old tag) |
783 | |
784 | =item {hz} : Herero |
785 | |
786 | =item {hil} : Hiligaynon |
787 | |
788 | =item {him} : Himachali |
789 | |
790 | =item {hi} : Hindi |
791 | |
792 | =item {ho} : Hiri Motu |
793 | |
794 | =item {hit} : Hittite |
795 | |
796 | (Historical) |
797 | |
798 | =item {hmn} : Hmong |
799 | |
800 | =item {hu} : Hungarian |
801 | |
802 | =item {hup} : Hupa |
803 | |
804 | =item {iba} : Iban |
805 | |
806 | =item {is} : Icelandic |
807 | |
aaf52a42 |
808 | =item {io} : Ido |
809 | |
810 | (Artificial) |
811 | |
812 | =item {ig} : Igbo |
813 | |
814 | (Formerly "ibo".) |
21aeefd5 |
815 | |
816 | =item {ijo} : Ijo |
817 | |
818 | =item {ilo} : Iloko |
819 | |
820 | =item [{inc} : Indic (Other)] |
821 | |
822 | =item [{ine} : Indo-European (Other)] |
823 | |
824 | =item {id} : Indonesian |
825 | |
826 | (Formerly "in".) |
827 | |
828 | =for etc |
829 | {in} Indonesian (old tag) |
830 | |
aaf52a42 |
831 | =item {inh} : Ingush |
832 | |
21aeefd5 |
833 | =item {ia} : Interlingua (International Auxiliary Language Association) |
834 | |
835 | (Artificial) NOT Interlingue! |
836 | |
837 | =item {ie} : Interlingue |
838 | |
839 | (Artificial) NOT Interlingua! |
840 | |
841 | =item {iu} : Inuktitut |
842 | |
843 | A subform of "Eskimo". |
844 | |
845 | =item {ik} : Inupiaq |
846 | |
847 | A subform of "Eskimo". |
848 | |
849 | =item [{ira} : Iranian (Other)] |
850 | |
851 | =item {ga} : Irish |
852 | |
853 | =item {mga} : Middle Irish (900-1200) |
854 | |
855 | (Historical) |
856 | |
857 | =item {sga} : Old Irish (to 900) |
858 | |
859 | (Historical) |
860 | |
861 | =item [{iro} : Iroquoian languages] |
862 | |
863 | =item {it} : Italian |
864 | |
865 | Notable forms: |
866 | {it-it} Italy Italian; |
867 | {it-ch} Swiss Italian. |
868 | |
869 | =item {ja} : Japanese |
870 | |
871 | (NOT "jp"!) |
872 | |
aaf52a42 |
873 | =item {jv} : Javanese |
874 | |
875 | (Formerly "jw" because of a typo.) |
21aeefd5 |
876 | |
877 | =item {jrb} : Judeo-Arabic |
878 | |
879 | =item {jpr} : Judeo-Persian |
880 | |
aaf52a42 |
881 | =item {kbd} : Kabardian |
882 | |
21aeefd5 |
883 | =item {kab} : Kabyle |
884 | |
885 | =item {kac} : Kachin |
886 | |
887 | =item {kl} : Kalaallisut |
888 | |
889 | eq Greenlandic "Eskimo" |
890 | |
aaf52a42 |
891 | =item {xal} : Kalmyk |
892 | |
21aeefd5 |
893 | =item {kam} : Kamba |
894 | |
895 | =item {kn} : Kannada |
896 | |
897 | eq Kanarese. NOT Canadian! |
898 | |
aaf52a42 |
899 | =item {kr} : Kanuri |
900 | |
901 | (Formerly "kau".) |
902 | |
903 | =item {krc} : Karachay-Balkar |
21aeefd5 |
904 | |
905 | =item {kaa} : Kara-Kalpak |
906 | |
907 | =item {kar} : Karen |
908 | |
909 | =item {ks} : Kashmiri |
910 | |
aaf52a42 |
911 | =item {csb} : Kashubian |
912 | |
913 | eq Kashub |
914 | |
21aeefd5 |
915 | =item {kaw} : Kawi |
916 | |
917 | =item {kk} : Kazakh |
918 | |
919 | =item {kha} : Khasi |
920 | |
921 | =item {km} : Khmer |
922 | |
923 | eq Cambodian. eq Kampuchean. |
924 | |
925 | =item [{khi} : Khoisan (Other)] |
926 | |
927 | =item {kho} : Khotanese |
928 | |
929 | =item {ki} : Kikuyu |
930 | |
931 | eq Gikuyu. |
932 | |
933 | =item {kmb} : Kimbundu |
934 | |
935 | =item {rw} : Kinyarwanda |
936 | |
937 | =item {ky} : Kirghiz |
938 | |
939 | =item {i-klingon} : Klingon |
940 | |
941 | =item {kv} : Komi |
942 | |
aaf52a42 |
943 | =item {kg} : Kongo |
944 | |
945 | (Formerly "kon".) |
21aeefd5 |
946 | |
947 | =item {kok} : Konkani |
948 | |
949 | =item {ko} : Korean |
950 | |
951 | =item {kos} : Kosraean |
952 | |
953 | =item {kpe} : Kpelle |
954 | |
955 | =item {kro} : Kru |
956 | |
957 | =item {kj} : Kuanyama |
958 | |
959 | =item {kum} : Kumyk |
960 | |
961 | =item {ku} : Kurdish |
962 | |
963 | =item {kru} : Kurukh |
964 | |
965 | =item {kut} : Kutenai |
966 | |
967 | =item {lad} : Ladino |
968 | |
969 | eq Judeo-Spanish. NOT Ladin (a minority language in Italy). |
970 | |
971 | =item {lah} : Lahnda |
972 | |
973 | NOT Lamba! |
974 | |
975 | =item {lam} : Lamba |
976 | |
977 | NOT Lahnda! |
978 | |
979 | =item {lo} : Lao |
980 | |
981 | eq Laotian. |
982 | |
983 | =item {la} : Latin |
984 | |
985 | (Historical) NOT Ladin! NOT Ladino! |
986 | |
987 | =item {lv} : Latvian |
988 | |
989 | eq Lettish. |
990 | |
991 | =item {lb} : Letzeburgesch |
992 | |
aaf52a42 |
993 | eq Luxemburgian, eq Luxemburger. (Formerly "i-lux".) |
21aeefd5 |
994 | |
995 | =for etc |
996 | {i-lux} Letzeburgesch (old tag) |
997 | |
998 | =item {lez} : Lezghian |
999 | |
aaf52a42 |
1000 | =item {li} : Limburgish |
1001 | |
1002 | eq Limburger, eq Limburgan. NOT Letzeburgesch! |
1003 | |
21aeefd5 |
1004 | =item {ln} : Lingala |
1005 | |
1006 | =item {lt} : Lithuanian |
1007 | |
1008 | =item {nds} : Low German |
1009 | |
1010 | eq Low Saxon. eq Low German. eq Low Saxon. |
1011 | |
aaf52a42 |
1012 | =item {art-lojban} : Lojban (Artificial) |
1013 | |
21aeefd5 |
1014 | =item {loz} : Lozi |
1015 | |
aaf52a42 |
1016 | =item {lu} : Luba-Katanga |
1017 | |
1018 | (Formerly "lub".) |
21aeefd5 |
1019 | |
1020 | =item {lua} : Luba-Lulua |
1021 | |
1022 | =item {lui} : Luiseno |
1023 | |
1024 | eq LuiseE<ntilde>o. |
1025 | |
1026 | =item {lun} : Lunda |
1027 | |
1028 | =item {luo} : Luo (Kenya and Tanzania) |
1029 | |
1030 | =item {lus} : Lushai |
1031 | |
1032 | =item {mk} : Macedonian |
1033 | |
1034 | eq the modern Slavic language spoken in what was Yugoslavia. |
1035 | NOT the form of Greek spoken in Greek Macedonia! |
1036 | |
1037 | =item {mad} : Madurese |
1038 | |
1039 | =item {mag} : Magahi |
1040 | |
1041 | =item {mai} : Maithili |
1042 | |
1043 | =item {mak} : Makasar |
1044 | |
1045 | =item {mg} : Malagasy |
1046 | |
1047 | =item {ms} : Malay |
1048 | |
1049 | NOT Malayalam! |
1050 | |
1051 | =item {ml} : Malayalam |
1052 | |
1053 | NOT Malay! |
1054 | |
1055 | =item {mt} : Maltese |
1056 | |
1057 | =item {mnc} : Manchu |
1058 | |
1059 | =item {mdr} : Mandar |
1060 | |
1061 | NOT Mandarin! |
1062 | |
1063 | =item {man} : Mandingo |
1064 | |
1065 | =item {mni} : Manipuri |
1066 | |
1067 | eq Meithei. |
1068 | |
1069 | =item [{mno} : Manobo languages] |
1070 | |
1071 | =item {gv} : Manx |
1072 | |
1073 | =item {mi} : Maori |
1074 | |
1075 | NOT Mari! |
1076 | |
1077 | =item {mr} : Marathi |
1078 | |
1079 | =item {chm} : Mari |
1080 | |
1081 | NOT Maori! |
1082 | |
1083 | =item {mh} : Marshall |
1084 | |
1085 | eq Marshallese. |
1086 | |
1087 | =item {mwr} : Marwari |
1088 | |
1089 | =item {mas} : Masai |
1090 | |
1091 | =item [{myn} : Mayan languages] |
1092 | |
1093 | =item {men} : Mende |
1094 | |
1095 | =item {mic} : Micmac |
1096 | |
1097 | =item {min} : Minangkabau |
1098 | |
1099 | =item {i-mingo} : Mingo |
1100 | |
1101 | eq the Irquoian language West Virginia Seneca. NOT New York Seneca! |
1102 | |
1103 | =item [{mis} : Miscellaneous languages] |
1104 | |
1105 | Don't use this. |
1106 | |
1107 | =item {moh} : Mohawk |
1108 | |
aaf52a42 |
1109 | =item {mdf} : Moksha |
1110 | |
21aeefd5 |
1111 | =item {mo} : Moldavian |
1112 | |
1113 | eq Moldovan. |
1114 | |
1115 | =item [{mkh} : Mon-Khmer (Other)] |
1116 | |
1117 | =item {lol} : Mongo |
1118 | |
1119 | =item {mn} : Mongolian |
1120 | |
1121 | eq Mongol. |
1122 | |
1123 | =item {mos} : Mossi |
1124 | |
1125 | =item [{mul} : Multiple languages] |
1126 | |
1127 | Not for normal use. |
1128 | |
1129 | =item [{mun} : Munda languages] |
1130 | |
1131 | =item {nah} : Nahuatl |
1132 | |
aaf52a42 |
1133 | =item {nap} : Neapolitan |
1134 | |
21aeefd5 |
1135 | =item {na} : Nauru |
1136 | |
1137 | =item {nv} : Navajo |
1138 | |
aaf52a42 |
1139 | eq Navaho. (Formerly "i-navajo".) |
21aeefd5 |
1140 | |
1141 | =for etc |
1142 | {i-navajo} Navajo (old tag) |
1143 | |
1144 | =item {nd} : North Ndebele |
1145 | |
1146 | =item {nr} : South Ndebele |
1147 | |
1148 | =item {ng} : Ndonga |
1149 | |
1150 | =item {ne} : Nepali |
1151 | |
1152 | eq Nepalese. Notable forms: |
1153 | {ne-np} Nepal Nepali; |
1154 | {ne-in} India Nepali. |
1155 | |
1156 | =item {new} : Newari |
1157 | |
1158 | =item {nia} : Nias |
1159 | |
1160 | =item [{nic} : Niger-Kordofanian (Other)] |
1161 | |
1162 | =item [{ssa} : Nilo-Saharan (Other)] |
1163 | |
1164 | =item {niu} : Niuean |
1165 | |
aaf52a42 |
1166 | =item {nog} : Nogai |
1167 | |
21aeefd5 |
1168 | =item {non} : Old Norse |
1169 | |
1170 | (Historical) |
1171 | |
1172 | =item [{nai} : North American Indian] |
1173 | |
1174 | Do not use this. |
1175 | |
21aeefd5 |
1176 | =item {no} : Norwegian |
1177 | |
1178 | Note the two following forms: |
1179 | |
1180 | =item {nb} : Norwegian Bokmal |
1181 | |
aaf52a42 |
1182 | eq BokmE<aring>l, (A form of Norwegian.) (Formerly "no-bok".) |
21aeefd5 |
1183 | |
1184 | =for etc |
1185 | {no-bok} Norwegian Bokmal (old tag) |
1186 | |
1187 | =item {nn} : Norwegian Nynorsk |
1188 | |
aaf52a42 |
1189 | (A form of Norwegian.) (Formerly "no-nyn".) |
21aeefd5 |
1190 | |
1191 | =for etc |
1192 | {no-nyn} Norwegian Nynorsk (old tag) |
1193 | |
1194 | =item [{nub} : Nubian languages] |
1195 | |
1196 | =item {nym} : Nyamwezi |
1197 | |
1198 | =item {nyn} : Nyankole |
1199 | |
1200 | =item {nyo} : Nyoro |
1201 | |
1202 | =item {nzi} : Nzima |
1203 | |
1204 | =item {oc} : Occitan (post 1500) |
1205 | |
1206 | eq ProvenE<ccedil>al, eq Provencal |
1207 | |
aaf52a42 |
1208 | =item {oj} : Ojibwa |
21aeefd5 |
1209 | |
aaf52a42 |
1210 | eq Ojibwe. (Formerly "oji".) |
21aeefd5 |
1211 | |
1212 | =item {or} : Oriya |
1213 | |
1214 | =item {om} : Oromo |
1215 | |
1216 | =item {osa} : Osage |
1217 | |
1218 | =item {os} : Ossetian; Ossetic |
1219 | |
1220 | =item [{oto} : Otomian languages] |
1221 | |
1222 | Group of languages collectively called "OtomE<iacute>". |
1223 | |
1224 | =item {pal} : Pahlavi |
1225 | |
1226 | eq Pahlevi |
1227 | |
1228 | =item {i-pwn} : Paiwan |
1229 | |
1230 | eq Pariwan |
1231 | |
1232 | =item {pau} : Palauan |
1233 | |
1234 | =item {pi} : Pali |
1235 | |
1236 | (Historical?) |
1237 | |
1238 | =item {pam} : Pampanga |
1239 | |
1240 | =item {pag} : Pangasinan |
1241 | |
1242 | =item {pa} : Panjabi |
1243 | |
1244 | eq Punjabi |
1245 | |
1246 | =item {pap} : Papiamento |
1247 | |
1248 | eq Papiamentu. |
1249 | |
1250 | =item [{paa} : Papuan (Other)] |
1251 | |
1252 | =item {fa} : Persian |
1253 | |
1254 | eq Farsi. eq Iranian. |
1255 | |
1256 | =item {peo} : Old Persian (ca.600-400 B.C.) |
1257 | |
1258 | =item [{phi} : Philippine (Other)] |
1259 | |
1260 | =item {phn} : Phoenician |
1261 | |
1262 | (Historical) |
1263 | |
1264 | =item {pon} : Pohnpeian |
1265 | |
1266 | NOT Pompeiian! |
1267 | |
1268 | =item {pl} : Polish |
1269 | |
1270 | =item {pt} : Portuguese |
1271 | |
1272 | eq Portugese. Notable forms: |
1273 | {pt-pt} Portugal Portuguese; |
1274 | {pt-br} Brazilian Portuguese. |
1275 | |
1276 | =item [{pra} : Prakrit languages] |
1277 | |
1278 | =item {pro} : Old Provencal (to 1500) |
1279 | |
1280 | eq Old ProvenE<ccedil>al. (Historical.) |
1281 | |
1282 | =item {ps} : Pushto |
1283 | |
1284 | eq Pashto. eq Pushtu. |
1285 | |
1286 | =item {qu} : Quechua |
1287 | |
1288 | eq Quecha. |
1289 | |
1290 | =item {rm} : Raeto-Romance |
1291 | |
1292 | eq Romansh. |
1293 | |
1294 | =item {raj} : Rajasthani |
1295 | |
1296 | =item {rap} : Rapanui |
1297 | |
1298 | =item {rar} : Rarotongan |
1299 | |
1300 | =item [{qaa - qtz} : Reserved for local use.] |
1301 | |
1302 | =item [{roa} : Romance (Other)] |
1303 | |
1304 | NOT Romanian! NOT Romany! NOT Romansh! |
1305 | |
1306 | =item {ro} : Romanian |
1307 | |
1308 | eq Rumanian. NOT Romany! |
1309 | |
1310 | =item {rom} : Romany |
1311 | |
1312 | eq Rom. NOT Romanian! |
1313 | |
1314 | =item {rn} : Rundi |
1315 | |
1316 | =item {ru} : Russian |
1317 | |
1318 | NOT White Russian! NOT Rusyn! |
1319 | |
1320 | =item [{sal} : Salishan languages] |
1321 | |
1322 | Large language group. |
1323 | |
1324 | =item {sam} : Samaritan Aramaic |
1325 | |
1326 | NOT Aramaic! |
1327 | |
aaf52a42 |
1328 | =item {se} : Northern Sami |
1329 | |
1330 | eq Lappish. eq Lapp. eq (Northern) Saami. |
1331 | |
1332 | =item {sma} : Southern Sami |
1333 | |
1334 | =item {smn} : Inari Sami |
1335 | |
1336 | =item {smj} : Lule Sami |
1337 | |
1338 | =item {sms} : Skolt Sami |
1339 | |
21aeefd5 |
1340 | =item [{smi} : Sami languages (Other)] |
1341 | |
1342 | =item {sm} : Samoan |
1343 | |
1344 | =item {sad} : Sandawe |
1345 | |
1346 | =item {sg} : Sango |
1347 | |
1348 | =item {sa} : Sanskrit |
1349 | |
1350 | (Historical) |
1351 | |
1352 | =item {sat} : Santali |
1353 | |
1354 | =item {sc} : Sardinian |
1355 | |
1356 | eq Sard. |
1357 | |
1358 | =item {sas} : Sasak |
1359 | |
1360 | =item {sco} : Scots |
1361 | |
1362 | NOT Scots Gaelic! |
1363 | |
1364 | =item {sel} : Selkup |
1365 | |
1366 | =item [{sem} : Semitic (Other)] |
1367 | |
1368 | =item {sr} : Serbian |
1369 | |
1370 | eq Serb. NOT Sorbian. |
1371 | |
aaf52a42 |
1372 | Notable forms: |
1373 | {sr-Cyrl} : Serbian in Cyrillic script; |
1374 | {sr-Latn} : Serbian in Latin script. |
1375 | |
21aeefd5 |
1376 | =item {srr} : Serer |
1377 | |
1378 | =item {shn} : Shan |
1379 | |
1380 | =item {sn} : Shona |
1381 | |
1382 | =item {sid} : Sidamo |
1383 | |
1384 | =item {sgn-...} : Sign Languages |
1385 | |
1386 | Always use with a subtag. Notable forms: |
1387 | {sgn-gb} British Sign Language (BSL); |
1388 | {sgn-ie} Irish Sign Language (ESL); |
1389 | {sgn-ni} Nicaraguan Sign Language (ISN); |
1390 | {sgn-us} American Sign Language (ASL). |
1391 | |
aaf52a42 |
1392 | (And so on with other country codes as the subtag.) |
1393 | |
21aeefd5 |
1394 | =item {bla} : Siksika |
1395 | |
1396 | eq Blackfoot. eq Pikanii. |
1397 | |
1398 | =item {sd} : Sindhi |
1399 | |
1400 | =item {si} : Sinhalese |
1401 | |
1402 | eq Sinhala. |
1403 | |
1404 | =item [{sit} : Sino-Tibetan (Other)] |
1405 | |
1406 | =item [{sio} : Siouan languages] |
1407 | |
1408 | =item {den} : Slave (Athapascan) |
1409 | |
1410 | ("Slavey" is a subform.) |
1411 | |
1412 | =item [{sla} : Slavic (Other)] |
1413 | |
1414 | =item {sk} : Slovak |
1415 | |
1416 | eq Slovakian. |
1417 | |
1418 | =item {sl} : Slovenian |
1419 | |
1420 | eq Slovene. |
1421 | |
1422 | =item {sog} : Sogdian |
1423 | |
1424 | =item {so} : Somali |
1425 | |
1426 | =item {son} : Songhai |
1427 | |
1428 | =item {snk} : Soninke |
1429 | |
1430 | =item {wen} : Sorbian languages |
1431 | |
1432 | eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian! |
1433 | |
1434 | =item {nso} : Northern Sotho |
1435 | |
1436 | =item {st} : Southern Sotho |
1437 | |
1438 | eq Sutu. eq Sesotho. |
1439 | |
1440 | =item [{sai} : South American Indian (Other)] |
1441 | |
1442 | =item {es} : Spanish |
1443 | |
1444 | Notable forms: |
1445 | {es-ar} Argentine Spanish; |
1446 | {es-bo} Bolivian Spanish; |
1447 | {es-cl} Chilean Spanish; |
1448 | {es-co} Colombian Spanish; |
1449 | {es-do} Dominican Spanish; |
1450 | {es-ec} Ecuadorian Spanish; |
1451 | {es-es} Spain Spanish; |
1452 | {es-gt} Guatemalan Spanish; |
1453 | {es-hn} Honduran Spanish; |
1454 | {es-mx} Mexican Spanish; |
1455 | {es-pa} Panamanian Spanish; |
1456 | {es-pe} Peruvian Spanish; |
1457 | {es-pr} Puerto Rican Spanish; |
1458 | {es-py} Paraguay Spanish; |
1459 | {es-sv} Salvadoran Spanish; |
1460 | {es-us} US Spanish; |
1461 | {es-uy} Uruguayan Spanish; |
1462 | {es-ve} Venezuelan Spanish. |
1463 | |
1464 | =item {suk} : Sukuma |
1465 | |
1466 | =item {sux} : Sumerian |
1467 | |
1468 | (Historical) |
1469 | |
1470 | =item {su} : Sundanese |
1471 | |
1472 | =item {sus} : Susu |
1473 | |
1474 | =item {sw} : Swahili |
1475 | |
1476 | eq Kiswahili |
1477 | |
1478 | =item {ss} : Swati |
1479 | |
1480 | =item {sv} : Swedish |
1481 | |
1482 | Notable forms: |
483dd220 |
1483 | {sv-se} Sweden Swedish; |
1484 | {sv-fi} Finland Swedish. |
21aeefd5 |
1485 | |
1486 | =item {syr} : Syriac |
1487 | |
1488 | =item {tl} : Tagalog |
1489 | |
1490 | =item {ty} : Tahitian |
1491 | |
1492 | =item [{tai} : Tai (Other)] |
1493 | |
1494 | NOT Thai! |
1495 | |
1496 | =item {tg} : Tajik |
1497 | |
1498 | =item {tmh} : Tamashek |
1499 | |
1500 | =item {ta} : Tamil |
1501 | |
1502 | =item {i-tao} : Tao |
1503 | |
1504 | eq Yami. |
1505 | |
1506 | =item {tt} : Tatar |
1507 | |
1508 | =item {i-tay} : Tayal |
1509 | |
1510 | eq Atayal. eq Atayan. |
1511 | |
1512 | =item {te} : Telugu |
1513 | |
1514 | =item {ter} : Tereno |
1515 | |
1516 | =item {tet} : Tetum |
1517 | |
1518 | =item {th} : Thai |
1519 | |
1520 | NOT Tai! |
1521 | |
1522 | =item {bo} : Tibetan |
1523 | |
1524 | =item {tig} : Tigre |
1525 | |
1526 | =item {ti} : Tigrinya |
1527 | |
1528 | =item {tem} : Timne |
1529 | |
1530 | eq Themne. eq Timene. |
1531 | |
1532 | =item {tiv} : Tiv |
1533 | |
1534 | =item {tli} : Tlingit |
1535 | |
1536 | =item {tpi} : Tok Pisin |
1537 | |
1538 | =item {tkl} : Tokelau |
1539 | |
1540 | =item {tog} : Tonga (Nyasa) |
1541 | |
1542 | NOT Tsonga! |
1543 | |
1544 | =item {to} : Tonga (Tonga Islands) |
1545 | |
1546 | (Pronounced "Tong-a", not "Tong-ga") |
1547 | |
1548 | NOT Tsonga! |
1549 | |
1550 | =item {tsi} : Tsimshian |
1551 | |
1552 | eq Sm'algyax |
1553 | |
1554 | =item {ts} : Tsonga |
1555 | |
1556 | NOT Tonga! |
1557 | |
1558 | =item {i-tsu} : Tsou |
1559 | |
1560 | =item {tn} : Tswana |
1561 | |
1562 | Same as Setswana. |
1563 | |
1564 | =item {tum} : Tumbuka |
1565 | |
aaf52a42 |
1566 | =item [{tup} : Tupi languages] |
1567 | |
21aeefd5 |
1568 | =item {tr} : Turkish |
1569 | |
1570 | (Typically in Roman script) |
1571 | |
1572 | =item {ota} : Ottoman Turkish (1500-1928) |
1573 | |
1574 | (Typically in Arabic script) (Historical) |
1575 | |
aaf52a42 |
1576 | =item {crh} : Crimean Turkish |
1577 | |
1578 | eq Crimean Tatar |
1579 | |
21aeefd5 |
1580 | =item {tk} : Turkmen |
1581 | |
1582 | eq Turkmeni. |
1583 | |
1584 | =item {tvl} : Tuvalu |
1585 | |
1586 | =item {tyv} : Tuvinian |
1587 | |
1588 | eq Tuvan. eq Tuvin. |
1589 | |
1590 | =item {tw} : Twi |
1591 | |
aaf52a42 |
1592 | =item {udm} : Udmurt |
1593 | |
21aeefd5 |
1594 | =item {uga} : Ugaritic |
1595 | |
1596 | NOT Ugric! |
1597 | |
1598 | =item {ug} : Uighur |
1599 | |
1600 | =item {uk} : Ukrainian |
1601 | |
1602 | =item {umb} : Umbundu |
1603 | |
1604 | =item {und} : Undetermined |
1605 | |
1606 | Not a tag for normal use. |
1607 | |
1608 | =item {ur} : Urdu |
1609 | |
1610 | =item {uz} : Uzbek |
1611 | |
1612 | eq E<Ouml>zbek |
1613 | |
aaf52a42 |
1614 | Notable forms: |
1615 | {uz-Cyrl} Uzbek in Cyrillic script; |
1616 | {uz-Latn} Uzbek in Latin script. |
1617 | |
21aeefd5 |
1618 | =item {vai} : Vai |
1619 | |
aaf52a42 |
1620 | =item {ve} : Venda |
21aeefd5 |
1621 | |
aaf52a42 |
1622 | NOT Wendish! NOT Wend! NOT Avestan! (Formerly "ven".) |
21aeefd5 |
1623 | |
1624 | =item {vi} : Vietnamese |
1625 | |
1626 | eq Viet. |
1627 | |
1628 | =item {vo} : Volapuk |
1629 | |
1630 | eq VolapE<uuml>k. (Artificial) |
1631 | |
1632 | =item {vot} : Votic |
1633 | |
1634 | eq Votian. eq Vod. |
1635 | |
1636 | =item [{wak} : Wakashan languages] |
1637 | |
aaf52a42 |
1638 | =item {wa} : Walloon |
1639 | |
21aeefd5 |
1640 | =item {wal} : Walamo |
1641 | |
1642 | eq Wolaytta. |
1643 | |
1644 | =item {war} : Waray |
1645 | |
1646 | Presumably the Philippine language Waray-Waray (SamareE<ntilde>o), |
1647 | not the smaller Philippine language Waray Sorsogon, nor the extinct |
1648 | Australian language Waray. |
1649 | |
1650 | =item {was} : Washo |
1651 | |
1652 | eq Washoe |
1653 | |
1654 | =item {cy} : Welsh |
1655 | |
1656 | =item {wo} : Wolof |
1657 | |
1658 | =item {x-...} : Unregistered (Semi-Private Use) |
1659 | |
1660 | "x-" is a prefix for language tags that are not registered with ISO |
1661 | or IANA. Example, x-double-dutch |
1662 | |
1663 | =item {xh} : Xhosa |
1664 | |
1665 | =item {sah} : Yakut |
1666 | |
1667 | =item {yao} : Yao |
1668 | |
1669 | (The Yao in Malawi?) |
1670 | |
1671 | =item {yap} : Yapese |
1672 | |
1673 | eq Yap |
1674 | |
aaf52a42 |
1675 | =item {ii} : Sichuan Yi |
1676 | |
21aeefd5 |
1677 | =item {yi} : Yiddish |
1678 | |
aaf52a42 |
1679 | Formerly "ji". Usually in Hebrew script. |
21aeefd5 |
1680 | |
aaf52a42 |
1681 | Notable forms: |
1682 | {yi-latn} Yiddish in Latin script |
21aeefd5 |
1683 | |
1684 | =item {yo} : Yoruba |
1685 | |
1686 | =item [{ypk} : Yupik languages] |
1687 | |
1688 | Several "Eskimo" languages. |
1689 | |
1690 | =item {znd} : Zande |
1691 | |
1692 | =item [{zap} : Zapotec] |
1693 | |
1694 | (A group of languages.) |
1695 | |
1696 | =item {zen} : Zenaga |
1697 | |
1698 | NOT Zend. |
1699 | |
1700 | =item {za} : Zhuang |
1701 | |
1702 | =item {zu} : Zulu |
1703 | |
1704 | =item {zun} : Zuni |
1705 | |
1706 | eq ZuE<ntilde>i |
1707 | |
1708 | =back |
1709 | |
1710 | =for woohah END |
1711 | |
1712 | =head1 SEE ALSO |
1713 | |
1714 | L<I18N::LangTags|I18N::LangTags> and its "See Also" section. |
1715 | |
1716 | =head1 COPYRIGHT AND DISCLAIMER |
1717 | |
77b20956 |
1718 | Copyright (c) 2001+ Sean M. Burke. All rights reserved. |
21aeefd5 |
1719 | |
1720 | You can redistribute and/or |
1721 | modify this document under the same terms as Perl itself. |
1722 | |
d1be9408 |
1723 | This document is provided in the hope that it will be |
21aeefd5 |
1724 | useful, but without any warranty; |
1725 | without even the implied warranty of accuracy, authoritativeness, |
1726 | completeness, merchantability, or fitness for a particular purpose. |
1727 | |
1728 | Email any corrections or questions to me. |
1729 | |
1730 | =head1 AUTHOR |
1731 | |
1732 | Sean M. Burke, sburkeE<64>cpan.org |
1733 | |
1734 | =cut |
1735 | |
1736 | |
1737 | # To generate a list of just the two and three-letter codes: |
1738 | |
1739 | #!/usr/local/bin/perl -w |
1740 | |
1741 | require 5; # Time-stamp: "2001-03-13 21:53:39 MST" |
1742 | # Sean M. Burke, sburke@cpan.org |
1743 | # This program is for generating the language_codes.txt file |
1744 | use strict; |
1745 | use LWP::Simple; |
1746 | use HTML::TreeBuilder 3.10; |
1747 | my $root = HTML::TreeBuilder->new(); |
1748 | my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html'; |
1749 | $root->parse(get($url) || die "Can't get $url"); |
1750 | $root->eof(); |
1751 | |
1752 | my @codes; |
1753 | |
1754 | foreach my $tr ($root->find_by_tag_name('tr')) { |
1755 | my @f = map $_->as_text(), $tr->content_list(); |
1756 | #print map("<$_> ", @f), "\n"; |
1757 | next unless @f == 5; |
1758 | pop @f; # nix the French name |
1759 | next if $f[-1] eq 'Language Name (English)'; # it's a header line |
1760 | my $xx = splice(@f, 2,1); # pull out the two-letter code |
1761 | $f[-1] =~ s/^\s+//; |
1762 | $f[-1] =~ s/\s+$//; |
1763 | if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it |
1764 | push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ]; |
1765 | } else { # print the three-letter codes. |
1766 | if($f[0] eq $f[1]) { |
1767 | push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ]; |
1768 | } else { # shouldn't happen |
1769 | push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; |
1770 | } |
1771 | } |
1772 | } |
1773 | |
1774 | print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes; |
1775 | print "[ based on $url\n at ", scalar(localtime), "]\n", |
1776 | "[Note: doesn't include IANA-registered codes.]\n"; |
1777 | exit; |
1778 | __END__ |
1779 | |