Commit | Line | Data |
21aeefd5 |
1 | |
2 | require 5; |
3 | package I18N::LangTags::List; |
483dd220 |
4 | # Time-stamp: "2002-02-02 20:13:58 MST" |
21aeefd5 |
5 | use strict; |
6 | use vars qw(%Name $Debug $VERSION); |
483dd220 |
7 | $VERSION = '0.25'; |
21aeefd5 |
8 | # POD at the end. |
9 | |
10 | #---------------------------------------------------------------------- |
11 | { |
12 | # read the table out of our own POD! |
13 | my $seeking = 1; |
14 | my $count = 0; |
15 | my($tag,$name); |
16 | while(<I18N::LangTags::List::DATA>) { |
17 | if($seeking) { |
18 | $seeking = 0 if m/=for woohah/; |
19 | } else { |
20 | next unless ($tag, $name) = |
21 | m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/; |
22 | $name =~ s/\s*[;\.]*\s*$//g; |
23 | next unless $name; |
24 | ++$count; |
25 | print "<$tag> <$name>\n" if $Debug; |
26 | $Name{$tag} = $name; |
27 | } |
28 | } |
29 | die "No tags read??" unless $count; |
30 | } |
31 | #---------------------------------------------------------------------- |
32 | |
33 | sub name { |
34 | my $tag = lc($_[0] || return); |
35 | $tag =~ s/^\s+//s; |
36 | $tag =~ s/\s+$//s; |
37 | |
38 | my $alt; |
39 | if($tag =~ m/^x-(.+)/) { |
40 | $alt = "i-$1"; |
41 | } elsif($tag =~ m/^i-(.+)/) { |
42 | $alt = "x-$1"; |
43 | } else { |
44 | $alt = ''; |
45 | } |
46 | |
47 | my $subform = ''; |
48 | my $name = ''; |
49 | print "Input: {$tag}\n" if $Debug; |
50 | while(length $tag) { |
51 | last if $name = $Name{$tag}; |
52 | last if $name = $Name{$alt}; |
53 | if($tag =~ s/(-[a-z0-9]+)$//s) { |
54 | print "Shaving off: $1 leaving $tag\n" if $Debug; |
55 | $subform = "$1$subform"; |
56 | # and loop around again |
57 | |
58 | $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n"; |
59 | } else { |
60 | # we're trying to pull a subform off a primary tag. TILT! |
61 | print "Aborting on: {$name}{$subform}\n" if $Debug; |
62 | last; |
63 | } |
64 | } |
65 | print "Output: {$name}{$subform}\n" if $Debug; |
66 | |
67 | return unless $name; # Failure |
68 | return $name unless $subform; # Exact match |
69 | $subform =~ s/^-//s; |
70 | $subform =~ s/-$//s; |
71 | return "$name (Subform \"$subform\")"; |
72 | } |
73 | |
74 | 1; |
75 | |
76 | __DATA__ |
77 | |
78 | =head1 NAME |
79 | |
80 | I18N::LangTags::List -- tags and names for human languages |
81 | |
82 | =head1 SYNOPSIS |
83 | |
84 | use I18N::LangTags::List; |
85 | print "Parlez-vous... ", join(', ', |
86 | I18N::LangTags::List::name('elx') || 'unknown_language', |
87 | I18N::LangTags::List::name('ar-Kw') || 'unknown_language', |
88 | I18N::LangTags::List::name('en') || 'unknown_language', |
89 | I18N::LangTags::List::name('en-CA') || 'unknown_language', |
90 | ), "?\n"; |
91 | |
92 | prints: |
93 | |
94 | Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English? |
95 | |
96 | =head1 DESCRIPTION |
97 | |
98 | This module provides a function |
99 | C<I18N::LangTags::List::name( I<langtag> ) > that takes |
100 | a language tag (see L<I18N::LangTags|I18N::LangTags>) |
101 | and returns the best attempt at an English name for it, or |
102 | undef if it can't make sense of the tag. |
103 | |
104 | The function I18N::LangTags::List::name(...) is not exported. |
105 | |
106 | The map of tags-to-names that it uses is accessable as |
107 | %I18N::LangTags::List::Name, and it's the same as the list |
108 | that follows in this documentation, which should be useful |
109 | to you even if you don't use this module. |
110 | |
111 | =head1 ABOUT LANGUAGE TAGS |
112 | |
113 | Internet language tags, as defined in RFC 3066, are a formalism |
114 | for denoting human languages. The two-letter ISO 639-1 language |
115 | codes are well known (as "en" for English), as are their forms |
116 | when qualified by a country code ("en-US"). Less well-known are the |
117 | arbitrary-length non-ISO codes (like "i-mingo"), and the |
118 | recently (in 2001) introduced three-letter ISO-639-2 codes. |
119 | |
483dd220 |
120 | Remember these important facts: |
21aeefd5 |
121 | |
122 | =over |
123 | |
124 | =item * |
125 | |
126 | Language tags are not locale IDs. A locale ID is written with a "_" |
127 | instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and |
128 | I<means> something different than a language tag. A language tag |
129 | denotes a language. A locale ID denotes a language I<as used in> |
130 | a particular place, in combination with non-linguistic |
483dd220 |
131 | location-specific information such as what currency is used |
21aeefd5 |
132 | there. Locales I<also> often denote character set information, |
133 | as in "en_US.ISO8859-1". |
134 | |
135 | =item * |
136 | |
137 | Language tags are not for computer languages. |
138 | |
139 | =item * |
140 | |
141 | "Dialect" is not a useful term, since there is no objective |
483dd220 |
142 | criterion for establishing when two language-forms are |
21aeefd5 |
143 | dialects of eachother, or are separate languages. |
144 | |
145 | =item * |
146 | |
147 | Language tags are not case-sensitive. en-US, en-us, En-Us, etc., |
148 | are all the same tag, and denote the same language. |
149 | |
150 | =item * |
151 | |
152 | Not every language tag really refers to a single language. Some |
153 | language tags refer to conditions: i-default (system-message text |
154 | in English plus maybe other languages), und (undetermined |
155 | language). Others (notably lots of the three-letter codes) are |
156 | bibliographic tags that classify whole groups of languages, as |
157 | with cus "Cushitic (Other)" (i.e., a |
158 | language that has been classed as Cushtic, but which has no more |
159 | specific code) or the even less linguistically coherent |
483dd220 |
160 | sai for "South American Indian (Other)". Though useful in |
21aeefd5 |
161 | bibliography, B<SUCH TAGS ARE NOT |
162 | FOR GENERAL USE>. For further guidance, email me. |
163 | |
164 | =item * |
165 | |
166 | Language tags are not country codes. In fact, they are often |
167 | distinct codes, as with language tag ja for Japanese, and |
168 | ISO 3166 country code C<.jp> for Japan. |
169 | |
170 | =back |
171 | |
172 | =head1 LIST OF LANGUAGES |
173 | |
174 | The first part of each item is the language tag, between |
175 | {...}. It |
176 | is followed by an English name for the language or language-group. |
177 | Language tags that I judge to be not for general use, are bracketed. |
178 | |
179 | This list is in alphabetical order by English name of the language. |
180 | |
181 | =for reminder |
182 | The name in the =item line MUST NOT have E<...>'s in it!! |
183 | |
184 | =for woohah START |
185 | |
186 | =over |
187 | |
188 | =item {ab} : Abkhazian |
189 | |
190 | eq Abkhaz |
191 | |
192 | =item {ace} : Achinese |
193 | |
194 | =item {ach} : Acoli |
195 | |
196 | =item {ada} : Adangme |
197 | |
198 | =item {aa} : Afar |
199 | |
200 | =item {afh} : Afrihili |
201 | |
202 | (Artificial) |
203 | |
204 | =item {af} : Afrikaans |
205 | |
206 | =item [{afa} : Afro-Asiatic (Other)] |
207 | |
208 | =item {aka} : Akan |
209 | |
210 | =item {akk} : Akkadian |
211 | |
212 | (Historical) |
213 | |
214 | =item {sq} : Albanian |
215 | |
216 | =item {ale} : Aleut |
217 | |
218 | =item [{alg} : Algonquian languages] |
219 | |
220 | NOT Algonquin! |
221 | |
222 | =item [{tut} : Altaic (Other)] |
223 | |
224 | =item {am} : Amharic |
225 | |
226 | NOT Aramaic! |
227 | |
228 | =item {i-ami} : Ami |
229 | |
230 | eq Amis. eq 'Amis. eq Pangca. |
231 | |
232 | =item [{apa} : Apache languages] |
233 | |
234 | =item {ar} : Arabic |
235 | |
236 | Many forms are mutually un-intelligible in spoken media. |
237 | Notable forms: |
238 | {ar-ae} UAE Arabic; |
239 | {ar-bh} Bahrain Arabic; |
240 | {ar-dz} Algerian Arabic; |
241 | {ar-eg} Egyptian Arabic; |
242 | {ar-iq} Iraqi Arabic; |
243 | {ar-jo} Jordanian Arabic; |
244 | {ar-kw} Kuwait Arabic; |
245 | {ar-lb} Lebanese Arabic; |
246 | {ar-ly} Libyan Arabic; |
247 | {ar-ma} Moroccan Arabic; |
248 | {ar-om} Omani Arabic; |
249 | {ar-qa} Qatari Arabic; |
250 | {ar-sa} Sauda Arabic; |
251 | {ar-sy} Syrian Arabic; |
252 | {ar-tn} Tunisian Arabic; |
253 | {ar-ye} Yemen Arabic. |
254 | |
255 | =item {arc} : Aramaic |
256 | |
257 | NOT Amharic! NOT Samaritan Aramaic! |
258 | |
259 | =item {arp} : Arapaho |
260 | |
261 | =item {arn} : Araucanian |
262 | |
263 | =item {arw} : Arawak |
264 | |
265 | =item {hy} : Armenian |
266 | |
267 | =item [{art} : Artificial (Other)] |
268 | |
269 | =item {as} : Assamese |
270 | |
271 | =item [{ath} : Athapascan languages] |
272 | |
273 | eq Athabaskan. eq Athapaskan. eq Athabascan. |
274 | |
275 | =item [{aus} : Australian languages] |
276 | |
277 | =item [{map} : Austronesian (Other)] |
278 | |
279 | =item {ava} : Avaric |
280 | |
281 | =item {ae} : Avestan |
282 | |
283 | eq Zend |
284 | |
285 | =item {awa} : Awadhi |
286 | |
287 | =item {ay} : Aymara |
288 | |
289 | =item {az} : Azerbaijani |
290 | |
291 | eq Azeri |
292 | |
293 | =item {ban} : Balinese |
294 | |
295 | =item [{bat} : Baltic (Other)] |
296 | |
297 | =item {bal} : Baluchi |
298 | |
299 | =item {bam} : Bambara |
300 | |
301 | =item [{bai} : Bamileke languages] |
302 | |
303 | =item {bad} : Banda |
304 | |
305 | =item [{bnt} : Bantu (Other)] |
306 | |
307 | =item {bas} : Basa |
308 | |
309 | =item {ba} : Bashkir |
310 | |
311 | =item {eu} : Basque |
312 | |
313 | =item {btk} : Batak (Indonesia) |
314 | |
315 | =item {bej} : Beja |
316 | |
317 | =item {be} : Belarusian |
318 | |
319 | eq Belarussian. eq Byelarussian. |
320 | eq Belorussian. eq Byelorussian. |
321 | eq White Russian. eq White Ruthenian. |
322 | NOT Ruthenian! |
323 | |
324 | =item {bem} : Bemba |
325 | |
326 | =item {bn} : Bengali |
327 | |
328 | eq Bangla. |
329 | |
330 | =item [{ber} : Berber (Other)] |
331 | |
332 | =item {bho} : Bhojpuri |
333 | |
334 | =item {bh} : Bihari |
335 | |
336 | =item {bik} : Bikol |
337 | |
338 | =item {bin} : Bini |
339 | |
340 | =item {bi} : Bislama |
341 | |
342 | eq Bichelamar. |
343 | |
344 | =item {bs} : Bosnian |
345 | |
346 | =item {bra} : Braj |
347 | |
348 | =item {br} : Breton |
349 | |
350 | =item {bug} : Buginese |
351 | |
352 | =item {bg} : Bulgarian |
353 | |
354 | =item {i-bnn} : Bunun |
355 | |
356 | =item {bua} : Buriat |
357 | |
358 | =item {my} : Burmese |
359 | |
360 | =item {cad} : Caddo |
361 | |
362 | =item {car} : Carib |
363 | |
364 | =item {ca} : Catalan |
365 | |
366 | eq CatalE<aacute>n. eq Catalonian. |
367 | |
368 | =item [{cau} : Caucasian (Other)] |
369 | |
370 | =item {ceb} : Cebuano |
371 | |
372 | =item [{cel} : Celtic (Other)] |
373 | |
374 | Notable forms: |
375 | {cel-gaulish} Gaulish (Historical) |
376 | |
377 | =item [{cai} : Central American Indian (Other)] |
378 | |
379 | =item {chg} : Chagatai |
380 | |
381 | (Historical?) |
382 | |
383 | =item [{cmc} : Chamic languages] |
384 | |
385 | =item {ch} : Chamorro |
386 | |
387 | =item {ce} : Chechen |
388 | |
389 | =item {chr} : Cherokee |
390 | |
391 | eq Tsalagi |
392 | |
393 | =item {chy} : Cheyenne |
394 | |
395 | =item {chb} : Chibcha |
396 | |
397 | (Historical) NOT Chibchan (which is a language family). |
398 | |
399 | =item {ny} : Chichewa |
400 | |
401 | eq Nyanja. eq Chinyanja. |
402 | |
403 | =item {zh} : Chinese |
404 | |
405 | Many forms are mutually un-intelligible in spoken media. |
406 | Notable subforms: |
407 | {zh-cn} PRC Chinese; |
408 | {zh-hk} Hong Kong Chinese; |
409 | {zh-mo} Macau Chinese; |
410 | {zh-sg} Singapore Chinese; |
411 | {zh-tw} Taiwan Chinese; |
412 | {zh-guoyu} Mandarin [Putonghua/Guoyu]; |
413 | {zh-hakka} Hakka [formerly i-hakka]; |
414 | {zh-min} Hokkien; |
415 | {zh-min-nan} Southern Hokkien; |
416 | {zh-wuu} Shanghaiese; |
417 | {zh-xiang} Hunanese; |
418 | {zh-gan} Gan; |
419 | {zh-yue} Cantonese. |
420 | |
421 | =for etc |
422 | {i-hakka} Hakka (old tag) |
423 | |
424 | =item {chn} : Chinook Jargon |
425 | |
426 | eq Chinook Wawa. |
427 | |
428 | =item {chp} : Chipewyan |
429 | |
430 | =item {cho} : Choctaw |
431 | |
432 | =item {cu} : Church Slavic |
433 | |
434 | eq Old Church Slavonic. |
435 | |
436 | =item {chk} : Chuukese |
437 | |
438 | eq Trukese. eq Chuuk. eq Truk. eq Ruk. |
439 | |
440 | =item {cv} : Chuvash |
441 | |
442 | =item {cop} : Coptic |
443 | |
444 | =item {kw} : Cornish |
445 | |
446 | =item {co} : Corsican |
447 | |
448 | eq Corse. |
449 | |
450 | =item {cre} : Cree |
451 | |
452 | NOT Creek! |
453 | |
454 | =item {mus} : Creek |
455 | |
456 | NOT Cree! |
457 | |
458 | =item [{cpe} : English-based Creoles and pidgins (Other)] |
459 | |
460 | =item [{cpf} : French-based Creoles and pidgins (Other)] |
461 | |
462 | =item [{cpp} : Portuguese-based Creoles and pidgins (Other)] |
463 | |
464 | =item [{crp} : Creoles and pidgins (Other)] |
465 | |
466 | =item {hr} : Croatian |
467 | |
468 | eq Croat. |
469 | |
470 | =item [{cus} : Cushitic (Other)] |
471 | |
472 | =item {cs} : Czech |
473 | |
474 | =item {dak} : Dakota |
475 | |
476 | eq Nakota. eq Latoka. |
477 | |
478 | =item {da} : Danish |
479 | |
480 | =item {day} : Dayak |
481 | |
482 | =item {i-default} : Default (Fallthru) Language |
483 | |
484 | Defined in RFC 2277, this is for tagging text |
485 | (which must include English text, and might/should include text |
486 | in other appropriate languages) that is emitted in a context |
487 | where language-negotiation wasn't possible -- in SMTP mail failure |
488 | messages, for example. |
489 | |
490 | =item {del} : Delaware |
491 | |
492 | =item {din} : Dinka |
493 | |
494 | =item {div} : Divehi |
495 | |
496 | =item {doi} : Dogri |
497 | |
498 | NOT Dogrib! |
499 | |
500 | =item {dgr} : Dogrib |
501 | |
502 | NOT Dogri! |
503 | |
504 | =item [{dra} : Dravidian (Other)] |
505 | |
506 | =item {dua} : Duala |
507 | |
508 | =item {nl} : Dutch |
509 | |
510 | eq Netherlander. Notable forms: |
511 | {nl-nl} Netherlands Dutch; |
512 | {nl-be} Belgian Dutch. |
513 | |
514 | =item {dum} : Middle Dutch (ca.1050-1350) |
515 | |
516 | (Historical) |
517 | |
518 | =item {dyu} : Dyula |
519 | |
520 | =item {dz} : Dzongkha |
521 | |
522 | =item {efi} : Efik |
523 | |
524 | =item {egy} : Ancient Egyptian |
525 | |
526 | (Historical) |
527 | |
528 | =item {eka} : Ekajuk |
529 | |
530 | =item {elx} : Elamite |
531 | |
532 | (Historical) |
533 | |
534 | =item {en} : English |
535 | |
536 | Notable forms: |
537 | {en-au} Australian English; |
538 | {en-bz} Belize English; |
539 | {en-ca} Canadian English; |
540 | {en-gb} UK English; |
541 | {en-ie} Irish English; |
542 | {en-jm} Jamaican English; |
543 | {en-nz} New Zealand English; |
544 | {en-ph} Philippine English; |
545 | {en-tt} Trinidad English; |
546 | {en-us} US English; |
547 | {en-za} South African English; |
548 | {en-zw} Zimbabwe English. |
549 | |
550 | =item {enm} : Old English (1100-1500) |
551 | |
552 | (Historical) |
553 | |
554 | =item {ang} : Old English (ca.450-1100) |
555 | |
556 | eq Anglo-Saxon. (Historical) |
557 | |
558 | =item {eo} : Esperanto |
559 | |
560 | (Artificial) |
561 | |
562 | =item {et} : Estonian |
563 | |
564 | =item {ewe} : Ewe |
565 | |
566 | =item {ewo} : Ewondo |
567 | |
568 | =item {fan} : Fang |
569 | |
570 | =item {fat} : Fanti |
571 | |
572 | =item {fo} : Faroese |
573 | |
574 | =item {fj} : Fijian |
575 | |
576 | =item {fi} : Finnish |
577 | |
578 | =item [{fiu} : Finno-Ugrian (Other)] |
579 | |
580 | eq Finno-Ugric. NOT Ugaritic! |
581 | |
582 | =item {fon} : Fon |
583 | |
584 | =item {fr} : French |
585 | |
586 | Notable forms: |
587 | {fr-fr} France French; |
588 | {fr-be} Belgian French; |
589 | {fr-ca} Canadian French; |
590 | {fr-ch} Swiss French; |
591 | {fr-lu} Luxembourg French; |
592 | {fr-mc} Monaco French. |
593 | |
594 | =item {frm} : Middle French (ca.1400-1600) |
595 | |
596 | (Historical) |
597 | |
598 | =item {fro} : Old French (842-ca.1400) |
599 | |
600 | (Historical) |
601 | |
602 | =item {fy} : Frisian |
603 | |
604 | =item {fur} : Friulian |
605 | |
606 | =item {ful} : Fulah |
607 | |
608 | =item {gaa} : Ga |
609 | |
610 | =item {gd} : Scots Gaelic |
611 | |
612 | NOT Scots! |
613 | |
614 | =item {gl} : Gallegan |
615 | |
616 | eq Galician |
617 | |
618 | =item {lug} : Ganda |
619 | |
620 | =item {gay} : Gayo |
621 | |
622 | =item {gba} : Gbaya |
623 | |
624 | =item {gez} : Geez |
625 | |
626 | eq Ge'ez |
627 | |
628 | =item {ka} : Georgian |
629 | |
630 | =item {de} : German |
631 | |
632 | Notable forms: |
633 | {de-at} Austrian German; |
634 | {de-be} Belgian German; |
635 | {de-ch} Swiss German; |
636 | {de-de} Germany German; |
637 | {de-li} Liechtenstein German; |
638 | {de-lu} Luxembourg German. |
639 | |
640 | =item {gmh} : Middle High German (ca.1050-1500) |
641 | |
642 | (Historical) |
643 | |
644 | =item {goh} : Old High German (ca.750-1050) |
645 | |
646 | (Historical) |
647 | |
648 | =item [{gem} : Germanic (Other)] |
649 | |
650 | =item {gil} : Gilbertese |
651 | |
652 | =item {gon} : Gondi |
653 | |
654 | =item {gor} : Gorontalo |
655 | |
656 | =item {got} : Gothic |
657 | |
658 | (Historical) |
659 | |
660 | =item {grb} : Grebo |
661 | |
4cf5bee0 |
662 | =item {grc} : Ancient Greek |
21aeefd5 |
663 | |
4cf5bee0 |
664 | (Historical) (Until 15th century or so.) |
665 | |
666 | =item {el} : Modern Greek |
21aeefd5 |
667 | |
4cf5bee0 |
668 | (Since 15th century or so.) |
21aeefd5 |
669 | |
670 | =item {gn} : Guarani |
671 | |
672 | GuaranE<iacute> |
673 | |
674 | =item {gu} : Gujarati |
675 | |
676 | =item {gwi} : Gwich'in |
677 | |
678 | eq Gwichin |
679 | |
680 | =item {hai} : Haida |
681 | |
682 | =item {ha} : Hausa |
683 | |
684 | =item {haw} : Hawaiian |
685 | |
686 | Hawai'ian |
687 | |
688 | =item {he} : Hebrew |
689 | |
690 | (Formerly "iw".) |
691 | |
692 | =for etc |
693 | {iw} Hebrew (old tag) |
694 | |
695 | =item {hz} : Herero |
696 | |
697 | =item {hil} : Hiligaynon |
698 | |
699 | =item {him} : Himachali |
700 | |
701 | =item {hi} : Hindi |
702 | |
703 | =item {ho} : Hiri Motu |
704 | |
705 | =item {hit} : Hittite |
706 | |
707 | (Historical) |
708 | |
709 | =item {hmn} : Hmong |
710 | |
711 | =item {hu} : Hungarian |
712 | |
713 | =item {hup} : Hupa |
714 | |
715 | =item {iba} : Iban |
716 | |
717 | =item {is} : Icelandic |
718 | |
719 | =item {ibo} : Igbo |
720 | |
721 | =item {ijo} : Ijo |
722 | |
723 | =item {ilo} : Iloko |
724 | |
725 | =item [{inc} : Indic (Other)] |
726 | |
727 | =item [{ine} : Indo-European (Other)] |
728 | |
729 | =item {id} : Indonesian |
730 | |
731 | (Formerly "in".) |
732 | |
733 | =for etc |
734 | {in} Indonesian (old tag) |
735 | |
736 | =item {ia} : Interlingua (International Auxiliary Language Association) |
737 | |
738 | (Artificial) NOT Interlingue! |
739 | |
740 | =item {ie} : Interlingue |
741 | |
742 | (Artificial) NOT Interlingua! |
743 | |
744 | =item {iu} : Inuktitut |
745 | |
746 | A subform of "Eskimo". |
747 | |
748 | =item {ik} : Inupiaq |
749 | |
750 | A subform of "Eskimo". |
751 | |
752 | =item [{ira} : Iranian (Other)] |
753 | |
754 | =item {ga} : Irish |
755 | |
756 | =item {mga} : Middle Irish (900-1200) |
757 | |
758 | (Historical) |
759 | |
760 | =item {sga} : Old Irish (to 900) |
761 | |
762 | (Historical) |
763 | |
764 | =item [{iro} : Iroquoian languages] |
765 | |
766 | =item {it} : Italian |
767 | |
768 | Notable forms: |
769 | {it-it} Italy Italian; |
770 | {it-ch} Swiss Italian. |
771 | |
772 | =item {ja} : Japanese |
773 | |
774 | (NOT "jp"!) |
775 | |
776 | =item {jw} : Javanese |
777 | |
778 | =item {jrb} : Judeo-Arabic |
779 | |
780 | =item {jpr} : Judeo-Persian |
781 | |
782 | =item {kab} : Kabyle |
783 | |
784 | =item {kac} : Kachin |
785 | |
786 | =item {kl} : Kalaallisut |
787 | |
788 | eq Greenlandic "Eskimo" |
789 | |
790 | =item {kam} : Kamba |
791 | |
792 | =item {kn} : Kannada |
793 | |
794 | eq Kanarese. NOT Canadian! |
795 | |
796 | =item {kau} : Kanuri |
797 | |
798 | =item {kaa} : Kara-Kalpak |
799 | |
800 | =item {kar} : Karen |
801 | |
802 | =item {ks} : Kashmiri |
803 | |
804 | =item {kaw} : Kawi |
805 | |
806 | =item {kk} : Kazakh |
807 | |
808 | =item {kha} : Khasi |
809 | |
810 | =item {km} : Khmer |
811 | |
812 | eq Cambodian. eq Kampuchean. |
813 | |
814 | =item [{khi} : Khoisan (Other)] |
815 | |
816 | =item {kho} : Khotanese |
817 | |
818 | =item {ki} : Kikuyu |
819 | |
820 | eq Gikuyu. |
821 | |
822 | =item {kmb} : Kimbundu |
823 | |
824 | =item {rw} : Kinyarwanda |
825 | |
826 | =item {ky} : Kirghiz |
827 | |
828 | =item {i-klingon} : Klingon |
829 | |
830 | =item {kv} : Komi |
831 | |
832 | =item {kon} : Kongo |
833 | |
834 | =item {kok} : Konkani |
835 | |
836 | =item {ko} : Korean |
837 | |
838 | =item {kos} : Kosraean |
839 | |
840 | =item {kpe} : Kpelle |
841 | |
842 | =item {kro} : Kru |
843 | |
844 | =item {kj} : Kuanyama |
845 | |
846 | =item {kum} : Kumyk |
847 | |
848 | =item {ku} : Kurdish |
849 | |
850 | =item {kru} : Kurukh |
851 | |
852 | =item {kut} : Kutenai |
853 | |
854 | =item {lad} : Ladino |
855 | |
856 | eq Judeo-Spanish. NOT Ladin (a minority language in Italy). |
857 | |
858 | =item {lah} : Lahnda |
859 | |
860 | NOT Lamba! |
861 | |
862 | =item {lam} : Lamba |
863 | |
864 | NOT Lahnda! |
865 | |
866 | =item {lo} : Lao |
867 | |
868 | eq Laotian. |
869 | |
870 | =item {la} : Latin |
871 | |
872 | (Historical) NOT Ladin! NOT Ladino! |
873 | |
874 | =item {lv} : Latvian |
875 | |
876 | eq Lettish. |
877 | |
878 | =item {lb} : Letzeburgesch |
879 | |
880 | eq Luxemburgian, eq Luxemburger. (Formerly i-lux.) |
881 | |
882 | =for etc |
883 | {i-lux} Letzeburgesch (old tag) |
884 | |
885 | =item {lez} : Lezghian |
886 | |
887 | =item {ln} : Lingala |
888 | |
889 | =item {lt} : Lithuanian |
890 | |
891 | =item {nds} : Low German |
892 | |
893 | eq Low Saxon. eq Low German. eq Low Saxon. |
894 | |
895 | =item {loz} : Lozi |
896 | |
897 | =item {lub} : Luba-Katanga |
898 | |
899 | =item {lua} : Luba-Lulua |
900 | |
901 | =item {lui} : Luiseno |
902 | |
903 | eq LuiseE<ntilde>o. |
904 | |
905 | =item {lun} : Lunda |
906 | |
907 | =item {luo} : Luo (Kenya and Tanzania) |
908 | |
909 | =item {lus} : Lushai |
910 | |
911 | =item {mk} : Macedonian |
912 | |
913 | eq the modern Slavic language spoken in what was Yugoslavia. |
914 | NOT the form of Greek spoken in Greek Macedonia! |
915 | |
916 | =item {mad} : Madurese |
917 | |
918 | =item {mag} : Magahi |
919 | |
920 | =item {mai} : Maithili |
921 | |
922 | =item {mak} : Makasar |
923 | |
924 | =item {mg} : Malagasy |
925 | |
926 | =item {ms} : Malay |
927 | |
928 | NOT Malayalam! |
929 | |
930 | =item {ml} : Malayalam |
931 | |
932 | NOT Malay! |
933 | |
934 | =item {mt} : Maltese |
935 | |
936 | =item {mnc} : Manchu |
937 | |
938 | =item {mdr} : Mandar |
939 | |
940 | NOT Mandarin! |
941 | |
942 | =item {man} : Mandingo |
943 | |
944 | =item {mni} : Manipuri |
945 | |
946 | eq Meithei. |
947 | |
948 | =item [{mno} : Manobo languages] |
949 | |
950 | =item {gv} : Manx |
951 | |
952 | =item {mi} : Maori |
953 | |
954 | NOT Mari! |
955 | |
956 | =item {mr} : Marathi |
957 | |
958 | =item {chm} : Mari |
959 | |
960 | NOT Maori! |
961 | |
962 | =item {mh} : Marshall |
963 | |
964 | eq Marshallese. |
965 | |
966 | =item {mwr} : Marwari |
967 | |
968 | =item {mas} : Masai |
969 | |
970 | =item [{myn} : Mayan languages] |
971 | |
972 | =item {men} : Mende |
973 | |
974 | =item {mic} : Micmac |
975 | |
976 | =item {min} : Minangkabau |
977 | |
978 | =item {i-mingo} : Mingo |
979 | |
980 | eq the Irquoian language West Virginia Seneca. NOT New York Seneca! |
981 | |
982 | =item [{mis} : Miscellaneous languages] |
983 | |
984 | Don't use this. |
985 | |
986 | =item {moh} : Mohawk |
987 | |
988 | =item {mo} : Moldavian |
989 | |
990 | eq Moldovan. |
991 | |
992 | =item [{mkh} : Mon-Khmer (Other)] |
993 | |
994 | =item {lol} : Mongo |
995 | |
996 | =item {mn} : Mongolian |
997 | |
998 | eq Mongol. |
999 | |
1000 | =item {mos} : Mossi |
1001 | |
1002 | =item [{mul} : Multiple languages] |
1003 | |
1004 | Not for normal use. |
1005 | |
1006 | =item [{mun} : Munda languages] |
1007 | |
1008 | =item {nah} : Nahuatl |
1009 | |
1010 | =item {na} : Nauru |
1011 | |
1012 | =item {nv} : Navajo |
1013 | |
1014 | eq Navaho. (Formerly i-navajo.) |
1015 | |
1016 | =for etc |
1017 | {i-navajo} Navajo (old tag) |
1018 | |
1019 | =item {nd} : North Ndebele |
1020 | |
1021 | =item {nr} : South Ndebele |
1022 | |
1023 | =item {ng} : Ndonga |
1024 | |
1025 | =item {ne} : Nepali |
1026 | |
1027 | eq Nepalese. Notable forms: |
1028 | {ne-np} Nepal Nepali; |
1029 | {ne-in} India Nepali. |
1030 | |
1031 | =item {new} : Newari |
1032 | |
1033 | =item {nia} : Nias |
1034 | |
1035 | =item [{nic} : Niger-Kordofanian (Other)] |
1036 | |
1037 | =item [{ssa} : Nilo-Saharan (Other)] |
1038 | |
1039 | =item {niu} : Niuean |
1040 | |
1041 | =item {non} : Old Norse |
1042 | |
1043 | (Historical) |
1044 | |
1045 | =item [{nai} : North American Indian] |
1046 | |
1047 | Do not use this. |
1048 | |
1049 | =item {se} : Northern Sami |
1050 | |
1051 | eq Lappish. eq Lapp. eq (Northern) Saami. |
1052 | |
1053 | =item {no} : Norwegian |
1054 | |
1055 | Note the two following forms: |
1056 | |
1057 | =item {nb} : Norwegian Bokmal |
1058 | |
1059 | eq BokmE<aring>l, (A form of Norwegian.) (Formerly no-bok.) |
1060 | |
1061 | =for etc |
1062 | {no-bok} Norwegian Bokmal (old tag) |
1063 | |
1064 | =item {nn} : Norwegian Nynorsk |
1065 | |
1066 | (A form of Norwegian.) (Formerly no-nyn.) |
1067 | |
1068 | =for etc |
1069 | {no-nyn} Norwegian Nynorsk (old tag) |
1070 | |
1071 | =item [{nub} : Nubian languages] |
1072 | |
1073 | =item {nym} : Nyamwezi |
1074 | |
1075 | =item {nyn} : Nyankole |
1076 | |
1077 | =item {nyo} : Nyoro |
1078 | |
1079 | =item {nzi} : Nzima |
1080 | |
1081 | =item {oc} : Occitan (post 1500) |
1082 | |
1083 | eq ProvenE<ccedil>al, eq Provencal |
1084 | |
1085 | =item {oji} : Ojibwa |
1086 | |
1087 | eq Ojibwe. |
1088 | |
1089 | =item {or} : Oriya |
1090 | |
1091 | =item {om} : Oromo |
1092 | |
1093 | =item {osa} : Osage |
1094 | |
1095 | =item {os} : Ossetian; Ossetic |
1096 | |
1097 | =item [{oto} : Otomian languages] |
1098 | |
1099 | Group of languages collectively called "OtomE<iacute>". |
1100 | |
1101 | =item {pal} : Pahlavi |
1102 | |
1103 | eq Pahlevi |
1104 | |
1105 | =item {i-pwn} : Paiwan |
1106 | |
1107 | eq Pariwan |
1108 | |
1109 | =item {pau} : Palauan |
1110 | |
1111 | =item {pi} : Pali |
1112 | |
1113 | (Historical?) |
1114 | |
1115 | =item {pam} : Pampanga |
1116 | |
1117 | =item {pag} : Pangasinan |
1118 | |
1119 | =item {pa} : Panjabi |
1120 | |
1121 | eq Punjabi |
1122 | |
1123 | =item {pap} : Papiamento |
1124 | |
1125 | eq Papiamentu. |
1126 | |
1127 | =item [{paa} : Papuan (Other)] |
1128 | |
1129 | =item {fa} : Persian |
1130 | |
1131 | eq Farsi. eq Iranian. |
1132 | |
1133 | =item {peo} : Old Persian (ca.600-400 B.C.) |
1134 | |
1135 | =item [{phi} : Philippine (Other)] |
1136 | |
1137 | =item {phn} : Phoenician |
1138 | |
1139 | (Historical) |
1140 | |
1141 | =item {pon} : Pohnpeian |
1142 | |
1143 | NOT Pompeiian! |
1144 | |
1145 | =item {pl} : Polish |
1146 | |
1147 | =item {pt} : Portuguese |
1148 | |
1149 | eq Portugese. Notable forms: |
1150 | {pt-pt} Portugal Portuguese; |
1151 | {pt-br} Brazilian Portuguese. |
1152 | |
1153 | =item [{pra} : Prakrit languages] |
1154 | |
1155 | =item {pro} : Old Provencal (to 1500) |
1156 | |
1157 | eq Old ProvenE<ccedil>al. (Historical.) |
1158 | |
1159 | =item {ps} : Pushto |
1160 | |
1161 | eq Pashto. eq Pushtu. |
1162 | |
1163 | =item {qu} : Quechua |
1164 | |
1165 | eq Quecha. |
1166 | |
1167 | =item {rm} : Raeto-Romance |
1168 | |
1169 | eq Romansh. |
1170 | |
1171 | =item {raj} : Rajasthani |
1172 | |
1173 | =item {rap} : Rapanui |
1174 | |
1175 | =item {rar} : Rarotongan |
1176 | |
1177 | =item [{qaa - qtz} : Reserved for local use.] |
1178 | |
1179 | =item [{roa} : Romance (Other)] |
1180 | |
1181 | NOT Romanian! NOT Romany! NOT Romansh! |
1182 | |
1183 | =item {ro} : Romanian |
1184 | |
1185 | eq Rumanian. NOT Romany! |
1186 | |
1187 | =item {rom} : Romany |
1188 | |
1189 | eq Rom. NOT Romanian! |
1190 | |
1191 | =item {rn} : Rundi |
1192 | |
1193 | =item {ru} : Russian |
1194 | |
1195 | NOT White Russian! NOT Rusyn! |
1196 | |
1197 | =item [{sal} : Salishan languages] |
1198 | |
1199 | Large language group. |
1200 | |
1201 | =item {sam} : Samaritan Aramaic |
1202 | |
1203 | NOT Aramaic! |
1204 | |
1205 | =item [{smi} : Sami languages (Other)] |
1206 | |
1207 | =item {sm} : Samoan |
1208 | |
1209 | =item {sad} : Sandawe |
1210 | |
1211 | =item {sg} : Sango |
1212 | |
1213 | =item {sa} : Sanskrit |
1214 | |
1215 | (Historical) |
1216 | |
1217 | =item {sat} : Santali |
1218 | |
1219 | =item {sc} : Sardinian |
1220 | |
1221 | eq Sard. |
1222 | |
1223 | =item {sas} : Sasak |
1224 | |
1225 | =item {sco} : Scots |
1226 | |
1227 | NOT Scots Gaelic! |
1228 | |
1229 | =item {sel} : Selkup |
1230 | |
1231 | =item [{sem} : Semitic (Other)] |
1232 | |
1233 | =item {sr} : Serbian |
1234 | |
1235 | eq Serb. NOT Sorbian. |
1236 | |
1237 | =item {srr} : Serer |
1238 | |
1239 | =item {shn} : Shan |
1240 | |
1241 | =item {sn} : Shona |
1242 | |
1243 | =item {sid} : Sidamo |
1244 | |
1245 | =item {sgn-...} : Sign Languages |
1246 | |
1247 | Always use with a subtag. Notable forms: |
1248 | {sgn-gb} British Sign Language (BSL); |
1249 | {sgn-ie} Irish Sign Language (ESL); |
1250 | {sgn-ni} Nicaraguan Sign Language (ISN); |
1251 | {sgn-us} American Sign Language (ASL). |
1252 | |
1253 | =item {bla} : Siksika |
1254 | |
1255 | eq Blackfoot. eq Pikanii. |
1256 | |
1257 | =item {sd} : Sindhi |
1258 | |
1259 | =item {si} : Sinhalese |
1260 | |
1261 | eq Sinhala. |
1262 | |
1263 | =item [{sit} : Sino-Tibetan (Other)] |
1264 | |
1265 | =item [{sio} : Siouan languages] |
1266 | |
1267 | =item {den} : Slave (Athapascan) |
1268 | |
1269 | ("Slavey" is a subform.) |
1270 | |
1271 | =item [{sla} : Slavic (Other)] |
1272 | |
1273 | =item {sk} : Slovak |
1274 | |
1275 | eq Slovakian. |
1276 | |
1277 | =item {sl} : Slovenian |
1278 | |
1279 | eq Slovene. |
1280 | |
1281 | =item {sog} : Sogdian |
1282 | |
1283 | =item {so} : Somali |
1284 | |
1285 | =item {son} : Songhai |
1286 | |
1287 | =item {snk} : Soninke |
1288 | |
1289 | =item {wen} : Sorbian languages |
1290 | |
1291 | eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian! |
1292 | |
1293 | =item {nso} : Northern Sotho |
1294 | |
1295 | =item {st} : Southern Sotho |
1296 | |
1297 | eq Sutu. eq Sesotho. |
1298 | |
1299 | =item [{sai} : South American Indian (Other)] |
1300 | |
1301 | =item {es} : Spanish |
1302 | |
1303 | Notable forms: |
1304 | {es-ar} Argentine Spanish; |
1305 | {es-bo} Bolivian Spanish; |
1306 | {es-cl} Chilean Spanish; |
1307 | {es-co} Colombian Spanish; |
1308 | {es-do} Dominican Spanish; |
1309 | {es-ec} Ecuadorian Spanish; |
1310 | {es-es} Spain Spanish; |
1311 | {es-gt} Guatemalan Spanish; |
1312 | {es-hn} Honduran Spanish; |
1313 | {es-mx} Mexican Spanish; |
1314 | {es-pa} Panamanian Spanish; |
1315 | {es-pe} Peruvian Spanish; |
1316 | {es-pr} Puerto Rican Spanish; |
1317 | {es-py} Paraguay Spanish; |
1318 | {es-sv} Salvadoran Spanish; |
1319 | {es-us} US Spanish; |
1320 | {es-uy} Uruguayan Spanish; |
1321 | {es-ve} Venezuelan Spanish. |
1322 | |
1323 | =item {suk} : Sukuma |
1324 | |
1325 | =item {sux} : Sumerian |
1326 | |
1327 | (Historical) |
1328 | |
1329 | =item {su} : Sundanese |
1330 | |
1331 | =item {sus} : Susu |
1332 | |
1333 | =item {sw} : Swahili |
1334 | |
1335 | eq Kiswahili |
1336 | |
1337 | =item {ss} : Swati |
1338 | |
1339 | =item {sv} : Swedish |
1340 | |
1341 | Notable forms: |
483dd220 |
1342 | {sv-se} Sweden Swedish; |
1343 | {sv-fi} Finland Swedish. |
21aeefd5 |
1344 | |
1345 | =item {syr} : Syriac |
1346 | |
1347 | =item {tl} : Tagalog |
1348 | |
1349 | =item {ty} : Tahitian |
1350 | |
1351 | =item [{tai} : Tai (Other)] |
1352 | |
1353 | NOT Thai! |
1354 | |
1355 | =item {tg} : Tajik |
1356 | |
1357 | =item {tmh} : Tamashek |
1358 | |
1359 | =item {ta} : Tamil |
1360 | |
1361 | =item {i-tao} : Tao |
1362 | |
1363 | eq Yami. |
1364 | |
1365 | =item {tt} : Tatar |
1366 | |
1367 | =item {i-tay} : Tayal |
1368 | |
1369 | eq Atayal. eq Atayan. |
1370 | |
1371 | =item {te} : Telugu |
1372 | |
1373 | =item {ter} : Tereno |
1374 | |
1375 | =item {tet} : Tetum |
1376 | |
1377 | =item {th} : Thai |
1378 | |
1379 | NOT Tai! |
1380 | |
1381 | =item {bo} : Tibetan |
1382 | |
1383 | =item {tig} : Tigre |
1384 | |
1385 | =item {ti} : Tigrinya |
1386 | |
1387 | =item {tem} : Timne |
1388 | |
1389 | eq Themne. eq Timene. |
1390 | |
1391 | =item {tiv} : Tiv |
1392 | |
1393 | =item {tli} : Tlingit |
1394 | |
1395 | =item {tpi} : Tok Pisin |
1396 | |
1397 | =item {tkl} : Tokelau |
1398 | |
1399 | =item {tog} : Tonga (Nyasa) |
1400 | |
1401 | NOT Tsonga! |
1402 | |
1403 | =item {to} : Tonga (Tonga Islands) |
1404 | |
1405 | (Pronounced "Tong-a", not "Tong-ga") |
1406 | |
1407 | NOT Tsonga! |
1408 | |
1409 | =item {tsi} : Tsimshian |
1410 | |
1411 | eq Sm'algyax |
1412 | |
1413 | =item {ts} : Tsonga |
1414 | |
1415 | NOT Tonga! |
1416 | |
1417 | =item {i-tsu} : Tsou |
1418 | |
1419 | =item {tn} : Tswana |
1420 | |
1421 | Same as Setswana. |
1422 | |
1423 | =item {tum} : Tumbuka |
1424 | |
1425 | =item {tr} : Turkish |
1426 | |
1427 | (Typically in Roman script) |
1428 | |
1429 | =item {ota} : Ottoman Turkish (1500-1928) |
1430 | |
1431 | (Typically in Arabic script) (Historical) |
1432 | |
1433 | =item {tk} : Turkmen |
1434 | |
1435 | eq Turkmeni. |
1436 | |
1437 | =item {tvl} : Tuvalu |
1438 | |
1439 | =item {tyv} : Tuvinian |
1440 | |
1441 | eq Tuvan. eq Tuvin. |
1442 | |
1443 | =item {tw} : Twi |
1444 | |
1445 | =item {uga} : Ugaritic |
1446 | |
1447 | NOT Ugric! |
1448 | |
1449 | =item {ug} : Uighur |
1450 | |
1451 | =item {uk} : Ukrainian |
1452 | |
1453 | =item {umb} : Umbundu |
1454 | |
1455 | =item {und} : Undetermined |
1456 | |
1457 | Not a tag for normal use. |
1458 | |
1459 | =item {ur} : Urdu |
1460 | |
1461 | =item {uz} : Uzbek |
1462 | |
1463 | eq E<Ouml>zbek |
1464 | |
1465 | =item {vai} : Vai |
1466 | |
1467 | =item {ven} : Venda |
1468 | |
1469 | NOT Wendish! NOT Wend! NOT Avestan! |
1470 | |
1471 | =item {vi} : Vietnamese |
1472 | |
1473 | eq Viet. |
1474 | |
1475 | =item {vo} : Volapuk |
1476 | |
1477 | eq VolapE<uuml>k. (Artificial) |
1478 | |
1479 | =item {vot} : Votic |
1480 | |
1481 | eq Votian. eq Vod. |
1482 | |
1483 | =item [{wak} : Wakashan languages] |
1484 | |
1485 | =item {wal} : Walamo |
1486 | |
1487 | eq Wolaytta. |
1488 | |
1489 | =item {war} : Waray |
1490 | |
1491 | Presumably the Philippine language Waray-Waray (SamareE<ntilde>o), |
1492 | not the smaller Philippine language Waray Sorsogon, nor the extinct |
1493 | Australian language Waray. |
1494 | |
1495 | =item {was} : Washo |
1496 | |
1497 | eq Washoe |
1498 | |
1499 | =item {cy} : Welsh |
1500 | |
1501 | =item {wo} : Wolof |
1502 | |
1503 | =item {x-...} : Unregistered (Semi-Private Use) |
1504 | |
1505 | "x-" is a prefix for language tags that are not registered with ISO |
1506 | or IANA. Example, x-double-dutch |
1507 | |
1508 | =item {xh} : Xhosa |
1509 | |
1510 | =item {sah} : Yakut |
1511 | |
1512 | =item {yao} : Yao |
1513 | |
1514 | (The Yao in Malawi?) |
1515 | |
1516 | =item {yap} : Yapese |
1517 | |
1518 | eq Yap |
1519 | |
1520 | =item {yi} : Yiddish |
1521 | |
1522 | Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script. |
1523 | |
1524 | =for etc |
1525 | {ji} Yiddish (old tag) |
1526 | |
1527 | =item {yo} : Yoruba |
1528 | |
1529 | =item [{ypk} : Yupik languages] |
1530 | |
1531 | Several "Eskimo" languages. |
1532 | |
1533 | =item {znd} : Zande |
1534 | |
1535 | =item [{zap} : Zapotec] |
1536 | |
1537 | (A group of languages.) |
1538 | |
1539 | =item {zen} : Zenaga |
1540 | |
1541 | NOT Zend. |
1542 | |
1543 | =item {za} : Zhuang |
1544 | |
1545 | =item {zu} : Zulu |
1546 | |
1547 | =item {zun} : Zuni |
1548 | |
1549 | eq ZuE<ntilde>i |
1550 | |
1551 | =back |
1552 | |
1553 | =for woohah END |
1554 | |
1555 | =head1 SEE ALSO |
1556 | |
1557 | L<I18N::LangTags|I18N::LangTags> and its "See Also" section. |
1558 | |
1559 | =head1 COPYRIGHT AND DISCLAIMER |
1560 | |
483dd220 |
1561 | Copyright (c) 2001,2002 Sean M. Burke. All rights reserved. |
21aeefd5 |
1562 | |
1563 | You can redistribute and/or |
1564 | modify this document under the same terms as Perl itself. |
1565 | |
d1be9408 |
1566 | This document is provided in the hope that it will be |
21aeefd5 |
1567 | useful, but without any warranty; |
1568 | without even the implied warranty of accuracy, authoritativeness, |
1569 | completeness, merchantability, or fitness for a particular purpose. |
1570 | |
1571 | Email any corrections or questions to me. |
1572 | |
1573 | =head1 AUTHOR |
1574 | |
1575 | Sean M. Burke, sburkeE<64>cpan.org |
1576 | |
1577 | =cut |
1578 | |
1579 | |
1580 | # To generate a list of just the two and three-letter codes: |
1581 | |
1582 | #!/usr/local/bin/perl -w |
1583 | |
1584 | require 5; # Time-stamp: "2001-03-13 21:53:39 MST" |
1585 | # Sean M. Burke, sburke@cpan.org |
1586 | # This program is for generating the language_codes.txt file |
1587 | use strict; |
1588 | use LWP::Simple; |
1589 | use HTML::TreeBuilder 3.10; |
1590 | my $root = HTML::TreeBuilder->new(); |
1591 | my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html'; |
1592 | $root->parse(get($url) || die "Can't get $url"); |
1593 | $root->eof(); |
1594 | |
1595 | my @codes; |
1596 | |
1597 | foreach my $tr ($root->find_by_tag_name('tr')) { |
1598 | my @f = map $_->as_text(), $tr->content_list(); |
1599 | #print map("<$_> ", @f), "\n"; |
1600 | next unless @f == 5; |
1601 | pop @f; # nix the French name |
1602 | next if $f[-1] eq 'Language Name (English)'; # it's a header line |
1603 | my $xx = splice(@f, 2,1); # pull out the two-letter code |
1604 | $f[-1] =~ s/^\s+//; |
1605 | $f[-1] =~ s/\s+$//; |
1606 | if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it |
1607 | push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ]; |
1608 | } else { # print the three-letter codes. |
1609 | if($f[0] eq $f[1]) { |
1610 | push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ]; |
1611 | } else { # shouldn't happen |
1612 | push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; |
1613 | } |
1614 | } |
1615 | } |
1616 | |
1617 | print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes; |
1618 | print "[ based on $url\n at ", scalar(localtime), "]\n", |
1619 | "[Note: doesn't include IANA-registered codes.]\n"; |
1620 | exit; |
1621 | __END__ |
1622 | |