Update to I18N::LangTags 0.24, from Sean Burke.

diff --git a/MANIFEST b/MANIFEST

index 0e54da7..0ca611b 100644 (file)
--- a/MANIFEST
+++ b/MANIFEST
@@ -974,7 +974,9 @@ lib/Getopt/Std.t            See if Getopt::Std and Getopt::Long work
 lib/I18N/Collate.pm            Routines to do strxfrm-based collation
 lib/I18N/Collate.t             See if I18N::Collate works
 lib/I18N/LangTags.pm           I18N::LangTags
-lib/I18N/LangTags/List.pod     list of tags for human languages
+lib/I18N/LangTags/ChangeLog    I18N::LangTags
+lib/I18N/LangTags/List.pm      List of tags for human languages
+lib/I18N/LangTags/README       I18N::LangTags
 lib/I18N/LangTags/test.pl      See if I18N::LangTags works
 lib/IPC/Open2.pm               Open a two-ended pipe
 lib/IPC/Open2.t                        See if IPC::Open2 works
diff --git a/lib/I18N/LangTags.pm b/lib/I18N/LangTags.pm

index f5db282..58f2464 100644 (file)
--- a/lib/I18N/LangTags.pm
+++ b/lib/I18N/LangTags.pm
@@ -1,23 +1,23 @@
 
-# Time-stamp: "2001-05-27 19:53:11 MDT"
+# Time-stamp: "2001-06-20 01:28:32 MDT"
 # Sean M. Burke <sburke@cpan.org>
 
 require 5.000;
 package I18N::LangTags;
 use strict;
-use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); # $Debug
+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION %Panic);
 require Exporter;
-# $Debug = 0;
 @ISA = qw(Exporter);
 @EXPORT = qw();
 @EXPORT_OK = qw(is_language_tag same_language_tag
                 extract_language_tags super_languages
                 similarity_language_tag is_dialect_of
                 locale2language_tag alternate_language_tags
-                encode_language_tag
+                encode_language_tag panic_languages
                );
+%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
 
-$VERSION = "0.22";
+$VERSION = "0.24";
 
 =head1 NAME
 
@@ -29,12 +29,17 @@ I18N::LangTags - functions for dealing with RFC3066-style language tags
                           extract_language_tags super_languages
                           similarity_language_tag is_dialect_of
                           locale2language_tag alternate_language_tags
-                          encode_language_tag
+                          encode_language_tag panic_languages
                          );
 
 ...or whatever of those functions you want to import.  Those are
 all the exportable functions -- you're free to import only some,
-or none at all.  By default, none are imported.
+or none at all.  By default, none are imported.  If you say:
+
+    use I18N::LangTags qw(:ALL)
+
+...then all are exported.  (This saves you from having to use
+something less obvious like C<use I18N::LangTags qw(/./)>.)
 
 If you don't import any of these functions, assume a C<&I18N::LangTags::>
 in front of all the function names in the following examples.
@@ -92,7 +97,7 @@ sub is_language_tag {
   my($tag) = lc($_[0]);
 
   return 0 if $tag eq "i" or $tag eq "x";
-  # Bad degenerate cases the following
+  # Bad degenerate cases that the following
   #  regexp would erroneously let pass
 
   return $tag =~ 
@@ -166,6 +171,8 @@ representing the same language-form.
       (all-English is not the SAME as US English)
    same_language_tag('x-kadara', 'x-kadar')   is FALSE
       (these are totally unrelated tags)
+   same_language_tag('no-bok',    'nb')       is TRUE
+      (no-bok is a legacy tag for nb (Norwegian Bokmal))
 
 C<same_language_tag> works by just seeing whether
 C<encode_language_tag($lang1)> is the same as
@@ -221,7 +228,9 @@ without regard to case and to x/i- alternation.
 sub similarity_language_tag {
   my $lang1 = &encode_language_tag($_[0]);
   my $lang2 = &encode_language_tag($_[1]);
-
+   # And encode_language_tag takes care of the whole
+   #  no-nyn==nn, i-hakka==zh-hakka, etc, things
+   
   # NB: (i-sil-...)?  (i-sgn-...)?
 
   return undef if !defined($lang1) and !defined($lang2);
@@ -245,7 +254,7 @@ sub similarity_language_tag {
 
 =item * the function is_dialect_of($lang1, $lang2)
 
-Returns true iff language tag $lang1 represents a subdialect of
+Returns true iff language tag $lang1 represents a subform of
 language tag $lang2.
 
 B<Get the order right!  It doesn't work the other way around!>
@@ -262,13 +271,18 @@ B<Get the order right!  It doesn't work the other way around!>
 
    is_dialect_of('fr', 'en-CA')            is FALSE
 
-   is_dialect_of('en',    'en'   )            is TRUE
-   is_dialect_of('en-US', 'en-US')            is TRUE
+   is_dialect_of('en',    'en'   )         is TRUE
+   is_dialect_of('en-US', 'en-US')         is TRUE
      (B<Note:> these are degenerate cases)
 
    is_dialect_of('i-mingo-tom', 'x-Mingo') is TRUE
      (the x/i thing doesn't matter, nor does case)
 
+   is_dialect_of('nn', 'no')               is TRUE
+     (because 'nn' (New Norse) is aliased to 'no-nyn',
+      as a special legacy case, and 'no-nyn' is a
+      subform of 'no' (Norwegian))
+
 =cut
 
 sub is_dialect_of {
@@ -324,6 +338,13 @@ carefully.
 sub super_languages {
   my $lang1 = $_[0];
   return() unless defined($lang1) && &is_language_tag($lang1);
+
+  # a hack for those annoying new (2001) tags:
+  $lang1 =~ s/^nb\b/no-bok/i; # yes, backwards
+  $lang1 =~ s/^nn\b/no-nyn/i; # yes, backwards
+  $lang1 =~ s/^[ix](-hakka\b)/zh$1/i; # goes the right way
+   # i-hakka-bork-bjork-bjark => zh-hakka-bork-bjork-bjark
+
   my @l1_subtags = split('-', $lang1);
 
   ## Changes in the language tagging standards may have to be reflected here.
@@ -501,15 +522,29 @@ sub encode_language_tag {
 
   ## Changes in the language tagging standards may have to be reflected here.
 
-  my($tag) = uc($_[0]); # smash case
+  my($tag) = $_[0] || return undef;
   return undef unless &is_language_tag($tag);
-   # If it's not a language tag, its encoding is undef
+
+  # For the moment, these legacy variances are few enough that
+  #  we can just handle them here with regexps.
+  $tag =~ s/^iw\b/he/i; # Hebrew
+  $tag =~ s/^in\b/id/i; # Indonesian
+  $tag =~ s/^[ix]-lux\b/lb/i;  # Luxemburger
+  $tag =~ s/^[ix]-navajo\b/nv/i;  # Navajo
+  $tag =~ s/^ji\b/yi/i;  # Yiddish
+  #
+  # These go FROM the simplex to complex form, to get
+  #  similarity-comparison right.  And that's okay, since
+  #  similarity_language_tag is the only thing that
+  #  analyzes our output.
+  $tag =~ s/^[ix]-hakka\b/zh-hakka/i;  # Hakka
+  $tag =~ s/^nb\b/no-bok/i;  # BACKWARDS for Bokmal
+  $tag =~ s/^nn\b/no-nyn/i;  # BACKWARDS for Nynorsk
 
   $tag =~ s/^[xiXI]-//s;
    # Just lop off any leading "x/i-"
-   # Or I suppose I could do s/^[xiXI]-/_/s or something.
 
-  return "~$tag";
+  return "~" . uc($tag);
 }
 
 #--------------------------------------------------------------------------
@@ -517,39 +552,184 @@ sub encode_language_tag {
 =item * the function alternate_language_tags($lang1)
 
 This function, if given a language tag, returns all language tags that
-are alternate forms of this language tag.  (There is little
-alternation in the C<current> language tagging formalism, but
-extensions to the formalism are under consideration which could add a
-great deal of alternation.)
-
-Examples from the current formalism:
-
-          alternate_language_tags('en')           is   ()
-          alternate_language_tags('x-mingo-tom')  is   ('i-mingo-tom')
-          alternate_language_tags('x-klikitat')   is   ('i-klikitat')
-          alternate_language_tags('i-klikitat')   is   ('x-klikitat')
-
-This function returns undef if given anything other than a formally
+are alternate forms of this language tag.  (I.e., tags which refer to
+the same language.)  This is meant to handle legacy tags caused by
+the minor changes in language tag standards over the years; and
+the x-/i- alternation is also dealt with.
+
+Note that this function does I<not> try to equate new (and never-used,
+and unusable)
+ISO639-2 three-letter tags to old (and still in use) ISO639-1
+two-letter equivalents -- like "ara" -> "ar" -- because
+"ara" has I<never> been in use as an Internet language tag,
+and RFC 3066 stipulates that it never should be, since a shorter
+tag ("ar") exists.
+
+Examples:
+
+          alternate_language_tags('no-bok')       is ('nb')
+          alternate_language_tags('nb')           is ('no-bok')
+          alternate_language_tags('he')           is ('iw')
+          alternate_language_tags('iw')           is ('he')
+          alternate_language_tags('i-hakka')      is ('zh-hakka', 'x-hakka')
+          alternate_language_tags('zh-hakka')     is ('i-hakka', 'x-hakka')
+          alternate_language_tags('en')           is ()
+          alternate_language_tags('x-mingo-tom')  is ('i-mingo-tom')
+          alternate_language_tags('x-klikitat')   is ('i-klikitat')
+          alternate_language_tags('i-klikitat')   is ('x-klikitat')
+
+This function returns empty-list if given anything other than a formally
 valid language tag.
 
 =cut
 
 my %alt = qw( i x   x i   I X   X I );
 sub alternate_language_tags {
-  ## Changes in the language tagging standards may have to be reflected here.
   my $tag = $_[0];
   return() unless &is_language_tag($tag);
 
- # might as well preserve case
+  my @em; # push 'em real goood!
+
+  # For the moment, these legacy variances are few enough that
+  #  we can just handle them here with regexps.
+  
+  if(     $tag =~ m/^[ix]-hakka\b(.*)/i) {push @em, "zh-hakka$1";
+  } elsif($tag =~ m/^zh-hakka\b(.*)/i) {  push @em, "x-hakka$1", "i-hakka$1";
+
+  } elsif($tag =~ m/^he\b(.*)/i) { push @em, "iw$1";
+  } elsif($tag =~ m/^iw\b(.*)/i) { push @em, "he$1";
+
+  } elsif($tag =~ m/^in\b(.*)/i) { push @em, "id$1";
+  } elsif($tag =~ m/^id\b(.*)/i) { push @em, "in$1";
+
+  } elsif($tag =~ m/^[ix]-lux\b(.*)/i) { push @em, "lb$1";
+  } elsif($tag =~ m/^lb\b(.*)/i) {       push @em, "i-lux$1", "x-lux$1";
+
+  } elsif($tag =~ m/^[ix]-navajo\b(.*)/i) { push @em, "nv$1";
+  } elsif($tag =~ m/^nv\b(.*)/i) {          push @em, "i-navajo$1", "x-navajo$1";
+
+  } elsif($tag =~ m/^yi\b(.*)/i) { push @em, "ji$1";
+  } elsif($tag =~ m/^ji\b(.*)/i) { push @em, "yi$1";
+
+  } elsif($tag =~ m/^nb\b(.*)/i) {     push @em, "no-bok$1";
+  } elsif($tag =~ m/^no-bok\b(.*)/i) { push @em, "nb$1";
+  
+  } elsif($tag =~ m/^nn\b(.*)/i) {     push @em, "no-nyn$1";
+  } elsif($tag =~ m/^no-nyn\b(.*)/i) { push @em, "nn$1";
+  }
+
+  push @em, $alt{$1} . $2 if $tag =~ /^([XIxi])(-.+)/;
+  return @em;
+}
+
+###########################################################################
+
+{
+  # Init %Panic...
+  
+  my @panic = (  # MUST all be lowercase!
+   # Only large ("national") languages make it in this list.
+   #  If you, as a user, are so bizarre that the /only/ language
+   #  you claim to accept is Galician, then no, we won't do you
+   #  the favor of providing Catalan as a panic-fallback for
+   #  you.  Because if I start trying to add "little languages" in
+   #  here, I'll just go crazy.
+
+   # In a pinch, consider Scandinavian languages (minus
+   #  Icelandic?) to be mutually intelligible.
+   ([qw(no nn nb se da fo)]) x 2,
+   
+   # I think this is about the extent of tolerable intelligibility
+   #  among large modern Romance languages.
+   'pt' => [qw(es ca it fr)], # Portuguese, Spanish, Catalan, Italian, French
+   'ca' => [qw(es pt it fr)],
+   'es' => [qw(ca it fr pt)],
+   'it' => [qw(es fr ca pt)],
+   'fr' => [qw(es it ca pt)],
+   
+   # Also assume that speakers of the main Indian languages prefer
+   #  to read/hear Hindi over English
+   [qw(
+     as bn gu kn ks kok ml mni mr ne or pa sa sd te ta ur
+   )] => 'hi',
+    # Assamese, Bengali, Gujarati, [Hindi,] Kannada (Kanarese), Kashmiri,
+    # Konkani, Malayalam, Meithei (Manipuri), Marathi, Nepali, Oriya,
+    # Punjabi, Sanskrit, Sindhi, Telugu, Tamil, and Urdu.
+   'hi' => [qw(bn pa as or)],
+   # I welcome finer data for the other Indian languages.
+   #  E.g., what should Oriya's list be, besides just Hindi?
+   
+   # And the panic languages for English is, of course, nil!
+
+   # My guesses at Slavic intelligibility:
+   ([qw(ru be uk)]) x 2,  # Russian, Belarusian, Ukranian
+   'sr' => 'hr', 'hr' => 'sr', # Serb + Croat
+   'cs' => 'sk', 'sk' => 'cs', # Czech + Slovak
+
+   'ms' => 'id', 'id' => 'ms', # Malay + Indonesian
+
+   'et' => 'fi', 'fi' => 'et', # Estonian + Finnish
+
+   #?? 'lo' => 'th', 'th' => 'lo', # Lao + Thai
+
+  );
+  my($k,$v);
+  while(@panic) {
+    ($k,$v) = splice(@panic,0,2);
+    foreach my $k (ref($k) ? @$k : $k) {
+      foreach my $v (ref($v) ? @$v : $v) {
+        push @{$Panic{$k} ||= []}, $v unless $k eq $v;
+      }
+    }
+  }
+}
+
+=item * the function @langs = panic_languages(@accept_languages)
+
+This function takes a list of 0 or more language
+tags that constitute a given user's Accept-Language list, and
+returns a list of tags for I<other> (non-super)
+languages that are probably acceptable to the user, to be
+used I<if all else fails>.
+
+For example, if a user accepts only 'ca' (Catalan) and
+'es' (Spanish), and the documents/interfaces you have
+available are just in German, Italian, and Chinese, then
+the user will most likely want the Italian one (and not
+the Chinese or German one!), instead of getting
+nothing.  So C<panic_languages('ca', 'es')> returns
+a list containing 'it' (Italian).
+
+English ('en') is I<always> in the return list, but
+whether it's at the very end or not depends
+on the input languages.  This function works by consulting
+an internal table that stipulates what common
+languages are "close" to each other.
+
+A useful construct you might consider using is:
+
+  @fallbacks = super_languages(@accept_languages);
+  push @fallbacks, panic_languages(
+    @accept_languages, @fallbacks,
+  );
+
+=cut
 
-  if($tag =~ /^([XIxi])(-.+)/) {
-    # This handles all the alternation that exists CURRENTLY
-    return($alt{$1} . $2);
+sub panic_languages {
+  # When in panic or in doubt, run in circles, scream, and shout!
+  my(@out, %seen);
+  foreach my $t (@_) {
+    next unless $t;
+    next if $seen{$t}++; # so we don't return it or hit it again
+    # push @out, super_languages($t); # nah, keep that separate
+    push @out, @{ $Panic{lc $t} || next };
   }
-  return();
+  return grep !$seen{$_}++,  @out, 'en';
 }
 
 ###########################################################################
+1;
+__END__
 
 =back
 
@@ -615,6 +795,3 @@ Sean M. Burke C<sburke@cpan.org>
 
 =cut
 
-1;
-
-__END__
diff --git a/lib/I18N/LangTags/ChangeLog b/lib/I18N/LangTags/ChangeLog

new file mode 100644 (file)

index 0000000..55b84ac
--- /dev/null
+++ b/lib/I18N/LangTags/ChangeLog
@@ -0,0 +1,90 @@
+Revision history for Perl module I18N::LangTags.
+                                        Time-stamp: "2001-06-20 01:52:29 MDT"
+
+2001-06-20  Sean M. Burke  sburke@cpan.org
+       
+       * Release 0.24
+
+       * I18N::LangTags -- some elaborate hacks to make us
+       recognize legacy aliases like no-nyn == nn.
+       Added panic_languages().
+       Added :ALL export tag.
+       Minor docs fixes, and spiffing up test.pl.
+       
+       * I18N::LangTags::List -- minor corrections; added
+       a few aliases.
+       
+2001-05-29  Sean M. Burke  sburke@cpan.org
+
+       * Release 0.23
+
+       * I18N::LangTags::List -- minor corrections.  And is now
+       a module, not just documentation.
+
+2001-05-27  Sean M. Burke  sburke@cpan.org
+
+       * Release 0.22
+        
+       * Now bundling I18N::LangTags::List, a reference for lang tags,
+       replacing generate_language_table.plx and language_codes.txt
+       
+2001-05-25  Sean M. Burke  sburke@cpan.org
+
+       * Release 0.21
+
+       * extract_language_tags and locale2langauge_tag now
+       return untainted output.  Useful if you feed tainted
+       things, like $ENV{'LANG'}.
+       
+2001-03-13  Sean M. Burke  sburke@cpan.org
+
+       * Release 0.20
+
+       * Added support for RFC 3066 tags: allowing three-letter primary
+       tags ("nav"), and allowing digits in subtags ("x-borg-prot3252").
+
+       * Changed all references from RFC 1766 to RFC 3066.
+
+       * Now bundling fulltext of RFC 3066 in the dist.
+
+       * Now bundling generate_language_table.plx and language_codes.txt
+       
+       * Added some nice tests to test.pl
+
+       * Inverting order of listings in this ChangeLog file.
+
+2000-05-13  Sean M. Burke  sburke@cpan.org
+
+       * Release 0.13
+
+       * Just noting my new email address.
+
+1999-03-06  Sean M. Burke  sburke@netadventure.net
+
+       * Release 0.11
+
+       * Added functions
+          similarity_language_tag, is_dialect_of,
+         locale2language_tag, alternate_language_tags, and
+         encode_language_tag
+
+1998-12-14  Sean M. Burke  sburke@netadventure.net
+
+       * Release 0.09
+
+       * Added function super_languages()
+
+1998-10-31  Sean M. Burke  sburke@netadventure.net
+
+       * Release 0.08
+
+       * Just changes in the docs and bundle -- no change
+       in functionality.
+
+1998-04-02 Sean M. Burke  sburke@netadventure.net
+
+       * Release 0.07
+
+       * First public release.
+
+[END OF CHANGELOG]
diff --git a/lib/I18N/LangTags/List.pm b/lib/I18N/LangTags/List.pm

new file mode 100644 (file)

index 0000000..dec2a3f
--- /dev/null
+++ b/lib/I18N/LangTags/List.pm
@@ -0,0 +1,1620 @@
+
+require 5;
+package I18N::LangTags::List;
+#  Time-stamp: "2001-05-27 19:55:19 MDT"
+use strict;
+use vars qw(%Name $Debug $VERSION);
+$VERSION = '0.24';
+# POD at the end.
+
+#----------------------------------------------------------------------
+{
+# read the table out of our own POD!
+  my $seeking = 1;
+  my $count = 0;
+  my($tag,$name);
+  while(<I18N::LangTags::List::DATA>) {
+    if($seeking) {
+      $seeking = 0 if m/=for woohah/;
+    } else {
+      next unless ($tag, $name) =
+       m/\{([-0-9a-zA-Z]+)\}(?:\s*:)?\s*([^\[\]]+)/;
+      $name =~ s/\s*[;\.]*\s*$//g;
+      next unless $name;
+      ++$count;
+      print "<$tag> <$name>\n" if $Debug;
+      $Name{$tag} = $name;
+    }
+  }
+  die "No tags read??" unless $count;
+}
+#----------------------------------------------------------------------
+
+sub name {
+  my $tag = lc($_[0] || return);
+  $tag =~ s/^\s+//s;
+  $tag =~ s/\s+$//s;
+  
+  my $alt;
+  if($tag =~ m/^x-(.+)/) {
+    $alt = "i-$1";
+  } elsif($tag =~ m/^i-(.+)/) {
+    $alt = "x-$1";
+  } else {
+    $alt = '';
+  }
+  
+  my $subform = '';
+  my $name = '';
+  print "Input: {$tag}\n" if $Debug;
+  while(length $tag) {
+    last if $name = $Name{$tag};
+    last if $name = $Name{$alt};
+    if($tag =~ s/(-[a-z0-9]+)$//s) {
+      print "Shaving off: $1 leaving $tag\n" if $Debug;
+      $subform = "$1$subform";
+       # and loop around again
+       
+      $alt =~ s/(-[a-z0-9]+)$//s && $Debug && print " alt -> $alt\n";
+    } else {
+      # we're trying to pull a subform off a primary tag. TILT!
+      print "Aborting on: {$name}{$subform}\n" if $Debug;
+      last;
+    }
+  }
+  print "Output: {$name}{$subform}\n" if $Debug;
+  
+  return unless $name;   # Failure
+  return $name unless $subform;   # Exact match
+  $subform =~ s/^-//s;
+  $subform =~ s/-$//s;
+  return "$name (Subform \"$subform\")";
+}
+
+1;
+
+__DATA__
+
+=head1 NAME
+
+I18N::LangTags::List -- tags and names for human languages
+
+=head1 SYNOPSIS
+
+  use I18N::LangTags::List;
+  print "Parlez-vous... ", join(', ',
+      I18N::LangTags::List::name('elx') || 'unknown_language',
+      I18N::LangTags::List::name('ar-Kw') || 'unknown_language',
+      I18N::LangTags::List::name('en') || 'unknown_language',
+      I18N::LangTags::List::name('en-CA') || 'unknown_language',
+    ), "?\n";
+
+prints:
+
+  Parlez-vous... Elamite, Kuwait Arabic, English, Canadian English?
+
+=head1 DESCRIPTION
+
+This module provides a function 
+C<I18N::LangTags::List::name( I<langtag> ) > that takes
+a language tag (see L<I18N::LangTags|I18N::LangTags>)
+and returns the best attempt at an English name for it, or
+undef if it can't make sense of the tag.
+
+The function I18N::LangTags::List::name(...) is not exported.
+
+The map of tags-to-names that it uses is accessable as
+%I18N::LangTags::List::Name, and it's the same as the list
+that follows in this documentation, which should be useful
+to you even if you don't use this module.
+
+=head1 ABOUT LANGUAGE TAGS
+
+Internet language tags, as defined in RFC 3066, are a formalism
+for denoting human languages.  The two-letter ISO 639-1 language
+codes are well known (as "en" for English), as are their forms
+when qualified by a country code ("en-US").  Less well-known are the
+arbitrary-length non-ISO codes (like "i-mingo"), and the 
+recently (in 2001) introduced three-letter ISO-639-2 codes.
+
+Remember this important facts:
+
+=over
+
+=item *
+
+Language tags are not locale IDs.  A locale ID is written with a "_"
+instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and
+I<means> something different than a language tag.  A language tag
+denotes a language.  A locale ID denotes a language I<as used in>
+a particular place, in combination with non-linguistic
+location-specific information such as what currency in used
+there.  Locales I<also> often denote character set information,
+as in "en_US.ISO8859-1".
+
+=item *
+
+Language tags are not for computer languages.
+
+=item *
+
+"Dialect" is not a useful term, since there is no objective
+criterion for establishing when two languages are
+dialects of eachother, or are separate languages.
+
+=item *
+
+Language tags are not case-sensitive.  en-US, en-us, En-Us, etc.,
+are all the same tag, and denote the same language.
+
+=item *
+
+Not every language tag really refers to a single language.  Some
+language tags refer to conditions: i-default (system-message text
+in English plus maybe other languages), und (undetermined
+language).  Others (notably lots of the three-letter codes) are
+bibliographic tags that classify whole groups of languages, as
+with cus "Cushitic (Other)" (i.e., a
+language that has been classed as Cushtic, but which has no more
+specific code) or the even less linguistically coherent
+sai for "South American Indian (Other)".  While useful in
+bibliography, B<SUCH TAGS ARE NOT
+FOR GENERAL USE>.  For further guidance, email me.
+
+=item *
+
+Language tags are not country codes.  In fact, they are often
+distinct codes, as with language tag ja for Japanese, and
+ISO 3166 country code C<.jp> for Japan.
+
+=back
+
+=head1 LIST OF LANGUAGES
+
+The first part of each item is the language tag, between
+{...}.  It
+is followed by an English name for the language or language-group.
+Language tags that I judge to be not for general use, are bracketed.
+
+This list is in alphabetical order by English name of the language.
+
+=for reminder
+ The name in the =item line MUST NOT have E<...>'s in it!!
+
+=for woohah START
+
+=over
+
+=item {ab} : Abkhazian
+
+eq Abkhaz
+
+=item {ace} : Achinese
+
+=item {ach} : Acoli
+
+=item {ada} : Adangme
+
+=item {aa} : Afar
+
+=item {afh} : Afrihili
+
+(Artificial)
+
+=item {af} : Afrikaans
+
+=item [{afa} : Afro-Asiatic (Other)]
+
+=item {aka} : Akan
+
+=item {akk} : Akkadian
+
+(Historical)
+
+=item {sq} : Albanian
+
+=item {ale} : Aleut
+
+=item [{alg} : Algonquian languages]
+
+NOT Algonquin!
+
+=item [{tut} : Altaic (Other)]
+
+=item {am} : Amharic
+
+NOT Aramaic!
+
+=item {i-ami} : Ami
+
+eq Amis.  eq 'Amis.  eq Pangca.
+
+=item [{apa} : Apache languages]
+
+=item {ar} : Arabic
+
+Many forms are mutually un-intelligible in spoken media.
+Notable forms:
+{ar-ae} UAE Arabic;
+{ar-bh} Bahrain Arabic;
+{ar-dz} Algerian Arabic;
+{ar-eg} Egyptian Arabic;
+{ar-iq} Iraqi Arabic;
+{ar-jo} Jordanian Arabic;
+{ar-kw} Kuwait Arabic;
+{ar-lb} Lebanese Arabic;
+{ar-ly} Libyan Arabic;
+{ar-ma} Moroccan Arabic;
+{ar-om} Omani Arabic;
+{ar-qa} Qatari Arabic;
+{ar-sa} Sauda Arabic;
+{ar-sy} Syrian Arabic;
+{ar-tn} Tunisian Arabic;
+{ar-ye} Yemen Arabic.
+
+=item {arc} : Aramaic
+
+NOT Amharic!  NOT Samaritan Aramaic!
+
+=item {arp} : Arapaho
+
+=item {arn} : Araucanian
+
+=item {arw} : Arawak
+
+=item {hy} : Armenian
+
+=item [{art} : Artificial (Other)]
+
+=item {as} : Assamese
+
+=item [{ath} : Athapascan languages]
+
+eq Athabaskan.  eq Athapaskan.  eq Athabascan.
+
+=item [{aus} : Australian languages]
+
+=item [{map} : Austronesian (Other)]
+
+=item {ava} : Avaric
+
+=item {ae} : Avestan
+
+eq Zend
+
+=item {awa} : Awadhi
+
+=item {ay} : Aymara
+
+=item {az} : Azerbaijani
+
+eq Azeri
+
+=item {ban} : Balinese
+
+=item [{bat} : Baltic (Other)]
+
+=item {bal} : Baluchi
+
+=item {bam} : Bambara
+
+=item [{bai} : Bamileke languages]
+
+=item {bad} : Banda
+
+=item [{bnt} : Bantu (Other)]
+
+=item {bas} : Basa
+
+=item {ba} : Bashkir
+
+=item {eu} : Basque
+
+=item {btk} : Batak (Indonesia)
+
+=item {bej} : Beja
+
+=item {be} : Belarusian
+
+eq Belarussian.  eq Byelarussian.
+eq Belorussian.  eq Byelorussian.
+eq White Russian.  eq White Ruthenian.
+NOT Ruthenian!
+
+=item {bem} : Bemba
+
+=item {bn} : Bengali
+
+eq Bangla.
+
+=item [{ber} : Berber (Other)]
+
+=item {bho} : Bhojpuri
+
+=item {bh} : Bihari
+
+=item {bik} : Bikol
+
+=item {bin} : Bini
+
+=item {bi} : Bislama
+
+eq Bichelamar.
+
+=item {bs} : Bosnian
+
+=item {bra} : Braj
+
+=item {br} : Breton
+
+=item {bug} : Buginese
+
+=item {bg} : Bulgarian
+
+=item {i-bnn} : Bunun
+
+=item {bua} : Buriat
+
+=item {my} : Burmese
+
+=item {cad} : Caddo
+
+=item {car} : Carib
+
+=item {ca} : Catalan
+
+eq CatalE<aacute>n.  eq Catalonian.
+
+=item [{cau} : Caucasian (Other)]
+
+=item {ceb} : Cebuano
+
+=item [{cel} : Celtic (Other)]
+
+Notable forms:
+{cel-gaulish} Gaulish (Historical)
+
+=item [{cai} : Central American Indian (Other)]
+
+=item {chg} : Chagatai
+
+(Historical?)
+
+=item [{cmc} : Chamic languages]
+
+=item {ch} : Chamorro
+
+=item {ce} : Chechen
+
+=item {chr} : Cherokee
+
+eq Tsalagi
+
+=item {chy} : Cheyenne
+
+=item {chb} : Chibcha
+
+(Historical)  NOT Chibchan (which is a language family).
+
+=item {ny} : Chichewa
+
+eq Nyanja.  eq Chinyanja.
+
+=item {zh} : Chinese
+
+Many forms are mutually un-intelligible in spoken media.
+Notable subforms:
+{zh-cn} PRC Chinese;
+{zh-hk} Hong Kong Chinese;
+{zh-mo} Macau Chinese;
+{zh-sg} Singapore Chinese;
+{zh-tw} Taiwan Chinese;
+{zh-guoyu} Mandarin [Putonghua/Guoyu];
+{zh-hakka} Hakka [formerly i-hakka];
+{zh-min} Hokkien;
+{zh-min-nan} Southern Hokkien;
+{zh-wuu} Shanghaiese;
+{zh-xiang} Hunanese;
+{zh-gan} Gan;
+{zh-yue} Cantonese.
+
+=for etc
+{i-hakka} Hakka (old tag)
+
+=item {chn} : Chinook Jargon
+
+eq Chinook Wawa.
+
+=item {chp} : Chipewyan
+
+=item {cho} : Choctaw
+
+=item {cu} : Church Slavic
+
+eq Old Church Slavonic.
+
+=item {chk} : Chuukese
+
+eq Trukese.  eq Chuuk.  eq Truk.  eq Ruk.
+
+=item {cv} : Chuvash
+
+=item {cop} : Coptic
+
+=item {kw} : Cornish
+
+=item {co} : Corsican
+
+eq Corse.
+
+=item {cre} : Cree
+
+NOT Creek!
+
+=item {mus} : Creek
+
+NOT Cree!
+
+=item [{cpe} : English-based Creoles and pidgins (Other)]
+
+=item [{cpf} : French-based Creoles and pidgins (Other)]
+
+=item [{cpp} : Portuguese-based Creoles and pidgins (Other)]
+
+=item [{crp} : Creoles and pidgins (Other)]
+
+=item {hr} : Croatian
+
+eq Croat.
+
+=item [{cus} : Cushitic (Other)]
+
+=item {cs} : Czech
+
+=item {dak} : Dakota
+
+eq Nakota.  eq Latoka.
+
+=item {da} : Danish
+
+=item {day} : Dayak
+
+=item {i-default} : Default (Fallthru) Language
+
+Defined in RFC 2277, this is for tagging text
+(which must include English text, and might/should include text
+in other appropriate languages) that is emitted in a context
+where language-negotiation wasn't possible -- in SMTP mail failure
+messages, for example.
+
+=item {del} : Delaware
+
+=item {din} : Dinka
+
+=item {div} : Divehi
+
+=item {doi} : Dogri
+
+NOT Dogrib!
+
+=item {dgr} : Dogrib
+
+NOT Dogri!
+
+=item [{dra} : Dravidian (Other)]
+
+=item {dua} : Duala
+
+=item {nl} : Dutch
+
+eq Netherlander.  Notable forms:
+{nl-nl} Netherlands Dutch;
+{nl-be} Belgian Dutch.
+
+=item {dum} : Middle Dutch (ca.1050-1350)
+
+(Historical)
+
+=item {dyu} : Dyula
+
+=item {dz} : Dzongkha
+
+=item {efi} : Efik
+
+=item {egy} : Ancient Egyptian
+
+(Historical)
+
+=item {eka} : Ekajuk
+
+=item {elx} : Elamite
+
+(Historical)
+
+=item {en} : English
+
+Notable forms:
+{en-au} Australian English;
+{en-bz} Belize English;
+{en-ca} Canadian English;
+{en-gb} UK English;
+{en-ie} Irish English;
+{en-jm} Jamaican English;
+{en-nz} New Zealand English;
+{en-ph} Philippine English;
+{en-tt} Trinidad English;
+{en-us} US English;
+{en-za} South African English;
+{en-zw} Zimbabwe English.
+
+=item {enm} : Old English (1100-1500)
+
+(Historical)
+
+=item {ang} : Old English (ca.450-1100)
+
+eq Anglo-Saxon.  (Historical)
+
+=item {eo} : Esperanto
+
+(Artificial)
+
+=item {et} : Estonian
+
+=item {ewe} : Ewe
+
+=item {ewo} : Ewondo
+
+=item {fan} : Fang
+
+=item {fat} : Fanti
+
+=item {fo} : Faroese
+
+=item {fj} : Fijian
+
+=item {fi} : Finnish
+
+=item [{fiu} : Finno-Ugrian (Other)]
+
+eq Finno-Ugric.  NOT Ugaritic!
+
+=item {fon} : Fon
+
+=item {fr} : French
+
+Notable forms:
+{fr-fr} France French;
+{fr-be} Belgian French;
+{fr-ca} Canadian French;
+{fr-ch} Swiss French;
+{fr-lu} Luxembourg French;
+{fr-mc} Monaco French.
+
+=item {frm} : Middle French (ca.1400-1600)
+
+(Historical)
+
+=item {fro} : Old French (842-ca.1400)
+
+(Historical)
+
+=item {fy} : Frisian
+
+=item {fur} : Friulian
+
+=item {ful} : Fulah
+
+=item {gaa} : Ga
+
+=item {gd} : Scots Gaelic
+
+NOT Scots!
+
+=item {gl} : Gallegan
+
+eq Galician
+
+=item {lug} : Ganda
+
+=item {gay} : Gayo
+
+=item {gba} : Gbaya
+
+=item {gez} : Geez
+
+eq Ge'ez
+
+=item {ka} : Georgian
+
+=item {de} : German
+
+Notable forms:
+{de-at} Austrian German;
+{de-be} Belgian German;
+{de-ch} Swiss German;
+{de-de} Germany German;
+{de-li} Liechtenstein German;
+{de-lu} Luxembourg German.
+
+=item {gmh} : Middle High German (ca.1050-1500)
+
+(Historical)
+
+=item {goh} : Old High German (ca.750-1050)
+
+(Historical)
+
+=item [{gem} : Germanic (Other)]
+
+=item {gil} : Gilbertese
+
+=item {gon} : Gondi
+
+=item {gor} : Gorontalo
+
+=item {got} : Gothic
+
+(Historical)
+
+=item {grb} : Grebo
+
+=item {grc} : Ancient Greek (to 1453)
+
+(Historical)
+
+=item {el} : Modern Greek (1453-)
+
+=item {gn} : Guarani
+
+GuaranE<iacute>
+
+=item {gu} : Gujarati
+
+=item {gwi} : Gwich'in
+
+eq Gwichin
+
+=item {hai} : Haida
+
+=item {ha} : Hausa
+
+=item {haw} : Hawaiian
+
+Hawai'ian
+
+=item {he} : Hebrew
+
+(Formerly "iw".)
+
+=for etc
+{iw} Hebrew (old tag)
+
+=item {hz} : Herero
+
+=item {hil} : Hiligaynon
+
+=item {him} : Himachali
+
+=item {hi} : Hindi
+
+=item {ho} : Hiri Motu
+
+=item {hit} : Hittite
+
+(Historical)
+
+=item {hmn} : Hmong
+
+=item {hu} : Hungarian
+
+=item {hup} : Hupa
+
+=item {iba} : Iban
+
+=item {is} : Icelandic
+
+=item {ibo} : Igbo
+
+=item {ijo} : Ijo
+
+=item {ilo} : Iloko
+
+=item [{inc} : Indic (Other)]
+
+=item [{ine} : Indo-European (Other)]
+
+=item {id} : Indonesian
+
+(Formerly "in".)
+
+=for etc
+{in} Indonesian (old tag)
+
+=item {ia} : Interlingua (International Auxiliary Language Association)
+
+(Artificial)  NOT Interlingue!
+
+=item {ie} : Interlingue
+
+(Artificial)  NOT Interlingua!
+
+=item {iu} : Inuktitut
+
+A subform of "Eskimo".
+
+=item {ik} : Inupiaq
+
+A subform of "Eskimo".
+
+=item [{ira} : Iranian (Other)]
+
+=item {ga} : Irish
+
+=item {mga} : Middle Irish (900-1200)
+
+(Historical)
+
+=item {sga} : Old Irish (to 900)
+
+(Historical)
+
+=item [{iro} : Iroquoian languages]
+
+=item {it} : Italian
+
+Notable forms:
+{it-it} Italy Italian;
+{it-ch} Swiss Italian.
+
+=item {ja} : Japanese
+
+(NOT "jp"!)
+
+=item {jw} : Javanese
+
+=item {jrb} : Judeo-Arabic
+
+=item {jpr} : Judeo-Persian
+
+=item {kab} : Kabyle
+
+=item {kac} : Kachin
+
+=item {kl} : Kalaallisut
+
+eq Greenlandic "Eskimo"
+
+=item {kam} : Kamba
+
+=item {kn} : Kannada
+
+eq Kanarese.  NOT Canadian!
+
+=item {kau} : Kanuri
+
+=item {kaa} : Kara-Kalpak
+
+=item {kar} : Karen
+
+=item {ks} : Kashmiri
+
+=item {kaw} : Kawi
+
+=item {kk} : Kazakh
+
+=item {kha} : Khasi
+
+=item {km} : Khmer
+
+eq Cambodian.  eq Kampuchean.
+
+=item [{khi} : Khoisan (Other)]
+
+=item {kho} : Khotanese
+
+=item {ki} : Kikuyu
+
+eq Gikuyu.
+
+=item {kmb} : Kimbundu
+
+=item {rw} : Kinyarwanda
+
+=item {ky} : Kirghiz
+
+=item {i-klingon} : Klingon
+
+=item {kv} : Komi
+
+=item {kon} : Kongo
+
+=item {kok} : Konkani
+
+=item {ko} : Korean
+
+=item {kos} : Kosraean
+
+=item {kpe} : Kpelle
+
+=item {kro} : Kru
+
+=item {kj} : Kuanyama
+
+=item {kum} : Kumyk
+
+=item {ku} : Kurdish
+
+=item {kru} : Kurukh
+
+=item {kut} : Kutenai
+
+=item {lad} : Ladino
+
+eq Judeo-Spanish.  NOT Ladin (a minority language in Italy).
+
+=item {lah} : Lahnda
+
+NOT Lamba!
+
+=item {lam} : Lamba
+
+NOT Lahnda!
+
+=item {lo} : Lao
+
+eq Laotian.
+
+=item {la} : Latin
+
+(Historical)  NOT Ladin!  NOT Ladino!
+
+=item {lv} : Latvian
+
+eq Lettish.
+
+=item {lb} : Letzeburgesch
+
+eq Luxemburgian, eq Luxemburger.  (Formerly i-lux.)
+
+=for etc
+{i-lux} Letzeburgesch (old tag)
+
+=item {lez} : Lezghian
+
+=item {ln} : Lingala
+
+=item {lt} : Lithuanian
+
+=item {nds} : Low German
+
+eq Low Saxon.  eq Low German.  eq Low Saxon.
+
+=item {loz} : Lozi
+
+=item {lub} : Luba-Katanga
+
+=item {lua} : Luba-Lulua
+
+=item {lui} : Luiseno
+
+eq LuiseE<ntilde>o.
+
+=item {lun} : Lunda
+
+=item {luo} : Luo (Kenya and Tanzania)
+
+=item {lus} : Lushai
+
+=item {mk} : Macedonian
+
+eq the modern Slavic language spoken in what was Yugoslavia.
+NOT the form of Greek spoken in Greek Macedonia!
+
+=item {mad} : Madurese
+
+=item {mag} : Magahi
+
+=item {mai} : Maithili
+
+=item {mak} : Makasar
+
+=item {mg} : Malagasy
+
+=item {ms} : Malay
+
+NOT Malayalam!
+
+=item {ml} : Malayalam
+
+NOT Malay!
+
+=item {mt} : Maltese
+
+=item {mnc} : Manchu
+
+=item {mdr} : Mandar
+
+NOT Mandarin!
+
+=item {man} : Mandingo
+
+=item {mni} : Manipuri
+
+eq Meithei.
+
+=item [{mno} : Manobo languages]
+
+=item {gv} : Manx
+
+=item {mi} : Maori
+
+NOT Mari!
+
+=item {mr} : Marathi
+
+=item {chm} : Mari
+
+NOT Maori!
+
+=item {mh} : Marshall
+
+eq Marshallese.
+
+=item {mwr} : Marwari
+
+=item {mas} : Masai
+
+=item [{myn} : Mayan languages]
+
+=item {men} : Mende
+
+=item {mic} : Micmac
+
+=item {min} : Minangkabau
+
+=item {i-mingo} : Mingo
+
+eq the Irquoian language West Virginia Seneca.  NOT New York Seneca!
+
+=item [{mis} : Miscellaneous languages]
+
+Don't use this.
+
+=item {moh} : Mohawk
+
+=item {mo} : Moldavian
+
+eq Moldovan.
+
+=item [{mkh} : Mon-Khmer (Other)]
+
+=item {lol} : Mongo
+
+=item {mn} : Mongolian
+
+eq Mongol.
+
+=item {mos} : Mossi
+
+=item [{mul} : Multiple languages]
+
+Not for normal use.
+
+=item [{mun} : Munda languages]
+
+=item {nah} : Nahuatl
+
+=item {na} : Nauru
+
+=item {nv} : Navajo
+
+eq Navaho.  (Formerly i-navajo.)
+
+=for etc
+{i-navajo} Navajo (old tag)
+
+=item {nd} : North Ndebele
+
+=item {nr} : South Ndebele
+
+=item {ng} : Ndonga
+
+=item {ne} : Nepali
+
+eq Nepalese.  Notable forms:
+{ne-np} Nepal Nepali;
+{ne-in} India Nepali.
+
+=item {new} : Newari
+
+=item {nia} : Nias
+
+=item [{nic} : Niger-Kordofanian (Other)]
+
+=item [{ssa} : Nilo-Saharan (Other)]
+
+=item {niu} : Niuean
+
+=item {non} : Old Norse
+
+(Historical)
+
+=item [{nai} : North American Indian]
+
+Do not use this.
+
+=item {se} : Northern Sami
+
+eq Lappish.  eq Lapp.  eq (Northern) Saami.
+
+=item {no} : Norwegian
+
+Note the two following forms:
+
+=item {nb} : Norwegian Bokmal
+
+eq BokmE<aring>l, (A form of Norwegian.)  (Formerly no-bok.)
+
+=for etc
+{no-bok} Norwegian Bokmal (old tag)
+
+=item {nn} : Norwegian Nynorsk
+
+(A form of Norwegian.)  (Formerly no-nyn.)
+
+=for etc
+{no-nyn} Norwegian Nynorsk (old tag)
+
+=item [{nub} : Nubian languages]
+
+=item {nym} : Nyamwezi
+
+=item {nyn} : Nyankole
+
+=item {nyo} : Nyoro
+
+=item {nzi} : Nzima
+
+=item {oc} : Occitan (post 1500)
+
+eq ProvenE<ccedil>al, eq Provencal
+
+=item {oji} : Ojibwa
+
+eq Ojibwe.
+
+=item {or} : Oriya
+
+=item {om} : Oromo
+
+=item {osa} : Osage
+
+=item {os} : Ossetian; Ossetic
+
+=item [{oto} : Otomian languages]
+
+Group of languages collectively called "OtomE<iacute>".
+
+=item {pal} : Pahlavi
+
+eq Pahlevi
+
+=item {i-pwn} : Paiwan
+
+eq Pariwan
+
+=item {pau} : Palauan
+
+=item {pi} : Pali
+
+(Historical?)
+
+=item {pam} : Pampanga
+
+=item {pag} : Pangasinan
+
+=item {pa} : Panjabi
+
+eq Punjabi
+
+=item {pap} : Papiamento
+
+eq Papiamentu.
+
+=item [{paa} : Papuan (Other)]
+
+=item {fa} : Persian
+
+eq Farsi.  eq Iranian.
+
+=item {peo} : Old Persian (ca.600-400 B.C.)
+
+=item [{phi} : Philippine (Other)]
+
+=item {phn} : Phoenician
+
+(Historical)
+
+=item {pon} : Pohnpeian
+
+NOT Pompeiian!
+
+=item {pl} : Polish
+
+=item {pt} : Portuguese
+
+eq Portugese.  Notable forms:
+{pt-pt} Portugal Portuguese;
+{pt-br} Brazilian Portuguese.
+
+=item [{pra} : Prakrit languages]
+
+=item {pro} : Old Provencal (to 1500)
+
+eq Old ProvenE<ccedil>al.  (Historical.)
+
+=item {ps} : Pushto
+
+eq Pashto.  eq Pushtu.
+
+=item {qu} : Quechua
+
+eq Quecha.
+
+=item {rm} : Raeto-Romance
+
+eq Romansh.
+
+=item {raj} : Rajasthani
+
+=item {rap} : Rapanui
+
+=item {rar} : Rarotongan
+
+=item [{qaa - qtz} : Reserved for local use.]
+
+=item [{roa} : Romance (Other)]
+
+NOT Romanian!  NOT Romany!  NOT Romansh!
+
+=item {ro} : Romanian
+
+eq Rumanian.  NOT Romany!
+
+=item {rom} : Romany
+
+eq Rom.  NOT Romanian!
+
+=item {rn} : Rundi
+
+=item {ru} : Russian
+
+NOT White Russian!  NOT Rusyn!
+
+=item [{sal} : Salishan languages]
+
+Large language group.
+
+=item {sam} : Samaritan Aramaic
+
+NOT Aramaic!
+
+=item [{smi} : Sami languages (Other)]
+
+=item {sm} : Samoan
+
+=item {sad} : Sandawe
+
+=item {sg} : Sango
+
+=item {sa} : Sanskrit
+
+(Historical)
+
+=item {sat} : Santali
+
+=item {sc} : Sardinian
+
+eq Sard.
+
+=item {sas} : Sasak
+
+=item {sco} : Scots
+
+NOT Scots Gaelic!
+
+=item {sel} : Selkup
+
+=item [{sem} : Semitic (Other)]
+
+=item {sr} : Serbian
+
+eq Serb.  NOT Sorbian.
+
+=item {srr} : Serer
+
+=item {shn} : Shan
+
+=item {sn} : Shona
+
+=item {sid} : Sidamo
+
+=item {sgn-...} : Sign Languages
+
+Always use with a subtag.  Notable forms:
+{sgn-gb} British Sign Language (BSL);
+{sgn-ie} Irish Sign Language (ESL);
+{sgn-ni} Nicaraguan Sign Language (ISN);
+{sgn-us} American Sign Language (ASL).
+
+=item {bla} : Siksika
+
+eq Blackfoot.  eq Pikanii.
+
+=item {sd} : Sindhi
+
+=item {si} : Sinhalese
+
+eq Sinhala.
+
+=item [{sit} : Sino-Tibetan (Other)]
+
+=item [{sio} : Siouan languages]
+
+=item {den} : Slave (Athapascan)
+
+("Slavey" is a subform.)
+
+=item [{sla} : Slavic (Other)]
+
+=item {sk} : Slovak
+
+eq Slovakian.
+
+=item {sl} : Slovenian
+
+eq Slovene.
+
+=item {sog} : Sogdian
+
+=item {so} : Somali
+
+=item {son} : Songhai
+
+=item {snk} : Soninke
+
+=item {wen} : Sorbian languages
+
+eq Wendish.  eq Sorb.  eq Lusatian.  eq Wend.  NOT Venda!  NOT Serbian!
+
+=item {nso} : Northern Sotho
+
+=item {st} : Southern Sotho
+
+eq Sutu.  eq Sesotho.
+
+=item [{sai} : South American Indian (Other)]
+
+=item {es} : Spanish
+
+Notable forms:
+{es-ar} Argentine Spanish;
+{es-bo} Bolivian Spanish;
+{es-cl} Chilean Spanish;
+{es-co} Colombian Spanish;
+{es-do} Dominican Spanish;
+{es-ec} Ecuadorian Spanish;
+{es-es} Spain Spanish;
+{es-gt} Guatemalan Spanish;
+{es-hn} Honduran Spanish;
+{es-mx} Mexican Spanish;
+{es-pa} Panamanian Spanish;
+{es-pe} Peruvian Spanish;
+{es-pr} Puerto Rican Spanish;
+{es-py} Paraguay Spanish;
+{es-sv} Salvadoran Spanish;
+{es-us} US Spanish;
+{es-uy} Uruguayan Spanish;
+{es-ve} Venezuelan Spanish.
+
+=item {suk} : Sukuma
+
+=item {sux} : Sumerian
+
+(Historical)
+
+=item {su} : Sundanese
+
+=item {sus} : Susu
+
+=item {sw} : Swahili
+
+eq Kiswahili
+
+=item {ss} : Swati
+
+=item {sv} : Swedish
+
+Notable forms:
+sv-se {Sweden Swedish};
+sv-fi {Finland Swedish}.
+
+=item {syr} : Syriac
+
+=item {tl} : Tagalog
+
+=item {ty} : Tahitian
+
+=item [{tai} : Tai (Other)]
+
+NOT Thai!
+
+=item {tg} : Tajik
+
+=item {tmh} : Tamashek
+
+=item {ta} : Tamil
+
+=item {i-tao} : Tao
+
+eq Yami.
+
+=item {tt} : Tatar
+
+=item {i-tay} : Tayal
+
+eq Atayal.  eq Atayan.
+
+=item {te} : Telugu
+
+=item {ter} : Tereno
+
+=item {tet} : Tetum
+
+=item {th} : Thai
+
+NOT Tai!
+
+=item {bo} : Tibetan
+
+=item {tig} : Tigre
+
+=item {ti} : Tigrinya
+
+=item {tem} : Timne
+
+eq Themne.  eq Timene.
+
+=item {tiv} : Tiv
+
+=item {tli} : Tlingit
+
+=item {tpi} : Tok Pisin
+
+=item {tkl} : Tokelau
+
+=item {tog} : Tonga (Nyasa)
+
+NOT Tsonga!
+
+=item {to} : Tonga (Tonga Islands)
+
+(Pronounced "Tong-a", not "Tong-ga")
+
+NOT Tsonga!
+
+=item {tsi} : Tsimshian
+
+eq Sm'algyax
+
+=item {ts} : Tsonga
+
+NOT Tonga!
+
+=item {i-tsu} : Tsou
+
+=item {tn} : Tswana
+
+Same as Setswana.
+
+=item {tum} : Tumbuka
+
+=item {tr} : Turkish
+
+(Typically in Roman script)
+
+=item {ota} : Ottoman Turkish (1500-1928)
+
+(Typically in Arabic script)  (Historical)
+
+=item {tk} : Turkmen
+
+eq Turkmeni.
+
+=item {tvl} : Tuvalu
+
+=item {tyv} : Tuvinian
+
+eq Tuvan.  eq Tuvin.
+
+=item {tw} : Twi
+
+=item {uga} : Ugaritic
+
+NOT Ugric!
+
+=item {ug} : Uighur
+
+=item {uk} : Ukrainian
+
+=item {umb} : Umbundu
+
+=item {und} : Undetermined
+
+Not a tag for normal use.
+
+=item {ur} : Urdu
+
+=item {uz} : Uzbek
+
+eq E<Ouml>zbek
+
+=item {vai} : Vai
+
+=item {ven} : Venda
+
+NOT Wendish!  NOT Wend!  NOT Avestan!
+
+=item {vi} : Vietnamese
+
+eq Viet.
+
+=item {vo} : Volapuk
+
+eq VolapE<uuml>k.  (Artificial)
+
+=item {vot} : Votic
+
+eq Votian.  eq Vod.
+
+=item [{wak} : Wakashan languages]
+
+=item {wal} : Walamo
+
+eq Wolaytta.
+
+=item {war} : Waray
+
+Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),
+not the smaller Philippine language Waray Sorsogon, nor the extinct
+Australian language Waray.
+
+=item {was} : Washo
+
+eq Washoe
+
+=item {cy} : Welsh
+
+=item {wo} : Wolof
+
+=item {x-...} : Unregistered (Semi-Private Use)
+
+"x-" is a prefix for language tags that are not registered with ISO
+or IANA.  Example, x-double-dutch
+
+=item {xh} : Xhosa
+
+=item {sah} : Yakut
+
+=item {yao} : Yao
+
+(The Yao in Malawi?)
+
+=item {yap} : Yapese
+
+eq Yap
+
+=item {yi} : Yiddish
+
+Formerly "ji".  Sometimes in Roman script, sometimes in Hebrew script.
+
+=for etc
+{ji} Yiddish (old tag)
+
+=item {yo} : Yoruba
+
+=item [{ypk} : Yupik languages]
+
+Several "Eskimo" languages.
+
+=item {znd} : Zande
+
+=item [{zap} : Zapotec]
+
+(A group of languages.)
+
+=item {zen} : Zenaga
+
+NOT Zend.
+
+=item {za} : Zhuang
+
+=item {zu} : Zulu
+
+=item {zun} : Zuni
+
+eq ZuE<ntilde>i
+
+=back
+
+=for woohah END
+
+=head1 SEE ALSO
+
+L<I18N::LangTags|I18N::LangTags> and its "See Also" section.
+
+=head1 COPYRIGHT AND DISCLAIMER
+
+Copyright (c) 2001 Sean M. Burke. All rights reserved.
+
+You can redistribute and/or
+modify this document under the same terms as Perl itself.
+
+This document is provided in the the hope that it will be
+useful, but without any warranty;
+without even the implied warranty of accuracy, authoritativeness,
+completeness, merchantability, or fitness for a particular purpose.
+
+Email any corrections or questions to me.
+
+=head1 AUTHOR
+
+Sean M. Burke, sburkeE<64>cpan.org
+
+=cut
+
+
+# To generate a list of just the two and three-letter codes:
+
+#!/usr/local/bin/perl -w
+
+require 5; # Time-stamp: "2001-03-13 21:53:39 MST"
+ # Sean M. Burke, sburke@cpan.org
+ # This program is for generating the language_codes.txt file
+use strict;
+use LWP::Simple;
+use HTML::TreeBuilder 3.10;
+my $root = HTML::TreeBuilder->new();
+my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';
+$root->parse(get($url) || die "Can't get $url");
+$root->eof();
+
+my @codes;
+
+foreach my $tr ($root->find_by_tag_name('tr')) {
+  my @f = map $_->as_text(), $tr->content_list();
+  #print map("<$_> ", @f), "\n";
+  next unless @f == 5;
+  pop @f; # nix the French name
+  next if $f[-1] eq 'Language Name (English)'; # it's a header line
+  my $xx = splice(@f, 2,1); # pull out the two-letter code
+  $f[-1] =~ s/^\s+//;
+  $f[-1] =~ s/\s+$//;
+  if($xx =~ m/[a-zA-Z]/) {   # there's a two-letter code for it
+    push   @codes, [ lc($f[-1]),   "$xx\t$f[-1]\n" ];
+  } else { # print the three-letter codes.
+    if($f[0] eq $f[1]) {
+      push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];
+    } else { # shouldn't happen
+      push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; 
+    }
+  }
+}
+
+print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;
+print "[ based on $url\n at ", scalar(localtime), "]\n",
+  "[Note: doesn't include IANA-registered codes.]\n";
+exit;
+__END__
+
diff --git a/lib/I18N/LangTags/List.pod b/lib/I18N/LangTags/List.pod

deleted file mode 100644 (file)

index 9bb5e07..0000000
--- a/lib/I18N/LangTags/List.pod
+++ /dev/null
@@ -1,1446 +0,0 @@
-=head1 NAME
-
-I18n::LangTags::List -- list of tags for human languages
-
-=head1 SYNOPSIS
-
-  Time-stamp: "2001-05-27 19:55:19 MDT"
-  [This is not a module; it is documentation]
-
-=head1 ABOUT LANGUAGE TAGS
-
-Internet language tags, as defined in RFC 3066, are a formalism
-for denoting human languages.  The two-letter ISO 639-1 language
-codes are well known (as "en" for English), as are their forms
-when qualified by a country code ("en-US").  Less well-known are the
-arbitrary-length non-ISO codes (like "i-mingo"), and the 
-recently (in 2001) introduced three-letter ISO-639-2 codes.
-
-Remember this important facts:
-
-=over
-
-=item *
-
-Language tags are not locale IDs.  A locale ID is written with a "_"
-instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and
-I<means> something different than a language tag.  A language tag
-denotes a language.  A locale ID denotes a language I<as used in>
-a particular place, in combination with non-linguistic
-location-specific information such as what currency in used
-there.  Locales I<also> often denote character set information,
-as in "en_US.ISO8859-1".
-
-=item *
-
-Language tags are not for computer languages.
-
-=item *
-
-"Dialect" is not a useful term, since there is no objective
-criterion for establishing when two languages are
-dialects of eachother, or are separate languages.
-
-=item *
-
-Language tags are not case-sensitive.  en-US, en-us, En-Us, etc.,
-are all the same tag, and denote the same language.
-
-=item *
-
-Not every language tag really refers to a single language.  Some
-language tags refer to conditions: i-default (system-message text
-in English plus maybe other languages), und (undetermined
-language).  Others (notably lots of the three-letter codes) are
-bibliographic tags that classify whole groups of languages, as
-with cus "Cushitic (Other)" (i.e., a
-language that has been classed as Cushtic, but which has no more
-specific code) or the even less linguistically coherent
-sai for "South American Indian (Other)".  While useful in
-bibliography, B<SUCH TAGS ARE NOT
-FOR GENERAL USE>.  For further guidance, email me.
-
-=item *
-
-Language tags are not country codes.  In fact, they are often
-distinct codes, as with language tag ja for Japanese, and
-ISO 3166 country code C<.jp> for Japan.
-
-=back
-
-=head1 LIST OF LANGUAGES
-
-The first part of each item is the language tag, between
-{...} and in italic characters.  It
-is followed by an English name for the language or language-group.
-Language tags that I judge to be not for general use, are bracketed.
-
-This list is in alphabetical order by English name of the language.
-
-=over
-
-=item I<{ab}> : Abkhazian
-
-eq Abkhaz
-
-=item I<{ace}> : Achinese
-
-=item I<{ach}> : Acoli
-
-=item I<{ada}> : Adangme
-
-=item I<{aa}> : Afar
-
-=item I<{afh}> : Afrihili
-
-(Artificial)
-
-=item I<{af}> : Afrikaans
-
-=item [I<{afa}> : Afro-Asiatic (Other)]
-
-=item I<{aka}> : Akan
-
-=item I<{akk}> : Akkadian
-
-(Historical)
-
-=item I<{sq}> : Albanian
-
-=item I<{ale}> : Aleut
-
-=item [I<{alg}> : Algonquian languages]
-
-NOT Algonquin!
-
-=item [I<{tut}> : Altaic (Other)]
-
-=item I<{am}> : Amharic
-
-NOT Aramaic!
-
-=item I<{i-ami}> : Ami
-
-eq Amis.  eq 'Amis.  eq Pangca.
-
-=item [I<{apa}> : Apache languages]
-
-=item I<{ar}> : Arabic
-
-Many forms are mutually un-intelligible in spoken media.
-Notable forms:
-ar-ae
-ar-bh
-ar-dz
-ar-eg
-ar-iq
-ar-jo
-ar-kw
-ar-lb
-ar-ly
-ar-ma
-ar-om
-ar-qa
-ar-sa
-ar-sy
-ar-tn
-ar-ye.
-
-=item I<{arc}> : Aramaic
-
-NOT Amharic!  NOT Samaritan Aramaic!
-
-=item I<{arp}> : Arapaho
-
-=item I<{arn}> : Araucanian
-
-=item I<{arw}> : Arawak
-
-=item I<{hy}> : Armenian
-
-=item [I<{art}> : Artificial (Other)]
-
-=item I<{as}> : Assamese
-
-=item [I<{ath}> : Athapascan languages]
-
-eq Athabaskan.  eq Athapaskan.  eq Athabascan.
-
-=item [I<{aus}> : Australian languages]
-
-=item [I<{map}> : Austronesian (Other)]
-
-=item I<{ava}> : Avaric
-
-=item I<{ae}> : Avestan
-
-eq Zend
-
-=item I<{awa}> : Awadhi
-
-=item I<{ay}> : Aymara
-
-=item I<{az}> : Azerbaijani
-
-eq Azeri
-
-=item I<{ban}> : Balinese
-
-=item [I<{bat}> : Baltic (Other)]
-
-=item I<{bal}> : Baluchi
-
-=item I<{bam}> : Bambara
-
-=item [I<{bai}> : Bamileke languages]
-
-=item I<{bad}> : Banda
-
-=item [I<{bnt}> : Bantu (Other)]
-
-=item I<{bas}> : Basa
-
-=item I<{ba}> : Bashkir
-
-=item I<{eu}> : Basque
-
-=item I<{btk}> : Batak (Indonesia)
-
-=item I<{bej}> : Beja
-
-=item I<{be}> : Belarusian
-
-eq Belarussian.  eq Byelarussian.
-eq Belorussian.  eq Byelorussian.
-eq White Russian.  eq White Ruthenian.
-NOT Ruthenian!
-
-=item I<{bem}> : Bemba
-
-=item I<{bn}> : Bengali
-
-=item [I<{ber}> : Berber (Other)]
-
-=item I<{bho}> : Bhojpuri
-
-=item I<{bh}> : Bihari
-
-=item I<{bik}> : Bikol
-
-=item I<{bin}> : Bini
-
-=item I<{bi}> : Bislama
-
-=item I<{bs}> : Bosnian
-
-=item I<{bra}> : Braj
-
-=item I<{br}> : Breton
-
-=item I<{bug}> : Buginese
-
-=item I<{bg}> : Bulgarian
-
-=item I<{i-bnn}> : Bunun
-
-=item I<{bua}> : Buriat
-
-=item I<{my}> : Burmese
-
-=item I<{cad}> : Caddo
-
-=item I<{car}> : Carib
-
-=item I<{ca}> : Catalan
-
-eq CatalE<aacute>n.  eq Catalonian.
-
-=item [I<{cau}> : Caucasian (Other)]
-
-=item I<{ceb}> : Cebuano
-
-=item [I<{cel}> : Celtic (Other)]
-
-Notable forms: cel-gaulish.
-
-=item [I<{cai}> : Central American Indian (Other)]
-
-=item I<{chg}> : Chagatai
-
-(Historical?)
-
-=item [I<{cmc}> : Chamic languages]
-
-=item I<{ch}> : Chamorro
-
-=item I<{ce}> : Chechen
-
-=item I<{chr}> : Cherokee
-
-eq Tsalagi
-
-=item I<{chy}> : Cheyenne
-
-=item I<{chb}> : Chibcha
-
-(Historical)  NOT Chibchan (which is a language family).
-
-=item I<{ny}> : Chichewa
-
-eq Nyanja.  eq Chinyanja.
-
-=item I<{zh}> : Chinese
-
-Many forms are mutually un-intelligible in spoken media.
-Notable subforms:
-zh-cn (PRC Chinese),
-zh-hk (Hong Kong Chinese),
-zh-mo (Macau Chinese), 
-zh-sg (Singapore Chinese),
-zh-tw (Taiwan Chinese),
-zh-guoyu (Putonghua/Guoyu/Mandarin),
-zh-hakka (Hakka; formerly i-hakka),
-zh-min (Hokkien),
-zh-min-nan (Southern Hokkien),
-zh-wuu (Shanghaiese),
-zh-xiang (Hunanese),
-zh-yue (Cantonese).
-
-=item I<{chn}> : Chinook Jargon
-
-eq Chinook Wawa.
-
-=item I<{chp}> : Chipewyan
-
-=item I<{cho}> : Choctaw
-
-=item I<{cu}> : Church Slavic
-
-eq Old Church Slavonic.
-
-=item I<{chk}> : Chuukese
-
-eq Trukese.  eq Chuuk.  eq Truk.  eq Ruk.
-
-=item I<{cv}> : Chuvash
-
-=item I<{cop}> : Coptic
-
-=item I<{kw}> : Cornish
-
-=item I<{co}> : Corsican
-
-eq Corse.
-
-=item I<{cre}> : Cree
-
-NOT Creek!
-
-=item I<{mus}> : Creek
-
-NOT Cree!
-
-=item [I<{cpe}> : English-based Creoles and pidgins (Other)]
-
-=item [I<{cpf}> : French-based Creoles and pidgins (Other)]
-
-=item [I<{cpp}> : Portuguese-based Creoles and pidgins (Other)]
-
-=item [I<{crp}> : Creoles and pidgins (Other)]
-
-=item I<{hr}> : Croatian
-
-eq Croat.
-
-=item [I<{cus}> : Cushitic (Other)]
-
-=item I<{cs}> : Czech
-
-=item I<{dak}> : Dakota
-
-eq Nakota.  eq Latoka.
-
-=item I<{da}> : Danish
-
-=item I<{day}> : Dayak
-
-=item I<{i-default}> : Default (Fallthru) Language
-
-Defined in RFC 2277, this is for tagging text
-(which must include English text, and might/should include text
-in other appropriate languages) that is emitted in a context
-where language-negotiation wasn't possible -- in SMTP mail failure
-messages, for example.
-
-=item I<{del}> : Delaware
-
-=item I<{din}> : Dinka
-
-=item I<{div}> : Divehi
-
-=item I<{doi}> : Dogri
-
-NOT Dogrib!
-
-=item I<{dgr}> : Dogrib
-
-NOT Dogri!
-
-=item [I<{dra}> : Dravidian (Other)]
-
-=item I<{dua}> : Duala
-
-=item I<{nl}> : Dutch
-
-eq Netherlander.  Notable forms: nl-nl, nl-be.
-
-=item I<{dum}> : Middle Dutch (ca.1050-1350)
-
-(Historical)
-
-=item I<{dyu}> : Dyula
-
-=item I<{dz}> : Dzongkha
-
-=item I<{efi}> : Efik
-
-=item I<{egy}> : Ancient Egyptian
-
-(Historical)
-
-=item I<{eka}> : Ekajuk
-
-=item I<{elx}> : Elamite
-
-(Historical)
-
-=item I<{en}> : English
-
-Notable forms:
-en-au
-en-bz
-en-ca
-en-gb
-en-ie
-en-jm
-en-nz
-en-ph
-en-tt
-en-us
-en-za
-en-zw.
-
-=item I<{enm}> : Old English (1100-1500)
-
-(Historical)
-
-=item I<{ang}> : Old English (ca.450-1100)
-
-eq Anglo-Saxon.  (Historical)
-
-=item I<{eo}> : Esperanto
-
-(Artificial)
-
-=item I<{et}> : Estonian
-
-=item I<{ewe}> : Ewe
-
-=item I<{ewo}> : Ewondo
-
-=item I<{fan}> : Fang
-
-=item I<{fat}> : Fanti
-
-=item I<{fo}> : Faroese
-
-=item I<{fj}> : Fijian
-
-=item I<{fi}> : Finnish
-
-=item [I<{fiu}> : Finno-Ugrian (Other)]
-
-eq Finno-Ugric.  NOT Ugaritic!
-
-=item I<{fon}> : Fon
-
-=item I<{fr}> : French
-
-Notable forms:
-fr-fr
-fr-be
-fr-ca
-fr-ch
-fr-lu
-fr-mc.
-
-=item I<{frm}> : Middle French (ca.1400-1600)
-
-(Historical)
-
-=item I<{fro}> : Old French (842-ca.1400)
-
-(Historical)
-
-=item I<{fy}> : Frisian
-
-=item I<{fur}> : Friulian
-
-=item I<{ful}> : Fulah
-
-=item I<{gaa}> : Ga
-
-=item I<{gd}> : Scots Gaelic
-
-NOT Scots!
-
-=item I<{gl}> : Gallegan
-
-eq Galician
-
-=item I<{lug}> : Ganda
-
-=item I<{gay}> : Gayo
-
-=item I<{gba}> : Gbaya
-
-=item I<{gez}> : Geez
-
-eq Ge'ez
-
-=item I<{ka}> : Georgian
-
-=item I<{de}> : German
-
-Notable forms: de-at
-de-be
-de-ch
-de-de
-de-li
-de-lu.
-
-=item I<{gmh}> : Middle High German (ca.1050-1500)
-
-(Historical)
-
-=item I<{goh}> : Old High German (ca.750-1050)
-
-(Historical)
-
-=item [I<{gem}> : Germanic (Other)]
-
-=item I<{gil}> : Gilbertese
-
-=item I<{gon}> : Gondi
-
-=item I<{gor}> : Gorontalo
-
-=item I<{got}> : Gothic
-
-(Historical)
-
-=item I<{grb}> : Grebo
-
-=item I<{grc}> : Ancient Greek (to 1453)
-
-(Historical)
-
-=item I<{el}> : Modern Greek (1453-)
-
-=item I<{gn}> : Guarani
-
-GuaranE<iacute>
-
-=item I<{gu}> : Gujarati
-
-=item I<{gwi}> : Gwich'in
-
-eq Gwichin
-
-=item I<{hai}> : Haida
-
-=item I<{ha}> : Hausa
-
-=item I<{haw}> : Hawaiian
-
-Hawai'ian
-
-=item I<{he}> : Hebrew
-
-(Formerly "iw".)
-
-=item I<{hz}> : Herero
-
-=item I<{hil}> : Hiligaynon
-
-=item I<{him}> : Himachali
-
-=item I<{hi}> : Hindi
-
-=item I<{ho}> : Hiri Motu
-
-=item I<{hit}> : Hittite
-
-(Historical)
-
-=item I<{hmn}> : Hmong
-
-=item I<{hu}> : Hungarian
-
-=item I<{hup}> : Hupa
-
-=item I<{iba}> : Iban
-
-=item I<{is}> : Icelandic
-
-=item I<{ibo}> : Igbo
-
-=item I<{ijo}> : Ijo
-
-=item I<{ilo}> : Iloko
-
-=item [I<{inc}> : Indic (Other)]
-
-=item [I<{ine}> : Indo-European (Other)]
-
-=item I<{id}> : Indonesian
-
-(Formerly "in".)
-
-=item I<{ia}> : Interlingua (International Auxiliary Language Association)
-
-(Artificial)  NOT Interlingue!
-
-=item I<{ie}> : Interlingue
-
-(Artificial)  NOT Interlingua!
-
-=item I<{iu}> : Inuktitut
-
-A subform of "Eskimo".
-
-=item I<{ik}> : Inupiaq
-
-A subform of "Eskimo".
-
-=item [I<{ira}> : Iranian (Other)]
-
-=item I<{ga}> : Irish
-
-=item I<{mga}> : Middle Irish (900-1200)
-
-(Historical)
-
-=item I<{sga}> : Old Irish (to 900)
-
-(Historical)
-
-=item [I<{iro}> : Iroquoian languages]
-
-=item I<{it}> : Italian
-
-Notable forms: it-it, it-ch
-
-=item I<{ja}> : Japanese
-
-(NOT "jp"!)
-
-=item I<{jw}> : Javanese
-
-=item I<{jrb}> : Judeo-Arabic
-
-=item I<{jpr}> : Judeo-Persian
-
-=item I<{kab}> : Kabyle
-
-=item I<{kac}> : Kachin
-
-=item I<{kl}> : Kalaallisut
-
-eq Greenlandic "Eskimo"
-
-=item I<{kam}> : Kamba
-
-=item I<{kn}> : Kannada
-
-NOT Canadian!
-
-=item I<{kau}> : Kanuri
-
-=item I<{kaa}> : Kara-Kalpak
-
-=item I<{kar}> : Karen
-
-=item I<{ks}> : Kashmiri
-
-=item I<{kaw}> : Kawi
-
-=item I<{kk}> : Kazakh
-
-=item I<{kha}> : Khasi
-
-=item I<{km}> : Khmer
-
-eq Cambodian.  eq Kampuchean.
-
-=item [I<{khi}> : Khoisan (Other)]
-
-=item I<{kho}> : Khotanese
-
-=item I<{ki}> : Kikuyu
-
-eq Gikuyu.
-
-=item I<{kmb}> : Kimbundu
-
-=item I<{rw}> : Kinyarwanda
-
-=item I<{ky}> : Kirghiz
-
-=item I<{i-klingon}> : Klingon
-
-=item I<{kv}> : Komi
-
-=item I<{kon}> : Kongo
-
-=item I<{kok}> : Konkani
-
-=item I<{ko}> : Korean
-
-=item I<{kos}> : Kosraean
-
-=item I<{kpe}> : Kpelle
-
-=item I<{kro}> : Kru
-
-=item I<{kj}> : Kuanyama
-
-=item I<{kum}> : Kumyk
-
-=item I<{ku}> : Kurdish
-
-=item I<{kru}> : Kurukh
-
-=item I<{kut}> : Kutenai
-
-=item I<{lad}> : Ladino
-
-eq Judeo-Spanish.  NOT Ladin (a minority language in Italy).
-
-=item I<{lah}> : Lahnda
-
-NOT Lamba!
-
-=item I<{lam}> : Lamba
-
-NOT Lahnda!
-
-=item I<{lo}> : Lao
-
-=item I<{la}> : Latin
-
-(Historical)  NOT Ladin!  NOT Ladino!
-
-=item I<{lv}> : Latvian
-
-eq Lettish.
-
-=item I<{lb}> : Letzeburgesch
-
-eq Luxemburgian, eq Luxemburger.  (Formerly i-lux.)
-
-=item I<{lez}> : Lezghian
-
-=item I<{ln}> : Lingala
-
-=item I<{lt}> : Lithuanian
-
-=item I<{nds}> : Low German
-
-eq Low Saxon.  eq Low German.  eq Low Saxon.
-
-=item I<{loz}> : Lozi
-
-=item I<{lub}> : Luba-Katanga
-
-=item I<{lua}> : Luba-Lulua
-
-=item I<{lui}> : Luiseno
-
-eq LuiseE<ntilde>o.
-
-=item I<{lun}> : Lunda
-
-=item I<{luo}> : Luo (Kenya and Tanzania)
-
-=item I<{lus}> : Lushai
-
-=item I<{mk}> : Macedonian
-
-eq the modern Slavic language spoken in what was Yugoslavia.
-NOT the form of Greek spoken in Greek Macedonia!
-
-=item I<{mad}> : Madurese
-
-=item I<{mag}> : Magahi
-
-=item I<{mai}> : Maithili
-
-=item I<{mak}> : Makasar
-
-=item I<{mg}> : Malagasy
-
-=item I<{ms}> : Malay
-
-NOT Malayalam!
-
-=item I<{ml}> : Malayalam
-
-NOT Malay!
-
-=item I<{mt}> : Maltese
-
-=item I<{mnc}> : Manchu
-
-=item I<{mdr}> : Mandar
-
-NOT Mandarin!
-
-=item I<{man}> : Mandingo
-
-=item I<{mni}> : Manipuri
-
-=item [I<{mno}> : Manobo languages]
-
-=item I<{gv}> : Manx
-
-=item I<{mi}> : Maori
-
-NOT Mari!
-
-=item I<{mr}> : Marathi
-
-=item I<{chm}> : Mari
-
-NOT Maori!
-
-=item I<{mh}> : Marshall
-
-eq Marshallese.
-
-=item I<{mwr}> : Marwari
-
-=item I<{mas}> : Masai
-
-=item [I<{myn}> : Mayan languages]
-
-=item I<{men}> : Mende
-
-=item I<{mic}> : Micmac
-
-=item I<{min}> : Minangkabau
-
-=item I<{i-mingo}> : Mingo
-
-eq the Irquoian language West Virginia Seneca.  NOT New York Seneca!
-
-=item [I<{mis}> : Miscellaneous languages]
-
-Don't use this.
-
-=item I<{moh}> : Mohawk
-
-=item I<{mo}> : Moldavian
-
-eq Moldovan.
-
-=item [I<{mkh}> : Mon-Khmer (Other)]
-
-=item I<{lol}> : Mongo
-
-=item I<{mn}> : Mongolian
-
-eq Mongol.
-
-=item I<{mos}> : Mossi
-
-=item [I<{mul}> : Multiple languages]
-
-Not for normal use.
-
-=item [I<{mun}> : Munda languages]
-
-=item I<{nah}> : Nahuatl
-
-=item I<{na}> : Nauru
-
-=item I<{nv}> : Navajo
-
-eq Navaho.  (Formerly i-navajo.)
-
-=item I<{nd}> : North Ndebele
-
-=item I<{nr}> : South Ndebele
-
-=item I<{ng}> : Ndonga
-
-=item I<{ne}> : Nepali
-
-eq Nepalese.  Notable forms: ne-np ne-in.
-
-=item I<{new}> : Newari
-
-=item I<{nia}> : Nias
-
-=item [I<{nic}> : Niger-Kordofanian (Other)]
-
-=item [I<{ssa}> : Nilo-Saharan (Other)]
-
-=item I<{niu}> : Niuean
-
-=item I<{non}> : Old Norse
-
-(Historical)
-
-=item [I<{nai}> : North American Indian]
-
-Do not use this.
-
-=item I<{se}> : Northern Sami
-
-eq Lappish.  eq Lapp.  eq (Northern) Saami.
-
-=item I<{no}> : Norwegian
-
-Note the two following forms:
-
-=item I<{nb}> : Norwegian BokmE<aring>l
-
-(A form of Norwegian.)  (Formerly no-bok.)
-
-=item I<{nn}> : Norwegian Nynorsk
-
-(A form of Norwegian.)  (Formerly no-nyn.)
-
-=item [I<{nub}> : Nubian languages]
-
-=item I<{nym}> : Nyamwezi
-
-=item I<{nyn}> : Nyankole
-
-=item I<{nyo}> : Nyoro
-
-=item I<{nzi}> : Nzima
-
-=item I<{oc}> : Occitan (post 1500)
-
-eq ProvenE<ccedil>al, eq Provencal
-
-=item I<{oji}> : Ojibwa
-
-eq Ojibwe.
-
-=item I<{or}> : Oriya
-
-=item I<{om}> : Oromo
-
-=item I<{osa}> : Osage
-
-=item I<{os}> : Ossetian; Ossetic
-
-=item [I<{oto}> : Otomian languages]
-
-Group of languages collectively called "OtomE<iacute>".
-
-=item I<{pal}> : Pahlavi
-
-eq Pahlevi
-
-=item I<{i-pwn}> : Paiwan
-
-eq Pariwan
-
-=item I<{pau}> : Palauan
-
-=item I<{pi}> : Pali
-
-(Historical?)
-
-=item I<{pam}> : Pampanga
-
-=item I<{pag}> : Pangasinan
-
-=item I<{pa}> : Panjabi
-
-eq Punjabi
-
-=item I<{pap}> : Papiamento
-
-eq Papiamentu.
-
-=item [I<{paa}> : Papuan (Other)]
-
-=item I<{fa}> : Persian
-
-eq Farsi.
-
-=item I<{peo}> : Old Persian (ca.600-400 B.C.)
-
-=item [I<{phi}> : Philippine (Other)]
-
-=item I<{phn}> : Phoenician
-
-(Historical)
-
-=item I<{pon}> : Pohnpeian
-
-=item I<{pl}> : Polish
-
-=item I<{pt}> : Portuguese
-
-eq Portugese.  Notable forms: pt-pt pt-br.
-
-=item [I<{pra}> : Prakrit languages]
-
-=item I<{pro}> : Old ProvenE<ccedil>al (to 1500)
-
-eq Old Provencal.  (Historical.)
-
-=item I<{ps}> : Pushto
-
-eq Pashto.  eq Pushtu.
-
-=item I<{qu}> : Quechua
-
-eq Quecha.
-
-=item I<{rm}> : Raeto-Romance
-
-eq Romansh.
-
-=item I<{raj}> : Rajasthani
-
-=item I<{rap}> : Rapanui
-
-=item I<{rar}> : Rarotongan
-
-=item [I<{qaa}>-I<qtz> : Reserved for local use.]
-
-=item [I<{roa}> : Romance (Other)]
-
-NOT Romanian!  NOT Romany!  NOT Romansh!
-
-=item I<{ro}> : Romanian
-
-eq Rumanian.  NOT Romany!
-
-=item I<{rom}> : Romany
-
-eq Rom.  NOT Romanian!
-
-=item I<{rn}> : Rundi
-
-=item I<{ru}> : Russian
-
-NOT White Russian!  NOT Rusyn!
-
-=item [I<{sal}> : Salishan languages]
-
-Large language group.
-
-=item I<{sam}> : Samaritan Aramaic
-
-NOT Aramaic!
-
-=item [I<{smi}> : Sami languages (Other)]
-
-=item I<{sm}> : Samoan
-
-=item I<{sad}> : Sandawe
-
-=item I<{sg}> : Sango
-
-=item I<{sa}> : Sanskrit
-
-(Historical)
-
-=item I<{sat}> : Santali
-
-=item I<{sc}> : Sardinian
-
-eq Sard.
-
-=item I<{sas}> : Sasak
-
-=item I<{sco}> : Scots
-
-NOT Scots Gaelic!
-
-=item I<{sel}> : Selkup
-
-=item [I<{sem}> : Semitic (Other)]
-
-=item I<{sr}> : Serbian
-
-eq Serb.  NOT Sorbian.
-
-=item I<{srr}> : Serer
-
-=item I<{shn}> : Shan
-
-=item I<{sn}> : Shona
-
-=item I<{sid}> : Sidamo
-
-=item I<{sgn-...}> : Sign Languages
-
-Always use with a subtag.  Notable forms: sgn-gb sgn-ie sgn-ni sgn-us.
-
-=item I<{bla}> : Siksika
-
-eq Blackfoot.  eq Pikanii.
-
-=item I<{sd}> : Sindhi
-
-=item I<{si}> : Sinhalese
-
-eq Sinhala.
-
-=item [I<{sit}> : Sino-Tibetan (Other)]
-
-=item [I<{sio}> : Siouan languages]
-
-=item I<{den}> : Slave (Athapascan)
-
-("Slavey" is a subform.)
-
-=item [I<{sla}> : Slavic (Other)]
-
-=item I<{sk}> : Slovak
-
-eq Slovakian.
-
-=item I<{sl}> : Slovenian
-
-eq Slovene.
-
-=item I<{sog}> : Sogdian
-
-=item I<{so}> : Somali
-
-=item I<{son}> : Songhai
-
-=item I<{snk}> : Soninke
-
-=item I<{wen}> : Sorbian languages
-
-eq Wendish.  eq Sorb.  eq Lusatian.  eq Wend.  NOT Venda!  NOT Serbian!
-
-=item I<{nso}> : Northern Sotho
-
-=item I<{st}> : Southern Sotho
-
-eq Sutu.  eq Sesotho.
-
-=item [I<{sai}> : South American Indian (Other)]
-
-=item I<{es}> : Spanish
-
-Notable forms:
-es-ar es-bo es-cl es-co es-do es-ec es-es es-gt
-es-hn es-mx es-pa es-pe es-pr es-py es-sv es-us
-es-uy es-ve 
-
-=item I<{suk}> : Sukuma
-
-=item I<{sux}> : Sumerian
-
-(Historical)
-
-=item I<{su}> : Sundanese
-
-=item I<{sus}> : Susu
-
-=item I<{sw}> : Swahili
-
-eq Kiswahili
-
-=item I<{ss}> : Swati
-
-=item I<{sv}> : Swedish
-
-Notable forms: sv-se sv-fi.
-
-=item I<{syr}> : Syriac
-
-=item I<{tl}> : Tagalog
-
-=item I<{ty}> : Tahitian
-
-=item [I<{tai}> : Tai (Other)]
-
-NOT Thai!
-
-=item I<{tg}> : Tajik
-
-=item I<{tmh}> : Tamashek
-
-=item I<{ta}> : Tamil
-
-=item I<{i-tao}> : Tao
-
-eq Yami.
-
-=item I<{tt}> : Tatar
-
-=item I<{i-tay}> : Tayal
-
-eq Atayal.  eq Atayan.
-
-=item I<{te}> : Telugu
-
-=item I<{ter}> : Tereno
-
-=item I<{tet}> : Tetum
-
-=item I<{th}> : Thai
-
-NOT Tai!
-
-=item I<{bo}> : Tibetan
-
-=item I<{tig}> : Tigre
-
-=item I<{ti}> : Tigrinya
-
-=item I<{tem}> : Timne
-
-eq Themne.  eq Timene.
-
-=item I<{tiv}> : Tiv
-
-=item I<{tli}> : Tlingit
-
-=item I<{tpi}> : Tok Pisin
-
-=item I<{tkl}> : Tokelau
-
-=item I<{tog}> : Tonga (Nyasa)
-
-NOT Tsonga!
-
-=item I<{to}> : Tonga (Tonga Islands)
-
-(Pronounced "Tong-a", not "Tong-ga")
-
-NOT Tsonga!
-
-=item I<{tsi}> : Tsimshian
-
-eq Sm'algyax
-
-=item I<{ts}> : Tsonga
-
-NOT Tonga!
-
-=item I<{i-tsu}> : Tsou
-
-=item I<{tn}> : Tswana
-
-Same as Setswana.
-
-=item I<{tum}> : Tumbuka
-
-=item I<{tr}> : Turkish
-
-(Typically in Roman script)
-
-=item I<{ota}> : Ottoman Turkish (1500-1928)
-
-(Typically in Arabic script)  (Historical)
-
-=item I<{tk}> : Turkmen
-
-eq Turkmeni.
-
-=item I<{tvl}> : Tuvalu
-
-=item I<{tyv}> : Tuvinian
-
-eq Tuvan.  eq Tuvin.
-
-=item I<{tw}> : Twi
-
-=item I<{uga}> : Ugaritic
-
-NOT Ugric!
-
-=item I<{ug}> : Uighur
-
-=item I<{uk}> : Ukrainian
-
-=item I<{umb}> : Umbundu
-
-=item I<{und}> : Undetermined
-
-Not a tag for normal use.
-
-=item I<{ur}> : Urdu
-
-=item I<{uz}> : Uzbek
-
-eq E<Ouml>zbek
-
-=item I<{vai}> : Vai
-
-=item I<{ven}> : Venda
-
-NOT Wendish!  NOT Wend!  NOT Avestan!
-
-=item I<{vi}> : Vietnamese
-
-eq Viet.
-
-=item I<{vo}> : VolapE<uuml>k
-
-eq Volapuk.  (Artificial)
-
-=item I<{vot}> : Votic
-
-eq Votian.  eq Vod.
-
-=item [I<{wak}> : Wakashan languages]
-
-=item I<{wal}> : Walamo
-
-eq Wolaytta.
-
-=item I<{war}> : Waray
-
-Presumably the Philippine language Waray-Waray (SamareE<ntilde>o),
-not the smaller Philippine language Waray Sorsogon, nor the extinct
-Australian language Waray.
-
-=item I<{was}> : Washo
-
-eq Washoe
-
-=item I<{cy}> : Welsh
-
-=item I<{wo}> : Wolof
-
-=item I<{x-...}> : Unregistered (Private Use)
-
-"x-" is a prefix for language tags that are not registered with ISO
-or IANA.  Example, x-double-dutch
-
-=item I<{xh}> : Xhosa
-
-=item I<{sah}> : Yakut
-
-=item I<{yao}> : Yao
-
-(The Yao in Malawi?)
-
-=item I<{yap}> : Yapese
-
-eq Yap
-
-=item I<{yi}> : Yiddish
-
-Formerly "ji".  Sometimes in Roman script, sometimes in Hebrew script.
-
-=item I<{yo}> : Yoruba
-
-=item [I<{ypk}> : Yupik languages]
-
-Several "Eskimo" languages.
-
-=item I<{znd}> : Zande
-
-=item [I<{zap}> : Zapotec]
-
-(A group of languages.)
-
-=item I<{zen}> : Zenaga
-
-NOT Zend.
-
-=item I<{za}> : Zhuang
-
-=item I<{zu}> : Zulu
-
-=item I<{zun}> : Zuni
-
-eq ZuE<ntilde>i
-
-=back
-
-=head1 SEE ALSO
-
-L<I18N::LangTags|I18N::LangTags>
-
-=head1 COPYRIGHT AND DISCLAIMER
-
-Copyright (c) 2001 Sean M. Burke. All rights reserved.
-
-You can redistribute and/or
-modify this document under the same terms as Perl itself.
-
-This document is provided in the the hope that it will be
-useful, but without any warranty;
-without even the implied warranty of accuracy, authoritativeness,
-completeness, merchantability, or fitness for a particular purpose.
-
-Email any corrections or questions to me.
-
-=head1 AUTHOR
-
-Sean M. Burke, sburkeE<64>cpan.org
-
-=cut
-
-
-# To generate a list of just the two and three-letter codes:
-
-#!/usr/local/bin/perl -w
-
-require 5; # Time-stamp: "2001-03-13 21:53:39 MST"
- # Sean M. Burke, sburke@cpan.org
- # This program is for generating the language_codes.txt file
-use strict;
-use LWP::Simple;
-use HTML::TreeBuilder 3.10;
-my $root = HTML::TreeBuilder->new();
-my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html';
-$root->parse(get($url) || die "Can't get $url");
-$root->eof();
-
-my @codes;
-
-foreach my $tr ($root->find_by_tag_name('tr')) {
-  my @f = map $_->as_text(), $tr->content_list();
-  #print map("<$_> ", @f), "\n";
-  next unless @f == 5;
-  pop @f; # nix the French name
-  next if $f[-1] eq 'Language Name (English)'; # it's a header line
-  my $xx = splice(@f, 2,1); # pull out the two-letter code
-  $f[-1] =~ s/^\s+//;
-  $f[-1] =~ s/\s+$//;
-  if($xx =~ m/[a-zA-Z]/) {   # there's a two-letter code for it
-    push   @codes, [ lc($f[-1]),   "$xx\t$f[-1]\n" ];
-  } else { # print the three-letter codes.
-    if($f[0] eq $f[1]) {
-      push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ];
-    } else { # shouldn't happen
-      push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; 
-    }
-  }
-}
-
-print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes;
-print "[ based on $url\n at ", scalar(localtime), "]\n",
-  "[Note: doesn't include IANA-registered codes.]\n";
-exit;
-__END__
-
diff --git a/lib/I18N/LangTags/README b/lib/I18N/LangTags/README

new file mode 100644 (file)

index 0000000..2ac6053
--- /dev/null
+++ b/lib/I18N/LangTags/README
@@ -0,0 +1,78 @@
+README for I18N::LangTags
+                                        Time-stamp: "2001-05-29 21:52:15 MDT"
+
+                           I18N::LangTags
+
+I18N::LangTags - functions for dealing with RFC3066-style language
+tags
+
+Language tags are a formalism, described in RFC 3066 (obsoleting
+1766), for declaring what language form (language and possibly
+dialect) a given chunk of information is in.
+
+This library provides functions for common tasks involving language
+tags (notably the extraction of them, comparing them, and testing the
+formal validity of them) as is needed in a variety of protocols and
+applications.
+
+
+I18N::LangTags::List -- tags and names for human languages.  This
+module goes from known language tag names ("fr-CA") to their English
+names ("Canadian French").  Its documentation also lists the several
+hundred known tags and some common subforms.  You may find this useful
+as a reference.
+
+
+See the POD for more information.
+
+
+INSTALLATION
+
+You install I18N::LangTags and I18N::LangTags::List, as you would
+install any perl module library, by running these commands:
+
+   perl Makefile.PL
+   make
+   make test
+   make install
+
+If you want to install a private copy of I18N::LangTags in your home
+directory, then you should try to produce the initial Makefile with
+something like this command:
+
+  perl Makefile.PL LIB=~/perl
+
+See perldoc perlmodinstall for more information on installing modules.
+
+
+DOCUMENTATION
+
+POD-format documentation is included in LangTags.pm.  POD is readable
+with the 'perldoc' utility.  See ChangeLog for recent changes.
+
+
+SUPPORT
+
+Questions, bug reports, useful code bits, and suggestions for
+I18N::LangTags should just be sent to me at sburke@cpan.org
+
+
+AVAILABILITY
+
+The latest version of I18N::LangTags is available from the
+Comprehensive Perl Archive Network (CPAN).  Visit
+<http://www.perl.com/CPAN/> to find a CPAN site near you.
+
+
+COPYRIGHT
+
+Copyright 1998-2001, Sean M. Burke <sburke@cpan.org>, all rights
+reserved.
+
+The programs and documentation in this dist are distributed in
+the hope that they will be useful, but without any warranty; without
+even the implied warranty of merchantability or fitness for a
+particular purpose.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
diff --git a/lib/I18N/LangTags/test.pl b/lib/I18N/LangTags/test.pl

index 06c178e..e9e96e8 100644 (file)
--- a/lib/I18N/LangTags/test.pl
+++ b/lib/I18N/LangTags/test.pl
@@ -1,21 +1,14 @@
-BEGIN {
-    chdir 't' if -d 't';
-    @INC = '../lib';
-}
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.pl'
 
 ######################### We start with some black magic to print on failure.
 require 5;
-
+ # Time-stamp: "2001-06-20 01:43:31 MDT"
 use strict;
 use Test;
-BEGIN { plan tests => 23 };
+BEGIN { plan tests => 46 };
 BEGIN { ok 1 }
-use I18N::LangTags qw(is_language_tag same_language_tag
-                     extract_language_tags super_languages
-                     similarity_language_tag is_dialect_of
-                     locale2language_tag alternate_language_tags
-                     encode_language_tag
-                    );
+use I18N::LangTags (':ALL');
 
 ok !is_language_tag('');
 ok  is_language_tag('fr');
@@ -41,5 +34,44 @@ ok 1 == similarity_language_tag('en-ca', 'en-us');
 ok 2 == similarity_language_tag('en-us-southern', 'en-us-western');
 ok 2 == similarity_language_tag('en-us-southern', 'en-us');
 
-# print "So there!\n";
+ok grep $_ eq 'hi', panic_languages('kok');
+ok grep $_ eq 'en', panic_languages('x-woozle-wuzzle');
+ok ! grep $_ eq 'mr', panic_languages('it');
+ok grep $_ eq 'es', panic_languages('it');
+ok grep $_ eq 'it', panic_languages('es');
+
+
+print "# Now the ::List tests...\n";
+use I18N::LangTags::List;
+foreach my $lt (qw(
+ en
+ en-us
+ en-kr
+ el
+ elx
+ i-mingo
+ i-mingo-tom
+ x-mingo-tom
+ it
+ it-it
+ it-IT
+ it-FR
+ yi
+ ji
+ cre-syllabic
+ cre-syllabic-western
+ cre-western
+ cre-latin
+)) {
+  my $name = I18N::LangTags::List::name($lt);
+  if($name) {
+    ok(1);
+    print "#        $lt -> $name\n";
+  } else {
+    ok(0);
+    print "#        Failed lookup on $lt\n";
+  }
+}
+
+print "# So there!\n";
MANIFEST		patch \| blob \| blame \| history
lib/I18N/LangTags.pm		patch \| blob \| blame \| history
lib/I18N/LangTags/ChangeLog	[new file with mode: 0644]	patch \| blob
lib/I18N/LangTags/List.pm	[new file with mode: 0644]	patch \| blob
lib/I18N/LangTags/List.pod	[deleted file]	patch \| blob \| blame \| history
lib/I18N/LangTags/README	[new file with mode: 0644]	patch \| blob
lib/I18N/LangTags/test.pl		patch \| blob \| blame \| history