+++ /dev/null
-#!/usr/bin/perl -w
-# Copyright (c) 2010-2010 Sullivan Beck. All rights reserved.
-# This program is free software; you can redistribute it and/or modify it
-# under the same terms as Perl itself.
-
-# This is used to match country names from one source with those from
-# an existing source.
-#
-%country_alias =
- (
- "Bahamas, The" => "Bahamas",
- "Bolivia (Plurinational State of)" => "Bolivia, Plurinational State of",
- "Bolivia" => "Bolivia, Plurinational State of",
- "British Virgin Islands" => "Virgin Islands, British",
- "Brunei" => "Brunei Darussalam",
- "Burma" => "Myanmar",
- "China, Hong Kong Special Administrative Region" => "Hong Kong",
- "China, Macao Special Administrative Region" => "Macao",
- "Cocos Islands" => "Cocos (Keeling) Islands",
- "Congo (Brazzaville)" => "Congo",
- "Democratic People's Republic of Korea" => "Korea, Democratic People's Republic of",
- "Democratic Republic of the Congo" => "Congo, The Democratic Republic of the",
- "East Timor" => "Timor-Leste",
- "Faeroe Islands" => "Faroe Islands",
- "Falkland Islands (Islas Malvinas)" => "Falkland Islands (Malvinas)",
- "Federated States of Micronesia" => "Micronesia, Federated States of",
- "French Southern and Antarctic Lands" => "French Southern Territories",
- "Gambia, The" => "Gambia",
- "Golan Heights (Israeli-occupied)" => "Syrian Arab Republic",
- "Great Britain" => "United Kingdom",
- "Holy See" => "Holy See (Vatican City State)",
- "Hong Kong S.A.R." => "Hong Kong",
- "Hong Kong Special Administrative Region of China" => "Hong Kong",
- "Iran (Islamic Republic of)" => "Iran, Islamic Republic of",
- "Iran" => "Iran, Islamic Republic of",
- "Kazakstan" => "Kazakhstan",
- "Keeling Islands" => "Cocos (Keeling) Islands",
- "Libya" => "Libyan Arab Jamahiriya",
- "Macao Special Administrative Region of China" => "Macao",
- "Macau S.A.R" => "Macao",
- "Macau S.A.R." => "Macao",
- "Macedonia" => "Macedonia, The Former Yugoslav Republic of",
- "Macedonia, Former Yugoslav Republic of" => "Macedonia, The Former Yugoslav Republic of",
- "Micronesia (Federated States of)" => "Micronesia, Federated States of",
- "North Korea" => "Korea, Democratic People's Republic of",
- "Occupied Palestinian Territory" => "Palestinian Territory, Occupied",
- "Pitcairn Island" => "Pitcairn",
- "Pitcairn Islands" => "Pitcairn",
- "Republic of Korea" => "Korea, Republic of",
- "Republic of Moldova" => "Moldova, Republic of",
- "Saint Helena" => "Saint Helena, Ascension and Tristan da Cunha",
- "Saint-Barthelemy" => "Saint Barthelemy",
- "Saint-Martin (French part)" => "Saint Martin",
- "South Korea" => "Korea, Republic of",
- "Svalbard and Jan Mayen Islands" => "Svalbard and Jan Mayen",
- "Taiwan" => "Taiwan, Province of China",
- "The Bahamas" => "Bahamas",
- "The Democratic Republic of the Congo" => "Congo, The Democratic Republic of the",
- "The Republic of the Congo" => "Congo",
- "The former Yugoslav Republic of Macedonia" => "Macedonia, The Former Yugoslav Republic of",
- "UK" => "United Kingdom",
- "US" => "United States",
- "USA" => "United States",
- "United Kingdom of Great Britain and Northern Ireland" => "United Kingdom",
- "United Republic of Tanzania" => "Tanzania, United Republic of",
- "United States Virgin Islands" => "Virgin Islands, U.S.",
- "United States of America" => "United States",
- "Vatican City" => "Holy See (Vatican City State)",
- "Venezuela (Bolivarian Republic of)" => "Venezuela, Bolivarian Republic of",
- "Venezuela" => "Venezuela, Bolivarian Republic of",
- "Vietnam" => "Viet Nam",
- "Virgin Islands (UK)" => "Virgin Islands, British",
- "Virgin Islands (US)" => "Virgin Islands, U.S.",
- "Virgin Islands" => "Virgin Islands, U.S.",
- "Wallis and Futuna Islands" => "Wallis and Futuna",
- );
-
-# From v. 2.xx:
-# Zaire
-# Serbia and Montenegro
-# Yugoslavia
-# Jan Mayen
-# Svalbard
-
-################################################################################
-# ISO 3166-1 countries
-
-%country_iso_orig =
- (
- "AFGHANISTAN" => "Afghanistan",
- "ÅLAND ISLANDS" => "Aland Islands",
- "ALBANIA" => "Albania",
- "ALGERIA" => "Algeria",
- "AMERICAN SAMOA" => "American Samoa",
- "ANDORRA" => "Andorra",
- "ANGOLA" => "Angola",
- "ANGUILLA" => "Anguilla",
- "ANTARCTICA" => "Antarctica",
- "ANTIGUA AND BARBUDA" => "Antigua and Barbuda",
- "ARGENTINA" => "Argentina",
- "ARMENIA" => "Armenia",
- "ARUBA" => "Aruba",
- "AUSTRALIA" => "Australia",
- "AUSTRIA" => "Austria",
- "AZERBAIJAN" => "Azerbaijan",
- "BAHAMAS" => "Bahamas",
- "BAHRAIN" => "Bahrain",
- "BANGLADESH" => "Bangladesh",
- "BARBADOS" => "Barbados",
- "BELARUS" => "Belarus",
- "BELGIUM" => "Belgium",
- "BELIZE" => "Belize",
- "BENIN" => "Benin",
- "BERMUDA" => "Bermuda",
- "BHUTAN" => "Bhutan",
- "BOLIVIA, PLURINATIONAL STATE OF" => "Bolivia, Plurinational State of",
- "BOSNIA AND HERZEGOVINA" => "Bosnia and Herzegovina",
- "BOTSWANA" => "Botswana",
- "BOUVET ISLAND" => "Bouvet Island",
- "BRAZIL" => "Brazil",
- "BRITISH INDIAN OCEAN TERRITORY" => "British Indian Ocean Territory",
- "BRUNEI DARUSSALAM" => "Brunei Darussalam",
- "BULGARIA" => "Bulgaria",
- "BURKINA FASO" => "Burkina Faso",
- "BURUNDI" => "Burundi",
- "CAMBODIA" => "Cambodia",
- "CAMEROON" => "Cameroon",
- "CANADA" => "Canada",
- "CAPE VERDE" => "Cape Verde",
- "CAYMAN ISLANDS" => "Cayman Islands",
- "CENTRAL AFRICAN REPUBLIC" => "Central African Republic",
- "CHAD" => "Chad",
- "CHILE" => "Chile",
- "CHINA" => "China",
- "CHRISTMAS ISLAND" => "Christmas Island",
- "COCOS (KEELING) ISLANDS" => "Cocos (Keeling) Islands",
- "COLOMBIA" => "Colombia",
- "COMOROS" => "Comoros",
- "CONGO" => "Congo",
- "CONGO, THE DEMOCRATIC REPUBLIC OF THE" => "Congo, The Democratic Republic of the",
- "COOK ISLANDS" => "Cook Islands",
- "COSTA RICA" => "Costa Rica",
- "CÔTE D'IVOIRE" => "Cote d'Ivoire",
- "CROATIA" => "Croatia",
- "CUBA" => "Cuba",
- "CYPRUS" => "Cyprus",
- "CZECH REPUBLIC" => "Czech Republic",
- "DENMARK" => "Denmark",
- "DJIBOUTI" => "Djibouti",
- "DOMINICA" => "Dominica",
- "DOMINICAN REPUBLIC" => "Dominican Republic",
- "ECUADOR" => "Ecuador",
- "EGYPT" => "Egypt",
- "EL SALVADOR" => "El Salvador",
- "EQUATORIAL GUINEA" => "Equatorial Guinea",
- "ERITREA" => "Eritrea",
- "ESTONIA" => "Estonia",
- "ETHIOPIA" => "Ethiopia",
- "FALKLAND ISLANDS (MALVINAS)" => "Falkland Islands (Malvinas)",
- "FAROE ISLANDS" => "Faroe Islands",
- "FIJI" => "Fiji",
- "FINLAND" => "Finland",
- "FRANCE" => "France",
- "FRENCH GUIANA" => "French Guiana",
- "FRENCH POLYNESIA" => "French Polynesia",
- "FRENCH SOUTHERN TERRITORIES" => "French Southern Territories",
- "GABON" => "Gabon",
- "GAMBIA" => "Gambia",
- "GEORGIA" => "Georgia",
- "GERMANY" => "Germany",
- "GHANA" => "Ghana",
- "GIBRALTAR" => "Gibraltar",
- "GREECE" => "Greece",
- "GREENLAND" => "Greenland",
- "GRENADA" => "Grenada",
- "GUADELOUPE" => "Guadeloupe",
- "GUAM" => "Guam",
- "GUATEMALA" => "Guatemala",
- "GUERNSEY" => "Guernsey",
- "GUINEA" => "Guinea",
- "GUINEA-BISSAU" => "Guinea-Bissau",
- "GUYANA" => "Guyana",
- "HAITI" => "Haiti",
- "HEARD ISLAND AND MCDONALD ISLANDS" => "Heard Island and Mcdonald Islands",
- "HOLY SEE (VATICAN CITY STATE)" => "Holy See (Vatican City State)",
- "HONDURAS" => "Honduras",
- "HONG KONG" => "Hong Kong",
- "HUNGARY" => "Hungary",
- "ICELAND" => "Iceland",
- "INDIA" => "India",
- "INDONESIA" => "Indonesia",
- "IRAN, ISLAMIC REPUBLIC OF" => "Iran, Islamic Republic of",
- "IRAQ" => "Iraq",
- "IRELAND" => "Ireland",
- "ISLE OF MAN" => "Isle of Man",
- "ISRAEL" => "Israel",
- "ITALY" => "Italy",
- "JAMAICA" => "Jamaica",
- "JAPAN" => "Japan",
- "JERSEY" => "Jersey",
- "JORDAN" => "Jordan",
- "KAZAKHSTAN" => "Kazakhstan",
- "KENYA" => "Kenya",
- "KIRIBATI" => "Kiribati",
- "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF" => "Korea, Democratic People's Republic of",
- "KOREA, REPUBLIC OF" => "Korea, Republic of",
- "KUWAIT" => "Kuwait",
- "KYRGYZSTAN" => "Kyrgyzstan",
- "LAO PEOPLE'S DEMOCRATIC REPUBLIC" => "Lao People's Democratic Republic",
- "LATVIA" => "Latvia",
- "LEBANON" => "Lebanon",
- "LESOTHO" => "Lesotho",
- "LIBERIA" => "Liberia",
- "LIBYAN ARAB JAMAHIRIYA" => "Libyan Arab Jamahiriya",
- "LIECHTENSTEIN" => "Liechtenstein",
- "LITHUANIA" => "Lithuania",
- "LUXEMBOURG" => "Luxembourg",
- "MACAO" => "Macao",
- "MACEDONIA, THE FORMER YUGOSLAV REPUBLIC OF" => "Macedonia, The Former Yugoslav Republic of",
- "MADAGASCAR" => "Madagascar",
- "MALAWI" => "Malawi",
- "MALAYSIA" => "Malaysia",
- "MALDIVES" => "Maldives",
- "MALI" => "Mali",
- "MALTA" => "Malta",
- "MARSHALL ISLANDS" => "Marshall Islands",
- "MARTINIQUE" => "Martinique",
- "MAURITANIA" => "Mauritania",
- "MAURITIUS" => "Mauritius",
- "MAYOTTE" => "Mayotte",
- "MEXICO" => "Mexico",
- "MICRONESIA, FEDERATED STATES OF" => "Micronesia, Federated States of",
- "MOLDOVA, REPUBLIC OF" => "Moldova, Republic of",
- "MONACO" => "Monaco",
- "MONGOLIA" => "Mongolia",
- "MONTENEGRO" => "Montenegro",
- "MONTSERRAT" => "Montserrat",
- "MOROCCO" => "Morocco",
- "MOZAMBIQUE" => "Mozambique",
- "MYANMAR" => "Myanmar",
- "NAMIBIA" => "Namibia",
- "NAURU" => "Nauru",
- "NEPAL" => "Nepal",
- "NETHERLANDS" => "Netherlands",
- "NETHERLANDS ANTILLES" => "Netherlands Antilles",
- "NEW CALEDONIA" => "New Caledonia",
- "NEW ZEALAND" => "New Zealand",
- "NICARAGUA" => "Nicaragua",
- "NIGER" => "Niger",
- "NIGERIA" => "Nigeria",
- "NIUE" => "Niue",
- "NORFOLK ISLAND" => "Norfolk Island",
- "NORTHERN MARIANA ISLANDS" => "Northern Mariana Islands",
- "NORWAY" => "Norway",
- "OMAN" => "Oman",
- "PAKISTAN" => "Pakistan",
- "PALAU" => "Palau",
- "PALESTINIAN TERRITORY, OCCUPIED" => "Palestinian Territory, Occupied",
- "PANAMA" => "Panama",
- "PAPUA NEW GUINEA" => "Papua New Guinea",
- "PARAGUAY" => "Paraguay",
- "PERU" => "Peru",
- "PHILIPPINES" => "Philippines",
- "PITCAIRN" => "Pitcairn",
- "POLAND" => "Poland",
- "PORTUGAL" => "Portugal",
- "PUERTO RICO" => "Puerto Rico",
- "QATAR" => "Qatar",
- "RÉUNION" => "Reunion",
- "ROMANIA" => "Romania",
- "RUSSIAN FEDERATION" => "Russian Federation",
- "RWANDA" => "Rwanda",
- "SAINT BARTHÉLEMY" => "Saint Barthelemy",
- "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA" => "Saint Helena, Ascension and Tristan da Cunha",
- "SAINT KITTS AND NEVIS" => "Saint Kitts and Nevis",
- "SAINT LUCIA" => "Saint Lucia",
- "SAINT MARTIN" => "Saint Martin",
- "SAINT PIERRE AND MIQUELON" => "Saint Pierre and Miquelon",
- "SAINT VINCENT AND THE GRENADINES" => "Saint Vincent and the Grenadines",
- "SAMOA" => "Samoa",
- "SAN MARINO" => "San Marino",
- "SAO TOME AND PRINCIPE" => "Sao Tome and Principe",
- "SAUDI ARABIA" => "Saudi Arabia",
- "SENEGAL" => "Senegal",
- "SERBIA" => "Serbia",
- "SEYCHELLES" => "Seychelles",
- "SIERRA LEONE" => "Sierra Leone",
- "SINGAPORE" => "Singapore",
- "SLOVAKIA" => "Slovakia",
- "SLOVENIA" => "Slovenia",
- "SOLOMON ISLANDS" => "Solomon Islands",
- "SOMALIA" => "Somalia",
- "SOUTH AFRICA" => "South Africa",
- "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS" => "South Georgia and the South Sandwich Islands",
- "SPAIN" => "Spain",
- "SRI LANKA" => "Sri Lanka",
- "SUDAN" => "Sudan",
- "SURINAME" => "Suriname",
- "SVALBARD AND JAN MAYEN" => "Svalbard and Jan Mayen",
- "SWAZILAND" => "Swaziland",
- "SWEDEN" => "Sweden",
- "SWITZERLAND" => "Switzerland",
- "SYRIAN ARAB REPUBLIC" => "Syrian Arab Republic",
- "TAIWAN, PROVINCE OF CHINA" => "Taiwan, Province of China",
- "TAJIKISTAN" => "Tajikistan",
- "TANZANIA, UNITED REPUBLIC OF" => "Tanzania, United Republic of",
- "THAILAND" => "Thailand",
- "TIMOR-LESTE" => "Timor-Leste",
- "TOGO" => "Togo",
- "TOKELAU" => "Tokelau",
- "TONGA" => "Tonga",
- "TRINIDAD AND TOBAGO" => "Trinidad and Tobago",
- "TUNISIA" => "Tunisia",
- "TURKEY" => "Turkey",
- "TURKMENISTAN" => "Turkmenistan",
- "TURKS AND CAICOS ISLANDS" => "Turks and Caicos Islands",
- "TUVALU" => "Tuvalu",
- "UGANDA" => "Uganda",
- "UKRAINE" => "Ukraine",
- "UNITED ARAB EMIRATES" => "United Arab Emirates",
- "UNITED KINGDOM" => "United Kingdom",
- "UNITED STATES" => "United States",
- "UNITED STATES MINOR OUTLYING ISLANDS" => "United States Minor Outlying Islands",
- "URUGUAY" => "Uruguay",
- "UZBEKISTAN" => "Uzbekistan",
- "VANUATU" => "Vanuatu",
- "VENEZUELA, BOLIVARIAN REPUBLIC OF" => "Venezuela, Bolivarian Republic of",
- "VIET NAM" => "Viet Nam",
- "VIRGIN ISLANDS, BRITISH" => "Virgin Islands, British",
- "VIRGIN ISLANDS, U.S." => "Virgin Islands, U.S.",
- "WALLIS AND FUTUNA" => "Wallis and Futuna",
- "WESTERN SAHARA" => "Western Sahara",
- "YEMEN" => "Yemen",
- "ZAMBIA" => "Zambia",
- "ZIMBABWE" => "Zimbabwe",
- );
-
-################################################################################
-# United Nations (source of alpha-3 and numeric codes)
-
-%country_un_orig =
- (
- 'Åland Islands' => "Aland Islands",
- 'Côte d'."'Ivoire" => "Cote d'Ivoire",
- 'Réunion' => "Reunion",
- 'Saint-Barthélemy' => "Saint-Barthelemy",
- );
-
-################################################################################
-# NGA (source of FIPS 10 codes)
-
-%country_nga_orig =
- (
- );
-
-%country_nga_ignore =
- (
- "Golan Heights (Israeli-occupied)" => 1,
- );
-
-################################################################################
-# IANA (source of top level domains)
-
-%country_iana_orig =
- (
- );
-
-################################################################################
-# CIA World Factbook (checks for codes)
-
-%country_cia_orig =
- (
- );
-
-%country_cia_ignore =
- (
- "Western Samoa" => 1,
- "World" => 1,
- "Zaire" => 1,
- );
-
-%country_cia_codes =
- (
- "alpha2" => {"Gaza Strip" => "-",
- "West Bank" => "-",
- "Svalbard" => "-",
- },
- "alpha3" => {"Gaza Strip" => "-",
- "West Bank" => "-",
- "Svalbard" => "-",
- },
- "num" => {"Gaza Strip" => "-",
- "West Bank" => "-",
- "Svalbard" => "-",
- },
- "fips" => {
- },
- "dom" => {"Gaza Strip" => "-",
- "West Bank" => "-",
- "Svalbard" => "-",
- },
- );
-
-1;
-
-# Local Variables:
-# mode: cperl
-# indent-tabs-mode: nil
-# cperl-indent-level: 3
-# cperl-continued-statement-offset: 2
-# cperl-continued-brace-offset: 0
-# cperl-brace-offset: 0
-# cperl-brace-imaginary-offset: 0
-# cperl-label-offset: -2
-# End:
-
+++ /dev/null
-#!/usr/bin/perl -w
-# Copyright (c) 2010-2010 Sullivan Beck. All rights reserved.
-# This program is free software; you can redistribute it and/or modify it
-# under the same terms as Perl itself.
-
-###############################################################################
-###############################################################################
-# This script is used to automatically generate the Locale::Codes module
-# which contain the actual codes.
-
-require 5.000000;
-use YAML;
-use IO::File;
-use strict;
-use warnings;
-use Archive::Zip;
-
-use lib "./internal";
-
-use vars qw($VERSION);
-$VERSION='3.12';
-
-use vars qw($DEBUG);
-$DEBUG = 0;
-
-###############################################################################
-# GLOBAL VARIABLES
-###############################################################################
-
-# We need to create the following variables:
-#
-# %Country{COUNTRY_ID} => [ COUNTRY, COUNTRY, ... ]
-# A list of all valid country names that
-# correspond to a given COUNTRY_ID.
-# The names are all real (i.e. correct
-# spelling and capitalization).
-# %CountryAlias{ALIAS} => [ COUNTRY_ID, I ]
-# A hash of all aliases for a country.
-# Aliases are all lowercase.
-# %Code2CountryID{CODESET}{CODE} => [ COUNTRY_ID, I ]
-# In a given CODESET, CODE corresponds to
-# the I'th entry list of countries.
-# %CountryID2Code{CODESET}{COUNTRY_ID} => CODE
-# In the given CODESET, the COUNTRY_ID
-# corresponds to the given CODE.
-
-use vars qw( $CountryID %Country %CountryAlias %Code2CountryID %CountryID2Code );
-use vars qw(%country_alias);
-
-$CountryID = "0001";
-
-use vars qw( $ModDir $CountryModule );
-
-$ModDir = "lib/Locale/Codes";
-$CountryModule = "Country";
-
-#
-# We'll first read data from the official ISO 3166.
-#
-# Data available consists only of the country names and 2-character
-# codes. Country names include non-ASCII characters encoded in
-# ISO-8859-1. Also, they're all uppercase! Every line in the file ends
-# with one unprintable character. In other words, they're distributed
-# in the most unfriendly fashion you could ask for! We'll store the
-# first country for error checking.
-#
-
-use vars qw($country_iso_url $country_iso_file $country_iso_1st);
-use vars qw(%country_iso_orig);
-
-$country_iso_url = "http://www.iso.org/iso/list-en1-semic-3.txt";
-$country_iso_1st = "AFGHANISTAN";
-($country_iso_file) = $country_iso_url =~ m,/([^/]*)$,;
-
-#
-# The UN Stats Division contains some (but not all) of the ISO 3166
-# 3-character codes and 3-digit codes. Since they are the maintainers
-# of this data, this is an official source.
-#
-
-use vars qw($country_un_url $country_un_file);
-use vars qw(%country_un_orig);
-
-$country_un_url = "http://unstats.un.org/unsd/methods/m49/m49alpha.htm";
-($country_un_file) = $country_un_url =~ m,/([^/]*)$,;
-
-#
-# The National Geospatial-Intelligence Agency is the official source
-# for FIPS 10 codes.
-#
-
-use vars qw($country_nga_url $country_nga_file);
-use vars qw(%country_nga_orig %country_nga_ignore);
-
-$country_nga_url = "http://earth-info.nga.mil/gns/html/digraphs.htm";
-($country_nga_file) = $country_nga_url =~ m,/([^/]*)$,;
-
-#
-# IANA domains
-#
-
-use vars qw($country_iana_url $country_iana_file);
-use vars qw(%country_iana_orig);
-
-$country_iana_url = "http://www.iana.org/domains/root/db/index.html";
-($country_iana_file) = $country_iana_url =~ m,/([^/]*)$,;
-
-#
-# This is the CIA World Factbook, which is assumed to be a reliable
-# source of this data. Due to the complexity of the data, we'll store
-# the last country so we know when to stop.
-#
-# We have to force-override some codes.
-#
-
-use vars qw($country_cia_url $country_cia_file $country_cia_last);
-use vars qw(%country_cia_ignore %country_cia_orig %country_cia_codes);
-
-$country_cia_url = "https://www.cia.gov/library/publications/the-world-factbook/appendix/print_appendix-d.html";
-($country_cia_file) = $country_cia_url =~ m,/([^/]*)$,;
-$country_cia_last = "Zimbabwe";
-
-require "data.country.pl";
-
-########################################
-
-# We need to create the following variables:
-#
-# %Language{LANGUAGE_ID} => [ LANGUAGE, LANGUAGE, ... ]
-# A list of all valid language names that
-# correspond to a given LANGUAGE_ID.
-# The names are all real (i.e. correct
-# spelling and capitalization).
-# %LanguageAlias{ALIAS} => [ LANGUAGE_ID, I ]
-# A hash of all aliases for a language.
-# Aliases are all lowercase.
-# %Code2LanguageID{CODESET}{CODE} => [ LANGUAGE_ID, I ]
-# In a given CODESET, CODE corresponds to
-# the I'th entry in the list of languages.
-# %LanguageID2Code{CODESET}{LANGUAGE_ID} => CODE
-# In the given CODESET, the LANGUAGE_ID
-# corresponds to the given CODE.
-
-use vars qw( $LanguageID %Language %LanguageAlias %Code2LanguageID %LanguageID2Code );
-use vars qw(%language_alias);
-
-$LanguageID = "0001";
-
-use vars qw( $LanguageModule );
-
-$LanguageModule = "Language";
-
-#
-# We'll first read data from the official ISO 639.
-#
-# Data available consists of the language names and 2-letter and
-# 3-letter codes. Language names include non-ASCII characters encoded in
-# UTF-8. And (amazingly enough) it's available in a field delimited file!!!
-#
-
-use vars qw($language_iso_url $language_iso_file);
-use vars qw(%language_iso_orig);
-
-$language_iso_url = "http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt";
-($language_iso_file) = $language_iso_url =~ m,/([^/]*)$,;
-
-require "data.language.pl";
-
-########################################
-
-# We need to create the following variables:
-#
-# %Currency{CURRENCY_ID} => [ CURRENCY, CURRENCY, ... ]
-# A list of all valid currency names that
-# correspond to a given CURRENCY_ID.
-# The names are all real (i.e. correct
-# spelling and capitalization).
-# %CurrencyAlias{ALIAS} => [ CURRENCY_ID, I ]
-# A hash of all aliases for a currency.
-# Aliases are all lowercase.
-# %Code2CurrencyID{CODESET}{CODE} => [ CURRENCY_ID, I ]
-# In a given CODESET, CODE corresponds to
-# the I'th entry in the list of currencies.
-# %CurrencyID2Code{CODESET}{CURRENCY_ID} => CODE
-# In the given CODESET, the CURRENCY_ID
-# corresponds to the given CODE.
-
-use vars qw( $CurrencyID %Currency %CurrencyAlias %Code2CurrencyID %CurrencyID2Code );
-use vars qw(%currency_alias);
-
-$CurrencyID = "0001";
-
-use vars qw( $CurrencyModule );
-
-$CurrencyModule = "Currency";
-
-#
-# We'll first read data from the official ISO 4217.
-#
-
-use vars qw($currency_iso_url $currency_iso_file $currency_iso_last);
-use vars qw(%currency_iso_orig %currency_iso_ignore);
-
-$currency_iso_url = "http://www.iso.org/iso/support/currency_codes_list-1.htm";
-($currency_iso_file) = $currency_iso_url =~ m,/([^/]*)$,;
-$currency_iso_last = "XXX";
-
-require "data.currency.pl";
-
-########################################
-
-# We need to create the following variables:
-#
-# %Script{SCRIPT_ID} => [ SCRIPT, SCRIPT, ... ]
-# A list of all valid script names that
-# correspond to a given SCRIPT_ID.
-# The names are all real (i.e. correct
-# spelling and capitalization).
-# %ScriptAlias{ALIAS} => [ SCRIPT_ID, I ]
-# A hash of all aliases for a script.
-# Aliases are all lowercase.
-# %Code2ScriptID{CODESET}{CODE} => [ SCRIPT_ID, I ]
-# In a given CODESET, CODE corresponds to
-# the I'th entry in the list of scripts.
-# %ScriptID2Code{CODESET}{SCRIPT_ID} => CODE
-# In the given CODESET, the SCRIPT_ID
-# corresponds to the given CODE.
-
-use vars qw( $ScriptID %Script %ScriptAlias %Code2ScriptID %ScriptID2Code );
-use vars qw(%script_alias);
-
-$ScriptID = "0001";
-
-use vars qw( $ScriptModule );
-
-$ScriptModule = "Script";
-
-#
-# We'll first read data from the official ISO 15924.
-#
-# Data available consists of the script names and 2-letter and
-# 3-letter codes. Script names include non-ASCII characters encoded in
-# UTF-8. And (amazingly enough) it's available in a field delimited file!!!
-#
-
-use vars qw($script_iso_url $script_iso_file $script_iso_tmp);
-use vars qw(%script_iso_orig %script_iso_ignore);
-
-$script_iso_url = "http://www.unicode.org/iso15924/iso15924.txt.zip";
-($script_iso_file) = $script_iso_url =~ m,/([^/]*)$,;
-$script_iso_tmp = "iso15924.txt";
-
-require "data.script.pl";
-
-###############################################################################
-# HELP
-###############################################################################
-
-use vars qw($usage);
-my $COM = $0;
-$COM =~ s/^.*\///;
-
-$usage=
- "usage: $COM OPTIONS
- -h/--help : Print help.
-
- -a/--all : Do all steps
-
- -c/--country : Get the country codes
- -l/--language : Get the language codes
- -r/--currency : Get the currency codes
- -s/--script : Get the script codes
- -C/--clean : Clean up all temporary files
-";
-
-###############################################################################
-# PARSE ARGUMENTS
-###############################################################################
-
-my $do_all = 0;
-my $do_country = 0;
-my $do_language = 0;
-my $do_currency = 0;
-my $do_script = 0;
-my $do_clean = 0;
-
-while ($_ = shift) {
-
- (print $usage), exit if ($_ eq "-h" || $_ eq "--help");
-
- $do_all = 1, next if ($_ eq "-a" || $_ eq "--all");
-
- $do_country = 1, next if ($_ eq "-c" || $_ eq "--country");
- $do_language = 1, next if ($_ eq "-l" || $_ eq "--language");
- $do_currency = 1, next if ($_ eq "-r" || $_ eq "--currency");
- $do_script = 1, next if ($_ eq "-s" || $_ eq "--script");
- $do_clean = 1, next if ($_ eq "-C" || $_ eq "--clean");
-}
-
-############################################################################
-# MAIN PROGRAM
-############################################################################
-
-do_country() if ($do_all || $do_country);
-do_language() if ($do_all || $do_language);
-do_currency() if ($do_all || $do_currency);
-do_script() if ($do_all || $do_script);
-do_clean() if ($do_all || $do_clean);
-
-############################################################################
-# DO_COUNTRY
-############################################################################
-
-sub do_country {
- print "Country codes...\n";
-
- do_country_iso();
- print_table("country") if ($DEBUG == 2);
-
- do_country_un();
- print_table("country") if ($DEBUG == 2);
-
- do_country_nga();
- print_table("country") if ($DEBUG == 2);
-
- do_country_iana();
- print_table("country") if ($DEBUG == 2);
-
- do_country_cia();
- print_table("country") if ($DEBUG == 2);
-
- # Go through all aliases to pick up any that haven't already been
- # added (since some aliases are for human convenience rather than
- # dealing with variations between codesets).
- do_aliases("country");
- print_table("country") if ($DEBUG);
-
- write_module("country");
-}
-
-########################################
-sub do_country_iso {
-
- ###
- ### The first set we'll do is the ISO 3166-1 2-character
- ### codes. These country names must be adjusted (since they're all
- ### uppercase). Also, the lines all end with some strange
- ### unprintable character.
- ###
-
- my $codeset = "alpha2";
-
- system("wget -N -q $country_iso_url");
- my @in = `cat $country_iso_file`;
- chomp(@in);
- chop(@in);
-
- # File is a line of text followed by a blank line followed by the
- # codes as ORIGNAME;CODE .
-
- if ($in[1] || $in[2] !~ /^$country_iso_1st;/) {
- die "ERROR [iso]: country code file format changed!\n";
- }
-
- shift(@in);
- shift(@in);
-
- foreach my $line (@in) {
- if ($line !~ /^(.*);(.*)$/) {
- die "ERROR [iso]: line invalid\n" .
- " $line\n";
- }
- my($country,$code) = ($1,$2);
- $code = lc($code);
- if (exists $country_iso_orig{$country}) {
- $country = $country_iso_orig{$country};
- } else {
- print "WARNING [iso]: unknown country: $country\n";
- next;
- }
-
- my $countryID = $CountryID++;
-
- $Country{$countryID} = [ $country ];
- $CountryAlias{lc($country)} = [ $countryID, 0 ];
- $Code2CountryID{$codeset}{$code} = [ $countryID, 0 ];
- $CountryID2Code{$codeset}{$countryID} = $code;
- }
-}
-
-########################################
-sub do_country_un {
-
- print "\nINFO [un]: expect Channel Islands\n";
-
- ###
- ### The UN data contains most of the alpha-3 and numeric code sets.
- ###
-
- system("wget -N -q $country_un_url");
- my @in = `cat $country_un_file`;
- chomp(@in);
- my $in = join("",@in);
-
- # Clean up some things that could cause problems in parsing:
-
- strip_tags(\$in,1,"br","p","strong","div");
- $in =~ s,\ , ,g;
- $in =~ s,\s+, ,g;
-
- # Look for a table who's first row has the header:
- # ISO ALPHA-3 code
-
- my $found = jump_to_row(\$in,"ISO ALPHA-3 code");
- if (! $found) {
- die "ERROR [un]: country code file format changed!\n";
- }
-
- while (1) {
- my @row = get_row("un",\$in);
- last if (! @row);
-
- my($num,$country,$alpha3) = @row;
- $alpha3 = lc($alpha3);
-
- if ($num) {
- if ($num !~ /^\d+$/ || length($num) > 3) {
- print "WARNING [un]: Invalid numeric code: $country => $num\n";
- next;
- }
- }
-
- if ($alpha3 && $alpha3 !~ /^[a-z][a-z][a-z]$/) {
- print "WARNING [un]: Invalid alpha-3 code: $country => $alpha3\n";
- next;
- }
-
- if (exists $country_un_orig{$country}) {
- $country = $country_un_orig{$country};
- }
-
- my($err,$countryID,$i,$t) = get_countryID("un",$country);
- next if ($err);
-
- if ($t eq "new") {
- print "INFO [un]: new country: $alpha3/$num\t$country\n";
- }
-
- if ($num) {
- $num = "0$num" while (length($num) < 3);
- $Code2CountryID{"num"}{$num} = [ $countryID, $i ];
- $CountryID2Code{"num"}{$countryID} = $num;
- }
-
- if ($alpha3) {
- $Code2CountryID{"alpha3"}{$alpha3} = [ $countryID, $i ];
- $CountryID2Code{"alpha3"}{$countryID} = $alpha3;
- }
- }
-}
-
-########################################
-sub do_country_nga {
-
- print "\nINFO [nga]: expect Serbia, Ashmore, Baker, Bassas, Clipperton,\n" .
- " Coral, Europa, Gaza, Glorioso, Howland, Jan Mayen,\n" .
- " Jarvis, Johnston, Juan, Kingman, Midway, Navassa, Palmyra,\n" .
- " Paracel, Spratly, Svalbard, Tromelin, Wake, West\n";
-
- ###
- ### The NGA data contains the FIPS 10 codes
- ###
-
- system("wget -N -q $country_nga_url");
- my @in = `cat $country_nga_file`;
- chomp(@in);
- my $in = join("",@in);
-
- # Clean up some things that could cause problems in parsing:
-
- strip_tags(\$in,1,"br","p","strong","div");
- $in =~ s,\Q[United States}\E,,; # A horrible typo in the HTML
- $in =~ s,\ , ,g;
- $in =~ s,Other:, ,g;
- $in =~ s,\(see note[^\)]*\), ,g;
- $in =~ s,\[[^\]]*\], ,g;
- $in =~ s,\s+, ,g;
-
- # Look for a table who's first row has the header:
- # SHORT FORM NAME
- # and then a table with the header:
- # Short Form Name
-
- foreach my $table ("SHORT FORM NAME","Short Form Name") {
- my $found = jump_to_row(\$in,$table);
- if (! $found) {
- die "ERROR [nga]: country code file format changed!\n";
- }
-
- while (1) {
- my @row = get_row("nga",\$in);
- last if (! @row);
-
- my($short,$long,$code) = @row;
- $code = uc($code);
- next if ($code eq "N/A");
- next if (exists $country_nga_ignore{$short});
-
- if ($code !~ /^[A-Z][A-Z]$/) {
- print "WARNING [nga]: Invalid code: $short => $code\n";
- }
-
- if ($short eq "None") {
- print "ERROR [nga]: no short definition: $code $long\n";
- next;
- }
- $long = "" if ($long eq "None");
-
- if (exists $country_nga_orig{$short}) {
- $short = $country_nga_orig{$short};
- }
- if (exists $country_nga_orig{$long}) {
- $long = $country_nga_orig{$long};
- }
-
- my($countryID,$i);
- my($err,$c,$ii,$t) = get_countryID("nga",$short,1);
- next if ($err);
-
- if ($t ne "new") {
- ($countryID,$i) = ($c,$ii);
- } else {
- ($err,$c,$ii,$t) = get_countryID("nga",$long,1);
- ($countryID,$i) = ($c,-1);
- if ($t eq "new") {
- print "INFO [nga]: new country: $code\t$short\n";
- }
- }
-
- if ($countryID == -1) {
- # New country
- $countryID = $CountryID++;
- $i = 0;
- $Country{$countryID} = [ $short ];
- $CountryAlias{lc($short)} = [ $countryID, $i ];
-
- } elsif ($i == -1) {
- # $short is a new alias
- push @{ $Country{$countryID} },$short;
- $i = $#{ $Country{$countryID} };
- $CountryAlias{lc($short)} = [ $countryID, $i ];
- }
-
- if ($long &&
- ! exists $CountryAlias{lc($long)}) {
- # $long is a new alias
- push @{ $Country{$countryID} },$long;
- $i = $#{ $Country{$countryID} };
- $CountryAlias{lc($long)} = [ $countryID, $i ];
- }
-
- $Code2CountryID{"fips"}{$code} = [ $countryID, $i ];
- $CountryID2Code{"fips"}{$countryID} = $code;
- }
- }
-}
-
-########################################
-sub do_country_iana {
-
- print "\nINFO [iana]: expect Acension, European, Soviet, Portuguese\n";
-
- ###
- ### The IANA data contains the domain names
- ###
-
- system("wget -N -q $country_iana_url");
- my @in = `cat $country_iana_file`;
- chomp(@in);
- my $in = join("",@in);
-
- # Clean up some things that could cause problems in parsing:
-
- strip_tags(\$in,1,"a");
- $in =~ s,\ , ,g;
- $in =~ s,\(being phased out\), ,g;
- $in =~ s,\s+, ,g;
-
- # Look for a table who's first row has the header:
- # Sponsoring Organisation
-
- my $found = jump_to_row(\$in,"Sponsoring Organisation");
- if (! $found) {
- die "ERROR [iana]: country code file format changed!\n";
- }
-
- while (1) {
- my @row = get_row("iana",\$in);
- last if (! @row);
-
- my($dom,$type,$country) = @row;
- next unless ($type eq "country-code");
- $dom =~ s/^\.//;
- $country =~ s,<br.*,,;
-
- if (exists $country_un_orig{$country}) {
- $country = $country_un_orig{$country};
- }
-
- my($err,$countryID,$i,$t) = get_countryID("iana",$country);
- next if ($err);
-
- if ($t eq "new") {
- print "INFO [iana]: new country: $dom\t$country\n";
- }
-
- $Code2CountryID{"dom"}{$dom} = [ $countryID, $i ];
- $CountryID2Code{"dom"}{$countryID} = $dom;
- }
-}
-
-########################################
-sub do_country_cia {
-
- print "\nINFO [cia]: expect Antarctica, Bouvet, British, Christmas, Cocos,\n" .
- " France (Metro), French, Heard, Kosovo, Montenegro,\n" .
- " Saint Barth, Saint Martin, Serbia, Taiwan, US\n";
-
- ###
- ### The CIA data is used to check:
- ### alpha-2, alpha-3, numeric, fips 10
- ###
-
- system("wget -N -q $country_cia_url");
- my @in = `cat $country_cia_file`;
- chomp(@in);
- my $in = join("",@in);
- $in =~ s/\015/ /sg;
-
- # Clean up some things that could cause problems in parsing:
-
- strip_tags(\$in,1,"br","p","strong","div","a","b","img");
- $in =~ s,\ , ,g;
- $in =~ s,\s+, ,g;
-
- # Move to a table with "Entity" as one of the entries. This
- # table is followed by the entires.
-
- my $found = jump_to_entry(\$in,"Entity");
- if (! $found) {
- die "ERROR [cia]: country code file format changed!\n";
- }
-
- # Each entry is quite complicated. Each is a single table (with a
- # table nested in it) of the form:
- #
- # <table>
- # <tr>
- # <td>COUNTRY</td>
- # <td>FIPS</td>
- # <td>
- # <table>
- # <tr>
- # <td>ALPHA-2</td>
- # <td>ALPHA-3</td>
- # <td>NUMERIC</td>
- # </table>
- # </td>
- # ...
- # </tr>
- # ...
- # </table>
- #
- # After all of the "A" entries, a special table containing the headers
- # is given again.
-
- while (1) {
-
- #
- # Read the next entry
- #
-
- my($country,$fips,$alpha2,$alpha3,$num,$stanag,$dom);
-
- my $found = jump_to_table(\$in);
- if (! $found) {
- print "ERROR [cia]: malformed file\n";
- last;
- }
- $country = get_entry(\$in);
- if (! $country) {
- # The first entry is empty if it's at the end of the entries
- # for a given letter.
- $found = jump_to_entry(\$in,"Entity");
- if (! $found) {
- die "ERROR [cia]: country code file format changed!\n";
- }
- next;
- }
-
- if (exists $country_cia_orig{$country}) {
- $country = $country_cia_orig{$country};
- }
-
- $fips = uc(get_entry(\$in));
- $found = jump_to_table(\$in);
- if (! $found) {
- print "ERROR [cia]: malformed file\n";
- last;
- }
- $alpha2 = lc(get_entry(\$in));
- $alpha3 = lc(get_entry(\$in));
- $num = get_entry(\$in);
- $stanag = get_entry(\$in);
- $dom = uc(get_entry(\$in));
- $dom =~ s/^\.//;
-
- next if (exists $country_cia_ignore{$country});
-
- $alpha2 = $country_cia_codes{"alpha2"}{$country}
- if (exists $country_cia_codes{"alpha2"}{$country});
- $alpha3 = $country_cia_codes{"alpha3"}{$country}
- if (exists $country_cia_codes{"alpha3"}{$country});
- $num = $country_cia_codes{"num"}{$country}
- if (exists $country_cia_codes{"num"}{$country});
- $fips = $country_cia_codes{"fips"}{$country}
- if (exists $country_cia_codes{"fips"}{$country});
- $dom = $country_cia_codes{"dom"}{$country}
- if (exists $country_cia_codes{"dom"}{$country});
-
- #
- # Get the countryID if the country or ANY of the codes match.
- #
-
- my($err,$countryID,$i,$type) = get_countryID("cia",$country,1);
- next if ($err);
-
- if ($countryID == -1 && $alpha2 ne "-") {
- if (exists $Code2CountryID{"alpha2"}{$alpha2}) {
- ($countryID,$i) = @{ $Code2CountryID{"alpha2"}{$alpha2} };
- }
- }
-
- if ($countryID == -1 && $alpha3 ne "-") {
- if (exists $Code2CountryID{"alpha3"}{$alpha3}) {
- ($countryID,$i) = @{ $Code2CountryID{"alpha3"}{$alpha3} };
- }
- }
-
- if ($countryID == -1 && $num ne "-") {
- if (exists $Code2CountryID{"num"}{$num}) {
- ($countryID,$i) = @{ $Code2CountryID{"num"}{$num} };
- }
- }
-
- if ($countryID == -1 && $fips ne "-") {
- if (exists $Code2CountryID{"fips"}{$fips}) {
- ($countryID,$i) = @{ $Code2CountryID{"fips"}{$fips} };
- }
- }
-
- if ($countryID == -1 && $dom ne "-") {
- if (exists $Code2CountryID{"dom"}{$dom}) {
- ($countryID,$i) = @{ $Code2CountryID{"dom"}{$dom} };
- }
- }
-
- if ($countryID == -1) {
- $countryID = $CountryID++;
- $i = 0;
- $Country{$countryID} = [ $country ];
- $CountryAlias{lc($country)} = [ $countryID, $i ];
- }
-
- #
- # Now check that any previously defined values match the
- # CIA data.
- #
-
- if ($alpha2 ne "-") {
- my $err = check_code("cia","alpha2",$alpha2,$country,$countryID);
- next if ($err);
- }
-
- if ($alpha3 ne "-") {
- my $err = check_code("cia","alpha3",$alpha3,$country,$countryID);
- next if ($err);
- }
-
- if ($num ne "-") {
- my $err = check_code("cia","num",$num,$country,$countryID);
- next if ($err);
- }
-
- if ($fips ne "-") {
- my $err = check_code("cia","fips",$fips,$country,$countryID);
- next if ($err);
- }
-
- if ($dom ne "-") {
- my $err = check_code("cia","dom",$dom,$country,$countryID);
- next if ($err);
- }
-
- last if ($country eq $country_cia_last);
- }
-}
-
-########################################
-sub check_code {
- my($type,$codeset,$code,$country,$countryID) = @_;
-
- # Check to make sure that the code is defined.
-
- if (exists $Code2CountryID{$codeset}{$code}) {
- return _check_code_exists($type,$codeset,$code,$country,$countryID);
- } else {
- return _check_code_new($type,$codeset,$code,$country,$countryID);
- }
-}
-
-sub _check_code_exists {
- my($type,$codeset,$code,$country,$countryID) = @_;
-
- # Check the countryID for the code. It must be the same as the one
- # passed in.
-
- my $old_countryID = $Code2CountryID{$codeset}{$code}[0];
- if ($countryID != $old_countryID) {
- print "ERROR [$type]: countryID mismatch in code: [$codeset, $country, $code, $countryID != $old_countryID ]\n";
- return 1;
- }
-
- # If the country is defined, it must be the same CountryID. If it
- # is not, create a new alias.
-
- if (exists $CountryAlias{lc($country)}) {
-
- my $alt_countryID = $CountryAlias{lc($country)}[0];
-
- if ($countryID != $alt_countryID) {
- print "ERROR [$type]: countryID mismatch in country: [$codeset, $country, $code, $countryID != $alt_countryID ]\n";
- return 1;
- }
-
- } else {
- push @{ $Country{$countryID} },$country;
- my $i = $#{ $Country{$countryID} };
- $CountryAlias{lc($country)} = [ $countryID, $i ];
- }
-}
-
-# This is a new code.
-sub _check_code_new {
- my($type,$codeset,$code,$country,$countryID) = @_;
-
- print "INFO [$type]: New code: $codeset [$code] => $country\n";
-
- # If this country name isn't defined, create it.
-
- my $i;
- if (exists $CountryAlias{lc($country)}) {
- $i = $CountryAlias{lc($country)}[1];
- } else {
- push @{ $Country{$countryID} },$country;
- $i = $#{ $Country{$countryID} };
- $CountryAlias{lc($country)} = [ $countryID, $i ];
- }
-
- # This country name is the canonical name for the code.
-
- $CountryID2Code{$codeset}{$countryID} = $code;
- $Code2CountryID{$codeset}{$code} = [ $countryID, $i ];
-
- return 0;
-}
-
-########################################
-sub get_countryID {
- my($type,$country,$no_create) = @_;
-
- my($countryID,$i,$t);
- if (exists $CountryAlias{lc($country)}) {
- # The country is the same name as one previously defined
- ($countryID,$i) = @{ $CountryAlias{lc($country)} };
- $t = "same";
-
- } elsif (exists $country_alias{$country}) {
- # It's a new alias for an existing country
- my $c = $country_alias{$country};
- if (! exists $CountryAlias{lc($c)}) {
- print "WARNING [$type]: alias referenced before it is defined: $country => $c\n";
- return (1);
- }
- $countryID = $CountryAlias{lc($c)}[0];
- push @{ $Country{$countryID} },$country;
- $i = $#{ $Country{$countryID} };
- $CountryAlias{lc($country)} = [ $countryID, $i ];
- $t = "alias";
-
- } else {
- # It's a new country.
- if ($no_create) {
- return(0,-1,-1,"new");
- }
- $countryID = $CountryID++;
- $i = 0;
- $Country{$countryID} = [ $country ];
- $CountryAlias{lc($country)} = [ $countryID, $i ];
- $t = "new";
- }
-
- return(0,$countryID,$i,$t);
-}
-
-############################################################################
-# DO_LANGUAGE
-############################################################################
-
-sub do_language {
- print "Language codes...\n";
-
- do_language_iso();
- print_table("language") if ($DEBUG == 2);
-
- # Go through all aliases to pick up any that haven't already been
- # added (since some aliases are for human convenience rather than
- # dealing with variations between codesets).
- do_aliases("language");
- print_table("language") if ($DEBUG);
-
- write_module("language");
-}
-
-########################################
-sub do_language_iso {
- ###
- ### The first set we'll do is the ISO codes.
- ###
-
- system("wget -N -q $language_iso_url");
- open(my $in,'<:encoding(utf8)',$language_iso_file);
- my @in = <$in>;
- close($in);
- chomp(@in);
-
- # File is a set of lines of fields delimited by "|". Fields are:
- #
- # alpha3
- # term
- # alpha2
- # English names (semicolon separated list)
- # French name
-
- foreach my $line (@in) {
- my($alpha3,$term,$alpha2,$english,$french) = split(/\|/,$line);
- # The first line has some binary characters at the start.
- if (length($alpha3)>3) {
- $alpha3 = substr($alpha3,length($alpha3)-3);
- }
-
- if (exists $language_iso_orig{$english}) {
- $english = $language_iso_orig{$english};
- }
- my $languageID = $LanguageID++;
- my @language = split(/\s*;\s*/,$english);
-
- $Language{$languageID} = [ @language ];
- for (my $i=0; $i<=$#language; $i++) {
- my $language = $language[$i];
- $LanguageAlias{lc($language)} = [ $languageID, $i ];
- }
-
- if ($alpha3) {
- $Code2LanguageID{"alpha3"}{$alpha3} = [ $languageID, 0 ];
- $LanguageID2Code{"alpha3"}{$languageID} = $alpha3;
- }
-
- if ($term) {
- $Code2LanguageID{"term"}{$term} = [ $languageID, 0 ];
- $LanguageID2Code{"term"}{$languageID} = $term;
- }
-
- if ($alpha2) {
- $Code2LanguageID{"alpha2"}{$alpha2} = [ $languageID, 0 ];
- $LanguageID2Code{"alpha2"}{$languageID} = $alpha2;
- }
- }
-}
-
-############################################################################
-# DO_CURRENCY
-############################################################################
-
-sub do_currency {
- print "Currency codes...\n";
-
- do_currency_iso();
- print_table("currency") if ($DEBUG == 2);
-
- # Go through all aliases to pick up any that haven't already been
- # added (since some aliases are for human convenience rather than
- # dealing with variations between codesets).
- do_aliases("currency");
- print_table("currency") if ($DEBUG);
-
- write_module("currency");
-}
-
-########################################
-sub do_currency_iso {
- ###
- ### The first set we'll do is the ISO 4217 codes.
- ###
-
- system("wget -N -q $currency_iso_url");
- my @in = `cat $currency_iso_file`;
- chomp(@in);
- my $in = join("",@in);
-
- # Clean up some things that could cause problems in parsing:
-
- strip_tags(\$in,1,"p","a","strong","div");
- $in =~ s,\ , ,g;
- $in =~ s,†,,g;
- $in =~ s,‡,,g;
- $in =~ s,\s+, ,g;
-
- # Look for a table who's first row has the header:
- # Entity
-
- my $found = jump_to_row(\$in,"Entity");
- if (! $found) {
- die "ERROR [iso]: currency code file format changed!\n";
- }
-
- LINE: while (1) {
- my @row = get_row("iso",\$in);
- last if (! @row);
-
- my($ent,$currencies,$alphas,$nums) = @row;
- $nums = "" if ($nums eq "Nil");
- next if (! $alphas && ! $nums);
-
- my(@currency,@num,@alpha,$done);
- @currency = split(/<br\s*\/><br\s*\/>/,$currencies);
- @num = split(/<br\s*\/><br\s*\/>/,$nums);
- @alpha = split(/<br\s*\/><br\s*\/>/,$alphas);
-
- if ( ($nums && $#num != $#currency) ||
- ($alphas && $#alpha != $#currency) ) {
- print "WARNING [iso]: Invalid line: $currencies => $alphas, $nums\n";
- next;
- }
-
- for (my $i=0; $i<=$#currency; $i++) {
- my $currency = $currency[$i];
- my $num = (@num ? $num[$i] : "");
- my $alpha = (@alpha ? $alpha[$i] : "");
- $done = 1 if ($alpha eq $currency_iso_last);
-
- if (exists $currency_iso_orig{$currency}) {
- $currency = $currency_iso_orig{$currency};
- }
-
- if ($num) {
- if ($num !~ /^\d+$/ || length($num) > 3) {
- print "WARNING [iso]: Invalid numeric code: $currency => $num\n";
- next LINE;
- }
- }
-
- $alpha = uc($alpha);
- if ($alpha && $alpha !~ /^[A-Z][A-Z][A-Z]$/) {
- print "WARNING [iso]: Invalid alpha code: $currency => $alpha\n";
- next LINE;
- }
-
- next if (exists $currency_iso_ignore{$alpha});
-
- # There's a lot of duplication, so make sure that if this code
- # exists, it is consistant.
-
- my $new = 1;
- my @currencyID;
- if (exists $Code2CurrencyID{"num"}{$num}) {
- $new = 0;
- my($currencyID,$i) = @{ $Code2CurrencyID{"num"}{$num} };
- push(@currencyID,$currencyID);
- }
- if (exists $Code2CurrencyID{"alpha"}{$alpha}) {
- $new = 0;
- my($currencyID,$i) = @{ $Code2CurrencyID{"alpha"}{$alpha} };
- push(@currencyID,$currencyID);
- }
-
- if ($#currencyID == 1) {
- if ($currencyID[0] != $currencyID[1]) {
- print "WARNING [iso]: CurrencyID mismatch: $currency => $alpha,$num\n";
- next LINE;
- }
- }
-
- my $currencyID;
- if ($new) {
- $currencyID = $CurrencyID++;
- $Currency{$currencyID} = [ $currency ];
- $CurrencyAlias{lc($currency)} = [ $currencyID, 0 ]
- } else {
- $currencyID = $currencyID[0];
- }
-
- if ($num) {
- $num = "0$num" while (length($num) < 3);
- $Code2CurrencyID{"num"}{$num} = [ $currencyID, 0 ];
- $CurrencyID2Code{"num"}{$currencyID} = $num;
- }
-
- if ($alpha) {
- $Code2CurrencyID{"alpha"}{$alpha} = [ $currencyID, 0 ];
- $CurrencyID2Code{"alpha"}{$currencyID} = $alpha;
- }
-
- }
-
- last if ($done);
- }
-}
-
-############################################################################
-# DO_SCRIPT
-############################################################################
-
-sub do_script {
- print "Script codes...\n";
-
- do_script_iso();
- print_table("script") if ($DEBUG == 2);
-
- # Go through all aliases to pick up any that haven't already been
- # added (since some aliases are for human convenience rather than
- # dealing with variations between codesets).
- do_aliases("script");
- print_table("script") if ($DEBUG);
-
- write_module("script");
-}
-
-########################################
-sub do_script_iso {
- ###
- ### The first set we'll do is the ISO 15924 codes. We
- ### can get a zip file which contains the data.
- ###
-
- system("wget -N -q $script_iso_url");
- my $zip = Archive::Zip->new($script_iso_file);
-
- my @members = grep(/^iso15924.*\.txt/,$zip->memberNames());
- if (@members != 1) {
- die "ERROR [iso]: zip file changed format\n";
- }
- my($file) = @members;
- $zip->extractMember($file,$script_iso_tmp);
-
- #
- # The zip file contains a series of lines in the form:
- # alpha;numeric;english;...
- # The data is in UTF-8.
- #
- # Every line has an unprintable character at the end.
- #
-
- open(my $in,'<:encoding(utf8)',$script_iso_tmp);
- my @in = <$in>;
- close($in);
- chomp(@in);
- chop(@in);
-
- foreach my $line (@in) {
- next if (! $line || $line =~ /^\043/);
- my($alpha,$num,$script) = split(/;/,$line);
- $alpha = ucfirst(lc($alpha));
- next if (exists $script_iso_ignore{$alpha});
-
- if (exists $script_iso_orig{$script}) {
- $script = $script_iso_orig{$script};
- }
- my $scriptID = $ScriptID++;
-
- $Script{$scriptID} = [ $script ];
- $ScriptAlias{lc($script)} = [ $scriptID, 0 ];
-
- $Code2ScriptID{"alpha"}{$alpha} = [ $scriptID, 0 ];
- $ScriptID2Code{"alpha"}{$scriptID} = $alpha;
-
- $Code2ScriptID{"num"}{$num} = [ $scriptID, 0 ];
- $ScriptID2Code{"num"}{$scriptID} = $num;
- }
-}
-
-############################################################################
-# PRINT_TABLE
-############################################################################
-
-sub _type_hashes {
- my($caller) = @_;
-
- my ($type_alias,$TypeAlias,$Type,$TypeID2Code,$Code2TypeID);
-
- if ($caller eq "country") {
- $type_alias = \%country_alias;
- $TypeAlias = \%CountryAlias;
- $Type = \%Country;
- $TypeID2Code = \%CountryID2Code;
- $Code2TypeID = \%Code2CountryID;
-
- } elsif ($caller eq "language") {
- $type_alias = \%language_alias;
- $TypeAlias = \%LanguageAlias;
- $Type = \%Language;
- $TypeID2Code = \%LanguageID2Code;
- $Code2TypeID = \%Code2LanguageID;
-
- } elsif ($caller eq "currency") {
- $type_alias = \%currency_alias;
- $TypeAlias = \%CurrencyAlias;
- $Type = \%Currency;
- $TypeID2Code = \%CurrencyID2Code;
- $Code2TypeID = \%Code2CurrencyID;
-
- } elsif ($caller eq "script") {
- $type_alias = \%script_alias;
- $TypeAlias = \%ScriptAlias;
- $Type = \%Script;
- $TypeID2Code = \%ScriptID2Code;
- $Code2TypeID = \%Code2ScriptID;
- }
-
- return($type_alias,$TypeAlias,$Type,$TypeID2Code,$Code2TypeID);
-}
-
-sub print_table {
- my($caller) = @_;
-
- my ($type_alias,$TypeAlias,$Type,$TypeID2Code,$Code2TypeID)
- = _type_hashes($caller);
-
- foreach my $typeID (sort keys %$Type) {
- my @type = @{ $$Type{$typeID} };
- my $i = 0;
-
- my $type = shift(@type);
- if (length($type) < 40) {
- $type .= " "x(40-length($type));
- } else {
- $type = substr($type,0,40);
- }
-
- print "${typeID}[$i] = $type ";
- foreach my $codeset (keys %$Code2TypeID) {
- my $field = "";
- if (exists $$TypeID2Code{$codeset}{$typeID}) {
- my $code = $$TypeID2Code{$codeset}{$typeID};
- my($code_id,$code_i) = @{ $$Code2TypeID{$codeset}{$code} };
- $field = "$code [$code_id,$code_i]";
- $field .= " ERR" if ($code_id != $typeID);
- }
- $field = $field . " "x(18-length($field));
- print $field;
- }
- print "\n";
- foreach $type (@type) {
- $i++;
- if (length($type) > 40) {
- $type = substr($type,0,40);
- }
- print " [$i] = $type\n";
- }
- }
-}
-
-############################################################################
-# DO_ALIASES
-############################################################################
-
-sub do_aliases {
- my($caller) = @_;
-
- my ($type_alias,$TypeAlias,$Type,$TypeID2Code,$Code2TypeID)
- = _type_hashes($caller);
-
- # Add remaining aliases.
-
- foreach my $alias (keys %$type_alias) {
- my $type = $$type_alias{$alias};
-
- next if (exists $$TypeAlias{lc($type)} &&
- exists $$TypeAlias{lc($alias)});
-
- if (! exists $$TypeAlias{lc($type)} &&
- ! exists $$TypeAlias{lc($alias)}) {
- print "WARNING: unused type in alias list: $type\n";
- print "WARNING: unused type in alias list: $alias\n";
- next;
- }
-
- my ($typeID);
- if (exists $$TypeAlias{lc($type)}) {
- $typeID = $$TypeAlias{lc($type)}[0];
- $type = $alias;
- } else {
- $typeID = $$TypeAlias{lc($alias)}[0];
- }
-
- push @{ $$Type{$typeID} },$type;
- my $i = $#{ $$Type{$typeID} };
- $$TypeAlias{lc($type)} = [ $typeID, $i ];
- }
-}
-
-############################################################################
-# WRITE_MODULE
-############################################################################
-
-sub write_module {
- my($type) = @_;
-
- my($module,%hashes,$id);
-
- if ($type eq "country") {
- $module = $CountryModule;
- %hashes = ("id2names" => "Country",
- "alias2id" => "CountryAlias",
- "code2id" => "Code2CountryID",
- "id2code" => "CountryID2Code");
- $id = $CountryID;
- } elsif ($type eq "language") {
- $module = $LanguageModule;
- %hashes = ("id2names" => "Language",
- "alias2id" => "LanguageAlias",
- "code2id" => "Code2LanguageID",
- "id2code" => "LanguageID2Code");
- $id = $LanguageID;
- } elsif ($type eq "currency") {
- $module = $CurrencyModule;
- %hashes = ("id2names" => "Currency",
- "alias2id" => "CurrencyAlias",
- "code2id" => "Code2CurrencyID",
- "id2code" => "CurrencyID2Code");
- $id = $CurrencyID;
- } elsif ($type eq "script") {
- $module = $ScriptModule;
- %hashes = ("id2names" => "Script",
- "alias2id" => "ScriptAlias",
- "code2id" => "Code2ScriptID",
- "id2code" => "ScriptID2Code");
- $id = $ScriptID;
- }
-
- my $file = "$ModDir/$module.pm";
- if (-f $file) {
- system("mv $file $file.bak");
- }
-
- my $out = new IO::File;
- $out->open(">$file");
- my $timestamp = `date`;
- chomp($timestamp);
-
- my $podstr = '=pod'; # so the CPAN indexer won't treat this as a POD file
-
- print $out "package Locale::Codes::$module;
-
-# This file was automatically generated. Any changes to this file will
-# be lost the next time 'get_codes' is run.
-# Generated on: $timestamp
-
-$podstr
-
-=head1 NAME
-
-Locale::Codes::$module - $type codes for the Locale::$module module
-
-=head1 SYNOPSIS
-
-This module contains data used by the Locale::$module module. It is
-not intended to be used directly, and contains no calleable routines.
-
-=head1 AUTHOR
-
-See Locale::Codes for full author history.
-
-Currently maintained by Sullivan Beck (sbeck\@cpan.org).
-
-=head1 COPYRIGHT
-
- Copyright (c) 1997-2001 Canon Research Centre Europe (CRE).
- Copyright (c) 2001-2010 Neil Bowers
- Copyright (c) 2010-2010 Sullivan Beck
-
-This module is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
-
-use strict;
-use warnings;
-require 5.002;
-
-use vars qw(\$VERSION);
-\$VERSION='3.12';
-
-\$Locale::Codes::Data{'$type'}{'id'} = '$id';
-
-";
-
- foreach my $h qw(id2names alias2id code2id id2code) {
- my $hash = $hashes{$h};
- print $out "\$Locale::Codes::Data{'$type'}{'$h'} = {\n";
- _write_hash($out,$hash);
-
- print $out "};\n\n";
- }
-
- print $out "1;\n";
-
- $out->close();
-}
-
-sub _write_hash {
- my($out,$hashname) = @_;
-
- no strict 'refs';
- my %hash = %$hashname;
- use strict 'refs';
- _write_subhash($out,3,\%hash);
-}
-
-sub _write_subhash {
- my($out,$indent,$hashref) = @_;
-
- my %hash = %$hashref;
- my $ind = " "x$indent;
-
- foreach my $key (sort keys %hash) {
- my $val = $hash{$key};
- if (ref($val) eq "HASH") {
- print $out "${ind}q($key) => {\n";
- _write_subhash($out,$indent+3,$val);
- print $out "${ind} },\n";
- } elsif (ref($val) eq "ARRAY") {
- print $out "${ind}q($key) => [\n";
- _write_sublist($out,$indent+3,$val);
- print $out "${ind} ],\n";
- } else {
- print $out "${ind}q($key) => q($val),\n";
- }
- }
-}
-
-sub _write_sublist {
- my($out,$indent,$listref) = @_;
-
- my @list = @$listref;
- my $ind = " "x$indent;
-
- foreach my $val (@list) {
- if (ref($val) eq "HASH") {
- print $out "${ind}{\n";
- _write_subhash($out,$indent+3,$val);
- print $out "${ind}},\n";
- } elsif (ref($val) eq "ARRAY") {
- print $out "${ind}[\n";
- _write_sublist($out,$indent+3,$val);
- print $out "${ind}],\n";
- } else {
- print $out "${ind}q($val),\n";
- }
- }
-}
-
-############################################################################
-# DO_CLEAN
-############################################################################
-
-sub do_clean {
- print "Cleaning...\n";
-
- system("rm -f $country_iso_file");
- system("rm -f $country_un_file");
- system("rm -f $country_nga_file");
- system("rm -f $country_cia_file");
- system("rm -f $country_iana_file");
- system("rm -f $language_iso_file");
- system("rm -f $currency_iso_file");
- system("rm -f $script_iso_file");
- system("rm -f $script_iso_tmp");
- system("rm -rf __MACOSX");
-}
-
-############################################################################
-# HTML SCRAPING
-############################################################################
-
-sub get_row {
- my($type,$inref) = @_;
-
- return () if ($$inref !~ m,^\s*<tr,);
-
- if ($$inref !~ s,^(.*?)</tr[^>]*>,,) {
- die "ERROR [$type]: malformed HTML\n";
- }
- my $row = $1;
-
- if ($row =~ m,<table,) {
- die "ERROR [$type]: embedded table\n";
- }
-
- my @row;
- while ($row =~ s,(?:.*?)<(td|th)[^>]*>\s*(.*?)\s*</\1[^>]*>,,) {
- my $val = $2;
- push(@row,$val);
- }
-
- return @row;
-}
-
-sub jump_to_row {
- my($inref,$header) = @_;
-
- if ($$inref =~ s,(.*?)\Q$header\E(.*?)</tr[^>]*>(.*?)(?=<tr),,) {
- return 1;
- } else {
- return 0;
- }
-}
-
-sub jump_to_entry {
- my($inref,$value) = @_;
-
- if ($$inref =~ s,(.*?)(?=<(?:td|th)[^>]*>\s*\Q$value\E\s*),,) {
- return 1;
- } else {
- return 0;
- }
-}
-
-sub jump_to_table {
- my($inref) = @_;
-
- if ($$inref =~ s,(.*?)(?=<table),,) {
- return 1;
- } else {
- return 0;
- }
-}
-
-sub get_entry {
- my($inref) = @_;
-
- if ($$inref =~ s,.*?<td[^>]*>\s*(.*?)\s*</td[^>]*>,,) {
- return $1;
- }
- return "";
-}
-
-sub strip_tags {
- my($inref,$close,@tags) = @_;
-
- foreach my $tag (@tags) {
- if ($close) {
- $$inref =~ s,</?$tag[^>]*>, ,g;
- } else {
- $$inref =~ s,<$tag[^>]*>, ,g;
- }
- }
-}
-
-# Local Variables:
-# mode: cperl
-# indent-tabs-mode: nil
-# cperl-indent-level: 3
-# cperl-continued-statement-offset: 2
-# cperl-continued-brace-offset: 0
-# cperl-brace-offset: 0
-# cperl-brace-imaginary-offset: 0
-# cperl-label-offset: -2
-# End: