More EBCDIC stuff:
[p5sagit/p5-mst-13.2.git] / lib / Locale / Country.pm
CommitLineData
47a334e9 1#-----------------------------------------------------------------------
2
3=head1 NAME
4
5Locale::Country - ISO codes for country identification (ISO 3166)
6
7=head1 SYNOPSIS
8
9 use Locale::Country;
88c28ceb 10
47a334e9 11 $country = code2country('jp'); # $country gets 'Japan'
12 $code = country2code('Norway'); # $code gets 'no'
88c28ceb 13
47a334e9 14 @codes = all_country_codes();
15 @names = all_country_names();
88c28ceb 16
47a334e9 17 # add "uk" as a pseudo country code for United Kingdom
18 Locale::Country::_alias_code('uk' => 'gb');
19
20=cut
21
22#-----------------------------------------------------------------------
23
24package Locale::Country;
25use strict;
26require 5.002;
27
28#-----------------------------------------------------------------------
29
30=head1 DESCRIPTION
31
32The C<Locale::Country> module provides access to the ISO
33codes for identifying countries, as defined in ISO 3166.
34You can either access the codes via the L<conversion routines>
35(described below), or with the two functions which return lists
36of all country codes or all country names.
37
38There are three different code sets you can use for identifying
39countries:
40
41=over 4
42
43=item B<alpha-2>
44
45Two letter codes, such as 'tv' for Tuvalu.
46This code set is identified with the symbol C<LOCALE_CODE_ALPHA_2>.
47
48=item B<alpha-3>
49
50Three letter codes, such as 'brb' for Barbados.
51This code set is identified with the symbol C<LOCALE_CODE_ALPHA_3>.
52
53=item B<numeric>
54
55Numeric codes, such as 064 for Bhutan.
56This code set is identified with the symbol C<LOCALE_CODE_NUMERIC>.
57
58=back
59
60All of the routines take an optional additional argument
61which specifies the code set to use.
62If not specified, it defaults to the two-letter codes.
63This is partly for backwards compatibility (previous versions
64of this module only supported the alpha-2 codes), and
65partly because they are the most widely used codes.
66
67The alpha-2 and alpha-3 codes are not case-dependent,
68so you can use 'BO', 'Bo', 'bO' or 'bo' for Bolivia.
69When a code is returned by one of the functions in
70this module, it will always be lower-case.
71
72=cut
73
74#-----------------------------------------------------------------------
75
76require Exporter;
77use Carp;
78use Locale::Constants;
79
80
81#-----------------------------------------------------------------------
82# Public Global Variables
83#-----------------------------------------------------------------------
84use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
85$VERSION = sprintf("%d.%02d", q$Revision: 1.7 $ =~ /(\d+)\.(\d+)/);
86@ISA = qw(Exporter);
87@EXPORT = qw(code2country country2code
88 all_country_codes all_country_names
89 country_code2code
90 LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC);
91
92#-----------------------------------------------------------------------
93# Private Global Variables
94#-----------------------------------------------------------------------
95my $CODES = [];
96my $COUNTRIES = [];
97
98
99#=======================================================================
100
101=head1 CONVERSION ROUTINES
102
103There are three conversion routines: C<code2country()>, C<country2code()>,
104and C<country_code2code()>.
105
106=over 8
107
108=item code2country( CODE, [ CODESET ] )
109
110This function takes a country code and returns a string
111which contains the name of the country identified.
112If the code is not a valid country code, as defined by ISO 3166,
113then C<undef> will be returned:
114
115 $country = code2country('fi');
116
117=item country2code( STRING, [ CODESET ] )
118
119This function takes a country name and returns the corresponding
120country code, if such exists.
121If the argument could not be identified as a country name,
122then C<undef> will be returned:
123
124 $code = country2code('Norway', LOCALE_CODE_ALPHA_3);
125 # $code will now be 'nor'
126
127The case of the country name is not important.
128See the section L<KNOWN BUGS AND LIMITATIONS> below.
129
130=item country_code2code( CODE, CODESET, CODESET )
131
132This function takes a country code from one code set,
133and returns the corresponding code from another code set.
134
135 $alpha2 = country_code2code('fin',
136 LOCALE_CODE_ALPHA_3 => LOCALE_CODE_ALPHA_2);
137 # $alpha2 will now be 'fi'
138
139If the code passed is not a valid country code in
140the first code set, or if there isn't a code for the
141corresponding country in the second code set,
142then C<undef> will be returned.
143
144=back
145
146=cut
147
148#=======================================================================
149sub code2country
150{
151 my $code = shift;
152 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
153
154
155 return undef unless defined $code;
156
157 #-------------------------------------------------------------------
158 # Make sure the code is in the right form before we use it
159 # to look up the corresponding country.
160 # We have to sprintf because the codes are given as 3-digits,
161 # with leading 0's. Eg 052 for Barbados.
162 #-------------------------------------------------------------------
163 if ($codeset == LOCALE_CODE_NUMERIC)
164 {
165 return undef if ($code =~ /\D/);
166 $code = sprintf("%.3d", $code);
167 }
168 else
169 {
170 $code = lc($code);
171 }
172
173 if (exists $CODES->[$codeset]->{$code})
174 {
175 return $CODES->[$codeset]->{$code};
176 }
177 else
178 {
179 #---------------------------------------------------------------
180 # no such country code!
181 #---------------------------------------------------------------
182 return undef;
183 }
184}
185
186sub country2code
187{
188 my $country = shift;
189 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
190
191
192 return undef unless defined $country;
193 $country = lc($country);
194 if (exists $COUNTRIES->[$codeset]->{$country})
195 {
196 return $COUNTRIES->[$codeset]->{$country};
197 }
198 else
199 {
200 #---------------------------------------------------------------
201 # no such country!
202 #---------------------------------------------------------------
203 return undef;
204 }
205}
206
207sub country_code2code
208{
209 (@_ == 3) or croak "country_code2code() takes 3 arguments!";
210
211 my $code = shift;
212 my $inset = shift;
213 my $outset = shift;
214 my $outcode = shift;
215 my $country;
216
217
218 return undef if $inset == $outset;
219 $country = code2country($code, $inset);
220 return undef if not defined $country;
221 $outcode = country2code($country, $outset);
222 return $outcode;
223}
224
225#=======================================================================
226
227=head1 QUERY ROUTINES
228
229There are two function which can be used to obtain a list of all codes,
230or all country names:
231
232=over 8
233
234=item C<all_country_codes( [ CODESET ] )>
235
236Returns a list of all two-letter country codes.
237The codes are guaranteed to be all lower-case,
238and not in any particular order.
239
240=item C<all_country_names( [ CODESET ] )>
241
242Returns a list of all country names for which there is a corresponding
243country code in the specified code set.
244The names are capitalised, and not returned in any particular order.
245
246Not all countries have alpha-3 and numeric codes -
247some just have an alpha-2 code,
248so you'll get a different number of countries
249depending on which code set you specify.
250
251=back
252
253=cut
254
255#=======================================================================
256sub all_country_codes
257{
258 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
259
260 return keys %{ $CODES->[$codeset] };
261}
262
263sub all_country_names
264{
265 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
266
267 return values %{ $CODES->[$codeset] };
268}
269
270#-----------------------------------------------------------------------
271
272=head1 CODE ALIASING
273
274This module supports a semi-private routine for specifying two letter
275code aliases.
276
277 Locale::Country::_alias_code( ALIAS => CODE [, CODESET ] )
278
279This feature was added as a mechanism for handling
280a "uk" code. The ISO standard says that the two-letter code for
281"United Kingdom" is "gb", whereas domain names are all .uk.
282
283By default the module does not understand "uk", since it is implementing
284an ISO standard. If you would like 'uk' to work as the two-letter
285code for United Kingdom, use the following:
286
287 use Locale::Country;
88c28ceb 288
47a334e9 289 Locale::Country::_alias_code('uk' => 'gb');
290
291With this code, both "uk" and "gb" are valid codes for United Kingdom,
292with the reverse lookup returning "uk" rather than the usual "gb".
293
294=cut
295
296#-----------------------------------------------------------------------
297
298sub _alias_code
299{
300 my $alias = shift;
301 my $real = shift;
302 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
303
304 my $country;
305
306
307 if (not exists $CODES->[$codeset]->{$real})
308 {
309 carp "attempt to alias \"$alias\" to unknown country code \"$real\"\n";
310 return undef;
311 }
312 $country = $CODES->[$codeset]->{$real};
313 $CODES->[$codeset]->{$alias} = $country;
314 $COUNTRIES->[$codeset]->{"\L$country"} = $alias;
315
316 return $alias;
317}
318
319#-----------------------------------------------------------------------
320
321=head1 EXAMPLES
322
323The following example illustrates use of the C<code2country()> function.
324The user is prompted for a country code, and then told the corresponding
325country name:
326
327 $| = 1; # turn off buffering
88c28ceb 328
47a334e9 329 print "Enter country code: ";
330 chop($code = <STDIN>);
331 $country = code2country($code, LOCALE_CODE_ALPHA_2);
332 if (defined $country)
333 {
334 print "$code = $country\n";
335 }
336 else
337 {
338 print "'$code' is not a valid country code!\n";
339 }
340
341=head1 DOMAIN NAMES
342
343Most top-level domain names are based on these codes,
344but there are certain codes which aren't.
345If you are using this module to identify country from hostname,
346your best bet is to preprocess the country code.
347
348For example, B<edu>, B<com>, B<gov> and friends would map to B<us>;
349B<uk> would map to B<gb>. Any others?
350
351=head1 KNOWN BUGS AND LIMITATIONS
352
353=over 4
354
355=item *
356
357When using C<country2code()>, the country name must currently appear
358exactly as it does in the source of the module. For example,
359
360 country2code('United States')
361
362will return B<us>, as expected. But the following will all return C<undef>:
363
364 country2code('United States of America')
365 country2code('Great Britain')
366 country2code('U.S.A.')
367
368If there's need for it, a future version could have variants
369for country names.
370
371=item *
372
373In the current implementation, all data is read in when the
374module is loaded, and then held in memory.
375A lazy implementation would be more memory friendly.
376
377=back
378
379=head1 SEE ALSO
380
381=over 4
382
383=item Locale::Language
384
385ISO two letter codes for identification of language (ISO 639).
386
387=item Locale::Currency
388
389ISO three letter codes for identification of currencies
390and funds (ISO 4217).
391
392=item ISO 3166
393
394The ISO standard which defines these codes.
395
396=item http://www.din.de/gremien/nas/nabd/iso3166ma/
397
398Official home page for ISO 3166
399
400=item http://www.egt.ie/standards/iso3166/iso3166-1-en.html
401
402Another useful, but not official, home page.
403
404=item http://www.cia.gov/cia/publications/factbook/docs/app-f.html
405
406An appendix in the CIA world fact book which lists country codes
407as defined by ISO 3166, FIPS 10-4, and internet domain names.
408
409=back
410
411
412=head1 AUTHOR
413
414Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt>
415
416=head1 COPYRIGHT
417
418Copyright (c) 1997-2001 Canon Research Centre Europe (CRE).
419
420This module is free software; you can redistribute it and/or
421modify it under the same terms as Perl itself.
422
423=cut
424
425#-----------------------------------------------------------------------
426
427#=======================================================================
428# initialisation code - stuff the DATA into the ALPHA2 hash
429#=======================================================================
430{
431 my ($alpha2, $alpha3, $numeric);
432 my $country;
433
434
435 while (<DATA>)
436 {
437 next unless /\S/;
438 chop;
439 ($alpha2, $alpha3, $numeric, $country) = split(/:/, $_, 4);
440
441 $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $country;
442 $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$country"} = $alpha2;
443
444 if ($alpha3)
445 {
446 $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $country;
447 $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$country"} = $alpha3;
448 }
449
450 if ($numeric)
451 {
452 $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $country;
453 $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$country"} = $numeric;
454 }
455
456 }
457}
458
4591;
460
461__DATA__
462ad:and:020:Andorra
463ae:are:784:United Arab Emirates
464af:afg:004:Afghanistan
465ag:atg:028:Antigua and Barbuda
466ai:aia:660:Anguilla
467al:alb:008:Albania
468am:arm:051:Armenia
469an:ant:530:Netherlands Antilles
470ao:ago:024:Angola
471aq:::Antarctica
472ar:arg:032:Argentina
473as:asm:016:American Samoa
474at:aut:040:Austria
475au:aus:036:Australia
476aw:abw:533:Aruba
477az:aze:031:Azerbaijan
478ba:bih:070:Bosnia and Herzegovina
479bb:brb:052:Barbados
480bd:bgd:050:Bangladesh
481be:bel:056:Belgium
482bf:bfa:854:Burkina Faso
483bg:bgr:100:Bulgaria
484bh:bhr:048:Bahrain
485bi:bdi:108:Burundi
486bj:ben:204:Benin
487bm:bmu:060:Bermuda
488bn:brn:096:Brunei Darussalam
489bo:bol:068:Bolivia
490br:bra:076:Brazil
491bs:bhs:044:Bahamas
492bt:btn:064:Bhutan
493bv:::Bouvet Island
494bw:bwa:072:Botswana
495by:blr:112:Belarus
496bz:blz:084:Belize
497ca:can:124:Canada
498cc:::Cocos (Keeling) Islands
499cd:cod:180:Congo, The Democratic Republic of the
500cf:caf:140:Central African Republic
501cg:cog:178:Congo
502ch:che:756:Switzerland
503ci:civ:384:Cote D'Ivoire
504ck:cok:184:Cook Islands
505cl:chl:152:Chile
506cm:cmr:120:Cameroon
507cn:chn:156:China
508co:col:170:Colombia
509cr:cri:188:Costa Rica
510cu:cub:192:Cuba
511cv:cpv:132:Cape Verde
512cx:::Christmas Island
513cy:cyp:196:Cyprus
514cz:cze:203:Czech Republic
515de:deu:276:Germany
516dj:dji:262:Djibouti
517dk:dnk:208:Denmark
518dm:dma:212:Dominica
519do:dom:214:Dominican Republic
520dz:dza:012:Algeria
521ec:ecu:218:Ecuador
522ee:est:233:Estonia
523eg:egy:818:Egypt
524eh:esh:732:Western Sahara
525er:eri:232:Eritrea
526es:esp:724:Spain
527et:eth:231:Ethiopia
528fi:fin:246:Finland
529fj:fji:242:Fiji
530fk:flk:238:Falkland Islands (Malvinas)
531fm:fsm:583:Micronesia, Federated States of
532fo:fro:234:Faroe Islands
533fr:fra:250:France
534fx:::France, Metropolitan
535ga:gab:266:Gabon
536gb:gbr:826:United Kingdom
537gd:grd:308:Grenada
538ge:geo:268:Georgia
539gf:guf:254:French Guiana
540gh:gha:288:Ghana
541gi:gib:292:Gibraltar
542gl:grl:304:Greenland
543gm:gmb:270:Gambia
544gn:gin:324:Guinea
545gp:glp:312:Guadeloupe
546gq:gnq:226:Equatorial Guinea
547gr:grc:300:Greece
548gs:::South Georgia and the South Sandwich Islands
549gt:gtm:320:Guatemala
550gu:gum:316:Guam
551gw:gnb:624:Guinea-Bissau
552gy:guy:328:Guyana
553hk:hkg:344:Hong Kong
554hm:::Heard Island and McDonald Islands
555hn:hnd:340:Honduras
556hr:hrv:191:Croatia
557ht:hti:332:Haiti
558hu:hun:348:Hungary
559id:idn:360:Indonesia
560ie:irl:372:Ireland
561il:isr:376:Israel
562in:ind:356:India
563io:::British Indian Ocean Territory
564iq:irq:368:Iraq
565ir:irn:364:Iran, Islamic Republic of
566is:isl:352:Iceland
567it:ita:380:Italy
568jm:jam:388:Jamaica
569jo:jor:400:Jordan
570jp:jpn:392:Japan
571ke:ken:404:Kenya
572kg:kgz:417:Kyrgyzstan
573kh:khm:116:Cambodia
574ki:kir:296:Kiribati
575km:com:174:Comoros
576kn:kna:659:Saint Kitts and Nevis
577kp:prk:408:Korea, Democratic People's Republic of
578kr:kor:410:Korea, Republic of
579kw:kwt:414:Kuwait
580ky:cym:136:Cayman Islands
581kz:kaz:398:Kazakstan
582la:lao:418:Lao People's Democratic Republic
583lb:lbn:422:Lebanon
584lc:lca:662:Saint Lucia
585li:lie:438:Liechtenstein
586lk:lka:144:Sri Lanka
587lr:lbr:430:Liberia
588ls:lso:426:Lesotho
589lt:ltu:440:Lithuania
590lu:lux:442:Luxembourg
591lv:lva:428:Latvia
592ly:lby:434:Libyan Arab Jamahiriya
593ma:mar:504:Morocco
594mc:mco:492:Monaco
595md:mda:498:Moldova, Republic of
596mg:mdg:450:Madagascar
597mh:mhl:584:Marshall Islands
598mk:mkd:807:Macedonia, the Former Yugoslav Republic of
599ml:mli:466:Mali
600mm:mmr:104:Myanmar
601mn:mng:496:Mongolia
602mo:mac:446:Macau
603mp:mnp:580:Northern Mariana Islands
604mq:mtq:474:Martinique
605mr:mrt:478:Mauritania
606ms:msr:500:Montserrat
607mt:mlt:470:Malta
608mu:mus:480:Mauritius
609mv:mdv:462:Maldives
610mw:mwi:454:Malawi
611mx:mex:484:Mexico
612my:mys:458:Malaysia
613mz:moz:508:Mozambique
614na:nam:516:Namibia
615nc:ncl:540:New Caledonia
616ne:ner:562:Niger
617nf:nfk:574:Norfolk Island
618ng:nga:566:Nigeria
619ni:nic:558:Nicaragua
620nl:nld:528:Netherlands
621no:nor:578:Norway
622np:npl:524:Nepal
623nr:nru:520:Nauru
624nu:niu:570:Niue
625nz:nzl:554:New Zealand
626om:omn:512:Oman
627pa:pan:591:Panama
628pe:per:604:Peru
629pf:pyf:258:French Polynesia
630pg:png:598:Papua New Guinea
631ph:phl:608:Philippines
632pk:pak:586:Pakistan
633pl:pol:616:Poland
634pm:spm:666:Saint Pierre and Miquelon
635pn:pcn:612:Pitcairn
636pr:pri:630:Puerto Rico
637ps:pse:275:Palestinian Territory, Occupied
638pt:prt:620:Portugal
639pw:plw:585:Palau
640py:pry:600:Paraguay
641qa:qat:634:Qatar
642re:reu:638:Reunion
643ro:rom:642:Romania
644ru:rus:643:Russian Federation
645rw:rwa:646:Rwanda
646sa:sau:682:Saudi Arabia
647sb:slb:090:Solomon Islands
648sc:syc:690:Seychelles
649sd:sdn:736:Sudan
650se:swe:752:Sweden
651sg:sgp:702:Singapore
652sh:shn:654:Saint Helena
653si:svn:705:Slovenia
654sj:sjm:744:Svalbard and Jan Mayen
655sk:svk:703:Slovakia
656sl:sle:694:Sierra Leone
657sm:smr:674:San Marino
658sn:sen:686:Senegal
659so:som:706:Somalia
660sr:sur:740:Suriname
661st:stp:678:Sao Tome and Principe
662sv:slv:222:El Salvador
663sy:syr:760:Syrian Arab Republic
664sz:swz:748:Swaziland
665tc:tca:796:Turks and Caicos Islands
666td:tcd:148:Chad
667tf:::French Southern Territories
668tg:tgo:768:Togo
669th:tha:764:Thailand
670tj:tjk:762:Tajikistan
671tk:tkl:772:Tokelau
672tm:tkm:795:Turkmenistan
673tn:tun:788:Tunisia
674to:ton:776:Tonga
675tp:tmp:626:East Timor
676tr:tur:792:Turkey
677tt:tto:780:Trinidad and Tobago
678tv:tuv:798:Tuvalu
679tw:twn:158:Taiwan, Province of China
680tz:tza:834:Tanzania, United Republic of
681ua:ukr:804:Ukraine
682ug:uga:800:Uganda
683um:::United States Minor Outlying Islands
684us:usa:840:United States
685uy:ury:858:Uruguay
686uz:uzb:860:Uzbekistan
687va:vat:336:Holy See (Vatican City State)
688vc:vct:670:Saint Vincent and the Grenadines
689ve:ven:862:Venezuela
690vg:vgb:092:Virgin Islands, British
691vi:vir:850:Virgin Islands, U.S.
692vn:vnm:704:Vietnam
693vu:vut:548:Vanuatu
694wf:wlf:876:Wallis and Futuna
695ws:wsm:882:Samoa
696ye:yem:887:Yemen
697yt:::Mayotte
698yu:yug:891:Yugoslavia
699za:zaf:710:South Africa
700zm:zmb:894:Zambia
701zr:::Zaire
702zw:zwe:716:Zimbabwe