Commit | Line | Data |
47a334e9 |
1 | #----------------------------------------------------------------------- |
2 | |
3 | =head1 NAME |
4 | |
5 | Locale::Language - ISO two letter codes for language identification (ISO 639) |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | use Locale::Language; |
88c28ceb |
10 | |
47a334e9 |
11 | $lang = code2language('en'); # $lang gets 'English' |
12 | $code = language2code('French'); # $code gets 'fr' |
88c28ceb |
13 | |
47a334e9 |
14 | @codes = all_language_codes(); |
15 | @names = all_language_names(); |
16 | |
17 | =cut |
18 | |
19 | #----------------------------------------------------------------------- |
20 | |
21 | package Locale::Language; |
22 | use strict; |
23 | require 5.002; |
24 | |
25 | #----------------------------------------------------------------------- |
26 | |
27 | =head1 DESCRIPTION |
28 | |
29 | The C<Locale::Language> module provides access to the ISO two-letter |
30 | codes for identifying languages, as defined in ISO 639. You can either |
31 | access the codes via the L<conversion routines> (described below), |
32 | or with the two functions which return lists of all language codes or |
33 | all language names. |
34 | |
35 | =cut |
36 | |
37 | #----------------------------------------------------------------------- |
38 | |
39 | require Exporter; |
40 | |
41 | #----------------------------------------------------------------------- |
42 | # Public Global Variables |
43 | #----------------------------------------------------------------------- |
44 | use vars qw($VERSION @ISA @EXPORT); |
45 | $VERSION = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/); |
46 | @ISA = qw(Exporter); |
47 | @EXPORT = qw(&code2language &language2code |
48 | &all_language_codes &all_language_names ); |
49 | |
50 | #----------------------------------------------------------------------- |
51 | # Private Global Variables |
52 | #----------------------------------------------------------------------- |
53 | my %CODES = (); |
54 | my %LANGUAGES = (); |
55 | |
56 | |
57 | #======================================================================= |
58 | |
59 | =head1 CONVERSION ROUTINES |
60 | |
61 | There are two conversion routines: C<code2language()> and C<language2code()>. |
62 | |
63 | =over 8 |
64 | |
65 | =item code2language() |
66 | |
67 | This function takes a two letter language code and returns a string |
68 | which contains the name of the language identified. If the code is |
69 | not a valid language code, as defined by ISO 639, then C<undef> |
70 | will be returned. |
71 | |
72 | $lang = code2language($code); |
73 | |
74 | =item language2code() |
75 | |
76 | This function takes a language name and returns the corresponding |
77 | two letter language code, if such exists. |
78 | If the argument could not be identified as a language name, |
79 | then C<undef> will be returned. |
80 | |
81 | $code = language2code('French'); |
82 | |
83 | The case of the language name is not important. |
84 | See the section L<KNOWN BUGS AND LIMITATIONS> below. |
85 | |
86 | =back |
87 | |
88 | =cut |
89 | |
90 | #======================================================================= |
91 | sub code2language |
92 | { |
93 | my $code = shift; |
94 | |
95 | |
96 | return undef unless defined $code; |
97 | $code = lc($code); |
98 | if (exists $CODES{$code}) |
99 | { |
100 | return $CODES{$code}; |
101 | } |
102 | else |
103 | { |
104 | #--------------------------------------------------------------- |
105 | # no such language code! |
106 | #--------------------------------------------------------------- |
107 | return undef; |
108 | } |
109 | } |
110 | |
111 | sub language2code |
112 | { |
113 | my $lang = shift; |
114 | |
115 | |
116 | return undef unless defined $lang; |
117 | $lang = lc($lang); |
118 | if (exists $LANGUAGES{$lang}) |
119 | { |
120 | return $LANGUAGES{$lang}; |
121 | } |
122 | else |
123 | { |
124 | #--------------------------------------------------------------- |
125 | # no such language! |
126 | #--------------------------------------------------------------- |
127 | return undef; |
128 | } |
129 | } |
130 | |
131 | #======================================================================= |
132 | |
133 | =head1 QUERY ROUTINES |
134 | |
135 | There are two function which can be used to obtain a list of all |
136 | language codes, or all language names: |
137 | |
138 | =over 8 |
139 | |
140 | =item C<all_language_codes()> |
141 | |
142 | Returns a list of all two-letter language codes. |
143 | The codes are guaranteed to be all lower-case, |
144 | and not in any particular order. |
145 | |
146 | =item C<all_language_names()> |
147 | |
148 | Returns a list of all language names for which there is a corresponding |
149 | two-letter language code. The names are capitalised, and not returned |
150 | in any particular order. |
151 | |
152 | =back |
153 | |
154 | =cut |
155 | |
156 | #======================================================================= |
157 | sub all_language_codes |
158 | { |
159 | return keys %CODES; |
160 | } |
161 | |
162 | sub all_language_names |
163 | { |
164 | return values %CODES; |
165 | } |
166 | |
167 | #----------------------------------------------------------------------- |
168 | |
169 | =head1 EXAMPLES |
170 | |
171 | The following example illustrates use of the C<code2language()> function. |
172 | The user is prompted for a language code, and then told the corresponding |
173 | language name: |
174 | |
175 | $| = 1; # turn off buffering |
88c28ceb |
176 | |
47a334e9 |
177 | print "Enter language code: "; |
178 | chop($code = <STDIN>); |
179 | $lang = code2language($code); |
180 | if (defined $lang) |
181 | { |
182 | print "$code = $lang\n"; |
183 | } |
184 | else |
185 | { |
186 | print "'$code' is not a valid language code!\n"; |
187 | } |
188 | |
189 | =head1 KNOWN BUGS AND LIMITATIONS |
190 | |
191 | =over 4 |
192 | |
193 | =item * |
194 | |
195 | In the current implementation, all data is read in when the |
196 | module is loaded, and then held in memory. |
197 | A lazy implementation would be more memory friendly. |
198 | |
199 | =item * |
200 | |
201 | Currently just supports the two letter language codes - |
202 | there are also three-letter codes, and numbers. |
203 | Would these be of any use to anyone? |
204 | |
205 | =back |
206 | |
207 | =head1 SEE ALSO |
208 | |
209 | =over 4 |
210 | |
211 | =item Locale::Country |
212 | |
213 | ISO codes for identification of country (ISO 3166). |
214 | Supports 2-letter, 3-letter, and numeric country codes. |
215 | |
216 | =item Locale::Currency |
217 | |
218 | ISO three letter codes for identification of currencies and funds (ISO 4217). |
219 | |
220 | =item ISO 639:1988 (E/F) |
221 | |
222 | Code for the representation of names of languages. |
223 | |
224 | =item http://lcweb.loc.gov/standards/iso639-2/langhome.html |
225 | |
226 | Home page for ISO 639-2 |
227 | |
228 | =back |
229 | |
230 | |
231 | =head1 AUTHOR |
232 | |
233 | Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt> |
234 | |
235 | =head1 COPYRIGHT |
236 | |
237 | Copyright (c) 1997-2001 Canon Research Centre Europe (CRE). |
238 | |
239 | This module is free software; you can redistribute it and/or |
240 | modify it under the same terms as Perl itself. |
241 | |
242 | =cut |
243 | |
244 | #----------------------------------------------------------------------- |
245 | |
246 | #======================================================================= |
247 | # initialisation code - stuff the DATA into the CODES hash |
248 | #======================================================================= |
249 | { |
4c53e876 |
250 | no utf8; # __DATA__ contains Latin-1 |
251 | |
47a334e9 |
252 | my $code; |
253 | my $language; |
254 | |
255 | |
256 | while (<DATA>) |
257 | { |
4c53e876 |
258 | next unless /\S/; |
47a334e9 |
259 | chop; |
260 | ($code, $language) = split(/:/, $_, 2); |
261 | $CODES{$code} = $language; |
262 | $LANGUAGES{"\L$language"} = $code; |
263 | } |
264 | } |
265 | |
266 | 1; |
267 | |
268 | __DATA__ |
269 | aa:Afar |
270 | ab:Abkhazian |
271 | ae:Avestan |
272 | af:Afrikaans |
273 | am:Amharic |
274 | ar:Arabic |
275 | as:Assamese |
276 | ay:Aymara |
277 | az:Azerbaijani |
278 | |
279 | ba:Bashkir |
280 | be:Belarusian |
281 | bg:Bulgarian |
282 | bh:Bihari |
283 | bi:Bislama |
284 | bn:Bengali |
285 | bo:Tibetan |
286 | br:Breton |
287 | bs:Bosnian |
288 | |
289 | ca:Catalan |
290 | ce:Chechen |
291 | ch:Chamorro |
292 | co:Corsican |
293 | cs:Czech |
294 | cu:Church Slavic |
295 | cv:Chuvash |
296 | cy:Welsh |
297 | |
298 | da:Danish |
299 | de:German |
300 | dz:Dzongkha |
301 | |
302 | el:Greek |
303 | en:English |
304 | eo:Esperanto |
305 | es:Spanish |
306 | et:Estonian |
307 | eu:Basque |
308 | |
309 | fa:Persian |
310 | fi:Finnish |
311 | fj:Fijian |
312 | fo:Faeroese |
313 | fr:French |
314 | fy:Frisian |
315 | |
316 | ga:Irish |
317 | gd:Gaelic (Scots) |
318 | gl:Gallegan |
319 | gn:Guarani |
320 | gu:Gujarati |
321 | gv:Manx |
322 | |
323 | ha:Hausa |
324 | he:Hebrew |
325 | hi:Hindi |
326 | ho:Hiri Motu |
327 | hr:Croatian |
328 | hu:Hungarian |
329 | hy:Armenian |
330 | hz:Herero |
331 | |
332 | ia:Interlingua |
333 | id:Indonesian |
334 | ie:Interlingue |
335 | ik:Inupiaq |
336 | is:Icelandic |
337 | it:Italian |
338 | iu:Inuktitut |
339 | |
340 | ja:Japanese |
341 | jw:Javanese |
342 | |
343 | ka:Georgian |
344 | ki:Kikuyu |
345 | kj:Kuanyama |
346 | kk:Kazakh |
347 | kl:Kalaallisut |
348 | km:Khmer |
349 | kn:Kannada |
350 | ko:Korean |
351 | ks:Kashmiri |
352 | ku:Kurdish |
353 | kv:Komi |
354 | kw:Cornish |
355 | ky:Kirghiz |
356 | |
357 | la:Latin |
358 | lb:Letzeburgesch |
359 | ln:Lingala |
360 | lo:Lao |
361 | lt:Lithuanian |
362 | lv:Latvian |
363 | |
364 | mg:Malagasy |
365 | mh:Marshall |
366 | mi:Maori |
367 | mk:Macedonian |
368 | ml:Malayalam |
369 | mn:Mongolian |
370 | mo:Moldavian |
371 | mr:Marathi |
372 | ms:Malay |
373 | mt:Maltese |
374 | my:Burmese |
375 | |
376 | na:Nauru |
377 | nb:Norwegian Bokmål |
378 | nd:Ndebele, North |
379 | ne:Nepali |
380 | ng:Ndonga |
381 | nl:Dutch |
382 | nn:Norwegian Nynorsk |
383 | no:Norwegian |
384 | nr:Ndebele, South |
385 | nv:Navajo |
386 | ny:Chichewa; Nyanja |
387 | |
388 | oc:Occitan (post 1500) |
389 | om:Oromo |
390 | or:Oriya |
391 | os:Ossetian; Ossetic |
392 | |
393 | pa:Panjabi |
394 | pi:Pali |
395 | pl:Polish |
396 | ps:Pushto |
397 | pt:Portuguese |
398 | |
399 | qu:Quechua |
400 | |
401 | rm:Rhaeto-Romance |
402 | rn:Rundi |
403 | ro:Romanian |
404 | ru:Russian |
405 | rw:Kinyarwanda |
406 | |
407 | sa:Sanskrit |
408 | sc:Sardinian |
409 | sd:Sindhi |
410 | se:Sami |
411 | sg:Sango |
412 | si:Sinhalese |
413 | sk:Slovak |
414 | sl:Slovenian |
415 | sm:Samoan |
416 | sn:Shona |
417 | so:Somali |
418 | sq:Albanian |
419 | sr:Serbian |
420 | ss:Swati |
421 | st:Sotho |
422 | su:Sundanese |
423 | sv:Swedish |
424 | sw:Swahili |
425 | |
426 | ta:Tamil |
427 | te:Telugu |
428 | tg:Tajik |
429 | th:Thai |
430 | ti:Tigrinya |
431 | tk:Turkmen |
432 | tl:Tagalog |
433 | tn:Tswana |
434 | to:Tonga |
435 | tr:Turkish |
436 | ts:Tsonga |
437 | tt:Tatar |
438 | tw:Twi |
439 | |
440 | ug:Uighur |
441 | uk:Ukrainian |
442 | ur:Urdu |
443 | uz:Uzbek |
444 | |
445 | vi:Vietnamese |
446 | vo:Volapük |
447 | |
448 | wo:Wolof |
449 | |
450 | xh:Xhosa |
451 | |
452 | yi:Yiddish |
453 | yo:Yoruba |
454 | |
455 | za:Zhuang |
456 | zh:Chinese |
457 | zu:Zulu |