Commit | Line | Data |
47a334e9 |
1 | #----------------------------------------------------------------------- |
2 | |
3 | =head1 NAME |
4 | |
5 | Locale::Language - ISO two letter codes for language identification (ISO 639) |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | use Locale::Language; |
88c28ceb |
10 | |
47a334e9 |
11 | $lang = code2language('en'); # $lang gets 'English' |
12 | $code = language2code('French'); # $code gets 'fr' |
88c28ceb |
13 | |
47a334e9 |
14 | @codes = all_language_codes(); |
15 | @names = all_language_names(); |
16 | |
17 | =cut |
18 | |
19 | #----------------------------------------------------------------------- |
20 | |
21 | package Locale::Language; |
22 | use strict; |
23 | require 5.002; |
24 | |
25 | #----------------------------------------------------------------------- |
26 | |
27 | =head1 DESCRIPTION |
28 | |
29 | The C<Locale::Language> module provides access to the ISO two-letter |
30 | codes for identifying languages, as defined in ISO 639. You can either |
31 | access the codes via the L<conversion routines> (described below), |
6b6e008c |
32 | or via the two functions which return lists of all language codes or |
47a334e9 |
33 | all language names. |
34 | |
35 | =cut |
36 | |
37 | #----------------------------------------------------------------------- |
38 | |
39 | require Exporter; |
40 | |
41 | #----------------------------------------------------------------------- |
42 | # Public Global Variables |
43 | #----------------------------------------------------------------------- |
44 | use vars qw($VERSION @ISA @EXPORT); |
6b6e008c |
45 | $VERSION = sprintf("%d.%02d", q$Revision: 2.0 $ =~ /(\d+)\.(\d+)/); |
47a334e9 |
46 | @ISA = qw(Exporter); |
47 | @EXPORT = qw(&code2language &language2code |
48 | &all_language_codes &all_language_names ); |
49 | |
50 | #----------------------------------------------------------------------- |
51 | # Private Global Variables |
52 | #----------------------------------------------------------------------- |
53 | my %CODES = (); |
54 | my %LANGUAGES = (); |
55 | |
56 | |
57 | #======================================================================= |
58 | |
59 | =head1 CONVERSION ROUTINES |
60 | |
61 | There are two conversion routines: C<code2language()> and C<language2code()>. |
62 | |
63 | =over 8 |
64 | |
65 | =item code2language() |
66 | |
67 | This function takes a two letter language code and returns a string |
68 | which contains the name of the language identified. If the code is |
69 | not a valid language code, as defined by ISO 639, then C<undef> |
70 | will be returned. |
71 | |
72 | $lang = code2language($code); |
73 | |
74 | =item language2code() |
75 | |
76 | This function takes a language name and returns the corresponding |
77 | two letter language code, if such exists. |
78 | If the argument could not be identified as a language name, |
79 | then C<undef> will be returned. |
80 | |
81 | $code = language2code('French'); |
82 | |
83 | The case of the language name is not important. |
84 | See the section L<KNOWN BUGS AND LIMITATIONS> below. |
85 | |
86 | =back |
87 | |
88 | =cut |
89 | |
90 | #======================================================================= |
91 | sub code2language |
92 | { |
93 | my $code = shift; |
94 | |
95 | |
96 | return undef unless defined $code; |
97 | $code = lc($code); |
98 | if (exists $CODES{$code}) |
99 | { |
100 | return $CODES{$code}; |
101 | } |
102 | else |
103 | { |
104 | #--------------------------------------------------------------- |
105 | # no such language code! |
106 | #--------------------------------------------------------------- |
107 | return undef; |
108 | } |
109 | } |
110 | |
111 | sub language2code |
112 | { |
113 | my $lang = shift; |
114 | |
115 | |
116 | return undef unless defined $lang; |
117 | $lang = lc($lang); |
118 | if (exists $LANGUAGES{$lang}) |
119 | { |
120 | return $LANGUAGES{$lang}; |
121 | } |
122 | else |
123 | { |
124 | #--------------------------------------------------------------- |
125 | # no such language! |
126 | #--------------------------------------------------------------- |
127 | return undef; |
128 | } |
129 | } |
130 | |
131 | #======================================================================= |
132 | |
133 | =head1 QUERY ROUTINES |
134 | |
135 | There are two function which can be used to obtain a list of all |
136 | language codes, or all language names: |
137 | |
138 | =over 8 |
139 | |
140 | =item C<all_language_codes()> |
141 | |
142 | Returns a list of all two-letter language codes. |
143 | The codes are guaranteed to be all lower-case, |
144 | and not in any particular order. |
145 | |
146 | =item C<all_language_names()> |
147 | |
148 | Returns a list of all language names for which there is a corresponding |
149 | two-letter language code. The names are capitalised, and not returned |
150 | in any particular order. |
151 | |
152 | =back |
153 | |
154 | =cut |
155 | |
156 | #======================================================================= |
157 | sub all_language_codes |
158 | { |
159 | return keys %CODES; |
160 | } |
161 | |
162 | sub all_language_names |
163 | { |
164 | return values %CODES; |
165 | } |
166 | |
167 | #----------------------------------------------------------------------- |
168 | |
169 | =head1 EXAMPLES |
170 | |
171 | The following example illustrates use of the C<code2language()> function. |
172 | The user is prompted for a language code, and then told the corresponding |
173 | language name: |
174 | |
175 | $| = 1; # turn off buffering |
88c28ceb |
176 | |
47a334e9 |
177 | print "Enter language code: "; |
178 | chop($code = <STDIN>); |
179 | $lang = code2language($code); |
180 | if (defined $lang) |
181 | { |
182 | print "$code = $lang\n"; |
183 | } |
184 | else |
185 | { |
186 | print "'$code' is not a valid language code!\n"; |
187 | } |
188 | |
189 | =head1 KNOWN BUGS AND LIMITATIONS |
190 | |
191 | =over 4 |
192 | |
193 | =item * |
194 | |
195 | In the current implementation, all data is read in when the |
196 | module is loaded, and then held in memory. |
197 | A lazy implementation would be more memory friendly. |
198 | |
199 | =item * |
200 | |
201 | Currently just supports the two letter language codes - |
202 | there are also three-letter codes, and numbers. |
203 | Would these be of any use to anyone? |
204 | |
205 | =back |
206 | |
207 | =head1 SEE ALSO |
208 | |
209 | =over 4 |
210 | |
211 | =item Locale::Country |
212 | |
213 | ISO codes for identification of country (ISO 3166). |
214 | Supports 2-letter, 3-letter, and numeric country codes. |
215 | |
6b6e008c |
216 | =item Locale::Script |
217 | |
218 | ISO codes for identification of written scripts (ISO 15924). |
219 | |
47a334e9 |
220 | =item Locale::Currency |
221 | |
222 | ISO three letter codes for identification of currencies and funds (ISO 4217). |
223 | |
224 | =item ISO 639:1988 (E/F) |
225 | |
226 | Code for the representation of names of languages. |
227 | |
228 | =item http://lcweb.loc.gov/standards/iso639-2/langhome.html |
229 | |
6b6e008c |
230 | Home page for ISO 639-2. |
47a334e9 |
231 | |
232 | =back |
233 | |
234 | |
235 | =head1 AUTHOR |
236 | |
6b6e008c |
237 | Neil Bowers E<lt>neil@bowers.comE<gt> |
47a334e9 |
238 | |
239 | =head1 COPYRIGHT |
240 | |
6b6e008c |
241 | Copyright (C) 2002, Neil Bowers. |
242 | |
47a334e9 |
243 | Copyright (c) 1997-2001 Canon Research Centre Europe (CRE). |
244 | |
245 | This module is free software; you can redistribute it and/or |
246 | modify it under the same terms as Perl itself. |
247 | |
248 | =cut |
249 | |
250 | #----------------------------------------------------------------------- |
251 | |
252 | #======================================================================= |
253 | # initialisation code - stuff the DATA into the CODES hash |
254 | #======================================================================= |
255 | { |
256 | my $code; |
257 | my $language; |
258 | |
259 | |
260 | while (<DATA>) |
261 | { |
6b6e008c |
262 | next unless /\S/; |
47a334e9 |
263 | chop; |
264 | ($code, $language) = split(/:/, $_, 2); |
265 | $CODES{$code} = $language; |
266 | $LANGUAGES{"\L$language"} = $code; |
267 | } |
268 | } |
269 | |
270 | 1; |
271 | |
272 | __DATA__ |
273 | aa:Afar |
274 | ab:Abkhazian |
275 | ae:Avestan |
276 | af:Afrikaans |
277 | am:Amharic |
278 | ar:Arabic |
279 | as:Assamese |
280 | ay:Aymara |
281 | az:Azerbaijani |
282 | |
283 | ba:Bashkir |
284 | be:Belarusian |
285 | bg:Bulgarian |
286 | bh:Bihari |
287 | bi:Bislama |
288 | bn:Bengali |
289 | bo:Tibetan |
290 | br:Breton |
291 | bs:Bosnian |
292 | |
293 | ca:Catalan |
294 | ce:Chechen |
295 | ch:Chamorro |
296 | co:Corsican |
297 | cs:Czech |
298 | cu:Church Slavic |
299 | cv:Chuvash |
300 | cy:Welsh |
301 | |
302 | da:Danish |
303 | de:German |
304 | dz:Dzongkha |
305 | |
306 | el:Greek |
307 | en:English |
308 | eo:Esperanto |
309 | es:Spanish |
310 | et:Estonian |
311 | eu:Basque |
312 | |
313 | fa:Persian |
314 | fi:Finnish |
315 | fj:Fijian |
316 | fo:Faeroese |
317 | fr:French |
318 | fy:Frisian |
319 | |
320 | ga:Irish |
321 | gd:Gaelic (Scots) |
322 | gl:Gallegan |
323 | gn:Guarani |
324 | gu:Gujarati |
325 | gv:Manx |
326 | |
327 | ha:Hausa |
328 | he:Hebrew |
329 | hi:Hindi |
330 | ho:Hiri Motu |
331 | hr:Croatian |
332 | hu:Hungarian |
333 | hy:Armenian |
334 | hz:Herero |
335 | |
336 | ia:Interlingua |
337 | id:Indonesian |
338 | ie:Interlingue |
339 | ik:Inupiaq |
340 | is:Icelandic |
341 | it:Italian |
342 | iu:Inuktitut |
343 | |
344 | ja:Japanese |
345 | jw:Javanese |
346 | |
347 | ka:Georgian |
348 | ki:Kikuyu |
349 | kj:Kuanyama |
350 | kk:Kazakh |
351 | kl:Kalaallisut |
352 | km:Khmer |
353 | kn:Kannada |
354 | ko:Korean |
355 | ks:Kashmiri |
356 | ku:Kurdish |
357 | kv:Komi |
358 | kw:Cornish |
359 | ky:Kirghiz |
360 | |
361 | la:Latin |
362 | lb:Letzeburgesch |
363 | ln:Lingala |
364 | lo:Lao |
365 | lt:Lithuanian |
366 | lv:Latvian |
367 | |
368 | mg:Malagasy |
369 | mh:Marshall |
370 | mi:Maori |
371 | mk:Macedonian |
372 | ml:Malayalam |
373 | mn:Mongolian |
374 | mo:Moldavian |
375 | mr:Marathi |
376 | ms:Malay |
377 | mt:Maltese |
378 | my:Burmese |
379 | |
380 | na:Nauru |
381 | nb:Norwegian Bokmål |
382 | nd:Ndebele, North |
383 | ne:Nepali |
384 | ng:Ndonga |
385 | nl:Dutch |
386 | nn:Norwegian Nynorsk |
387 | no:Norwegian |
388 | nr:Ndebele, South |
389 | nv:Navajo |
390 | ny:Chichewa; Nyanja |
391 | |
392 | oc:Occitan (post 1500) |
393 | om:Oromo |
394 | or:Oriya |
395 | os:Ossetian; Ossetic |
396 | |
397 | pa:Panjabi |
398 | pi:Pali |
399 | pl:Polish |
400 | ps:Pushto |
401 | pt:Portuguese |
402 | |
403 | qu:Quechua |
404 | |
405 | rm:Rhaeto-Romance |
406 | rn:Rundi |
407 | ro:Romanian |
408 | ru:Russian |
409 | rw:Kinyarwanda |
410 | |
411 | sa:Sanskrit |
412 | sc:Sardinian |
413 | sd:Sindhi |
414 | se:Sami |
415 | sg:Sango |
416 | si:Sinhalese |
417 | sk:Slovak |
418 | sl:Slovenian |
419 | sm:Samoan |
420 | sn:Shona |
421 | so:Somali |
422 | sq:Albanian |
423 | sr:Serbian |
424 | ss:Swati |
425 | st:Sotho |
426 | su:Sundanese |
427 | sv:Swedish |
428 | sw:Swahili |
429 | |
430 | ta:Tamil |
431 | te:Telugu |
432 | tg:Tajik |
433 | th:Thai |
434 | ti:Tigrinya |
435 | tk:Turkmen |
436 | tl:Tagalog |
437 | tn:Tswana |
438 | to:Tonga |
439 | tr:Turkish |
440 | ts:Tsonga |
441 | tt:Tatar |
442 | tw:Twi |
443 | |
444 | ug:Uighur |
445 | uk:Ukrainian |
446 | ur:Urdu |
447 | uz:Uzbek |
448 | |
449 | vi:Vietnamese |
450 | vo:Volapük |
451 | |
452 | wo:Wolof |
453 | |
454 | xh:Xhosa |
455 | |
456 | yi:Yiddish |
457 | yo:Yoruba |
458 | |
459 | za:Zhuang |
460 | zh:Chinese |
461 | zu:Zulu |