Commit | Line | Data |
47a334e9 |
1 | #----------------------------------------------------------------------- |
2 | |
3 | =head1 NAME |
4 | |
5 | Locale::Language - ISO two letter codes for language identification (ISO 639) |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | use Locale::Language; |
88c28ceb |
10 | |
47a334e9 |
11 | $lang = code2language('en'); # $lang gets 'English' |
12 | $code = language2code('French'); # $code gets 'fr' |
88c28ceb |
13 | |
47a334e9 |
14 | @codes = all_language_codes(); |
15 | @names = all_language_names(); |
16 | |
17 | =cut |
18 | |
19 | #----------------------------------------------------------------------- |
20 | |
21 | package Locale::Language; |
22 | use strict; |
23 | require 5.002; |
24 | |
25 | #----------------------------------------------------------------------- |
26 | |
27 | =head1 DESCRIPTION |
28 | |
29 | The C<Locale::Language> module provides access to the ISO two-letter |
30 | codes for identifying languages, as defined in ISO 639. You can either |
31 | access the codes via the L<conversion routines> (described below), |
32 | or with the two functions which return lists of all language codes or |
33 | all language names. |
34 | |
35 | =cut |
36 | |
37 | #----------------------------------------------------------------------- |
38 | |
39 | require Exporter; |
40 | |
41 | #----------------------------------------------------------------------- |
42 | # Public Global Variables |
43 | #----------------------------------------------------------------------- |
44 | use vars qw($VERSION @ISA @EXPORT); |
45 | $VERSION = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/); |
46 | @ISA = qw(Exporter); |
47 | @EXPORT = qw(&code2language &language2code |
48 | &all_language_codes &all_language_names ); |
49 | |
50 | #----------------------------------------------------------------------- |
51 | # Private Global Variables |
52 | #----------------------------------------------------------------------- |
53 | my %CODES = (); |
54 | my %LANGUAGES = (); |
55 | |
56 | |
57 | #======================================================================= |
58 | |
59 | =head1 CONVERSION ROUTINES |
60 | |
61 | There are two conversion routines: C<code2language()> and C<language2code()>. |
62 | |
63 | =over 8 |
64 | |
65 | =item code2language() |
66 | |
67 | This function takes a two letter language code and returns a string |
68 | which contains the name of the language identified. If the code is |
69 | not a valid language code, as defined by ISO 639, then C<undef> |
70 | will be returned. |
71 | |
72 | $lang = code2language($code); |
73 | |
74 | =item language2code() |
75 | |
76 | This function takes a language name and returns the corresponding |
77 | two letter language code, if such exists. |
78 | If the argument could not be identified as a language name, |
79 | then C<undef> will be returned. |
80 | |
81 | $code = language2code('French'); |
82 | |
83 | The case of the language name is not important. |
84 | See the section L<KNOWN BUGS AND LIMITATIONS> below. |
85 | |
86 | =back |
87 | |
88 | =cut |
89 | |
90 | #======================================================================= |
91 | sub code2language |
92 | { |
93 | my $code = shift; |
94 | |
95 | |
96 | return undef unless defined $code; |
97 | $code = lc($code); |
98 | if (exists $CODES{$code}) |
99 | { |
100 | return $CODES{$code}; |
101 | } |
102 | else |
103 | { |
104 | #--------------------------------------------------------------- |
105 | # no such language code! |
106 | #--------------------------------------------------------------- |
107 | return undef; |
108 | } |
109 | } |
110 | |
111 | sub language2code |
112 | { |
113 | my $lang = shift; |
114 | |
115 | |
116 | return undef unless defined $lang; |
117 | $lang = lc($lang); |
118 | if (exists $LANGUAGES{$lang}) |
119 | { |
120 | return $LANGUAGES{$lang}; |
121 | } |
122 | else |
123 | { |
124 | #--------------------------------------------------------------- |
125 | # no such language! |
126 | #--------------------------------------------------------------- |
127 | return undef; |
128 | } |
129 | } |
130 | |
131 | #======================================================================= |
132 | |
133 | =head1 QUERY ROUTINES |
134 | |
135 | There are two function which can be used to obtain a list of all |
136 | language codes, or all language names: |
137 | |
138 | =over 8 |
139 | |
140 | =item C<all_language_codes()> |
141 | |
142 | Returns a list of all two-letter language codes. |
143 | The codes are guaranteed to be all lower-case, |
144 | and not in any particular order. |
145 | |
146 | =item C<all_language_names()> |
147 | |
148 | Returns a list of all language names for which there is a corresponding |
149 | two-letter language code. The names are capitalised, and not returned |
150 | in any particular order. |
151 | |
152 | =back |
153 | |
154 | =cut |
155 | |
156 | #======================================================================= |
157 | sub all_language_codes |
158 | { |
159 | return keys %CODES; |
160 | } |
161 | |
162 | sub all_language_names |
163 | { |
164 | return values %CODES; |
165 | } |
166 | |
167 | #----------------------------------------------------------------------- |
168 | |
169 | =head1 EXAMPLES |
170 | |
171 | The following example illustrates use of the C<code2language()> function. |
172 | The user is prompted for a language code, and then told the corresponding |
173 | language name: |
174 | |
175 | $| = 1; # turn off buffering |
88c28ceb |
176 | |
47a334e9 |
177 | print "Enter language code: "; |
178 | chop($code = <STDIN>); |
179 | $lang = code2language($code); |
180 | if (defined $lang) |
181 | { |
182 | print "$code = $lang\n"; |
183 | } |
184 | else |
185 | { |
186 | print "'$code' is not a valid language code!\n"; |
187 | } |
188 | |
189 | =head1 KNOWN BUGS AND LIMITATIONS |
190 | |
191 | =over 4 |
192 | |
193 | =item * |
194 | |
195 | In the current implementation, all data is read in when the |
196 | module is loaded, and then held in memory. |
197 | A lazy implementation would be more memory friendly. |
198 | |
199 | =item * |
200 | |
201 | Currently just supports the two letter language codes - |
202 | there are also three-letter codes, and numbers. |
203 | Would these be of any use to anyone? |
204 | |
205 | =back |
206 | |
207 | =head1 SEE ALSO |
208 | |
209 | =over 4 |
210 | |
211 | =item Locale::Country |
212 | |
213 | ISO codes for identification of country (ISO 3166). |
214 | Supports 2-letter, 3-letter, and numeric country codes. |
215 | |
216 | =item Locale::Currency |
217 | |
218 | ISO three letter codes for identification of currencies and funds (ISO 4217). |
219 | |
220 | =item ISO 639:1988 (E/F) |
221 | |
222 | Code for the representation of names of languages. |
223 | |
224 | =item http://lcweb.loc.gov/standards/iso639-2/langhome.html |
225 | |
226 | Home page for ISO 639-2 |
227 | |
228 | =back |
229 | |
230 | |
231 | =head1 AUTHOR |
232 | |
233 | Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt> |
234 | |
235 | =head1 COPYRIGHT |
236 | |
237 | Copyright (c) 1997-2001 Canon Research Centre Europe (CRE). |
238 | |
239 | This module is free software; you can redistribute it and/or |
240 | modify it under the same terms as Perl itself. |
241 | |
242 | =cut |
243 | |
244 | #----------------------------------------------------------------------- |
245 | |
246 | #======================================================================= |
247 | # initialisation code - stuff the DATA into the CODES hash |
248 | #======================================================================= |
249 | { |
250 | my $code; |
251 | my $language; |
252 | |
253 | |
254 | while (<DATA>) |
255 | { |
256 | next unless /\S/; |
257 | chop; |
258 | ($code, $language) = split(/:/, $_, 2); |
259 | $CODES{$code} = $language; |
260 | $LANGUAGES{"\L$language"} = $code; |
261 | } |
262 | } |
263 | |
264 | 1; |
265 | |
266 | __DATA__ |
267 | aa:Afar |
268 | ab:Abkhazian |
269 | ae:Avestan |
270 | af:Afrikaans |
271 | am:Amharic |
272 | ar:Arabic |
273 | as:Assamese |
274 | ay:Aymara |
275 | az:Azerbaijani |
276 | |
277 | ba:Bashkir |
278 | be:Belarusian |
279 | bg:Bulgarian |
280 | bh:Bihari |
281 | bi:Bislama |
282 | bn:Bengali |
283 | bo:Tibetan |
284 | br:Breton |
285 | bs:Bosnian |
286 | |
287 | ca:Catalan |
288 | ce:Chechen |
289 | ch:Chamorro |
290 | co:Corsican |
291 | cs:Czech |
292 | cu:Church Slavic |
293 | cv:Chuvash |
294 | cy:Welsh |
295 | |
296 | da:Danish |
297 | de:German |
298 | dz:Dzongkha |
299 | |
300 | el:Greek |
301 | en:English |
302 | eo:Esperanto |
303 | es:Spanish |
304 | et:Estonian |
305 | eu:Basque |
306 | |
307 | fa:Persian |
308 | fi:Finnish |
309 | fj:Fijian |
310 | fo:Faeroese |
311 | fr:French |
312 | fy:Frisian |
313 | |
314 | ga:Irish |
315 | gd:Gaelic (Scots) |
316 | gl:Gallegan |
317 | gn:Guarani |
318 | gu:Gujarati |
319 | gv:Manx |
320 | |
321 | ha:Hausa |
322 | he:Hebrew |
323 | hi:Hindi |
324 | ho:Hiri Motu |
325 | hr:Croatian |
326 | hu:Hungarian |
327 | hy:Armenian |
328 | hz:Herero |
329 | |
330 | ia:Interlingua |
331 | id:Indonesian |
332 | ie:Interlingue |
333 | ik:Inupiaq |
334 | is:Icelandic |
335 | it:Italian |
336 | iu:Inuktitut |
337 | |
338 | ja:Japanese |
339 | jw:Javanese |
340 | |
341 | ka:Georgian |
342 | ki:Kikuyu |
343 | kj:Kuanyama |
344 | kk:Kazakh |
345 | kl:Kalaallisut |
346 | km:Khmer |
347 | kn:Kannada |
348 | ko:Korean |
349 | ks:Kashmiri |
350 | ku:Kurdish |
351 | kv:Komi |
352 | kw:Cornish |
353 | ky:Kirghiz |
354 | |
355 | la:Latin |
356 | lb:Letzeburgesch |
357 | ln:Lingala |
358 | lo:Lao |
359 | lt:Lithuanian |
360 | lv:Latvian |
361 | |
362 | mg:Malagasy |
363 | mh:Marshall |
364 | mi:Maori |
365 | mk:Macedonian |
366 | ml:Malayalam |
367 | mn:Mongolian |
368 | mo:Moldavian |
369 | mr:Marathi |
370 | ms:Malay |
371 | mt:Maltese |
372 | my:Burmese |
373 | |
374 | na:Nauru |
375 | nb:Norwegian Bokmål |
376 | nd:Ndebele, North |
377 | ne:Nepali |
378 | ng:Ndonga |
379 | nl:Dutch |
380 | nn:Norwegian Nynorsk |
381 | no:Norwegian |
382 | nr:Ndebele, South |
383 | nv:Navajo |
384 | ny:Chichewa; Nyanja |
385 | |
386 | oc:Occitan (post 1500) |
387 | om:Oromo |
388 | or:Oriya |
389 | os:Ossetian; Ossetic |
390 | |
391 | pa:Panjabi |
392 | pi:Pali |
393 | pl:Polish |
394 | ps:Pushto |
395 | pt:Portuguese |
396 | |
397 | qu:Quechua |
398 | |
399 | rm:Rhaeto-Romance |
400 | rn:Rundi |
401 | ro:Romanian |
402 | ru:Russian |
403 | rw:Kinyarwanda |
404 | |
405 | sa:Sanskrit |
406 | sc:Sardinian |
407 | sd:Sindhi |
408 | se:Sami |
409 | sg:Sango |
410 | si:Sinhalese |
411 | sk:Slovak |
412 | sl:Slovenian |
413 | sm:Samoan |
414 | sn:Shona |
415 | so:Somali |
416 | sq:Albanian |
417 | sr:Serbian |
418 | ss:Swati |
419 | st:Sotho |
420 | su:Sundanese |
421 | sv:Swedish |
422 | sw:Swahili |
423 | |
424 | ta:Tamil |
425 | te:Telugu |
426 | tg:Tajik |
427 | th:Thai |
428 | ti:Tigrinya |
429 | tk:Turkmen |
430 | tl:Tagalog |
431 | tn:Tswana |
432 | to:Tonga |
433 | tr:Turkish |
434 | ts:Tsonga |
435 | tt:Tatar |
436 | tw:Twi |
437 | |
438 | ug:Uighur |
439 | uk:Ukrainian |
440 | ur:Urdu |
441 | uz:Uzbek |
442 | |
443 | vi:Vietnamese |
444 | vo:Volapük |
445 | |
446 | wo:Wolof |
447 | |
448 | xh:Xhosa |
449 | |
450 | yi:Yiddish |
451 | yo:Yoruba |
452 | |
453 | za:Zhuang |
454 | zh:Chinese |
455 | zu:Zulu |