[p5sagit/p5-mst-13.2.git] / lib / Locale / Language.pm

#-----------------------------------------------------------------------

=head1 NAME

Locale::Language - ISO two letter codes for language identification (ISO 639)

=head1 SYNOPSIS

    use Locale::Language;
    
    $lang = code2language('en');        # $lang gets 'English'
    $code = language2code('French');    # $code gets 'fr'
    
    @codes   = all_language_codes();
    @names   = all_language_names();

=cut

#-----------------------------------------------------------------------

package Locale::Language;
use strict;
require 5.002;

#-----------------------------------------------------------------------

=head1 DESCRIPTION

The C<Locale::Language> module provides access to the ISO two-letter
codes for identifying languages, as defined in ISO 639. You can either
access the codes via the L<conversion routines> (described below),
or with the two functions which return lists of all language codes or
all language names.

=cut

#-----------------------------------------------------------------------

require Exporter;

#-----------------------------------------------------------------------
#	Public Global Variables
#-----------------------------------------------------------------------
use vars qw($VERSION @ISA @EXPORT);
$VERSION      = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/);
@ISA          = qw(Exporter);
@EXPORT       = qw(&code2language &language2code
                   &all_language_codes &all_language_names );

#-----------------------------------------------------------------------
#	Private Global Variables
#-----------------------------------------------------------------------
my %CODES     = ();
my %LANGUAGES = ();


#=======================================================================

=head1 CONVERSION ROUTINES

There are two conversion routines: C<code2language()> and C<language2code()>.

=over 8

=item code2language()

This function takes a two letter language code and returns a string
which contains the name of the language identified. If the code is
not a valid language code, as defined by ISO 639, then C<undef>
will be returned.

    $lang = code2language($code);

=item language2code()

This function takes a language name and returns the corresponding
two letter language code, if such exists.
If the argument could not be identified as a language name,
then C<undef> will be returned.

    $code = language2code('French');

The case of the language name is not important.
See the section L<KNOWN BUGS AND LIMITATIONS> below.

=back

=cut

#=======================================================================
sub code2language
{
    my $code = shift;


    return undef unless defined $code;
    $code = lc($code);
    if (exists $CODES{$code})
    {
        return $CODES{$code};
    }
    else
    {
        #---------------------------------------------------------------
        # no such language code!
        #---------------------------------------------------------------
        return undef;
    }
}

sub language2code
{
    my $lang = shift;


    return undef unless defined $lang;
    $lang = lc($lang);
    if (exists $LANGUAGES{$lang})
    {
        return $LANGUAGES{$lang};
    }
    else
    {
        #---------------------------------------------------------------
        # no such language!
        #---------------------------------------------------------------
        return undef;
    }
}

#=======================================================================

=head1 QUERY ROUTINES

There are two function which can be used to obtain a list of all
language codes, or all language names:

=over 8

=item C<all_language_codes()>

Returns a list of all two-letter language codes.
The codes are guaranteed to be all lower-case,
and not in any particular order.

=item C<all_language_names()>

Returns a list of all language names for which there is a corresponding
two-letter language code. The names are capitalised, and not returned
in any particular order.

=back

=cut

#=======================================================================
sub all_language_codes
{
    return keys %CODES;
}

sub all_language_names
{
    return values %CODES;
}

#-----------------------------------------------------------------------

=head1 EXAMPLES

The following example illustrates use of the C<code2language()> function.
The user is prompted for a language code, and then told the corresponding
language name:

    $| = 1;    # turn off buffering
    
    print "Enter language code: ";
    chop($code = <STDIN>);
    $lang = code2language($code);
    if (defined $lang)
    {
        print "$code = $lang\n";
    }
    else
    {
        print "'$code' is not a valid language code!\n";
    }

=head1 KNOWN BUGS AND LIMITATIONS

=over 4

=item *

In the current implementation, all data is read in when the
module is loaded, and then held in memory.
A lazy implementation would be more memory friendly.

=item *

Currently just supports the two letter language codes -
there are also three-letter codes, and numbers.
Would these be of any use to anyone?

=back

=head1 SEE ALSO

=over 4

=item Locale::Country

ISO codes for identification of country (ISO 3166).
Supports 2-letter, 3-letter, and numeric country codes.

=item Locale::Currency

ISO three letter codes for identification of currencies and funds (ISO 4217).

=item ISO 639:1988 (E/F)

Code for the representation of names of languages.

=item http://lcweb.loc.gov/standards/iso639-2/langhome.html

Home page for ISO 639-2

=back


=head1 AUTHOR

Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt>

=head1 COPYRIGHT

Copyright (c) 1997-2001 Canon Research Centre Europe (CRE).

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

#-----------------------------------------------------------------------

#=======================================================================
# initialisation code - stuff the DATA into the CODES hash
#=======================================================================
{
    my $code;
    my $language;


    while (<DATA>)
    {
        next unless /\S/;
        chop;
        ($code, $language) = split(/:/, $_, 2);
        $CODES{$code} = $language;
        $LANGUAGES{"\L$language"} = $code;
    }
}

1;

__DATA__
aa:Afar
ab:Abkhazian
ae:Avestan
af:Afrikaans
am:Amharic
ar:Arabic
as:Assamese
ay:Aymara
az:Azerbaijani

ba:Bashkir
be:Belarusian
bg:Bulgarian
bh:Bihari
bi:Bislama
bn:Bengali
bo:Tibetan
br:Breton
bs:Bosnian

ca:Catalan
ce:Chechen
ch:Chamorro
co:Corsican
cs:Czech
cu:Church Slavic
cv:Chuvash
cy:Welsh

da:Danish
de:German
dz:Dzongkha

el:Greek
en:English
eo:Esperanto
es:Spanish
et:Estonian
eu:Basque

fa:Persian
fi:Finnish
fj:Fijian
fo:Faeroese
fr:French
fy:Frisian

ga:Irish
gd:Gaelic (Scots)
gl:Gallegan
gn:Guarani
gu:Gujarati
gv:Manx

ha:Hausa
he:Hebrew
hi:Hindi
ho:Hiri Motu
hr:Croatian
hu:Hungarian
hy:Armenian
hz:Herero

ia:Interlingua
id:Indonesian
ie:Interlingue
ik:Inupiaq
is:Icelandic
it:Italian
iu:Inuktitut

ja:Japanese
jw:Javanese

ka:Georgian
ki:Kikuyu
kj:Kuanyama
kk:Kazakh
kl:Kalaallisut
km:Khmer
kn:Kannada
ko:Korean
ks:Kashmiri
ku:Kurdish
kv:Komi
kw:Cornish
ky:Kirghiz

la:Latin
lb:Letzeburgesch
ln:Lingala
lo:Lao
lt:Lithuanian
lv:Latvian

mg:Malagasy
mh:Marshall
mi:Maori
mk:Macedonian
ml:Malayalam
mn:Mongolian
mo:Moldavian
mr:Marathi
ms:Malay
mt:Maltese
my:Burmese

na:Nauru
nb:Norwegian Bokmål
nd:Ndebele, North
ne:Nepali
ng:Ndonga
nl:Dutch
nn:Norwegian Nynorsk
no:Norwegian
nr:Ndebele, South
nv:Navajo
ny:Chichewa; Nyanja

oc:Occitan (post 1500)
om:Oromo
or:Oriya
os:Ossetian; Ossetic

pa:Panjabi
pi:Pali
pl:Polish
ps:Pushto
pt:Portuguese

qu:Quechua

rm:Rhaeto-Romance
rn:Rundi
ro:Romanian
ru:Russian
rw:Kinyarwanda

sa:Sanskrit
sc:Sardinian
sd:Sindhi
se:Sami
sg:Sango
si:Sinhalese
sk:Slovak
sl:Slovenian
sm:Samoan
sn:Shona
so:Somali
sq:Albanian
sr:Serbian
ss:Swati
st:Sotho
su:Sundanese
sv:Swedish
sw:Swahili

ta:Tamil
te:Telugu
tg:Tajik
th:Thai
ti:Tigrinya
tk:Turkmen
tl:Tagalog
tn:Tswana
to:Tonga
tr:Turkish
ts:Tsonga
tt:Tatar
tw:Twi

ug:Uighur
uk:Ukrainian
ur:Urdu
uz:Uzbek

vi:Vietnamese
vo:Volapük

wo:Wolof

xh:Xhosa

yi:Yiddish
yo:Yoruba

za:Zhuang
zh:Chinese
zu:Zulu
Commit	Line	Data
47a334e9	1	#-----------------------------------------------------------------------
	2
	3	=head1 NAME
	4
	5	Locale::Language - ISO two letter codes for language identification (ISO 639)
	6
	7	=head1 SYNOPSIS
	8
	9	use Locale::Language;
88c28ceb	10
47a334e9	11	$lang = code2language('en'); # $lang gets 'English'
47a334e9	12	$code = language2code('French'); # $code gets 'fr'
88c28ceb	13
47a334e9	14	@codes = all_language_codes();
	15	@names = all_language_names();
	16
	17	=cut
	18
	19	#-----------------------------------------------------------------------
	20
	21	package Locale::Language;
	22	use strict;
	23	require 5.002;
	24
	25	#-----------------------------------------------------------------------
	26
	27	=head1 DESCRIPTION
	28
	29	The C<Locale::Language> module provides access to the ISO two-letter
	30	codes for identifying languages, as defined in ISO 639. You can either
	31	access the codes via the L<conversion routines> (described below),
	32	or with the two functions which return lists of all language codes or
	33	all language names.
	34
	35	=cut
	36
	37	#-----------------------------------------------------------------------
	38
	39	require Exporter;
	40
	41	#-----------------------------------------------------------------------
	42	# Public Global Variables
	43	#-----------------------------------------------------------------------
	44	use vars qw($VERSION @ISA @EXPORT);
	45	$VERSION = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/);
	46	@ISA = qw(Exporter);
	47	@EXPORT = qw(&code2language &language2code
	48	&all_language_codes &all_language_names );
	49
	50	#-----------------------------------------------------------------------
	51	# Private Global Variables
	52	#-----------------------------------------------------------------------
	53	my %CODES = ();
	54	my %LANGUAGES = ();
	55
	56
	57	#=======================================================================
	58
	59	=head1 CONVERSION ROUTINES
	60
	61	There are two conversion routines: C<code2language()> and C<language2code()>.
	62
	63	=over 8
	64
	65	=item code2language()
	66
	67	This function takes a two letter language code and returns a string
	68	which contains the name of the language identified. If the code is
	69	not a valid language code, as defined by ISO 639, then C<undef>
	70	will be returned.
	71
	72	$lang = code2language($code);
	73
	74	=item language2code()
	75
	76	This function takes a language name and returns the corresponding
	77	two letter language code, if such exists.
78	If the argument could not be identified as a language name,
79	then C<undef> will be returned.
80
81	$code = language2code('French');
82
83	The case of the language name is not important.
84	See the section L<KNOWN BUGS AND LIMITATIONS> below.
85
86	=back
87
88	=cut
89
90	#=======================================================================
91	sub code2language
92	{
93	my $code = shift;
94
95
96	return undef unless defined $code;
97	$code = lc($code);
98	if (exists $CODES{$code})
99	{
100	return $CODES{$code};
101	}
102	else
103	{
104	#---------------------------------------------------------------
105	# no such language code!
106	#---------------------------------------------------------------
107	return undef;
108	}
109	}
110
111	sub language2code
112	{
113	my $lang = shift;
114
115
116	return undef unless defined $lang;
117	$lang = lc($lang);
118	if (exists $LANGUAGES{$lang})
119	{
120	return $LANGUAGES{$lang};
121	}
122	else
123	{
124	#---------------------------------------------------------------
125	# no such language!
126	#---------------------------------------------------------------
127	return undef;
128	}
129	}
130
131	#=======================================================================
132
133	=head1 QUERY ROUTINES
134
135	There are two function which can be used to obtain a list of all
136	language codes, or all language names:
137
138	=over 8
139
140	=item C<all_language_codes()>
141
142	Returns a list of all two-letter language codes.
143	The codes are guaranteed to be all lower-case,
144	and not in any particular order.
145
146	=item C<all_language_names()>
147
148	Returns a list of all language names for which there is a corresponding
149	two-letter language code. The names are capitalised, and not returned
150	in any particular order.
151
152	=back
153
154	=cut
155
156	#=======================================================================
157	sub all_language_codes
158	{
159	return keys %CODES;
160	}
161
162	sub all_language_names
163	{
164	return values %CODES;
165	}
166
167	#-----------------------------------------------------------------------
168
169	=head1 EXAMPLES
170
171	The following example illustrates use of the C<code2language()> function.
172	The user is prompted for a language code, and then told the corresponding
173	language name:
174
175	$\| = 1; # turn off buffering
88c28ceb	176
47a334e9	177	print "Enter language code: ";
	178	chop($code = <STDIN>);
	179	$lang = code2language($code);
	180	if (defined $lang)
	181	{
	182	print "$code = $lang\n";
	183	}
	184	else
	185	{
	186	print "'$code' is not a valid language code!\n";
	187	}
	188
	189	=head1 KNOWN BUGS AND LIMITATIONS
	190
	191	=over 4
	192
	193	=item *
	194
	195	In the current implementation, all data is read in when the
	196	module is loaded, and then held in memory.
	197	A lazy implementation would be more memory friendly.
	198
	199	=item *
	200
	201	Currently just supports the two letter language codes -
	202	there are also three-letter codes, and numbers.
	203	Would these be of any use to anyone?
	204
	205	=back
	206
	207	=head1 SEE ALSO
	208
	209	=over 4
	210
	211	=item Locale::Country
	212
	213	ISO codes for identification of country (ISO 3166).
	214	Supports 2-letter, 3-letter, and numeric country codes.
	215
	216	=item Locale::Currency
	217
	218	ISO three letter codes for identification of currencies and funds (ISO 4217).
	219
	220	=item ISO 639:1988 (E/F)
	221
	222	Code for the representation of names of languages.
	223
	224	=item http://lcweb.loc.gov/standards/iso639-2/langhome.html
	225
	226	Home page for ISO 639-2
	227
	228	=back
	229
	230
	231	=head1 AUTHOR
	232
	233	Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt>
	234
	235	=head1 COPYRIGHT
	236
	237	Copyright (c) 1997-2001 Canon Research Centre Europe (CRE).
	238
	239	This module is free software; you can redistribute it and/or
	240	modify it under the same terms as Perl itself.
241
242	=cut
243
244	#-----------------------------------------------------------------------
245
246	#=======================================================================
247	# initialisation code - stuff the DATA into the CODES hash
248	#=======================================================================
249	{
250	my $code;
251	my $language;
252
253
254	while (<DATA>)
255	{
256	next unless /\S/;
257	chop;
258	($code, $language) = split(/:/, $_, 2);
259	$CODES{$code} = $language;
260	$LANGUAGES{"\L$language"} = $code;
261	}
262	}
263
264	1;
265
266	__DATA__
267	aa:Afar
268	ab:Abkhazian
269	ae:Avestan
270	af:Afrikaans
271	am:Amharic
272	ar:Arabic
273	as:Assamese
274	ay:Aymara
275	az:Azerbaijani
276
277	ba:Bashkir
278	be:Belarusian
279	bg:Bulgarian
280	bh:Bihari
281	bi:Bislama
282	bn:Bengali
283	bo:Tibetan
284	br:Breton
285	bs:Bosnian
286
287	ca:Catalan
288	ce:Chechen
289	ch:Chamorro
290	co:Corsican
291	cs:Czech
292	cu:Church Slavic
293	cv:Chuvash
294	cy:Welsh
295
296	da:Danish
297	de:German
298	dz:Dzongkha
299
300	el:Greek
301	en:English
302	eo:Esperanto
303	es:Spanish
304	et:Estonian
305	eu:Basque
306
307	fa:Persian
308	fi:Finnish
309	fj:Fijian
310	fo:Faeroese
311	fr:French
312	fy:Frisian
313
314	ga:Irish
315	gd:Gaelic (Scots)
316	gl:Gallegan
317	gn:Guarani
318	gu:Gujarati
319	gv:Manx
320
321	ha:Hausa
322	he:Hebrew
323	hi:Hindi
324	ho:Hiri Motu
325	hr:Croatian
326	hu:Hungarian
327	hy:Armenian
328	hz:Herero
329
330	ia:Interlingua
331	id:Indonesian
332	ie:Interlingue
333	ik:Inupiaq
334	is:Icelandic
335	it:Italian
336	iu:Inuktitut
337
338	ja:Japanese
339	jw:Javanese
340
341	ka:Georgian
342	ki:Kikuyu
343	kj:Kuanyama
344	kk:Kazakh
345	kl:Kalaallisut
346	km:Khmer
347	kn:Kannada
348	ko:Korean
349	ks:Kashmiri
350	ku:Kurdish
351	kv:Komi
352	kw:Cornish
353	ky:Kirghiz
354
355	la:Latin
356	lb:Letzeburgesch
357	ln:Lingala
358	lo:Lao
359	lt:Lithuanian
360	lv:Latvian
361
362	mg:Malagasy
363	mh:Marshall
364	mi:Maori
365	mk:Macedonian
366	ml:Malayalam
367	mn:Mongolian
368	mo:Moldavian
369	mr:Marathi
370	ms:Malay
371	mt:Maltese
372	my:Burmese
373
374	na:Nauru
375	nb:Norwegian Bokmål
376	nd:Ndebele, North
377	ne:Nepali
378	ng:Ndonga
379	nl:Dutch
380	nn:Norwegian Nynorsk
381	no:Norwegian
382	nr:Ndebele, South
383	nv:Navajo
384	ny:Chichewa; Nyanja
385
386	oc:Occitan (post 1500)
387	om:Oromo
388	or:Oriya
389	os:Ossetian; Ossetic
390
391	pa:Panjabi
392	pi:Pali
393	pl:Polish
394	ps:Pushto
395	pt:Portuguese
396
397	qu:Quechua
398
399	rm:Rhaeto-Romance
400	rn:Rundi
401	ro:Romanian
402	ru:Russian
403	rw:Kinyarwanda
404
405	sa:Sanskrit
406	sc:Sardinian
407	sd:Sindhi
408	se:Sami
409	sg:Sango
410	si:Sinhalese
411	sk:Slovak
412	sl:Slovenian
413	sm:Samoan
414	sn:Shona
415	so:Somali
416	sq:Albanian
417	sr:Serbian
418	ss:Swati
419	st:Sotho
420	su:Sundanese
421	sv:Swedish
422	sw:Swahili
423
424	ta:Tamil
425	te:Telugu
426	tg:Tajik
427	th:Thai
428	ti:Tigrinya
429	tk:Turkmen
430	tl:Tagalog
431	tn:Tswana
432	to:Tonga
433	tr:Turkish
434	ts:Tsonga
435	tt:Tatar
436	tw:Twi
437
438	ug:Uighur
439	uk:Ukrainian
440	ur:Urdu
441	uz:Uzbek
442
443	vi:Vietnamese
444	vo:Volapük
445
446	wo:Wolof
447
448	xh:Xhosa
449
450	yi:Yiddish
451	yo:Yoruba
452
453	za:Zhuang
454	zh:Chinese
455	zu:Zulu