Commit | Line | Data |
6b14ceb7 |
1 | # |
2 | # Locale::Script - ISO codes for script identification (ISO 15924) |
3 | # |
3a6b268d |
4 | # $Id: Script.pm,v 2.2 2002/07/10 16:33:28 neilb Exp $ |
6b14ceb7 |
5 | # |
6b6e008c |
6 | |
7 | package Locale::Script; |
8 | use strict; |
9 | require 5.002; |
10 | |
6b6e008c |
11 | require Exporter; |
12 | use Carp; |
13 | use Locale::Constants; |
14 | |
15 | |
16 | #----------------------------------------------------------------------- |
17 | # Public Global Variables |
18 | #----------------------------------------------------------------------- |
19 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); |
f93bb41e |
20 | $VERSION = sprintf("%d.%02d", q$Revision: 2.21 $ =~ /(\d+)\.(\d+)/); |
6b6e008c |
21 | @ISA = qw(Exporter); |
22 | @EXPORT = qw(code2script script2code |
23 | all_script_codes all_script_names |
24 | script_code2code |
25 | LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC); |
26 | |
27 | #----------------------------------------------------------------------- |
28 | # Private Global Variables |
29 | #----------------------------------------------------------------------- |
30 | my $CODES = []; |
31 | my $COUNTRIES = []; |
32 | |
33 | |
34 | #======================================================================= |
6b14ceb7 |
35 | # |
36 | # code2script ( CODE [, CODESET ] ) |
37 | # |
6b6e008c |
38 | #======================================================================= |
39 | sub code2script |
40 | { |
41 | my $code = shift; |
42 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
43 | |
44 | |
45 | return undef unless defined $code; |
46 | |
47 | #------------------------------------------------------------------- |
48 | # Make sure the code is in the right form before we use it |
49 | # to look up the corresponding script. |
50 | # We have to sprintf because the codes are given as 3-digits, |
51 | # with leading 0's. Eg 070 for Egyptian demotic. |
52 | #------------------------------------------------------------------- |
53 | if ($codeset == LOCALE_CODE_NUMERIC) |
54 | { |
55 | return undef if ($code =~ /\D/); |
56 | $code = sprintf("%.3d", $code); |
57 | } |
58 | else |
59 | { |
60 | $code = lc($code); |
61 | } |
62 | |
63 | if (exists $CODES->[$codeset]->{$code}) |
64 | { |
65 | return $CODES->[$codeset]->{$code}; |
66 | } |
67 | else |
68 | { |
69 | #--------------------------------------------------------------- |
70 | # no such script code! |
71 | #--------------------------------------------------------------- |
72 | return undef; |
73 | } |
74 | } |
75 | |
6b14ceb7 |
76 | |
77 | #======================================================================= |
78 | # |
79 | # script2code ( SCRIPT [, CODESET ] ) |
80 | # |
81 | #======================================================================= |
6b6e008c |
82 | sub script2code |
83 | { |
84 | my $script = shift; |
85 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
86 | |
87 | |
88 | return undef unless defined $script; |
89 | $script = lc($script); |
90 | if (exists $COUNTRIES->[$codeset]->{$script}) |
91 | { |
92 | return $COUNTRIES->[$codeset]->{$script}; |
93 | } |
94 | else |
95 | { |
96 | #--------------------------------------------------------------- |
97 | # no such script! |
98 | #--------------------------------------------------------------- |
99 | return undef; |
100 | } |
101 | } |
102 | |
6b14ceb7 |
103 | |
104 | #======================================================================= |
105 | # |
106 | # script_code2code ( CODE, IN-CODESET, OUT-CODESET ) |
107 | # |
108 | #======================================================================= |
6b6e008c |
109 | sub script_code2code |
110 | { |
111 | (@_ == 3) or croak "script_code2code() takes 3 arguments!"; |
112 | |
113 | my $code = shift; |
114 | my $inset = shift; |
115 | my $outset = shift; |
6b14ceb7 |
116 | my $outcode; |
6b6e008c |
117 | my $script; |
118 | |
119 | |
120 | return undef if $inset == $outset; |
121 | $script = code2script($code, $inset); |
122 | return undef if not defined $script; |
123 | $outcode = script2code($script, $outset); |
124 | return $outcode; |
125 | } |
126 | |
6b6e008c |
127 | |
128 | #======================================================================= |
6b14ceb7 |
129 | # |
130 | # all_script_codes() |
131 | # |
132 | #======================================================================= |
6b6e008c |
133 | sub all_script_codes |
134 | { |
135 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
136 | |
137 | return keys %{ $CODES->[$codeset] }; |
138 | } |
139 | |
6b14ceb7 |
140 | |
141 | #======================================================================= |
142 | # |
143 | # all_script_names() |
144 | # |
145 | #======================================================================= |
6b6e008c |
146 | sub all_script_names |
147 | { |
148 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; |
149 | |
150 | return values %{ $CODES->[$codeset] }; |
151 | } |
152 | |
153 | |
6b6e008c |
154 | #======================================================================= |
6b14ceb7 |
155 | # |
6b6e008c |
156 | # initialisation code - stuff the DATA into the ALPHA2 hash |
6b14ceb7 |
157 | # |
6b6e008c |
158 | #======================================================================= |
159 | { |
160 | my ($alpha2, $alpha3, $numeric); |
161 | my $script; |
162 | |
77b047e7 |
163 | local $_; |
6b6e008c |
164 | |
165 | while (<DATA>) |
166 | { |
167 | next unless /\S/; |
168 | chop; |
169 | ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4); |
170 | |
171 | $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script; |
172 | $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2; |
173 | |
174 | if ($alpha3) |
175 | { |
176 | $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script; |
177 | $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3; |
178 | } |
179 | |
180 | if ($numeric) |
181 | { |
182 | $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script; |
183 | $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric; |
184 | } |
185 | |
186 | } |
3a6b268d |
187 | |
188 | close(DATA); |
6b6e008c |
189 | } |
190 | |
191 | 1; |
192 | |
193 | __DATA__ |
194 | am:ama:130:Aramaic |
195 | ar:ara:160:Arabic |
196 | av:ave:151:Avestan |
197 | bh:bhm:300:Brahmi (Ashoka) |
198 | bi:bid:372:Buhid |
199 | bn:ben:325:Bengali |
200 | bo:bod:330:Tibetan |
201 | bp:bpm:285:Bopomofo |
202 | br:brl:570:Braille |
203 | bt:btk:365:Batak |
204 | bu:bug:367:Buginese (Makassar) |
205 | by:bys:550:Blissymbols |
206 | ca:cam:358:Cham |
207 | ch:chu:221:Old Church Slavonic |
208 | ci:cir:291:Cirth |
209 | cm:cmn:402:Cypro-Minoan |
210 | co:cop:205:Coptic |
211 | cp:cpr:403:Cypriote syllabary |
212 | cy:cyr:220:Cyrillic |
213 | ds:dsr:250:Deserel (Mormon) |
214 | dv:dvn:315:Devanagari (Nagari) |
215 | ed:egd:070:Egyptian demotic |
216 | eg:egy:050:Egyptian hieroglyphs |
217 | eh:egh:060:Egyptian hieratic |
218 | el:ell:200:Greek |
219 | eo:eos:210:Etruscan and Oscan |
220 | et:eth:430:Ethiopic |
221 | gl:glg:225:Glagolitic |
222 | gm:gmu:310:Gurmukhi |
223 | gt:gth:206:Gothic |
224 | gu:guj:320:Gujarati |
225 | ha:han:500:Han ideographs |
226 | he:heb:125:Hebrew |
227 | hg:hgl:420:Hangul |
228 | hm:hmo:450:Pahawh Hmong |
229 | ho:hoo:371:Hanunoo |
230 | hr:hrg:410:Hiragana |
231 | hu:hun:176:Old Hungarian runic |
232 | hv:hvn:175:Kok Turki runic |
233 | hy:hye:230:Armenian |
234 | iv:ivl:610:Indus Valley |
235 | ja:jap:930:(alias for Han + Hiragana + Katakana) |
236 | jl:jlg:445:Cherokee syllabary |
237 | jw:jwi:360:Javanese |
238 | ka:kam:241:Georgian (Mxedruli) |
239 | kh:khn:931:(alias for Hangul + Han) |
240 | kk:kkn:411:Katakana |
241 | km:khm:354:Khmer |
242 | kn:kan:345:Kannada |
243 | kr:krn:357:Karenni (Kayah Li) |
244 | ks:kst:305:Kharoshthi |
245 | kx:kax:240:Georgian (Xucuri) |
246 | la:lat:217:Latin |
247 | lf:laf:215:Latin (Fraktur variant) |
248 | lg:lag:216:Latin (Gaelic variant) |
249 | lo:lao:356:Lao |
250 | lp:lpc:335:Lepcha (Rong) |
251 | md:mda:140:Mandaean |
252 | me:mer:100:Meroitic |
253 | mh:may:090:Mayan hieroglyphs |
254 | ml:mlm:347:Malayalam |
255 | mn:mon:145:Mongolian |
256 | my:mya:350:Burmese |
257 | na:naa:400:Linear A |
258 | nb:nbb:401:Linear B |
259 | og:ogm:212:Ogham |
260 | or:ory:327:Oriya |
261 | os:osm:260:Osmanya |
262 | ph:phx:115:Phoenician |
263 | ph:pah:150:Pahlavi |
264 | pl:pld:282:Pollard Phonetic |
265 | pq:pqd:295:Klingon plQaD |
266 | pr:prm:227:Old Permic |
267 | ps:pst:600:Phaistos Disk |
268 | rn:rnr:211:Runic (Germanic) |
269 | rr:rro:620:Rongo-rongo |
270 | sa:sar:110:South Arabian |
271 | si:sin:348:Sinhala |
272 | sj:syj:137:Syriac (Jacobite variant) |
273 | sl:slb:440:Unified Canadian Aboriginal Syllabics |
274 | sn:syn:136:Syriac (Nestorian variant) |
275 | sw:sww:281:Shavian (Shaw) |
276 | sy:syr:135:Syriac (Estrangelo) |
277 | ta:tam:346:Tamil |
278 | tb:tbw:373:Tagbanwa |
279 | te:tel:340:Telugu |
280 | tf:tfn:120:Tifnagh |
281 | tg:tag:370:Tagalog |
282 | th:tha:352:Thai |
283 | tn:tna:170:Thaana |
284 | tw:twr:290:Tengwar |
285 | va:vai:470:Vai |
286 | vs:vsp:280:Visible Speech |
287 | xa:xas:000:Cuneiform, Sumero-Akkadian |
288 | xf:xfa:105:Cuneiform, Old Persian |
289 | xk:xkn:412:(alias for Hiragana + Katakana) |
290 | xu:xug:106:Cuneiform, Ugaritic |
291 | yi:yii:460:Yi |
292 | zx:zxx:997:Unwritten language |
293 | zy:zyy:998:Undetermined script |
294 | zz:zzz:999:Uncoded script |