Commit | Line | Data |
a0d0e21e |
1 | package I18N::Collate; |
2 | |
f06db76b |
3 | =head1 NAME |
4 | |
69b19ea2 |
5 | I18N::Collate - compare 8-bit scalar data according to the current locale |
f06db76b |
6 | |
4413da2b |
7 | *** |
8 | |
9 | WARNING: starting from the Perl version 5.003_06 |
10 | the I18N::Collate interface for comparing 8-bit scalar data |
11 | according to the current locale |
12 | |
13 | HAS BEEN DEPRECATED |
14 | |
15 | That is, please do not use it anymore for any new applications |
16 | and please migrate the old applications away from it because its |
17 | functionality was integrated into the Perl core language in the |
18 | release 5.003_06. |
19 | |
20 | See the perllocale manual page for further information. |
21 | |
22 | *** |
23 | |
f06db76b |
24 | =head1 SYNOPSIS |
25 | |
69b19ea2 |
26 | use I18N::Collate; |
f06db76b |
27 | setlocale(LC_COLLATE, 'locale-of-your-choice'); |
69b19ea2 |
28 | $s1 = new I18N::Collate "scalar_data_1"; |
29 | $s2 = new I18N::Collate "scalar_data_2"; |
f06db76b |
30 | |
31 | =head1 DESCRIPTION |
32 | |
33 | This module provides you with objects that will collate |
69b19ea2 |
34 | according to your national character set, provided that the |
35 | POSIX setlocale() function is supported on your system. |
f06db76b |
36 | |
37 | You can compare $s1 and $s2 above with |
38 | |
39 | $s1 le $s2 |
40 | |
41 | to extract the data itself, you'll need a dereference: $$s1 |
42 | |
6158a1ac |
43 | This module uses POSIX::setlocale(). The basic collation conversion is |
44 | done by strxfrm() which terminates at NUL characters being a decent C |
45 | routine. collate_xfrm() handles embedded NUL characters gracefully. |
c2960299 |
46 | |
6158a1ac |
47 | The available locales depend on your operating system; try whether |
48 | C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the |
49 | direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or |
50 | C<ls /usr/lib/locale>. Not all the locales that your vendor supports |
51 | are necessarily installed: please consult your operating system's |
52 | documentation and possibly your local system administration. The |
53 | locale names are probably something like C<xx_XX.(ISO)?8859-N> or |
54 | C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH) |
55 | variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western |
56 | European character set. |
f06db76b |
57 | |
58 | =cut |
59 | |
69b19ea2 |
60 | # I18N::Collate.pm |
a0d0e21e |
61 | # |
5aabfad6 |
62 | # Author: Jarkko Hietaniemi <F<jhi@iki.fi>> |
a0d0e21e |
63 | # Helsinki University of Technology, Finland |
64 | # |
5aabfad6 |
65 | # Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood |
a0d0e21e |
66 | # overloading magic much deeper than I and told |
67 | # how to cut the size of this code by more than half. |
68 | # (my first version did overload all of lt gt eq le ge cmp) |
69 | # |
70 | # Purpose: compare 8-bit scalar data according to the current locale |
71 | # |
72 | # Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm() |
73 | # |
74 | # Exports: setlocale 1) |
75 | # collate_xfrm 2) |
76 | # |
77 | # Overloads: cmp # 3) |
78 | # |
69b19ea2 |
79 | # Usage: use I18N::Collate; |
c2960299 |
80 | # setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4) |
69b19ea2 |
81 | # $s1 = new I18N::Collate "scalar_data_1"; |
82 | # $s2 = new I18N::Collate "scalar_data_2"; |
a0d0e21e |
83 | # |
84 | # now you can compare $s1 and $s2: $s1 le $s2 |
85 | # to extract the data itself, you need to deref: $$s1 |
86 | # |
87 | # Notes: |
88 | # 1) this uses POSIX::setlocale |
89 | # 2) the basic collation conversion is done by strxfrm() which |
90 | # terminates at NUL characters being a decent C routine. |
91 | # collate_xfrm handles embedded NUL characters gracefully. |
92 | # 3) due to cmp and overload magic, lt le eq ge gt work also |
93 | # 4) the available locales depend on your operating system; |
c2960299 |
94 | # try whether "locale -a" shows them or man pages for |
95 | # "locale" or "nlsinfo" work or the more direct |
a0d0e21e |
96 | # approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls". |
c2960299 |
97 | # Not all the locales that your vendor supports |
98 | # are necessarily installed: please consult your |
99 | # operating system's documentation. |
a0d0e21e |
100 | # The locale names are probably something like |
c2960299 |
101 | # 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N', |
102 | # for example 'fr_CH.ISO8859-1' is the Swiss (CH) |
103 | # variant of French (fr), ISO Latin (8859) 1 (-1) |
104 | # which is the Western European character set. |
a0d0e21e |
105 | # |
6b48aaa4 |
106 | # Updated: 19961005 |
a0d0e21e |
107 | # |
108 | # --- |
109 | |
110 | use POSIX qw(strxfrm LC_COLLATE); |
d3a7d8c7 |
111 | use warnings::register; |
a0d0e21e |
112 | |
113 | require Exporter; |
114 | |
115 | @ISA = qw(Exporter); |
116 | @EXPORT = qw(collate_xfrm setlocale LC_COLLATE); |
117 | @EXPORT_OK = qw(); |
118 | |
a5f75d66 |
119 | use overload qw( |
a0d0e21e |
120 | fallback 1 |
121 | cmp collate_cmp |
122 | ); |
123 | |
6b48aaa4 |
124 | sub new { |
125 | my $new = $_[1]; |
126 | |
d3a7d8c7 |
127 | if (warnings::enabled() && $] >= 5.003_06) { |
6b48aaa4 |
128 | unless ($please_use_I18N_Collate_even_if_deprecated) { |
d3a7d8c7 |
129 | warnings::warn <<___EOD___; |
6b48aaa4 |
130 | *** |
131 | |
4413da2b |
132 | WARNING: starting from the Perl version 5.003_06 |
133 | the I18N::Collate interface for comparing 8-bit scalar data |
134 | according to the current locale |
6b48aaa4 |
135 | |
136 | HAS BEEN DEPRECATED |
137 | |
4413da2b |
138 | That is, please do not use it anymore for any new applications |
139 | and please migrate the old applications away from it because its |
140 | functionality was integrated into the Perl core language in the |
141 | release 5.003_06. |
6b48aaa4 |
142 | |
4413da2b |
143 | See the perllocale manual page for further information. |
6b48aaa4 |
144 | |
145 | *** |
146 | ___EOD___ |
147 | $please_use_I18N_Collate_even_if_deprecated++; |
148 | } |
149 | } |
150 | |
151 | bless \$new; |
152 | } |
a0d0e21e |
153 | |
154 | sub setlocale { |
155 | my ($category, $locale) = @_[0,1]; |
156 | |
157 | POSIX::setlocale($category, $locale) if (defined $category); |
158 | # the current $LOCALE |
159 | $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || ''; |
160 | } |
161 | |
162 | sub C { |
163 | my $s = ${$_[0]}; |
164 | |
165 | $C->{$LOCALE}->{$s} = collate_xfrm($s) |
166 | unless (defined $C->{$LOCALE}->{$s}); # cache when met |
167 | |
168 | $C->{$LOCALE}->{$s}; |
169 | } |
170 | |
171 | sub collate_xfrm { |
172 | my $s = $_[0]; |
173 | my $x = ''; |
174 | |
175 | for (split(/(\000+)/, $s)) { |
176 | $x .= (/^\000/) ? $_ : strxfrm("$_\000"); |
177 | } |
178 | |
179 | $x; |
180 | } |
181 | |
182 | sub collate_cmp { |
183 | &C($_[0]) cmp &C($_[1]); |
184 | } |
185 | |
186 | # init $LOCALE |
187 | |
188 | &I18N::Collate::setlocale(); |
189 | |
190 | 1; # keep require happy |