additional tests for utf8.t
[p5sagit/p5-mst-13.2.git] / lib / I18N / Collate.pm
CommitLineData
a0d0e21e 1package I18N::Collate;
2
f06db76b 3=head1 NAME
4
69b19ea2 5I18N::Collate - compare 8-bit scalar data according to the current locale
f06db76b 6
4413da2b 7 ***
8
9 WARNING: starting from the Perl version 5.003_06
10 the I18N::Collate interface for comparing 8-bit scalar data
11 according to the current locale
12
13 HAS BEEN DEPRECATED
14
15 That is, please do not use it anymore for any new applications
16 and please migrate the old applications away from it because its
17 functionality was integrated into the Perl core language in the
18 release 5.003_06.
19
20 See the perllocale manual page for further information.
21
22 ***
23
f06db76b 24=head1 SYNOPSIS
25
69b19ea2 26 use I18N::Collate;
f06db76b 27 setlocale(LC_COLLATE, 'locale-of-your-choice');
69b19ea2 28 $s1 = new I18N::Collate "scalar_data_1";
29 $s2 = new I18N::Collate "scalar_data_2";
f06db76b 30
31=head1 DESCRIPTION
32
33This module provides you with objects that will collate
69b19ea2 34according to your national character set, provided that the
35POSIX setlocale() function is supported on your system.
f06db76b 36
37You can compare $s1 and $s2 above with
38
39 $s1 le $s2
40
41to extract the data itself, you'll need a dereference: $$s1
42
6158a1ac 43This module uses POSIX::setlocale(). The basic collation conversion is
44done by strxfrm() which terminates at NUL characters being a decent C
45routine. collate_xfrm() handles embedded NUL characters gracefully.
c2960299 46
6158a1ac 47The available locales depend on your operating system; try whether
48C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
49direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
50C<ls /usr/lib/locale>. Not all the locales that your vendor supports
51are necessarily installed: please consult your operating system's
52documentation and possibly your local system administration. The
53locale names are probably something like C<xx_XX.(ISO)?8859-N> or
54C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
55variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
56European character set.
f06db76b 57
58=cut
59
69b19ea2 60# I18N::Collate.pm
a0d0e21e 61#
5aabfad6 62# Author: Jarkko Hietaniemi <F<jhi@iki.fi>>
a0d0e21e 63# Helsinki University of Technology, Finland
64#
5aabfad6 65# Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood
a0d0e21e 66# overloading magic much deeper than I and told
67# how to cut the size of this code by more than half.
68# (my first version did overload all of lt gt eq le ge cmp)
69#
70# Purpose: compare 8-bit scalar data according to the current locale
71#
72# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
73#
74# Exports: setlocale 1)
75# collate_xfrm 2)
76#
77# Overloads: cmp # 3)
78#
69b19ea2 79# Usage: use I18N::Collate;
c2960299 80# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
69b19ea2 81# $s1 = new I18N::Collate "scalar_data_1";
82# $s2 = new I18N::Collate "scalar_data_2";
a0d0e21e 83#
84# now you can compare $s1 and $s2: $s1 le $s2
85# to extract the data itself, you need to deref: $$s1
86#
87# Notes:
88# 1) this uses POSIX::setlocale
89# 2) the basic collation conversion is done by strxfrm() which
90# terminates at NUL characters being a decent C routine.
91# collate_xfrm handles embedded NUL characters gracefully.
92# 3) due to cmp and overload magic, lt le eq ge gt work also
93# 4) the available locales depend on your operating system;
c2960299 94# try whether "locale -a" shows them or man pages for
95# "locale" or "nlsinfo" work or the more direct
a0d0e21e 96# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
c2960299 97# Not all the locales that your vendor supports
98# are necessarily installed: please consult your
99# operating system's documentation.
a0d0e21e 100# The locale names are probably something like
c2960299 101# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
102# for example 'fr_CH.ISO8859-1' is the Swiss (CH)
103# variant of French (fr), ISO Latin (8859) 1 (-1)
104# which is the Western European character set.
a0d0e21e 105#
6b48aaa4 106# Updated: 19961005
a0d0e21e 107#
108# ---
109
110use POSIX qw(strxfrm LC_COLLATE);
d3a7d8c7 111use warnings::register;
a0d0e21e 112
113require Exporter;
114
115@ISA = qw(Exporter);
116@EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
117@EXPORT_OK = qw();
118
a5f75d66 119use overload qw(
a0d0e21e 120fallback 1
121cmp collate_cmp
122);
123
6b48aaa4 124sub new {
125 my $new = $_[1];
126
d3a7d8c7 127 if (warnings::enabled() && $] >= 5.003_06) {
6b48aaa4 128 unless ($please_use_I18N_Collate_even_if_deprecated) {
d3a7d8c7 129 warnings::warn <<___EOD___;
6b48aaa4 130***
131
4413da2b 132 WARNING: starting from the Perl version 5.003_06
133 the I18N::Collate interface for comparing 8-bit scalar data
134 according to the current locale
6b48aaa4 135
136 HAS BEEN DEPRECATED
137
4413da2b 138 That is, please do not use it anymore for any new applications
139 and please migrate the old applications away from it because its
140 functionality was integrated into the Perl core language in the
141 release 5.003_06.
6b48aaa4 142
4413da2b 143 See the perllocale manual page for further information.
6b48aaa4 144
145***
146___EOD___
147 $please_use_I18N_Collate_even_if_deprecated++;
148 }
149 }
150
151 bless \$new;
152}
a0d0e21e 153
154sub setlocale {
155 my ($category, $locale) = @_[0,1];
156
157 POSIX::setlocale($category, $locale) if (defined $category);
158 # the current $LOCALE
159 $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
160}
161
162sub C {
163 my $s = ${$_[0]};
164
165 $C->{$LOCALE}->{$s} = collate_xfrm($s)
166 unless (defined $C->{$LOCALE}->{$s}); # cache when met
167
168 $C->{$LOCALE}->{$s};
169}
170
171sub collate_xfrm {
172 my $s = $_[0];
173 my $x = '';
174
175 for (split(/(\000+)/, $s)) {
176 $x .= (/^\000/) ? $_ : strxfrm("$_\000");
177 }
178
179 $x;
180}
181
182sub collate_cmp {
183 &C($_[0]) cmp &C($_[1]);
184}
185
186# init $LOCALE
187
188&I18N::Collate::setlocale();
189
1901; # keep require happy