LC_COLLATE.
[p5sagit/p5-mst-13.2.git] / lib / I18N / Collate.pm
CommitLineData
6b48aaa4 1#
2# NOTE! This module is deprecated (obsolete) after the Perl release
3# 5.003_06 as the functionality has been integrated into the Perl core.
4#
5
a0d0e21e 6package I18N::Collate;
7
f06db76b 8=head1 NAME
9
69b19ea2 10I18N::Collate - compare 8-bit scalar data according to the current locale
f06db76b 11
12=head1 SYNOPSIS
13
69b19ea2 14 use I18N::Collate;
f06db76b 15 setlocale(LC_COLLATE, 'locale-of-your-choice');
69b19ea2 16 $s1 = new I18N::Collate "scalar_data_1";
17 $s2 = new I18N::Collate "scalar_data_2";
f06db76b 18
19=head1 DESCRIPTION
20
21This module provides you with objects that will collate
69b19ea2 22according to your national character set, provided that the
23POSIX setlocale() function is supported on your system.
f06db76b 24
25You can compare $s1 and $s2 above with
26
27 $s1 le $s2
28
29to extract the data itself, you'll need a dereference: $$s1
30
69b19ea2 31This uses POSIX::setlocale(). The basic collation conversion is done by
f06db76b 32strxfrm() which terminates at NUL characters being a decent C routine.
33collate_xfrm() handles embedded NUL characters gracefully. Due to C<cmp>
34and overload magic, C<lt>, C<le>, C<eq>, C<ge>, and C<gt> work also. The
35available locales depend on your operating system; try whether C<locale
c2960299 36-a> shows them or man pages for "locale" or "nlsinfo" or
37the direct approach C<ls /usr/lib/nls/loc> or C<ls
38/usr/lib/nls>. Not all the locales that your vendor supports
39are necessarily installed: please consult your operating system's
69b19ea2 40documentation and possibly your local system administration.
c2960299 41
42The locale names are probably something like
43C<"xx_XX.(ISO)?8859-N"> or C<"xx_XX.(ISO)?8859N">, for example
44C<"fr_CH.ISO8859-1"> is the Swiss (CH) variant of French (fr),
45ISO Latin (8859) 1 (-1) which is the Western European character set.
f06db76b 46
47=cut
48
69b19ea2 49# I18N::Collate.pm
a0d0e21e 50#
51# Author: Jarkko Hietaniemi <Jarkko.Hietaniemi@hut.fi>
52# Helsinki University of Technology, Finland
53#
54# Acks: Guy Decoux <decoux@moulon.inra.fr> understood
55# overloading magic much deeper than I and told
56# how to cut the size of this code by more than half.
57# (my first version did overload all of lt gt eq le ge cmp)
58#
59# Purpose: compare 8-bit scalar data according to the current locale
60#
61# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
62#
63# Exports: setlocale 1)
64# collate_xfrm 2)
65#
66# Overloads: cmp # 3)
67#
69b19ea2 68# Usage: use I18N::Collate;
c2960299 69# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
69b19ea2 70# $s1 = new I18N::Collate "scalar_data_1";
71# $s2 = new I18N::Collate "scalar_data_2";
a0d0e21e 72#
73# now you can compare $s1 and $s2: $s1 le $s2
74# to extract the data itself, you need to deref: $$s1
75#
76# Notes:
77# 1) this uses POSIX::setlocale
78# 2) the basic collation conversion is done by strxfrm() which
79# terminates at NUL characters being a decent C routine.
80# collate_xfrm handles embedded NUL characters gracefully.
81# 3) due to cmp and overload magic, lt le eq ge gt work also
82# 4) the available locales depend on your operating system;
c2960299 83# try whether "locale -a" shows them or man pages for
84# "locale" or "nlsinfo" work or the more direct
a0d0e21e 85# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
c2960299 86# Not all the locales that your vendor supports
87# are necessarily installed: please consult your
88# operating system's documentation.
a0d0e21e 89# The locale names are probably something like
c2960299 90# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
91# for example 'fr_CH.ISO8859-1' is the Swiss (CH)
92# variant of French (fr), ISO Latin (8859) 1 (-1)
93# which is the Western European character set.
a0d0e21e 94#
6b48aaa4 95# Updated: 19961005
a0d0e21e 96#
97# ---
98
99use POSIX qw(strxfrm LC_COLLATE);
100
101require Exporter;
102
103@ISA = qw(Exporter);
104@EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
105@EXPORT_OK = qw();
106
a5f75d66 107use overload qw(
a0d0e21e 108fallback 1
109cmp collate_cmp
110);
111
6b48aaa4 112sub new {
113 my $new = $_[1];
114
115 if ($^W && $] >= 5.003_06) {
116 unless ($please_use_I18N_Collate_even_if_deprecated) {
117 warn <<___EOD___;
118***
119
120 WARNING: starting from the Perl version 5.003_06 the I18N::Collate
121 interface for comparing 8-bit scalar data according to the current locale
122
123 HAS BEEN DEPRECATED
124
125 (that is, please do not use it anymore for any new applications and please
126 migrate the old applications away from it) because its functionality
127 was integrated into the Perl core language in the release 5.003_06.
128
129 All scalar data is now collated according to the current locale setting.
130 Also, Perl does automatically the setlocale(LC_COLLATE, "") for you.
131
132 To convert: forget I18N::Collate completely and use scalar data in
133 a completely normal way.
134
135***
136___EOD___
137 $please_use_I18N_Collate_even_if_deprecated++;
138 }
139 }
140
141 bless \$new;
142}
a0d0e21e 143
144sub setlocale {
145 my ($category, $locale) = @_[0,1];
146
147 POSIX::setlocale($category, $locale) if (defined $category);
148 # the current $LOCALE
149 $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
150}
151
152sub C {
153 my $s = ${$_[0]};
154
155 $C->{$LOCALE}->{$s} = collate_xfrm($s)
156 unless (defined $C->{$LOCALE}->{$s}); # cache when met
157
158 $C->{$LOCALE}->{$s};
159}
160
161sub collate_xfrm {
162 my $s = $_[0];
163 my $x = '';
164
165 for (split(/(\000+)/, $s)) {
166 $x .= (/^\000/) ? $_ : strxfrm("$_\000");
167 }
168
169 $x;
170}
171
172sub collate_cmp {
173 &C($_[0]) cmp &C($_[1]);
174}
175
176# init $LOCALE
177
178&I18N::Collate::setlocale();
179
1801; # keep require happy