File::Find assumes $_ remains unchanged; bug
[p5sagit/p5-mst-13.2.git] / lib / I18N / Collate.pm
CommitLineData
a0d0e21e 1package I18N::Collate;
2
f06db76b 3=head1 NAME
4
69b19ea2 5I18N::Collate - compare 8-bit scalar data according to the current locale
f06db76b 6
7=head1 SYNOPSIS
8
69b19ea2 9 use I18N::Collate;
f06db76b 10 setlocale(LC_COLLATE, 'locale-of-your-choice');
69b19ea2 11 $s1 = new I18N::Collate "scalar_data_1";
12 $s2 = new I18N::Collate "scalar_data_2";
f06db76b 13
14=head1 DESCRIPTION
15
16This module provides you with objects that will collate
69b19ea2 17according to your national character set, provided that the
18POSIX setlocale() function is supported on your system.
f06db76b 19
20You can compare $s1 and $s2 above with
21
22 $s1 le $s2
23
24to extract the data itself, you'll need a dereference: $$s1
25
69b19ea2 26This uses POSIX::setlocale(). The basic collation conversion is done by
f06db76b 27strxfrm() which terminates at NUL characters being a decent C routine.
28collate_xfrm() handles embedded NUL characters gracefully. Due to C<cmp>
29and overload magic, C<lt>, C<le>, C<eq>, C<ge>, and C<gt> work also. The
30available locales depend on your operating system; try whether C<locale
c2960299 31-a> shows them or man pages for "locale" or "nlsinfo" or
32the direct approach C<ls /usr/lib/nls/loc> or C<ls
33/usr/lib/nls>. Not all the locales that your vendor supports
34are necessarily installed: please consult your operating system's
69b19ea2 35documentation and possibly your local system administration.
c2960299 36
37The locale names are probably something like
38C<"xx_XX.(ISO)?8859-N"> or C<"xx_XX.(ISO)?8859N">, for example
39C<"fr_CH.ISO8859-1"> is the Swiss (CH) variant of French (fr),
40ISO Latin (8859) 1 (-1) which is the Western European character set.
f06db76b 41
42=cut
43
69b19ea2 44# I18N::Collate.pm
a0d0e21e 45#
46# Author: Jarkko Hietaniemi <Jarkko.Hietaniemi@hut.fi>
47# Helsinki University of Technology, Finland
48#
49# Acks: Guy Decoux <decoux@moulon.inra.fr> understood
50# overloading magic much deeper than I and told
51# how to cut the size of this code by more than half.
52# (my first version did overload all of lt gt eq le ge cmp)
53#
54# Purpose: compare 8-bit scalar data according to the current locale
55#
56# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
57#
58# Exports: setlocale 1)
59# collate_xfrm 2)
60#
61# Overloads: cmp # 3)
62#
69b19ea2 63# Usage: use I18N::Collate;
c2960299 64# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
69b19ea2 65# $s1 = new I18N::Collate "scalar_data_1";
66# $s2 = new I18N::Collate "scalar_data_2";
a0d0e21e 67#
68# now you can compare $s1 and $s2: $s1 le $s2
69# to extract the data itself, you need to deref: $$s1
70#
71# Notes:
72# 1) this uses POSIX::setlocale
73# 2) the basic collation conversion is done by strxfrm() which
74# terminates at NUL characters being a decent C routine.
75# collate_xfrm handles embedded NUL characters gracefully.
76# 3) due to cmp and overload magic, lt le eq ge gt work also
77# 4) the available locales depend on your operating system;
c2960299 78# try whether "locale -a" shows them or man pages for
79# "locale" or "nlsinfo" work or the more direct
a0d0e21e 80# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
c2960299 81# Not all the locales that your vendor supports
82# are necessarily installed: please consult your
83# operating system's documentation.
a0d0e21e 84# The locale names are probably something like
c2960299 85# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
86# for example 'fr_CH.ISO8859-1' is the Swiss (CH)
87# variant of French (fr), ISO Latin (8859) 1 (-1)
88# which is the Western European character set.
a0d0e21e 89#
69b19ea2 90# Updated: 19960104 1946 GMT
a0d0e21e 91#
92# ---
93
94use POSIX qw(strxfrm LC_COLLATE);
95
96require Exporter;
97
98@ISA = qw(Exporter);
99@EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
100@EXPORT_OK = qw();
101
a5f75d66 102use overload qw(
a0d0e21e 103fallback 1
104cmp collate_cmp
105);
106
107sub new { my $new = $_[1]; bless \$new }
108
109sub setlocale {
110 my ($category, $locale) = @_[0,1];
111
112 POSIX::setlocale($category, $locale) if (defined $category);
113 # the current $LOCALE
114 $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
115}
116
117sub C {
118 my $s = ${$_[0]};
119
120 $C->{$LOCALE}->{$s} = collate_xfrm($s)
121 unless (defined $C->{$LOCALE}->{$s}); # cache when met
122
123 $C->{$LOCALE}->{$s};
124}
125
126sub collate_xfrm {
127 my $s = $_[0];
128 my $x = '';
129
130 for (split(/(\000+)/, $s)) {
131 $x .= (/^\000/) ? $_ : strxfrm("$_\000");
132 }
133
134 $x;
135}
136
137sub collate_cmp {
138 &C($_[0]) cmp &C($_[1]);
139}
140
141# init $LOCALE
142
143&I18N::Collate::setlocale();
144
1451; # keep require happy