This is my patch patch.1g for perl5.001.
[p5sagit/p5-mst-13.2.git] / lib / I18N / Collate.pm
CommitLineData
a0d0e21e 1package I18N::Collate;
2
f06db76b 3=head1 NAME
4
5Collate - compare 8-bit scalar data according to the current locale
6
7=head1 SYNOPSIS
8
9 use Collate;
10 setlocale(LC_COLLATE, 'locale-of-your-choice');
11 $s1 = new Collate "scalar_data_1";
12 $s2 = new Collate "scalar_data_2";
13
14=head1 DESCRIPTION
15
16This module provides you with objects that will collate
17according to your national character set, providing the
18POSIX setlocale() function should be supported on your system.
19
20You can compare $s1 and $s2 above with
21
22 $s1 le $s2
23
24to extract the data itself, you'll need a dereference: $$s1
25
26This uses POSIX::setlocale The basic collation conversion is done by
27strxfrm() which terminates at NUL characters being a decent C routine.
28collate_xfrm() handles embedded NUL characters gracefully. Due to C<cmp>
29and overload magic, C<lt>, C<le>, C<eq>, C<ge>, and C<gt> work also. The
30available locales depend on your operating system; try whether C<locale
31-a> shows them or the more direct approach C<ls /usr/lib/nls/loc> or C<ls
32/usr/lib/nls>. The locale names are probably something like
33"xx_XX.(ISO)?8859-N".
34
35=cut
36
a0d0e21e 37# Collate.pm
38#
39# Author: Jarkko Hietaniemi <Jarkko.Hietaniemi@hut.fi>
40# Helsinki University of Technology, Finland
41#
42# Acks: Guy Decoux <decoux@moulon.inra.fr> understood
43# overloading magic much deeper than I and told
44# how to cut the size of this code by more than half.
45# (my first version did overload all of lt gt eq le ge cmp)
46#
47# Purpose: compare 8-bit scalar data according to the current locale
48#
49# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
50#
51# Exports: setlocale 1)
52# collate_xfrm 2)
53#
54# Overloads: cmp # 3)
55#
56# Usage: use Collate;
57# setlocale(&LC_COLLATE, 'locale-of-your-choice'); # 4)
58# $s1 = new Collate "scalar_data_1";
59# $s2 = new Collate "scalar_data_2";
60#
61# now you can compare $s1 and $s2: $s1 le $s2
62# to extract the data itself, you need to deref: $$s1
63#
64# Notes:
65# 1) this uses POSIX::setlocale
66# 2) the basic collation conversion is done by strxfrm() which
67# terminates at NUL characters being a decent C routine.
68# collate_xfrm handles embedded NUL characters gracefully.
69# 3) due to cmp and overload magic, lt le eq ge gt work also
70# 4) the available locales depend on your operating system;
71# try whether "locale -a" shows them or the more direct
72# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
73# The locale names are probably something like
74# 'xx_XX.(ISO)?8859-N'.
75#
76# Updated: 19940913 1341 GMT
77#
78# ---
79
80use POSIX qw(strxfrm LC_COLLATE);
81
82require Exporter;
83
84@ISA = qw(Exporter);
85@EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
86@EXPORT_OK = qw();
87
88%OVERLOAD = qw(
89fallback 1
90cmp collate_cmp
91);
92
93sub new { my $new = $_[1]; bless \$new }
94
95sub setlocale {
96 my ($category, $locale) = @_[0,1];
97
98 POSIX::setlocale($category, $locale) if (defined $category);
99 # the current $LOCALE
100 $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
101}
102
103sub C {
104 my $s = ${$_[0]};
105
106 $C->{$LOCALE}->{$s} = collate_xfrm($s)
107 unless (defined $C->{$LOCALE}->{$s}); # cache when met
108
109 $C->{$LOCALE}->{$s};
110}
111
112sub collate_xfrm {
113 my $s = $_[0];
114 my $x = '';
115
116 for (split(/(\000+)/, $s)) {
117 $x .= (/^\000/) ? $_ : strxfrm("$_\000");
118 }
119
120 $x;
121}
122
123sub collate_cmp {
124 &C($_[0]) cmp &C($_[1]);
125}
126
127# init $LOCALE
128
129&I18N::Collate::setlocale();
130
1311; # keep require happy