Commit | Line | Data |
a0d0e21e |
1 | package I18N::Collate; |
2 | |
f06db76b |
3 | =head1 NAME |
4 | |
5 | Collate - compare 8-bit scalar data according to the current locale |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | use Collate; |
10 | setlocale(LC_COLLATE, 'locale-of-your-choice'); |
11 | $s1 = new Collate "scalar_data_1"; |
12 | $s2 = new Collate "scalar_data_2"; |
13 | |
14 | =head1 DESCRIPTION |
15 | |
16 | This module provides you with objects that will collate |
17 | according to your national character set, providing the |
18 | POSIX setlocale() function should be supported on your system. |
19 | |
20 | You can compare $s1 and $s2 above with |
21 | |
22 | $s1 le $s2 |
23 | |
24 | to extract the data itself, you'll need a dereference: $$s1 |
25 | |
26 | This uses POSIX::setlocale The basic collation conversion is done by |
27 | strxfrm() which terminates at NUL characters being a decent C routine. |
28 | collate_xfrm() handles embedded NUL characters gracefully. Due to C<cmp> |
29 | and overload magic, C<lt>, C<le>, C<eq>, C<ge>, and C<gt> work also. The |
30 | available locales depend on your operating system; try whether C<locale |
31 | -a> shows them or the more direct approach C<ls /usr/lib/nls/loc> or C<ls |
32 | /usr/lib/nls>. The locale names are probably something like |
33 | "xx_XX.(ISO)?8859-N". |
34 | |
35 | =cut |
36 | |
a0d0e21e |
37 | # Collate.pm |
38 | # |
39 | # Author: Jarkko Hietaniemi <Jarkko.Hietaniemi@hut.fi> |
40 | # Helsinki University of Technology, Finland |
41 | # |
42 | # Acks: Guy Decoux <decoux@moulon.inra.fr> understood |
43 | # overloading magic much deeper than I and told |
44 | # how to cut the size of this code by more than half. |
45 | # (my first version did overload all of lt gt eq le ge cmp) |
46 | # |
47 | # Purpose: compare 8-bit scalar data according to the current locale |
48 | # |
49 | # Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm() |
50 | # |
51 | # Exports: setlocale 1) |
52 | # collate_xfrm 2) |
53 | # |
54 | # Overloads: cmp # 3) |
55 | # |
56 | # Usage: use Collate; |
57 | # setlocale(&LC_COLLATE, 'locale-of-your-choice'); # 4) |
58 | # $s1 = new Collate "scalar_data_1"; |
59 | # $s2 = new Collate "scalar_data_2"; |
60 | # |
61 | # now you can compare $s1 and $s2: $s1 le $s2 |
62 | # to extract the data itself, you need to deref: $$s1 |
63 | # |
64 | # Notes: |
65 | # 1) this uses POSIX::setlocale |
66 | # 2) the basic collation conversion is done by strxfrm() which |
67 | # terminates at NUL characters being a decent C routine. |
68 | # collate_xfrm handles embedded NUL characters gracefully. |
69 | # 3) due to cmp and overload magic, lt le eq ge gt work also |
70 | # 4) the available locales depend on your operating system; |
71 | # try whether "locale -a" shows them or the more direct |
72 | # approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls". |
73 | # The locale names are probably something like |
74 | # 'xx_XX.(ISO)?8859-N'. |
75 | # |
76 | # Updated: 19940913 1341 GMT |
77 | # |
78 | # --- |
79 | |
80 | use POSIX qw(strxfrm LC_COLLATE); |
81 | |
82 | require Exporter; |
83 | |
84 | @ISA = qw(Exporter); |
85 | @EXPORT = qw(collate_xfrm setlocale LC_COLLATE); |
86 | @EXPORT_OK = qw(); |
87 | |
88 | %OVERLOAD = qw( |
89 | fallback 1 |
90 | cmp collate_cmp |
91 | ); |
92 | |
93 | sub new { my $new = $_[1]; bless \$new } |
94 | |
95 | sub setlocale { |
96 | my ($category, $locale) = @_[0,1]; |
97 | |
98 | POSIX::setlocale($category, $locale) if (defined $category); |
99 | # the current $LOCALE |
100 | $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || ''; |
101 | } |
102 | |
103 | sub C { |
104 | my $s = ${$_[0]}; |
105 | |
106 | $C->{$LOCALE}->{$s} = collate_xfrm($s) |
107 | unless (defined $C->{$LOCALE}->{$s}); # cache when met |
108 | |
109 | $C->{$LOCALE}->{$s}; |
110 | } |
111 | |
112 | sub collate_xfrm { |
113 | my $s = $_[0]; |
114 | my $x = ''; |
115 | |
116 | for (split(/(\000+)/, $s)) { |
117 | $x .= (/^\000/) ? $_ : strxfrm("$_\000"); |
118 | } |
119 | |
120 | $x; |
121 | } |
122 | |
123 | sub collate_cmp { |
124 | &C($_[0]) cmp &C($_[1]); |
125 | } |
126 | |
127 | # init $LOCALE |
128 | |
129 | &I18N::Collate::setlocale(); |
130 | |
131 | 1; # keep require happy |