Commit | Line | Data |
84d4ea48 |
1 | package sort; |
2 | |
1f17861c |
3 | our $VERSION = '2.00'; |
84d4ea48 |
4 | |
7b9ef140 |
5 | # The hints for pp_sort are now stored in $^H{sort}; older versions |
6 | # of perl used the global variable $sort::hints. -- rjh 2005-12-19 |
045ac317 |
7 | |
7b9ef140 |
8 | $sort::hint_bits = 0x04020000; # HINT_LOCALIZE_HH | HINT_HH_FOR_EVAL |
84d4ea48 |
9 | |
10 | $sort::quicksort_bit = 0x00000001; |
11 | $sort::mergesort_bit = 0x00000002; |
12 | $sort::sort_bits = 0x000000FF; # allow 256 different ones |
13 | $sort::stable_bit = 0x00000100; |
84d4ea48 |
14 | |
15 | use strict; |
16 | |
17 | sub import { |
18 | shift; |
19 | if (@_ == 0) { |
20 | require Carp; |
21 | Carp::croak("sort pragma requires arguments"); |
22 | } |
84d4ea48 |
23 | local $_; |
045ac317 |
24 | no warnings 'uninitialized'; # bitops would warn |
7b9ef140 |
25 | $^H{sort} //= 0; |
84d4ea48 |
26 | while ($_ = shift(@_)) { |
c53fc8a6 |
27 | if (/^_q(?:uick)?sort$/) { |
7b9ef140 |
28 | $^H{sort} &= ~$sort::sort_bits; |
29 | $^H{sort} |= $sort::quicksort_bit; |
c53fc8a6 |
30 | } elsif ($_ eq '_mergesort') { |
7b9ef140 |
31 | $^H{sort} &= ~$sort::sort_bits; |
32 | $^H{sort} |= $sort::mergesort_bit; |
c53fc8a6 |
33 | } elsif ($_ eq 'stable') { |
7b9ef140 |
34 | $^H{sort} |= $sort::stable_bit; |
7a8ff2dd |
35 | } elsif ($_ eq 'defaults') { |
7b9ef140 |
36 | $^H{sort} = 0; |
7a8ff2dd |
37 | } else { |
38 | require Carp; |
39 | Carp::croak("sort: unknown subpragma '$_'"); |
40 | } |
41 | } |
7b9ef140 |
42 | $^H |= $sort::hint_bits; |
7a8ff2dd |
43 | } |
44 | |
45 | sub unimport { |
46 | shift; |
47 | if (@_ == 0) { |
48 | require Carp; |
49 | Carp::croak("sort pragma requires arguments"); |
50 | } |
51 | local $_; |
52 | no warnings 'uninitialized'; # bitops would warn |
53 | while ($_ = shift(@_)) { |
54 | if (/^_q(?:uick)?sort$/) { |
7b9ef140 |
55 | $^H{sort} &= ~$sort::sort_bits; |
7a8ff2dd |
56 | } elsif ($_ eq '_mergesort') { |
7b9ef140 |
57 | $^H{sort} &= ~$sort::sort_bits; |
7a8ff2dd |
58 | } elsif ($_ eq 'stable') { |
7b9ef140 |
59 | $^H{sort} &= ~$sort::stable_bit; |
84d4ea48 |
60 | } else { |
61 | require Carp; |
71c4de84 |
62 | Carp::croak("sort: unknown subpragma '$_'"); |
84d4ea48 |
63 | } |
64 | } |
65 | } |
66 | |
67 | sub current { |
68 | my @sort; |
7b9ef140 |
69 | if ($^H{sort}) { |
70 | push @sort, 'quicksort' if $^H{sort} & $sort::quicksort_bit; |
71 | push @sort, 'mergesort' if $^H{sort} & $sort::mergesort_bit; |
72 | push @sort, 'stable' if $^H{sort} & $sort::stable_bit; |
84d4ea48 |
73 | } |
74 | push @sort, 'mergesort' unless @sort; |
75 | join(' ', @sort); |
76 | } |
77 | |
78 | 1; |
79 | __END__ |
80 | |
81 | =head1 NAME |
82 | |
83 | sort - perl pragma to control sort() behaviour |
84 | |
85 | =head1 SYNOPSIS |
86 | |
c53fc8a6 |
87 | use sort 'stable'; # guarantee stability |
88 | use sort '_quicksort'; # use a quicksort algorithm |
89 | use sort '_mergesort'; # use a mergesort algorithm |
7a8ff2dd |
90 | use sort 'defaults'; # revert to default behavior |
91 | no sort 'stable'; # stability not important |
84d4ea48 |
92 | |
c53fc8a6 |
93 | use sort '_qsort'; # alias for quicksort |
84d4ea48 |
94 | |
7b9ef140 |
95 | my $current; |
96 | BEGIN { |
97 | $current = sort::current(); # identify prevailing algorithm |
98 | } |
84d4ea48 |
99 | |
100 | =head1 DESCRIPTION |
101 | |
7a8ff2dd |
102 | With the C<sort> pragma you can control the behaviour of the builtin |
103 | C<sort()> function. |
84d4ea48 |
104 | |
105 | In Perl versions 5.6 and earlier the quicksort algorithm was used to |
7a8ff2dd |
106 | implement C<sort()>, but in Perl 5.8 a mergesort algorithm was also made |
c53fc8a6 |
107 | available, mainly to guarantee worst case O(N log N) behaviour: |
108 | the worst case of quicksort is O(N**2). In Perl 5.8 and later, |
109 | quicksort defends against quadratic behaviour by shuffling large |
110 | arrays before sorting. |
111 | |
112 | A stable sort means that for records that compare equal, the original |
b0ae2885 |
113 | input ordering is preserved. Mergesort is stable, quicksort is not. |
c53fc8a6 |
114 | Stability will matter only if elements that compare equal can be |
115 | distinguished in some other way. That means that simple numerical |
116 | and lexical sorts do not profit from stability, since equal elements |
117 | are indistinguishable. However, with a comparison such as |
118 | |
119 | { substr($a, 0, 3) cmp substr($b, 0, 3) } |
120 | |
121 | stability might matter because elements that compare equal on the |
122 | first 3 characters may be distinguished based on subsequent characters. |
123 | In Perl 5.8 and later, quicksort can be stabilized, but doing so will |
124 | add overhead, so it should only be done if it matters. |
125 | |
126 | The best algorithm depends on many things. On average, mergesort |
127 | does fewer comparisons than quicksort, so it may be better when |
128 | complicated comparison routines are used. Mergesort also takes |
129 | advantage of pre-existing order, so it would be favored for using |
7a8ff2dd |
130 | C<sort()> to merge several sorted arrays. On the other hand, quicksort |
131 | is often faster for small arrays, and on arrays of a few distinct |
132 | values, repeated many times. You can force the |
c53fc8a6 |
133 | choice of algorithm with this pragma, but this feels heavy-handed, |
134 | so the subpragmas beginning with a C<_> may not persist beyond Perl 5.8. |
7a8ff2dd |
135 | The default algorithm is mergesort, which will be stable even if |
136 | you do not explicitly demand it. |
137 | But the stability of the default sort is a side-effect that could |
138 | change in later versions. If stability is important, be sure to |
139 | say so with a |
140 | |
141 | use sort 'stable'; |
142 | |
143 | The C<no sort> pragma doesn't |
144 | I<forbid> what follows, it just leaves the choice open. Thus, after |
145 | |
146 | no sort qw(_mergesort stable); |
147 | |
148 | a mergesort, which happens to be stable, will be employed anyway. |
149 | Note that |
150 | |
151 | no sort "_quicksort"; |
152 | no sort "_mergesort"; |
153 | |
154 | have exactly the same effect, leaving the choice of sort algorithm open. |
84d4ea48 |
155 | |
0e59b7c6 |
156 | =head1 CAVEATS |
157 | |
7b9ef140 |
158 | As of Perl 5.10, this pragma is lexically scoped and takes effect |
159 | at compile time. In earlier versions its effect was global and took |
160 | effect at run-time; the documentation suggested using C<eval()> to |
161 | change the behaviour: |
7a8ff2dd |
162 | |
7b9ef140 |
163 | { eval 'use sort qw(defaults _quicksort)'; # force quicksort |
164 | eval 'no sort "stable"'; # stability not wanted |
7a8ff2dd |
165 | print sort::current . "\n"; |
166 | @a = sort @b; |
7b9ef140 |
167 | eval 'use sort "defaults"'; # clean up, for others |
7a8ff2dd |
168 | } |
7b9ef140 |
169 | { eval 'use sort qw(defaults stable)'; # force stability |
7a8ff2dd |
170 | print sort::current . "\n"; |
171 | @c = sort @d; |
7b9ef140 |
172 | eval 'use sort "defaults"'; # clean up, for others |
7a8ff2dd |
173 | } |
7a8ff2dd |
174 | |
7b9ef140 |
175 | Such code no longer has the desired effect, for two reasons. |
176 | Firstly, the use of C<eval()> means that the sorting algorithm |
177 | is not changed until runtime, by which time it's too late to |
178 | have any effect. Secondly, C<sort::current> is also called at |
179 | run-time, when in fact the compile-time value of C<sort::current> |
180 | is the one that matters. |
7a8ff2dd |
181 | |
7b9ef140 |
182 | So now this code would be written: |
7a8ff2dd |
183 | |
7b9ef140 |
184 | { use sort qw(defaults _quicksort); # force quicksort |
185 | no sort "stable"; # stability not wanted |
186 | my $current; |
187 | BEGIN { $current = print sort::current; } |
188 | print "$current\n"; |
7a8ff2dd |
189 | @a = sort @b; |
7b9ef140 |
190 | # Pragmas go out of scope at the end of the block |
7a8ff2dd |
191 | } |
7b9ef140 |
192 | { use sort qw(defaults stable); # force stability |
193 | my $current; |
194 | BEGIN { $current = print sort::current; } |
195 | print "$current\n"; |
7a8ff2dd |
196 | @c = sort @d; |
7a8ff2dd |
197 | } |
0e59b7c6 |
198 | |
84d4ea48 |
199 | =cut |
200 | |