Commit | Line | Data |
84d4ea48 |
1 | package sort; |
2 | |
1f17861c |
3 | our $VERSION = '2.00'; |
84d4ea48 |
4 | |
7b9ef140 |
5 | # The hints for pp_sort are now stored in $^H{sort}; older versions |
6 | # of perl used the global variable $sort::hints. -- rjh 2005-12-19 |
045ac317 |
7 | |
84d4ea48 |
8 | $sort::quicksort_bit = 0x00000001; |
9 | $sort::mergesort_bit = 0x00000002; |
10 | $sort::sort_bits = 0x000000FF; # allow 256 different ones |
11 | $sort::stable_bit = 0x00000100; |
84d4ea48 |
12 | |
13 | use strict; |
14 | |
15 | sub import { |
16 | shift; |
17 | if (@_ == 0) { |
18 | require Carp; |
19 | Carp::croak("sort pragma requires arguments"); |
20 | } |
84d4ea48 |
21 | local $_; |
045ac317 |
22 | no warnings 'uninitialized'; # bitops would warn |
7b9ef140 |
23 | $^H{sort} //= 0; |
84d4ea48 |
24 | while ($_ = shift(@_)) { |
c53fc8a6 |
25 | if (/^_q(?:uick)?sort$/) { |
7b9ef140 |
26 | $^H{sort} &= ~$sort::sort_bits; |
27 | $^H{sort} |= $sort::quicksort_bit; |
c53fc8a6 |
28 | } elsif ($_ eq '_mergesort') { |
7b9ef140 |
29 | $^H{sort} &= ~$sort::sort_bits; |
30 | $^H{sort} |= $sort::mergesort_bit; |
c53fc8a6 |
31 | } elsif ($_ eq 'stable') { |
7b9ef140 |
32 | $^H{sort} |= $sort::stable_bit; |
7a8ff2dd |
33 | } elsif ($_ eq 'defaults') { |
7b9ef140 |
34 | $^H{sort} = 0; |
7a8ff2dd |
35 | } else { |
36 | require Carp; |
37 | Carp::croak("sort: unknown subpragma '$_'"); |
38 | } |
39 | } |
7b9ef140 |
40 | $^H |= $sort::hint_bits; |
7a8ff2dd |
41 | } |
42 | |
43 | sub unimport { |
44 | shift; |
45 | if (@_ == 0) { |
46 | require Carp; |
47 | Carp::croak("sort pragma requires arguments"); |
48 | } |
49 | local $_; |
50 | no warnings 'uninitialized'; # bitops would warn |
51 | while ($_ = shift(@_)) { |
52 | if (/^_q(?:uick)?sort$/) { |
7b9ef140 |
53 | $^H{sort} &= ~$sort::sort_bits; |
7a8ff2dd |
54 | } elsif ($_ eq '_mergesort') { |
7b9ef140 |
55 | $^H{sort} &= ~$sort::sort_bits; |
7a8ff2dd |
56 | } elsif ($_ eq 'stable') { |
7b9ef140 |
57 | $^H{sort} &= ~$sort::stable_bit; |
84d4ea48 |
58 | } else { |
59 | require Carp; |
71c4de84 |
60 | Carp::croak("sort: unknown subpragma '$_'"); |
84d4ea48 |
61 | } |
62 | } |
63 | } |
64 | |
65 | sub current { |
66 | my @sort; |
7b9ef140 |
67 | if ($^H{sort}) { |
68 | push @sort, 'quicksort' if $^H{sort} & $sort::quicksort_bit; |
69 | push @sort, 'mergesort' if $^H{sort} & $sort::mergesort_bit; |
70 | push @sort, 'stable' if $^H{sort} & $sort::stable_bit; |
84d4ea48 |
71 | } |
72 | push @sort, 'mergesort' unless @sort; |
73 | join(' ', @sort); |
74 | } |
75 | |
76 | 1; |
77 | __END__ |
78 | |
79 | =head1 NAME |
80 | |
81 | sort - perl pragma to control sort() behaviour |
82 | |
83 | =head1 SYNOPSIS |
84 | |
c53fc8a6 |
85 | use sort 'stable'; # guarantee stability |
86 | use sort '_quicksort'; # use a quicksort algorithm |
87 | use sort '_mergesort'; # use a mergesort algorithm |
7a8ff2dd |
88 | use sort 'defaults'; # revert to default behavior |
89 | no sort 'stable'; # stability not important |
84d4ea48 |
90 | |
c53fc8a6 |
91 | use sort '_qsort'; # alias for quicksort |
84d4ea48 |
92 | |
7b9ef140 |
93 | my $current; |
94 | BEGIN { |
95 | $current = sort::current(); # identify prevailing algorithm |
96 | } |
84d4ea48 |
97 | |
98 | =head1 DESCRIPTION |
99 | |
7a8ff2dd |
100 | With the C<sort> pragma you can control the behaviour of the builtin |
101 | C<sort()> function. |
84d4ea48 |
102 | |
103 | In Perl versions 5.6 and earlier the quicksort algorithm was used to |
7a8ff2dd |
104 | implement C<sort()>, but in Perl 5.8 a mergesort algorithm was also made |
c53fc8a6 |
105 | available, mainly to guarantee worst case O(N log N) behaviour: |
106 | the worst case of quicksort is O(N**2). In Perl 5.8 and later, |
107 | quicksort defends against quadratic behaviour by shuffling large |
108 | arrays before sorting. |
109 | |
110 | A stable sort means that for records that compare equal, the original |
b0ae2885 |
111 | input ordering is preserved. Mergesort is stable, quicksort is not. |
c53fc8a6 |
112 | Stability will matter only if elements that compare equal can be |
113 | distinguished in some other way. That means that simple numerical |
114 | and lexical sorts do not profit from stability, since equal elements |
115 | are indistinguishable. However, with a comparison such as |
116 | |
117 | { substr($a, 0, 3) cmp substr($b, 0, 3) } |
118 | |
119 | stability might matter because elements that compare equal on the |
120 | first 3 characters may be distinguished based on subsequent characters. |
121 | In Perl 5.8 and later, quicksort can be stabilized, but doing so will |
122 | add overhead, so it should only be done if it matters. |
123 | |
124 | The best algorithm depends on many things. On average, mergesort |
125 | does fewer comparisons than quicksort, so it may be better when |
126 | complicated comparison routines are used. Mergesort also takes |
127 | advantage of pre-existing order, so it would be favored for using |
7a8ff2dd |
128 | C<sort()> to merge several sorted arrays. On the other hand, quicksort |
129 | is often faster for small arrays, and on arrays of a few distinct |
130 | values, repeated many times. You can force the |
c53fc8a6 |
131 | choice of algorithm with this pragma, but this feels heavy-handed, |
132 | so the subpragmas beginning with a C<_> may not persist beyond Perl 5.8. |
7a8ff2dd |
133 | The default algorithm is mergesort, which will be stable even if |
134 | you do not explicitly demand it. |
135 | But the stability of the default sort is a side-effect that could |
136 | change in later versions. If stability is important, be sure to |
137 | say so with a |
138 | |
139 | use sort 'stable'; |
140 | |
141 | The C<no sort> pragma doesn't |
142 | I<forbid> what follows, it just leaves the choice open. Thus, after |
143 | |
144 | no sort qw(_mergesort stable); |
145 | |
146 | a mergesort, which happens to be stable, will be employed anyway. |
147 | Note that |
148 | |
149 | no sort "_quicksort"; |
150 | no sort "_mergesort"; |
151 | |
152 | have exactly the same effect, leaving the choice of sort algorithm open. |
84d4ea48 |
153 | |
0e59b7c6 |
154 | =head1 CAVEATS |
155 | |
7b9ef140 |
156 | As of Perl 5.10, this pragma is lexically scoped and takes effect |
157 | at compile time. In earlier versions its effect was global and took |
158 | effect at run-time; the documentation suggested using C<eval()> to |
159 | change the behaviour: |
7a8ff2dd |
160 | |
7b9ef140 |
161 | { eval 'use sort qw(defaults _quicksort)'; # force quicksort |
162 | eval 'no sort "stable"'; # stability not wanted |
7a8ff2dd |
163 | print sort::current . "\n"; |
164 | @a = sort @b; |
7b9ef140 |
165 | eval 'use sort "defaults"'; # clean up, for others |
7a8ff2dd |
166 | } |
7b9ef140 |
167 | { eval 'use sort qw(defaults stable)'; # force stability |
7a8ff2dd |
168 | print sort::current . "\n"; |
169 | @c = sort @d; |
7b9ef140 |
170 | eval 'use sort "defaults"'; # clean up, for others |
7a8ff2dd |
171 | } |
7a8ff2dd |
172 | |
7b9ef140 |
173 | Such code no longer has the desired effect, for two reasons. |
174 | Firstly, the use of C<eval()> means that the sorting algorithm |
175 | is not changed until runtime, by which time it's too late to |
176 | have any effect. Secondly, C<sort::current> is also called at |
177 | run-time, when in fact the compile-time value of C<sort::current> |
178 | is the one that matters. |
7a8ff2dd |
179 | |
7b9ef140 |
180 | So now this code would be written: |
7a8ff2dd |
181 | |
7b9ef140 |
182 | { use sort qw(defaults _quicksort); # force quicksort |
183 | no sort "stable"; # stability not wanted |
184 | my $current; |
185 | BEGIN { $current = print sort::current; } |
186 | print "$current\n"; |
7a8ff2dd |
187 | @a = sort @b; |
7b9ef140 |
188 | # Pragmas go out of scope at the end of the block |
7a8ff2dd |
189 | } |
7b9ef140 |
190 | { use sort qw(defaults stable); # force stability |
191 | my $current; |
192 | BEGIN { $current = print sort::current; } |
193 | print "$current\n"; |
7a8ff2dd |
194 | @c = sort @d; |
7a8ff2dd |
195 | } |
0e59b7c6 |
196 | |
84d4ea48 |
197 | =cut |
198 | |