X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2Fsort.pm;h=922f82b46915b1e39bae61558d594afb5da182e2;hb=b9ad30b40cf004f5ea6fd7a945a950cf873aed7b;hp=5256a5f16a4fee02c7e118c464416d59f81f7533;hpb=84d4ea48280f6b54fdc70fe4c8b9494e3331071e;p=p5sagit%2Fp5-mst-13.2.git diff --git a/lib/sort.pm b/lib/sort.pm index 5256a5f..922f82b 100644 --- a/lib/sort.pm +++ b/lib/sort.pm @@ -1,16 +1,14 @@ package sort; -our $VERSION = '1.00'; +our $VERSION = '2.01'; -$sort::hint_bits = 0x00020000; # HINT_LOCALIZE_HH, really... +# The hints for pp_sort are now stored in $^H{sort}; older versions +# of perl used the global variable $sort::hints. -- rjh 2005-12-19 $sort::quicksort_bit = 0x00000001; $sort::mergesort_bit = 0x00000002; $sort::sort_bits = 0x000000FF; # allow 256 different ones $sort::stable_bit = 0x00000100; -$sort::insensitive_bit = 0x00000200; -$sort::safe_bits = 0x00000300; -$sort::fast_bit = 0x00000400; use strict; @@ -20,41 +18,54 @@ sub import { require Carp; Carp::croak("sort pragma requires arguments"); } - $^H |= $sort::hint_bits; local $_; + $^H{sort} //= 0; while ($_ = shift(@_)) { - if (/^q(?:uick)?sort$/) { - $^H{SORT} &= ~$sort::sort_bits; - $^H{SORT} |= $sort::quicksort_bit; - return; - } elsif ($_ eq 'mergesort') { - $^H{SORT} &= ~$sort::sort_bits; - $^H{SORT} |= $sort::mergesort_bit; - return; - } elsif ($_ eq 'safe') { - $^H{SORT} &= ~$sort::fast_bit; - $^H{SORT} |= $sort::safe_bits; - $_ = 'mergesort'; - redo; - } elsif ($_ eq 'fast') { - $^H{SORT} &= ~$sort::safe_bits; - $^H{SORT} |= $sort::fast_bit; - $_ = 'quicksort'; - redo; + if (/^_q(?:uick)?sort$/) { + $^H{sort} &= ~$sort::sort_bits; + $^H{sort} |= $sort::quicksort_bit; + } elsif ($_ eq '_mergesort') { + $^H{sort} &= ~$sort::sort_bits; + $^H{sort} |= $sort::mergesort_bit; + } elsif ($_ eq 'stable') { + $^H{sort} |= $sort::stable_bit; + } elsif ($_ eq 'defaults') { + $^H{sort} = 0; } else { require Carp; - Carp::croak("sort: unknown subpragma '@_'"); + Carp::croak("sort: unknown subpragma '$_'"); + } + } +} + +sub unimport { + shift; + if (@_ == 0) { + require Carp; + Carp::croak("sort pragma requires arguments"); + } + local $_; + no warnings 'uninitialized'; # bitops would warn + while ($_ = shift(@_)) { + if (/^_q(?:uick)?sort$/) { + $^H{sort} &= ~$sort::sort_bits; + } elsif ($_ eq '_mergesort') { + $^H{sort} &= ~$sort::sort_bits; + } elsif ($_ eq 'stable') { + $^H{sort} &= ~$sort::stable_bit; + } else { + require Carp; + Carp::croak("sort: unknown subpragma '$_'"); } } } sub current { my @sort; - if ($^H{SORT}) { - push @sort, 'quicksort' if $^H{SORT} & $sort::quicksort_bit; - push @sort, 'mergesort' if $^H{SORT} & $sort::mergesort_bit; - push @sort, 'safe' if $^H{SORT} & $sort::safe_bits; - push @sort, 'fast' if $^H{SORT} & $sort::fast_bit; + if ($^H{sort}) { + push @sort, 'quicksort' if $^H{sort} & $sort::quicksort_bit; + push @sort, 'mergesort' if $^H{sort} & $sort::mergesort_bit; + push @sort, 'stable' if $^H{sort} & $sort::stable_bit; } push @sort, 'mergesort' unless @sort; join(' ', @sort); @@ -69,43 +80,117 @@ sort - perl pragma to control sort() behaviour =head1 SYNOPSIS - use sort 'quicksort'; - use sort 'mergesort'; - - use sort 'qsort'; # alias for quicksort - - # alias for mergesort: insenstive and stable - use sort 'safe'; + use sort 'stable'; # guarantee stability + use sort '_quicksort'; # use a quicksort algorithm + use sort '_mergesort'; # use a mergesort algorithm + use sort 'defaults'; # revert to default behavior + no sort 'stable'; # stability not important - # alias for raw quicksort: sensitive and nonstable - use sort 'fast'; + use sort '_qsort'; # alias for quicksort - my $current = sort::current(); + my $current; + BEGIN { + $current = sort::current(); # identify prevailing algorithm + } =head1 DESCRIPTION -With the sort pragma you can control the behaviour of the builtin -sort() function. +With the C pragma you can control the behaviour of the builtin +C function. In Perl versions 5.6 and earlier the quicksort algorithm was used to -implement sort(), but in Perl 5.8 the algorithm was changed to mergesort, -mainly to guarantee insensitiveness to sort input: the worst case of -quicksort is O(N**2), while mergesort is always O(N log N). - -On the other hand, for same cases (especially for shorter inputs) -quicksort is faster. - -In Perl 5.8 and later by default quicksort is wrapped into a -stabilizing layer. A stable sort means that for records that compare -equal, the original input ordering is preserved. Mergesort is stable; -quicksort is not. - -The metapragmas 'fast' and 'safe' select quicksort without the -stabilizing layer and mergesort, respectively. In other words, -'safe' is the default. - -Finally, the sort performance is also dependent on the platform -(smaller CPU caches favour quicksort). +implement C, but in Perl 5.8 a mergesort algorithm was also made +available, mainly to guarantee worst case O(N log N) behaviour: +the worst case of quicksort is O(N**2). In Perl 5.8 and later, +quicksort defends against quadratic behaviour by shuffling large +arrays before sorting. + +A stable sort means that for records that compare equal, the original +input ordering is preserved. Mergesort is stable, quicksort is not. +Stability will matter only if elements that compare equal can be +distinguished in some other way. That means that simple numerical +and lexical sorts do not profit from stability, since equal elements +are indistinguishable. However, with a comparison such as + + { substr($a, 0, 3) cmp substr($b, 0, 3) } + +stability might matter because elements that compare equal on the +first 3 characters may be distinguished based on subsequent characters. +In Perl 5.8 and later, quicksort can be stabilized, but doing so will +add overhead, so it should only be done if it matters. + +The best algorithm depends on many things. On average, mergesort +does fewer comparisons than quicksort, so it may be better when +complicated comparison routines are used. Mergesort also takes +advantage of pre-existing order, so it would be favored for using +C to merge several sorted arrays. On the other hand, quicksort +is often faster for small arrays, and on arrays of a few distinct +values, repeated many times. You can force the +choice of algorithm with this pragma, but this feels heavy-handed, +so the subpragmas beginning with a C<_> may not persist beyond Perl 5.8. +The default algorithm is mergesort, which will be stable even if +you do not explicitly demand it. +But the stability of the default sort is a side-effect that could +change in later versions. If stability is important, be sure to +say so with a + + use sort 'stable'; + +The C pragma doesn't +I what follows, it just leaves the choice open. Thus, after + + no sort qw(_mergesort stable); + +a mergesort, which happens to be stable, will be employed anyway. +Note that + + no sort "_quicksort"; + no sort "_mergesort"; + +have exactly the same effect, leaving the choice of sort algorithm open. + +=head1 CAVEATS + +As of Perl 5.10, this pragma is lexically scoped and takes effect +at compile time. In earlier versions its effect was global and took +effect at run-time; the documentation suggested using C to +change the behaviour: + + { eval 'use sort qw(defaults _quicksort)'; # force quicksort + eval 'no sort "stable"'; # stability not wanted + print sort::current . "\n"; + @a = sort @b; + eval 'use sort "defaults"'; # clean up, for others + } + { eval 'use sort qw(defaults stable)'; # force stability + print sort::current . "\n"; + @c = sort @d; + eval 'use sort "defaults"'; # clean up, for others + } + +Such code no longer has the desired effect, for two reasons. +Firstly, the use of C means that the sorting algorithm +is not changed until runtime, by which time it's too late to +have any effect. Secondly, C is also called at +run-time, when in fact the compile-time value of C +is the one that matters. + +So now this code would be written: + + { use sort qw(defaults _quicksort); # force quicksort + no sort "stable"; # stability not wanted + my $current; + BEGIN { $current = print sort::current; } + print "$current\n"; + @a = sort @b; + # Pragmas go out of scope at the end of the block + } + { use sort qw(defaults stable); # force stability + my $current; + BEGIN { $current = print sort::current; } + print "$current\n"; + @c = sort @d; + } =cut