From: Jarkko Hietaniemi Date: Tue, 27 Nov 2001 00:24:36 +0000 (+0000) Subject: sort tweaks from John P. Linderman. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c53fc8a620e539470713c5fc9ecf3b649176ff4a;p=p5sagit%2Fp5-mst-13.2.git sort tweaks from John P. Linderman. p4raw-id: //depot/perl@13292 --- diff --git a/lib/sort.pm b/lib/sort.pm index c1ea71c..c9ef3fa 100644 --- a/lib/sort.pm +++ b/lib/sort.pm @@ -8,9 +8,6 @@ $sort::quicksort_bit = 0x00000001; $sort::mergesort_bit = 0x00000002; $sort::sort_bits = 0x000000FF; # allow 256 different ones $sort::stable_bit = 0x00000100; -$sort::insensitive_bit = 0x00000200; -$sort::safe_bits = 0x00000300; -$sort::fast_bit = 0x00000400; use strict; @@ -24,24 +21,14 @@ sub import { local $_; no warnings 'uninitialized'; # $^H{SORT} bitops would warn while ($_ = shift(@_)) { - if (/^q(?:uick)?sort$/) { + if (/^_q(?:uick)?sort$/) { $^H{SORT} &= ~$sort::sort_bits; $^H{SORT} |= $sort::quicksort_bit; - return; - } elsif ($_ eq 'mergesort') { + } elsif ($_ eq '_mergesort') { $^H{SORT} &= ~$sort::sort_bits; $^H{SORT} |= $sort::mergesort_bit; - return; - } elsif ($_ eq 'safe') { - $^H{SORT} &= ~$sort::fast_bit; - $^H{SORT} |= $sort::safe_bits; - $_ = 'mergesort'; - redo; - } elsif ($_ eq 'fast') { - $^H{SORT} &= ~$sort::safe_bits; - $^H{SORT} |= $sort::fast_bit; - $_ = 'quicksort'; - redo; + } elsif ($_ eq 'stable') { + $^H{SORT} |= $sort::stable_bit; } else { require Carp; Carp::croak("sort: unknown subpragma '@_'"); @@ -54,8 +41,7 @@ sub current { if ($^H{SORT}) { push @sort, 'quicksort' if $^H{SORT} & $sort::quicksort_bit; push @sort, 'mergesort' if $^H{SORT} & $sort::mergesort_bit; - push @sort, 'safe' if $^H{SORT} & $sort::safe_bits; - push @sort, 'fast' if $^H{SORT} & $sort::fast_bit; + push @sort, 'stable' if $^H{SORT} & $sort::stable_bit; } push @sort, 'mergesort' unless @sort; join(' ', @sort); @@ -70,18 +56,13 @@ sort - perl pragma to control sort() behaviour =head1 SYNOPSIS - use sort 'quicksort'; - use sort 'mergesort'; + use sort 'stable'; # guarantee stability + use sort '_quicksort'; # use a quicksort algorithm + use sort '_mergesort'; # use a mergesort algorithm - use sort 'qsort'; # alias for quicksort + use sort '_qsort'; # alias for quicksort - # alias for mergesort: insensitive and stable - use sort 'safe'; - - # alias for raw quicksort: sensitive and nonstable - use sort 'fast'; - - my $current = sort::current(); + my $current = sort::current(); # identify prevailing algorithm =head1 DESCRIPTION @@ -89,24 +70,35 @@ With the sort pragma you can control the behaviour of the builtin sort() function. In Perl versions 5.6 and earlier the quicksort algorithm was used to -implement sort(), but in Perl 5.8 the algorithm was changed to mergesort, -mainly to guarantee insensitiveness to sort input: the worst case of -quicksort is O(N**2), while mergesort is always O(N log N). - -On the other hand, for same cases (especially for shorter inputs) -quicksort is faster. - -In Perl 5.8 and later by default quicksort is wrapped into a -stabilizing layer. A stable sort means that for records that compare -equal, the original input ordering is preserved. Mergesort is stable; -quicksort is not. - -The metapragmas 'fast' and 'safe' select quicksort without the -stabilizing layer and mergesort, respectively. In other words, -'safe' is the default. - -Finally, the sort performance is also dependent on the platform -(smaller CPU caches favour quicksort). +implement sort(), but in Perl 5.8 a mergesort algorithm was also made +available, mainly to guarantee worst case O(N log N) behaviour: +the worst case of quicksort is O(N**2). In Perl 5.8 and later, +quicksort defends against quadratic behaviour by shuffling large +arrays before sorting. + +A stable sort means that for records that compare equal, the original +input ordering is preserved. Perl's mergesort is stable, quicksort is not. +Stability will matter only if elements that compare equal can be +distinguished in some other way. That means that simple numerical +and lexical sorts do not profit from stability, since equal elements +are indistinguishable. However, with a comparison such as + + { substr($a, 0, 3) cmp substr($b, 0, 3) } + +stability might matter because elements that compare equal on the +first 3 characters may be distinguished based on subsequent characters. +In Perl 5.8 and later, quicksort can be stabilized, but doing so will +add overhead, so it should only be done if it matters. + +The best algorithm depends on many things. On average, mergesort +does fewer comparisons than quicksort, so it may be better when +complicated comparison routines are used. Mergesort also takes +advantage of pre-existing order, so it would be favored for using +sort to merge several sorted arrays. On the other hand, quicksort +is often faster for small arrays, and on platforms with small memory +caches that are much faster than main memory. You can force the +choice of algorithm with this pragma, but this feels heavy-handed, +so the subpragmas beginning with a C<_> may not persist beyond Perl 5.8. =cut diff --git a/lib/sort.t b/lib/sort.t index 6b60716..44aaf8f 100644 --- a/lib/sort.t +++ b/lib/sort.t @@ -12,6 +12,6 @@ BEGIN { ok(sort::current() eq 'mergesort'); } -use sort 'fast'; -ok(sort::current() eq 'quicksort fast'); +use sort qw( stable _qsort ); +ok(sort::current() eq 'quicksort stable'); diff --git a/perl.h b/perl.h index 7c51e93..c9d359c 100644 --- a/perl.h +++ b/perl.h @@ -3103,10 +3103,7 @@ enum { /* pass one of these to get_vtbl */ #define HINT_SORT_SORT_BITS 0x000000FF /* allow 256 different ones */ #define HINT_SORT_QUICKSORT 0x00000001 #define HINT_SORT_MERGESORT 0x00000002 -#define HINT_SORT_STABLE 0x00000100 /* sort styles */ -#define HINT_SORT_INSENSITIVE 0x00000200 -#define HINT_SORT_SAFE 0x00000300 /* stable and insensitive */ -#define HINT_SORT_FAST 0x00000400 /* damn the icebergs */ +#define HINT_SORT_STABLE 0x00000100 /* sort styles (currently one) */ /* Various states of the input record separator SV (rs) */ #define RsSNARF(sv) (! SvOK(sv)) diff --git a/pp_sort.c b/pp_sort.c index 844c0e3..797cb22 100644 --- a/pp_sort.c +++ b/pp_sort.c @@ -34,6 +34,10 @@ static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b); (hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \ (I32)SvIV(*hintsvp) : 0) +#ifndef SMALLSORT +#define SMALLSORT (200) +#endif + /* * The mergesort implementation is by Peter M. Mcilroy . * @@ -281,9 +285,11 @@ S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp) gptr *aux, *list2, *p2, *last; gptr *base = list1; gptr *p1; + gptr small[SMALLSORT]; if (nmemb <= 1) return; /* sorted trivially */ - New(799,list2,nmemb,gptr); /* allocate auxilliary array */ + if (nmemb <= SMALLSORT) list2 = small; /* use stack for aux array */ + else { New(799,list2,nmemb,gptr); } /* allocate auxilliary array */ aux = list2; dynprep(aTHX_ list1, list2, nmemb, cmp); last = PINDEX(list2, nmemb); @@ -395,7 +401,7 @@ S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp) last = PINDEX(list1, nmemb); FROMTOUPTO(list1, list2, last); } - Safefree(aux); + if (aux != small) Safefree(aux); /* free iff allocated */ return; } @@ -1101,10 +1107,6 @@ S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare) /* Believe it or not, the array is sorted at this point! */ } -#ifndef SMALLSORT -#define SMALLSORT (200) -#endif - /* Stabilize what is, presumably, an otherwise unstable sort method. * We do that by allocating (or having on hand) an array of pointers * that is the same size as the original array of elements to be sorted. @@ -1175,9 +1177,7 @@ S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp) { SV **hintsvp; - if (SORTHINTS(hintsvp) & HINT_SORT_FAST) - S_qsortsvu(aTHX_ list1, nmemb, cmp); - else { + if (SORTHINTS(hintsvp) & HINT_SORT_STABLE) { register gptr **pp, *q; register size_t n, j, i; gptr *small[SMALLSORT], **indir, tmp; @@ -1238,6 +1238,8 @@ S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp) if (indir != small) { Safefree(indir); } /* restore prevailing comparison routine */ RealCmp = savecmp; + } else { + S_qsortsvu(aTHX_ list1, nmemb, cmp); } }