pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (c) 1991-2001, Larry Wall
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  */
   9
  10 /*
  11  *   ...they shuffled back towards the rear of the line. 'No, not at the
  12  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  13  */
  14
  15 #include "EXTERN.h"
  16 #define PERL_IN_PP_SORT_C
  17 #include "perl.h"
  18
  19 static I32 sortcv(pTHX_ SV *a, SV *b);
  20 static I32 sortcv_stacked(pTHX_ SV *a, SV *b);
  21 static I32 sortcv_xsub(pTHX_ SV *a, SV *b);
  22 static I32 sv_ncmp(pTHX_ SV *a, SV *b);
  23 static I32 sv_i_ncmp(pTHX_ SV *a, SV *b);
  24 static I32 amagic_ncmp(pTHX_ SV *a, SV *b);
  25 static I32 amagic_i_ncmp(pTHX_ SV *a, SV *b);
  26 static I32 amagic_cmp(pTHX_ SV *a, SV *b);
  27 static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b);
  28
  29 #define sv_cmp_static Perl_sv_cmp
  30 #define sv_cmp_locale_static Perl_sv_cmp_locale
  31
  32 #define SORTHINTS(hintsvp) \
  33      ((PL_hintgv &&     \
  34       (hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \
  35           (I32)SvIV(*hintsvp) : 0)
  36
  37 /*
  38  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  39  *
  40  * The original code was written in conjunction with BSD Computer Software
  41  * Research Group at University of California, Berkeley.
  42  *
  43  * See also: "Optimistic Merge Sort" (SODA '92)
  44  *
  45  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  46  *
  47  * The code can be distributed under the same terms as Perl itself.
  48  *
  49  */
  50
  51 #ifdef  TESTHARNESS
  52 #include <sys/types.h>
  53 typedef void SV;
  54 #define pTHX_
  55 #define STATIC
  56 #define New(ID,VAR,N,TYPE) VAR=(TYPE *)malloc((N)*sizeof(TYPE))
  57 #define Safefree(VAR) free(VAR)
  58 typedef int  (*SVCOMPARE_t) (pTHX_ SV*, SV*);
  59 #endif  /* TESTHARNESS */
  60
  61 typedef char * aptr;            /* pointer for arithmetic on sizes */
  62 typedef SV * gptr;              /* pointers in our lists */
  63
  64 /* Binary merge internal sort, with a few special mods
  65 ** for the special perl environment it now finds itself in.
  66 **
  67 ** Things that were once options have been hotwired
  68 ** to values suitable for this use.  In particular, we'll always
  69 ** initialize looking for natural runs, we'll always produce stable
  70 ** output, and we'll always do Peter McIlroy's binary merge.
  71 */
  72
  73 /* Pointer types for arithmetic and storage and convenience casts */
  74
  75 #define APTR(P) ((aptr)(P))
  76 #define GPTP(P) ((gptr *)(P))
  77 #define GPPP(P) ((gptr **)(P))
  78
  79
  80 /* byte offset from pointer P to (larger) pointer Q */
  81 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  82
  83 #define PSIZE sizeof(gptr)
  84
  85 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  86
  87 #ifdef  PSHIFT
  88 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  89 #define PNBYTE(N)       ((N) << (PSHIFT))
  90 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  91 #else
  92 /* Leave optimization to compiler */
  93 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  94 #define PNBYTE(N)       ((N) * (PSIZE))
  95 #define PINDEX(P, N)    (GPTP(P) + (N))
  96 #endif
  97
  98 /* Pointer into other corresponding to pointer into this */
  99 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 100
 101 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 102
 103
 104 /* Runs are identified by a pointer in the auxilliary list.
 105 ** The pointer is at the start of the list,
 106 ** and it points to the start of the next list.
 107 ** NEXT is used as an lvalue, too.
 108 */
 109
 110 #define NEXT(P)         (*GPPP(P))
 111
 112
 113 /* PTHRESH is the minimum number of pairs with the same sense to justify
 114 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 115 ** not just elements, so PTHRESH == 8 means a run of 16.
 116 */
 117
 118 #define PTHRESH (8)
 119
 120 /* RTHRESH is the number of elements in a run that must compare low
 121 ** to the low element from the opposing run before we justify
 122 ** doing a binary rampup instead of single stepping.
 123 ** In random input, N in a row low should only happen with
 124 ** probability 2^(1-N), so we can risk that we are dealing
 125 ** with orderly input without paying much when we aren't.
 126 */
 127
 128 #define RTHRESH (6)
 129
 130
 131 /*
 132 ** Overview of algorithm and variables.
 133 ** The array of elements at list1 will be organized into runs of length 2,
 134 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 135 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 136 **
 137 ** Unless otherwise specified, pair pointers address the first of two elements.
 138 **
 139 ** b and b+1 are a pair that compare with sense ``sense''.
 140 ** b is the ``bottom'' of adjacent pairs that might form a longer run.
 141 **
 142 ** p2 parallels b in the list2 array, where runs are defined by
 143 ** a pointer chain.
 144 **
 145 ** t represents the ``top'' of the adjacent pairs that might extend
 146 ** the run beginning at b.  Usually, t addresses a pair
 147 ** that compares with opposite sense from (b,b+1).
 148 ** However, it may also address a singleton element at the end of list1,
 149 ** or it may be equal to ``last'', the first element beyond list1.
 150 **
 151 ** r addresses the Nth pair following b.  If this would be beyond t,
 152 ** we back it off to t.  Only when r is less than t do we consider the
 153 ** run long enough to consider checking.
 154 **
 155 ** q addresses a pair such that the pairs at b through q already form a run.
 156 ** Often, q will equal b, indicating we only are sure of the pair itself.
 157 ** However, a search on the previous cycle may have revealed a longer run,
 158 ** so q may be greater than b.
 159 **
 160 ** p is used to work back from a candidate r, trying to reach q,
 161 ** which would mean b through r would be a run.  If we discover such a run,
 162 ** we start q at r and try to push it further towards t.
 163 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 164 ** In any event, after the check (if any), we have two main cases.
 165 **
 166 ** 1) Short run.  b <= q < p <= r <= t.
 167 **      b through q is a run (perhaps trivial)
 168 **      q through p are uninteresting pairs
 169 **      p through r is a run
 170 **
 171 ** 2) Long run.  b < r <= q < t.
 172 **      b through q is a run (of length >= 2 * PTHRESH)
 173 **
 174 ** Note that degenerate cases are not only possible, but likely.
 175 ** For example, if the pair following b compares with opposite sense,
 176 ** then b == q < p == r == t.
 177 */
 178
 179
 180 static void
 181 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
 182 {
 183     int sense;
 184     register gptr *b, *p, *q, *t, *p2;
 185     register gptr c, *last, *r;
 186     gptr *savep;
 187
 188     b = list1;
 189     last = PINDEX(b, nmemb);
 190     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 191     for (p2 = list2; b < last; ) {
 192         /* We just started, or just reversed sense.
 193         ** Set t at end of pairs with the prevailing sense.
 194         */
 195         for (p = b+2, t = p; ++p < last; t = ++p) {
 196             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 197         }
 198         q = b;
 199         /* Having laid out the playing field, look for long runs */
 200         do {
 201             p = r = b + (2 * PTHRESH);
 202             if (r >= t) p = r = t;      /* too short to care about */
 203             else {
 204                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 205                        ((p -= 2) > q));
 206                 if (p <= q) {
 207                     /* b through r is a (long) run.
 208                     ** Extend it as far as possible.
 209                     */
 210                     p = q = r;
 211                     while (((p += 2) < t) &&
 212                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 213                     r = p = q + 2;      /* no simple pairs, no after-run */
 214                 }
 215             }
 216             if (q > b) {                /* run of greater than 2 at b */
 217                 savep = p;
 218                 p = q += 2;
 219                 /* pick up singleton, if possible */
 220                 if ((p == t) &&
 221                     ((t + 1) == last) &&
 222                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 223                     savep = r = p = q = last;
 224                 p2 = NEXT(p2) = p2 + (p - b);
 225                 if (sense) while (b < --p) {
 226                     c = *b;
 227                     *b++ = *p;
 228                     *p = c;
 229                 }
 230                 p = savep;
 231             }
 232             while (q < p) {             /* simple pairs */
 233                 p2 = NEXT(p2) = p2 + 2;
 234                 if (sense) {
 235                     c = *q++;
 236                     *(q-1) = *q;
 237                     *q++ = c;
 238                 } else q += 2;
 239             }
 240             if (((b = p) == t) && ((t+1) == last)) {
 241                 NEXT(p2) = p2 + 1;
 242                 b++;
 243             }
 244             q = r;
 245         } while (b < t);
 246         sense = !sense;
 247     }
 248     return;
 249 }
 250
 251
 252 /* Overview of bmerge variables:
 253 **
 254 ** list1 and list2 address the main and auxiliary arrays.
 255 ** They swap identities after each merge pass.
 256 ** Base points to the original list1, so we can tell if
 257 ** the pointers ended up where they belonged (or must be copied).
 258 **
 259 ** When we are merging two lists, f1 and f2 are the next elements
 260 ** on the respective lists.  l1 and l2 mark the end of the lists.
 261 ** tp2 is the current location in the merged list.
 262 **
 263 ** p1 records where f1 started.
 264 ** After the merge, a new descriptor is built there.
 265 **
 266 ** p2 is a ``parallel'' pointer in (what starts as) descriptor space.
 267 ** It is used to identify and delimit the runs.
 268 **
 269 ** In the heat of determining where q, the greater of the f1/f2 elements,
 270 ** belongs in the other list, b, t and p, represent bottom, top and probe
 271 ** locations, respectively, in the other list.
 272 ** They make convenient temporary pointers in other places.
 273 */
 274
 275 STATIC void
 276 S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
 277 {
 278     int i, run;
 279     int sense;
 280     register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q;
 281     gptr *aux, *list2, *p2, *last;
 282     gptr *base = list1;
 283     gptr *p1;
 284
 285     if (nmemb <= 1) return;     /* sorted trivially */
 286     New(799,list2,nmemb,gptr);  /* allocate auxilliary array */
 287     aux = list2;
 288     dynprep(aTHX_ list1, list2, nmemb, cmp);
 289     last = PINDEX(list2, nmemb);
 290     while (NEXT(list2) != last) {
 291         /* More than one run remains.  Do some merging to reduce runs. */
 292         l2 = p1 = list1;
 293         for (tp2 = p2 = list2; p2 != last;) {
 294             /* The new first run begins where the old second list ended.
 295             ** Use the p2 ``parallel'' pointer to identify the end of the run.
 296             */
 297             f1 = l2;
 298             t = NEXT(p2);
 299             f2 = l1 = POTHER(t, list2, list1);
 300             if (t != last) t = NEXT(t);
 301             l2 = POTHER(t, list2, list1);
 302             p2 = t;
 303             while (f1 < l1 && f2 < l2) {
 304                 /* If head 1 is larger than head 2, find ALL the elements
 305                 ** in list 2 strictly less than head1, write them all,
 306                 ** then head 1.  Then compare the new heads, and repeat,
 307                 ** until one or both lists are exhausted.
 308                 **
 309                 ** In all comparisons (after establishing
 310                 ** which head to merge) the item to merge
 311                 ** (at pointer q) is the first operand of
 312                 ** the comparison.  When we want to know
 313                 ** if ``q is strictly less than the other'',
 314                 ** we can't just do
 315                 **    cmp(q, other) < 0
 316                 ** because stability demands that we treat equality
 317                 ** as high when q comes from l2, and as low when
 318                 ** q was from l1.  So we ask the question by doing
 319                 **    cmp(q, other) <= sense
 320                 ** and make sense == 0 when equality should look low,
 321                 ** and -1 when equality should look high.
 322                 */
 323
 324
 325                 if (cmp(aTHX_ *f1, *f2) <= 0) {
 326                     q = f2; b = f1; t = l1;
 327                     sense = -1;
 328                 } else {
 329                     q = f1; b = f2; t = l2;
 330                     sense = 0;
 331                 }
 332
 333
 334                 /* ramp up
 335                 **
 336                 ** Leave t at something strictly
 337                 ** greater than q (or at the end of the list),
 338                 ** and b at something strictly less than q.
 339                 */
 340                 for (i = 1, run = 0 ;;) {
 341                     if ((p = PINDEX(b, i)) >= t) {
 342                         /* off the end */
 343                         if (((p = PINDEX(t, -1)) > b) &&
 344                             (cmp(aTHX_ *q, *p) <= sense))
 345                              t = p;
 346                         else b = p;
 347                         break;
 348                     } else if (cmp(aTHX_ *q, *p) <= sense) {
 349                         t = p;
 350                         break;
 351                     } else b = p;
 352                     if (++run >= RTHRESH) i += i;
 353                 }
 354
 355
 356                 /* q is known to follow b and must be inserted before t.
 357                 ** Increment b, so the range of possibilities is [b,t).
 358                 ** Round binary split down, to favor early appearance.
 359                 ** Adjust b and t until q belongs just before t.
 360                 */
 361
 362                 b++;
 363                 while (b < t) {
 364                     p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 365                     if (cmp(aTHX_ *q, *p) <= sense) {
 366                         t = p;
 367                     } else b = p + 1;
 368                 }
 369
 370
 371                 /* Copy all the strictly low elements */
 372
 373                 if (q == f1) {
 374                     FROMTOUPTO(f2, tp2, t);
 375                     *tp2++ = *f1++;
 376                 } else {
 377                     FROMTOUPTO(f1, tp2, t);
 378                     *tp2++ = *f2++;
 379                 }
 380             }
 381
 382
 383             /* Run out remaining list */
 384             if (f1 == l1) {
 385                    if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 386             } else              FROMTOUPTO(f1, tp2, l1);
 387             p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 388         }
 389         t = list1;
 390         list1 = list2;
 391         list2 = t;
 392         last = PINDEX(list2, nmemb);
 393     }
 394     if (base == list2) {
 395         last = PINDEX(list1, nmemb);
 396         FROMTOUPTO(list1, list2, last);
 397     }
 398     Safefree(aux);
 399     return;
 400 }
 401
 402 /*
 403  * The quicksort implementation was derived from source code contributed
 404  * by Tom Horsley.
 405  *
 406  * NOTE: this code was derived from Tom Horsley's qsort replacement
 407  * and should not be confused with the original code.
 408  */
 409
 410 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 411
 412    Permission granted to distribute under the same terms as perl which are
 413    (briefly):
 414
 415     This program is free software; you can redistribute it and/or modify
 416     it under the terms of either:
 417
 418         a) the GNU General Public License as published by the Free
 419         Software Foundation; either version 1, or (at your option) any
 420         later version, or
 421
 422         b) the "Artistic License" which comes with this Kit.
 423
 424    Details on the perl license can be found in the perl source code which
 425    may be located via the www.perl.com web page.
 426
 427    This is the most wonderfulest possible qsort I can come up with (and
 428    still be mostly portable) My (limited) tests indicate it consistently
 429    does about 20% fewer calls to compare than does the qsort in the Visual
 430    C++ library, other vendors may vary.
 431
 432    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 433    others I invented myself (or more likely re-invented since they seemed
 434    pretty obvious once I watched the algorithm operate for a while).
 435
 436    Most of this code was written while watching the Marlins sweep the Giants
 437    in the 1997 National League Playoffs - no Braves fans allowed to use this
 438    code (just kidding :-).
 439
 440    I realize that if I wanted to be true to the perl tradition, the only
 441    comment in this file would be something like:
 442
 443    ...they shuffled back towards the rear of the line. 'No, not at the
 444    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 445
 446    However, I really needed to violate that tradition just so I could keep
 447    track of what happens myself, not to mention some poor fool trying to
 448    understand this years from now :-).
 449 */
 450
 451 /* ********************************************************** Configuration */
 452
 453 #ifndef QSORT_ORDER_GUESS
 454 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 455 #endif
 456
 457 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 458    future processing - a good max upper bound is log base 2 of memory size
 459    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 460    safely be smaller than that since the program is taking up some space and
 461    most operating systems only let you grab some subset of contiguous
 462    memory (not to mention that you are normally sorting data larger than
 463    1 byte element size :-).
 464 */
 465 #ifndef QSORT_MAX_STACK
 466 #define QSORT_MAX_STACK 32
 467 #endif
 468
 469 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 470    Anything bigger and we use qsort. If you make this too small, the qsort
 471    will probably break (or become less efficient), because it doesn't expect
 472    the middle element of a partition to be the same as the right or left -
 473    you have been warned).
 474 */
 475 #ifndef QSORT_BREAK_EVEN
 476 #define QSORT_BREAK_EVEN 6
 477 #endif
 478
 479 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 480    to go quadratic on.  We innoculate larger partitions against
 481    quadratic behavior by shuffling them before sorting.  This is not
 482    an absolute guarantee of non-quadratic behavior, but it would take
 483    staggeringly bad luck to pick extreme elements as the pivot
 484    from randomized data.
 485 */
 486 #ifndef QSORT_PLAY_SAFE
 487 #define QSORT_PLAY_SAFE 255
 488 #endif
 489
 490 /* ************************************************************* Data Types */
 491
 492 /* hold left and right index values of a partition waiting to be sorted (the
 493    partition includes both left and right - right is NOT one past the end or
 494    anything like that).
 495 */
 496 struct partition_stack_entry {
 497    int left;
 498    int right;
 499 #ifdef QSORT_ORDER_GUESS
 500    int qsort_break_even;
 501 #endif
 502 };
 503
 504 /* ******************************************************* Shorthand Macros */
 505
 506 /* Note that these macros will be used from inside the qsort function where
 507    we happen to know that the variable 'elt_size' contains the size of an
 508    array element and the variable 'temp' points to enough space to hold a
 509    temp element and the variable 'array' points to the array being sorted
 510    and 'compare' is the pointer to the compare routine.
 511
 512    Also note that there are very many highly architecture specific ways
 513    these might be sped up, but this is simply the most generally portable
 514    code I could think of.
 515 */
 516
 517 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 518 */
 519 #define qsort_cmp(elt1, elt2) \
 520    ((*compare)(aTHX_ array[elt1], array[elt2]))
 521
 522 #ifdef QSORT_ORDER_GUESS
 523 #define QSORT_NOTICE_SWAP swapped++;
 524 #else
 525 #define QSORT_NOTICE_SWAP
 526 #endif
 527
 528 /* swaps contents of array elements elt1, elt2.
 529 */
 530 #define qsort_swap(elt1, elt2) \
 531    STMT_START { \
 532       QSORT_NOTICE_SWAP \
 533       temp = array[elt1]; \
 534       array[elt1] = array[elt2]; \
 535       array[elt2] = temp; \
 536    } STMT_END
 537
 538 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 539    elt3 and elt3 gets elt1.
 540 */
 541 #define qsort_rotate(elt1, elt2, elt3) \
 542    STMT_START { \
 543       QSORT_NOTICE_SWAP \
 544       temp = array[elt1]; \
 545       array[elt1] = array[elt2]; \
 546       array[elt2] = array[elt3]; \
 547       array[elt3] = temp; \
 548    } STMT_END
 549
 550 /* ************************************************************ Debug stuff */
 551
 552 #ifdef QSORT_DEBUG
 553
 554 static void
 555 break_here()
 556 {
 557    return; /* good place to set a breakpoint */
 558 }
 559
 560 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 561
 562 static void
 563 doqsort_all_asserts(
 564    void * array,
 565    size_t num_elts,
 566    size_t elt_size,
 567    int (*compare)(const void * elt1, const void * elt2),
 568    int pc_left, int pc_right, int u_left, int u_right)
 569 {
 570    int i;
 571
 572    qsort_assert(pc_left <= pc_right);
 573    qsort_assert(u_right < pc_left);
 574    qsort_assert(pc_right < u_left);
 575    for (i = u_right + 1; i < pc_left; ++i) {
 576       qsort_assert(qsort_cmp(i, pc_left) < 0);
 577    }
 578    for (i = pc_left; i < pc_right; ++i) {
 579       qsort_assert(qsort_cmp(i, pc_right) == 0);
 580    }
 581    for (i = pc_right + 1; i < u_left; ++i) {
 582       qsort_assert(qsort_cmp(pc_right, i) < 0);
 583    }
 584 }
 585
 586 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 587    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 588                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 589
 590 #else
 591
 592 #define qsort_assert(t) ((void)0)
 593
 594 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 595
 596 #endif
 597
 598 /* ****************************************************************** qsort */
 599
 600 STATIC void /* the standard unstable (u) quicksort (qsort) */
 601 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 602 {
 603    register SV * temp;
 604
 605    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 606    int next_stack_entry = 0;
 607
 608    int part_left;
 609    int part_right;
 610 #ifdef QSORT_ORDER_GUESS
 611    int qsort_break_even;
 612    int swapped;
 613 #endif
 614
 615    /* Make sure we actually have work to do.
 616    */
 617    if (num_elts <= 1) {
 618       return;
 619    }
 620
 621    /* Innoculate large partitions against quadratic behavior */
 622    if (num_elts > QSORT_PLAY_SAFE) {
 623       register size_t n, j;
 624       register SV **q;
 625       for (n = num_elts, q = array; n > 1; ) {
 626          j = n-- * Drand01();
 627          temp = q[j];
 628          q[j] = q[n];
 629          q[n] = temp;
 630       }
 631    }
 632
 633    /* Setup the initial partition definition and fall into the sorting loop
 634    */
 635    part_left = 0;
 636    part_right = (int)(num_elts - 1);
 637 #ifdef QSORT_ORDER_GUESS
 638    qsort_break_even = QSORT_BREAK_EVEN;
 639 #else
 640 #define qsort_break_even QSORT_BREAK_EVEN
 641 #endif
 642    for ( ; ; ) {
 643       if ((part_right - part_left) >= qsort_break_even) {
 644          /* OK, this is gonna get hairy, so lets try to document all the
 645             concepts and abbreviations and variables and what they keep
 646             track of:
 647
 648             pc: pivot chunk - the set of array elements we accumulate in the
 649                 middle of the partition, all equal in value to the original
 650                 pivot element selected. The pc is defined by:
 651
 652                 pc_left - the leftmost array index of the pc
 653                 pc_right - the rightmost array index of the pc
 654
 655                 we start with pc_left == pc_right and only one element
 656                 in the pivot chunk (but it can grow during the scan).
 657
 658             u:  uncompared elements - the set of elements in the partition
 659                 we have not yet compared to the pivot value. There are two
 660                 uncompared sets during the scan - one to the left of the pc
 661                 and one to the right.
 662
 663                 u_right - the rightmost index of the left side's uncompared set
 664                 u_left - the leftmost index of the right side's uncompared set
 665
 666                 The leftmost index of the left sides's uncompared set
 667                 doesn't need its own variable because it is always defined
 668                 by the leftmost edge of the whole partition (part_left). The
 669                 same goes for the rightmost edge of the right partition
 670                 (part_right).
 671
 672                 We know there are no uncompared elements on the left once we
 673                 get u_right < part_left and no uncompared elements on the
 674                 right once u_left > part_right. When both these conditions
 675                 are met, we have completed the scan of the partition.
 676
 677                 Any elements which are between the pivot chunk and the
 678                 uncompared elements should be less than the pivot value on
 679                 the left side and greater than the pivot value on the right
 680                 side (in fact, the goal of the whole algorithm is to arrange
 681                 for that to be true and make the groups of less-than and
 682                 greater-then elements into new partitions to sort again).
 683
 684             As you marvel at the complexity of the code and wonder why it
 685             has to be so confusing. Consider some of the things this level
 686             of confusion brings:
 687
 688             Once I do a compare, I squeeze every ounce of juice out of it. I
 689             never do compare calls I don't have to do, and I certainly never
 690             do redundant calls.
 691
 692             I also never swap any elements unless I can prove there is a
 693             good reason. Many sort algorithms will swap a known value with
 694             an uncompared value just to get things in the right place (or
 695             avoid complexity :-), but that uncompared value, once it gets
 696             compared, may then have to be swapped again. A lot of the
 697             complexity of this code is due to the fact that it never swaps
 698             anything except compared values, and it only swaps them when the
 699             compare shows they are out of position.
 700          */
 701          int pc_left, pc_right;
 702          int u_right, u_left;
 703
 704          int s;
 705
 706          pc_left = ((part_left + part_right) / 2);
 707          pc_right = pc_left;
 708          u_right = pc_left - 1;
 709          u_left = pc_right + 1;
 710
 711          /* Qsort works best when the pivot value is also the median value
 712             in the partition (unfortunately you can't find the median value
 713             without first sorting :-), so to give the algorithm a helping
 714             hand, we pick 3 elements and sort them and use the median value
 715             of that tiny set as the pivot value.
 716
 717             Some versions of qsort like to use the left middle and right as
 718             the 3 elements to sort so they can insure the ends of the
 719             partition will contain values which will stop the scan in the
 720             compare loop, but when you have to call an arbitrarily complex
 721             routine to do a compare, its really better to just keep track of
 722             array index values to know when you hit the edge of the
 723             partition and avoid the extra compare. An even better reason to
 724             avoid using a compare call is the fact that you can drop off the
 725             edge of the array if someone foolishly provides you with an
 726             unstable compare function that doesn't always provide consistent
 727             results.
 728
 729             So, since it is simpler for us to compare the three adjacent
 730             elements in the middle of the partition, those are the ones we
 731             pick here (conveniently pointed at by u_right, pc_left, and
 732             u_left). The values of the left, center, and right elements
 733             are refered to as l c and r in the following comments.
 734          */
 735
 736 #ifdef QSORT_ORDER_GUESS
 737          swapped = 0;
 738 #endif
 739          s = qsort_cmp(u_right, pc_left);
 740          if (s < 0) {
 741             /* l < c */
 742             s = qsort_cmp(pc_left, u_left);
 743             /* if l < c, c < r - already in order - nothing to do */
 744             if (s == 0) {
 745                /* l < c, c == r - already in order, pc grows */
 746                ++pc_right;
 747                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 748             } else if (s > 0) {
 749                /* l < c, c > r - need to know more */
 750                s = qsort_cmp(u_right, u_left);
 751                if (s < 0) {
 752                   /* l < c, c > r, l < r - swap c & r to get ordered */
 753                   qsort_swap(pc_left, u_left);
 754                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 755                } else if (s == 0) {
 756                   /* l < c, c > r, l == r - swap c&r, grow pc */
 757                   qsort_swap(pc_left, u_left);
 758                   --pc_left;
 759                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 760                } else {
 761                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 762                   qsort_rotate(pc_left, u_right, u_left);
 763                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 764                }
 765             }
 766          } else if (s == 0) {
 767             /* l == c */
 768             s = qsort_cmp(pc_left, u_left);
 769             if (s < 0) {
 770                /* l == c, c < r - already in order, grow pc */
 771                --pc_left;
 772                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 773             } else if (s == 0) {
 774                /* l == c, c == r - already in order, grow pc both ways */
 775                --pc_left;
 776                ++pc_right;
 777                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 778             } else {
 779                /* l == c, c > r - swap l & r, grow pc */
 780                qsort_swap(u_right, u_left);
 781                ++pc_right;
 782                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 783             }
 784          } else {
 785             /* l > c */
 786             s = qsort_cmp(pc_left, u_left);
 787             if (s < 0) {
 788                /* l > c, c < r - need to know more */
 789                s = qsort_cmp(u_right, u_left);
 790                if (s < 0) {
 791                   /* l > c, c < r, l < r - swap l & c to get ordered */
 792                   qsort_swap(u_right, pc_left);
 793                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 794                } else if (s == 0) {
 795                   /* l > c, c < r, l == r - swap l & c, grow pc */
 796                   qsort_swap(u_right, pc_left);
 797                   ++pc_right;
 798                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 799                } else {
 800                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 801                   qsort_rotate(u_right, pc_left, u_left);
 802                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 803                }
 804             } else if (s == 0) {
 805                /* l > c, c == r - swap ends, grow pc */
 806                qsort_swap(u_right, u_left);
 807                --pc_left;
 808                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 809             } else {
 810                /* l > c, c > r - swap ends to get in order */
 811                qsort_swap(u_right, u_left);
 812                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 813             }
 814          }
 815          /* We now know the 3 middle elements have been compared and
 816             arranged in the desired order, so we can shrink the uncompared
 817             sets on both sides
 818          */
 819          --u_right;
 820          ++u_left;
 821          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 822
 823          /* The above massive nested if was the simple part :-). We now have
 824             the middle 3 elements ordered and we need to scan through the
 825             uncompared sets on either side, swapping elements that are on
 826             the wrong side or simply shuffling equal elements around to get
 827             all equal elements into the pivot chunk.
 828          */
 829
 830          for ( ; ; ) {
 831             int still_work_on_left;
 832             int still_work_on_right;
 833
 834             /* Scan the uncompared values on the left. If I find a value
 835                equal to the pivot value, move it over so it is adjacent to
 836                the pivot chunk and expand the pivot chunk. If I find a value
 837                less than the pivot value, then just leave it - its already
 838                on the correct side of the partition. If I find a greater
 839                value, then stop the scan.
 840             */
 841             while ((still_work_on_left = (u_right >= part_left))) {
 842                s = qsort_cmp(u_right, pc_left);
 843                if (s < 0) {
 844                   --u_right;
 845                } else if (s == 0) {
 846                   --pc_left;
 847                   if (pc_left != u_right) {
 848                      qsort_swap(u_right, pc_left);
 849                   }
 850                   --u_right;
 851                } else {
 852                   break;
 853                }
 854                qsort_assert(u_right < pc_left);
 855                qsort_assert(pc_left <= pc_right);
 856                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
 857                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
 858             }
 859
 860             /* Do a mirror image scan of uncompared values on the right
 861             */
 862             while ((still_work_on_right = (u_left <= part_right))) {
 863                s = qsort_cmp(pc_right, u_left);
 864                if (s < 0) {
 865                   ++u_left;
 866                } else if (s == 0) {
 867                   ++pc_right;
 868                   if (pc_right != u_left) {
 869                      qsort_swap(pc_right, u_left);
 870                   }
 871                   ++u_left;
 872                } else {
 873                   break;
 874                }
 875                qsort_assert(u_left > pc_right);
 876                qsort_assert(pc_left <= pc_right);
 877                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
 878                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
 879             }
 880
 881             if (still_work_on_left) {
 882                /* I know I have a value on the left side which needs to be
 883                   on the right side, but I need to know more to decide
 884                   exactly the best thing to do with it.
 885                */
 886                if (still_work_on_right) {
 887                   /* I know I have values on both side which are out of
 888                      position. This is a big win because I kill two birds
 889                      with one swap (so to speak). I can advance the
 890                      uncompared pointers on both sides after swapping both
 891                      of them into the right place.
 892                   */
 893                   qsort_swap(u_right, u_left);
 894                   --u_right;
 895                   ++u_left;
 896                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 897                } else {
 898                   /* I have an out of position value on the left, but the
 899                      right is fully scanned, so I "slide" the pivot chunk
 900                      and any less-than values left one to make room for the
 901                      greater value over on the right. If the out of position
 902                      value is immediately adjacent to the pivot chunk (there
 903                      are no less-than values), I can do that with a swap,
 904                      otherwise, I have to rotate one of the less than values
 905                      into the former position of the out of position value
 906                      and the right end of the pivot chunk into the left end
 907                      (got all that?).
 908                   */
 909                   --pc_left;
 910                   if (pc_left == u_right) {
 911                      qsort_swap(u_right, pc_right);
 912                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
 913                   } else {
 914                      qsort_rotate(u_right, pc_left, pc_right);
 915                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
 916                   }
 917                   --pc_right;
 918                   --u_right;
 919                }
 920             } else if (still_work_on_right) {
 921                /* Mirror image of complex case above: I have an out of
 922                   position value on the right, but the left is fully
 923                   scanned, so I need to shuffle things around to make room
 924                   for the right value on the left.
 925                */
 926                ++pc_right;
 927                if (pc_right == u_left) {
 928                   qsort_swap(u_left, pc_left);
 929                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
 930                } else {
 931                   qsort_rotate(pc_right, pc_left, u_left);
 932                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
 933                }
 934                ++pc_left;
 935                ++u_left;
 936             } else {
 937                /* No more scanning required on either side of partition,
 938                   break out of loop and figure out next set of partitions
 939                */
 940                break;
 941             }
 942          }
 943
 944          /* The elements in the pivot chunk are now in the right place. They
 945             will never move or be compared again. All I have to do is decide
 946             what to do with the stuff to the left and right of the pivot
 947             chunk.
 948
 949             Notes on the QSORT_ORDER_GUESS ifdef code:
 950
 951             1. If I just built these partitions without swapping any (or
 952                very many) elements, there is a chance that the elements are
 953                already ordered properly (being properly ordered will
 954                certainly result in no swapping, but the converse can't be
 955                proved :-).
 956
 957             2. A (properly written) insertion sort will run faster on
 958                already ordered data than qsort will.
 959
 960             3. Perhaps there is some way to make a good guess about
 961                switching to an insertion sort earlier than partition size 6
 962                (for instance - we could save the partition size on the stack
 963                and increase the size each time we find we didn't swap, thus
 964                switching to insertion sort earlier for partitions with a
 965                history of not swapping).
 966
 967             4. Naturally, if I just switch right away, it will make
 968                artificial benchmarks with pure ascending (or descending)
 969                data look really good, but is that a good reason in general?
 970                Hard to say...
 971          */
 972
 973 #ifdef QSORT_ORDER_GUESS
 974          if (swapped < 3) {
 975 #if QSORT_ORDER_GUESS == 1
 976             qsort_break_even = (part_right - part_left) + 1;
 977 #endif
 978 #if QSORT_ORDER_GUESS == 2
 979             qsort_break_even *= 2;
 980 #endif
 981 #if QSORT_ORDER_GUESS == 3
 982             int prev_break = qsort_break_even;
 983             qsort_break_even *= qsort_break_even;
 984             if (qsort_break_even < prev_break) {
 985                qsort_break_even = (part_right - part_left) + 1;
 986             }
 987 #endif
 988          } else {
 989             qsort_break_even = QSORT_BREAK_EVEN;
 990          }
 991 #endif
 992
 993          if (part_left < pc_left) {
 994             /* There are elements on the left which need more processing.
 995                Check the right as well before deciding what to do.
 996             */
 997             if (pc_right < part_right) {
 998                /* We have two partitions to be sorted. Stack the biggest one
 999                   and process the smallest one on the next iteration. This
1000                   minimizes the stack height by insuring that any additional
1001                   stack entries must come from the smallest partition which
1002                   (because it is smallest) will have the fewest
1003                   opportunities to generate additional stack entries.
1004                */
1005                if ((part_right - pc_right) > (pc_left - part_left)) {
1006                   /* stack the right partition, process the left */
1007                   partition_stack[next_stack_entry].left = pc_right + 1;
1008                   partition_stack[next_stack_entry].right = part_right;
1009 #ifdef QSORT_ORDER_GUESS
1010                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1011 #endif
1012                   part_right = pc_left - 1;
1013                } else {
1014                   /* stack the left partition, process the right */
1015                   partition_stack[next_stack_entry].left = part_left;
1016                   partition_stack[next_stack_entry].right = pc_left - 1;
1017 #ifdef QSORT_ORDER_GUESS
1018                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1019 #endif
1020                   part_left = pc_right + 1;
1021                }
1022                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1023                ++next_stack_entry;
1024             } else {
1025                /* The elements on the left are the only remaining elements
1026                   that need sorting, arrange for them to be processed as the
1027                   next partition.
1028                */
1029                part_right = pc_left - 1;
1030             }
1031          } else if (pc_right < part_right) {
1032             /* There is only one chunk on the right to be sorted, make it
1033                the new partition and loop back around.
1034             */
1035             part_left = pc_right + 1;
1036          } else {
1037             /* This whole partition wound up in the pivot chunk, so
1038                we need to get a new partition off the stack.
1039             */
1040             if (next_stack_entry == 0) {
1041                /* the stack is empty - we are done */
1042                break;
1043             }
1044             --next_stack_entry;
1045             part_left = partition_stack[next_stack_entry].left;
1046             part_right = partition_stack[next_stack_entry].right;
1047 #ifdef QSORT_ORDER_GUESS
1048             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1049 #endif
1050          }
1051       } else {
1052          /* This partition is too small to fool with qsort complexity, just
1053             do an ordinary insertion sort to minimize overhead.
1054          */
1055          int i;
1056          /* Assume 1st element is in right place already, and start checking
1057             at 2nd element to see where it should be inserted.
1058          */
1059          for (i = part_left + 1; i <= part_right; ++i) {
1060             int j;
1061             /* Scan (backwards - just in case 'i' is already in right place)
1062                through the elements already sorted to see if the ith element
1063                belongs ahead of one of them.
1064             */
1065             for (j = i - 1; j >= part_left; --j) {
1066                if (qsort_cmp(i, j) >= 0) {
1067                   /* i belongs right after j
1068                   */
1069                   break;
1070                }
1071             }
1072             ++j;
1073             if (j != i) {
1074                /* Looks like we really need to move some things
1075                */
1076                int k;
1077                temp = array[i];
1078                for (k = i - 1; k >= j; --k)
1079                   array[k + 1] = array[k];
1080                array[j] = temp;
1081             }
1082          }
1083
1084          /* That partition is now sorted, grab the next one, or get out
1085             of the loop if there aren't any more.
1086          */
1087
1088          if (next_stack_entry == 0) {
1089             /* the stack is empty - we are done */
1090             break;
1091          }
1092          --next_stack_entry;
1093          part_left = partition_stack[next_stack_entry].left;
1094          part_right = partition_stack[next_stack_entry].right;
1095 #ifdef QSORT_ORDER_GUESS
1096          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1097 #endif
1098       }
1099    }
1100
1101    /* Believe it or not, the array is sorted at this point! */
1102 }
1103
1104 #ifndef SMALLSORT
1105 #define SMALLSORT (200)
1106 #endif
1107
1108 /* Stabilize what is, presumably, an otherwise unstable sort method.
1109  * We do that by allocating (or having on hand) an array of pointers
1110  * that is the same size as the original array of elements to be sorted.
1111  * We initialize this parallel array with the addresses of the original
1112  * array elements.  This indirection can make you crazy.
1113  * Some pictures can help.  After initializing, we have
1114  *
1115  *  indir                  list1
1116  * +----+                 +----+
1117  * |    | --------------> |    | ------> first element to be sorted
1118  * +----+                 +----+
1119  * |    | --------------> |    | ------> second element to be sorted
1120  * +----+                 +----+
1121  * |    | --------------> |    | ------> third element to be sorted
1122  * +----+                 +----+
1123  *  ...
1124  * +----+                 +----+
1125  * |    | --------------> |    | ------> n-1st element to be sorted
1126  * +----+                 +----+
1127  * |    | --------------> |    | ------> n-th element to be sorted
1128  * +----+                 +----+
1129  *
1130  * During the sort phase, we leave the elements of list1 where they are,
1131  * and sort the pointers in the indirect array in the same order determined
1132  * by the original comparison routine on the elements pointed to.
1133  * Because we don't move the elements of list1 around through
1134  * this phase, we can break ties on elements that compare equal
1135  * using their address in the list1 array, ensuring stabilty.
1136  * This leaves us with something looking like
1137  *
1138  *  indir                  list1
1139  * +----+                 +----+
1140  * |    | --+       +---> |    | ------> first element to be sorted
1141  * +----+   |       |     +----+
1142  * |    | --|-------|---> |    | ------> second element to be sorted
1143  * +----+   |       |     +----+
1144  * |    | --|-------+ +-> |    | ------> third element to be sorted
1145  * +----+   |         |   +----+
1146  *  ...
1147  * +----+    | |   | |    +----+
1148  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1149  * +----+    |     |      +----+
1150  * |    | ---+     +----> |    | ------> n-th element to be sorted
1151  * +----+                 +----+
1152  *
1153  * where the i-th element of the indirect array points to the element
1154  * that should be i-th in the sorted array.  After the sort phase,
1155  * we have to put the elements of list1 into the places
1156  * dictated by the indirect array.
1157  */
1158
1159 static SVCOMPARE_t RealCmp;
1160
1161 static I32
1162 cmpindir(pTHX_ gptr a, gptr b)
1163 {
1164     I32 sense;
1165     gptr *ap = (gptr *)a;
1166     gptr *bp = (gptr *)b;
1167
1168     if ((sense = RealCmp(aTHX_ *ap, *bp)) == 0)
1169          sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1170     return sense;
1171 }
1172
1173 STATIC void
1174 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
1175 {
1176     SV **hintsvp;
1177
1178     if (SORTHINTS(hintsvp) & HINT_SORT_FAST)
1179          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1180     else {
1181          register gptr **pp, *q;
1182          register size_t n, j, i;
1183          gptr *small[SMALLSORT], **indir, tmp;
1184          SVCOMPARE_t savecmp;
1185          if (nmemb <= 1) return;     /* sorted trivially */
1186
1187          /* Small arrays can use the stack, big ones must be allocated */
1188          if (nmemb <= SMALLSORT) indir = small;
1189          else { New(1799, indir, nmemb, gptr *); }
1190
1191          /* Copy pointers to original array elements into indirect array */
1192          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1193
1194          savecmp = RealCmp;     /* Save current comparison routine, if any */
1195          RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1196
1197          /* sort, with indirection */
1198          S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir);
1199
1200          pp = indir;
1201          q = list1;
1202          for (n = nmemb; n--; ) {
1203               /* Assert A: all elements of q with index > n are already
1204                * in place.  This is vacuosly true at the start, and we
1205                * put element n where it belongs below (if it wasn't
1206                * already where it belonged). Assert B: we only move
1207                * elements that aren't where they belong,
1208                * so, by A, we never tamper with elements above n.
1209                */
1210               j = pp[n] - q;            /* This sets j so that q[j] is
1211                                          * at pp[n].  *pp[j] belongs in
1212                                          * q[j], by construction.
1213                                          */
1214               if (n != j) {             /* all's well if n == j */
1215                    tmp = q[j];          /* save what's in q[j] */
1216                    do {
1217                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1218                         i = pp[j] - q;  /* the index in q of the element
1219                                          * just moved */
1220                         pp[j] = q + j;  /* this is ok now */
1221                    } while ((j = i) != n);
1222                    /* There are only finitely many (nmemb) addresses
1223                     * in the pp array.
1224                     * So we must eventually revisit an index we saw before.
1225                     * Suppose the first revisited index is k != n.
1226                     * An index is visited because something else belongs there.
1227                     * If we visit k twice, then two different elements must
1228                     * belong in the same place, which cannot be.
1229                     * So j must get back to n, the loop terminates,
1230                     * and we put the saved element where it belongs.
1231                     */
1232                    q[n] = tmp;          /* put what belongs into
1233                                          * the n-th element */
1234               }
1235          }
1236
1237         /* free iff allocated */
1238          if (indir != small) { Safefree(indir); }
1239          /* restore prevailing comparison routine */
1240          RealCmp = savecmp;
1241     }
1242 }
1243
1244 /*
1245 =for apidoc sortsv
1246
1247 Sort an array. Here is an example:
1248
1249     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1250
1251 =cut
1252 */
1253
1254 void
1255 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1256 {
1257     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) =
1258         S_mergesortsv;
1259     SV **hintsvp;
1260     I32 hints;
1261
1262     if ((hints = SORTHINTS(hintsvp))) {
1263          if (hints & HINT_SORT_QUICKSORT)
1264               sortsvp = S_qsortsv;
1265          else {
1266               if (hints & HINT_SORT_MERGESORT)
1267                    sortsvp = S_mergesortsv;
1268               else
1269                    sortsvp = S_mergesortsv;
1270          }
1271     }
1272
1273     sortsvp(aTHX_ array, nmemb, cmp);
1274 }
1275
1276 PP(pp_sort)
1277 {
1278     dSP; dMARK; dORIGMARK;
1279     register SV **up;
1280     SV **myorigmark = ORIGMARK;
1281     register I32 max;
1282     HV *stash;
1283     GV *gv;
1284     CV *cv = 0;
1285     I32 gimme = GIMME;
1286     OP* nextop = PL_op->op_next;
1287     I32 overloading = 0;
1288     bool hasargs = FALSE;
1289     I32 is_xsub = 0;
1290
1291     if (gimme != G_ARRAY) {
1292         SP = MARK;
1293         RETPUSHUNDEF;
1294     }
1295
1296     ENTER;
1297     SAVEVPTR(PL_sortcop);
1298     if (PL_op->op_flags & OPf_STACKED) {
1299         if (PL_op->op_flags & OPf_SPECIAL) {
1300             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1301             kid = kUNOP->op_first;                      /* pass rv2gv */
1302             kid = kUNOP->op_first;                      /* pass leave */
1303             PL_sortcop = kid->op_next;
1304             stash = CopSTASH(PL_curcop);
1305         }
1306         else {
1307             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1308             if (cv && SvPOK(cv)) {
1309                 STRLEN n_a;
1310                 char *proto = SvPV((SV*)cv, n_a);
1311                 if (proto && strEQ(proto, "$$")) {
1312                     hasargs = TRUE;
1313                 }
1314             }
1315             if (!(cv && CvROOT(cv))) {
1316                 if (cv && CvXSUB(cv)) {
1317                     is_xsub = 1;
1318                 }
1319                 else if (gv) {
1320                     SV *tmpstr = sv_newmortal();
1321                     gv_efullname3(tmpstr, gv, Nullch);
1322                     DIE(aTHX_ "Undefined sort subroutine \"%s\" called",
1323                         SvPVX(tmpstr));
1324                 }
1325                 else {
1326                     DIE(aTHX_ "Undefined subroutine in sort");
1327                 }
1328             }
1329
1330             if (is_xsub)
1331                 PL_sortcop = (OP*)cv;
1332             else {
1333                 PL_sortcop = CvSTART(cv);
1334                 SAVEVPTR(CvROOT(cv)->op_ppaddr);
1335                 CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL];
1336
1337                 SAVEVPTR(PL_curpad);
1338                 PL_curpad = AvARRAY((AV*)AvARRAY(CvPADLIST(cv))[1]);
1339             }
1340         }
1341     }
1342     else {
1343         PL_sortcop = Nullop;
1344         stash = CopSTASH(PL_curcop);
1345     }
1346
1347     up = myorigmark + 1;
1348     while (MARK < SP) { /* This may or may not shift down one here. */
1349         /*SUPPRESS 560*/
1350         if ((*up = *++MARK)) {                  /* Weed out nulls. */
1351             SvTEMP_off(*up);
1352             if (!PL_sortcop && !SvPOK(*up)) {
1353                 STRLEN n_a;
1354                 if (SvAMAGIC(*up))
1355                     overloading = 1;
1356                 else
1357                     (void)sv_2pv(*up, &n_a);
1358             }
1359             up++;
1360         }
1361     }
1362     max = --up - myorigmark;
1363     if (PL_sortcop) {
1364         if (max > 1) {
1365             PERL_CONTEXT *cx;
1366             SV** newsp;
1367             bool oldcatch = CATCH_GET;
1368
1369             SAVETMPS;
1370             SAVEOP();
1371
1372             CATCH_SET(TRUE);
1373             PUSHSTACKi(PERLSI_SORT);
1374             if (!hasargs && !is_xsub) {
1375                 if (PL_sortstash != stash || !PL_firstgv || !PL_secondgv) {
1376                     SAVESPTR(PL_firstgv);
1377                     SAVESPTR(PL_secondgv);
1378                     PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1379                     PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1380                     PL_sortstash = stash;
1381                 }
1382 #ifdef USE_5005THREADS
1383                 sv_lock((SV *)PL_firstgv);
1384                 sv_lock((SV *)PL_secondgv);
1385 #endif
1386                 SAVESPTR(GvSV(PL_firstgv));
1387                 SAVESPTR(GvSV(PL_secondgv));
1388             }
1389
1390             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1391             if (!(PL_op->op_flags & OPf_SPECIAL)) {
1392                 cx->cx_type = CXt_SUB;
1393                 cx->blk_gimme = G_SCALAR;
1394                 PUSHSUB(cx);
1395                 if (!CvDEPTH(cv))
1396                     (void)SvREFCNT_inc(cv); /* in preparation for POPSUB */
1397             }
1398             PL_sortcxix = cxstack_ix;
1399
1400             if (hasargs && !is_xsub) {
1401                 /* This is mostly copied from pp_entersub */
1402                 AV *av = (AV*)PL_curpad[0];
1403
1404 #ifndef USE_5005THREADS
1405                 cx->blk_sub.savearray = GvAV(PL_defgv);
1406                 GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1407 #endif /* USE_5005THREADS */
1408                 cx->blk_sub.oldcurpad = PL_curpad;
1409                 cx->blk_sub.argarray = av;
1410             }
1411            sortsv((myorigmark+1), max,
1412                   is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
1413
1414             POPBLOCK(cx,PL_curpm);
1415             PL_stack_sp = newsp;
1416             POPSTACK;
1417             CATCH_SET(oldcatch);
1418         }
1419     }
1420     else {
1421         if (max > 1) {
1422             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1423             sortsv(ORIGMARK+1, max,
1424                   (PL_op->op_private & OPpSORT_NUMERIC)
1425                         ? ( (PL_op->op_private & OPpSORT_INTEGER)
1426                             ? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
1427                             : ( overloading ? amagic_ncmp : sv_ncmp))
1428                         : ( IN_LOCALE_RUNTIME
1429                             ? ( overloading
1430                                 ? amagic_cmp_locale
1431                                 : sv_cmp_locale_static)
1432                             : ( overloading ? amagic_cmp : sv_cmp_static)));
1433             if (PL_op->op_private & OPpSORT_REVERSE) {
1434                 SV **p = ORIGMARK+1;
1435                 SV **q = ORIGMARK+max;
1436                 while (p < q) {
1437                     SV *tmp = *p;
1438                     *p++ = *q;
1439                     *q-- = tmp;
1440                 }
1441             }
1442         }
1443     }
1444     LEAVE;
1445     PL_stack_sp = ORIGMARK + max;
1446     return nextop;
1447 }
1448
1449 static I32
1450 sortcv(pTHX_ SV *a, SV *b)
1451 {
1452     I32 oldsaveix = PL_savestack_ix;
1453     I32 oldscopeix = PL_scopestack_ix;
1454     I32 result;
1455     GvSV(PL_firstgv) = a;
1456     GvSV(PL_secondgv) = b;
1457     PL_stack_sp = PL_stack_base;
1458     PL_op = PL_sortcop;
1459     CALLRUNOPS(aTHX);
1460     if (PL_stack_sp != PL_stack_base + 1)
1461         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1462     if (!SvNIOKp(*PL_stack_sp))
1463         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1464     result = SvIV(*PL_stack_sp);
1465     while (PL_scopestack_ix > oldscopeix) {
1466         LEAVE;
1467     }
1468     leave_scope(oldsaveix);
1469     return result;
1470 }
1471
1472 static I32
1473 sortcv_stacked(pTHX_ SV *a, SV *b)
1474 {
1475     I32 oldsaveix = PL_savestack_ix;
1476     I32 oldscopeix = PL_scopestack_ix;
1477     I32 result;
1478     AV *av;
1479
1480 #ifdef USE_5005THREADS
1481     av = (AV*)PL_curpad[0];
1482 #else
1483     av = GvAV(PL_defgv);
1484 #endif
1485
1486     if (AvMAX(av) < 1) {
1487         SV** ary = AvALLOC(av);
1488         if (AvARRAY(av) != ary) {
1489             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1490             SvPVX(av) = (char*)ary;
1491         }
1492         if (AvMAX(av) < 1) {
1493             AvMAX(av) = 1;
1494             Renew(ary,2,SV*);
1495             SvPVX(av) = (char*)ary;
1496         }
1497     }
1498     AvFILLp(av) = 1;
1499
1500     AvARRAY(av)[0] = a;
1501     AvARRAY(av)[1] = b;
1502     PL_stack_sp = PL_stack_base;
1503     PL_op = PL_sortcop;
1504     CALLRUNOPS(aTHX);
1505     if (PL_stack_sp != PL_stack_base + 1)
1506         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1507     if (!SvNIOKp(*PL_stack_sp))
1508         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1509     result = SvIV(*PL_stack_sp);
1510     while (PL_scopestack_ix > oldscopeix) {
1511         LEAVE;
1512     }
1513     leave_scope(oldsaveix);
1514     return result;
1515 }
1516
1517 static I32
1518 sortcv_xsub(pTHX_ SV *a, SV *b)
1519 {
1520     dSP;
1521     I32 oldsaveix = PL_savestack_ix;
1522     I32 oldscopeix = PL_scopestack_ix;
1523     I32 result;
1524     CV *cv=(CV*)PL_sortcop;
1525
1526     SP = PL_stack_base;
1527     PUSHMARK(SP);
1528     EXTEND(SP, 2);
1529     *++SP = a;
1530     *++SP = b;
1531     PUTBACK;
1532     (void)(*CvXSUB(cv))(aTHX_ cv);
1533     if (PL_stack_sp != PL_stack_base + 1)
1534         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1535     if (!SvNIOKp(*PL_stack_sp))
1536         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1537     result = SvIV(*PL_stack_sp);
1538     while (PL_scopestack_ix > oldscopeix) {
1539         LEAVE;
1540     }
1541     leave_scope(oldsaveix);
1542     return result;
1543 }
1544
1545
1546 static I32
1547 sv_ncmp(pTHX_ SV *a, SV *b)
1548 {
1549     NV nv1 = SvNV(a);
1550     NV nv2 = SvNV(b);
1551     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1552 }
1553
1554 static I32
1555 sv_i_ncmp(pTHX_ SV *a, SV *b)
1556 {
1557     IV iv1 = SvIV(a);
1558     IV iv2 = SvIV(b);
1559     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1560 }
1561 #define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \
1562           *svp = Nullsv;                                \
1563           if (PL_amagic_generation) { \
1564             if (SvAMAGIC(left)||SvAMAGIC(right))\
1565                 *svp = amagic_call(left, \
1566                                    right, \
1567                                    CAT2(meth,_amg), \
1568                                    0); \
1569           } \
1570         } STMT_END
1571
1572 static I32
1573 amagic_ncmp(pTHX_ register SV *a, register SV *b)
1574 {
1575     SV *tmpsv;
1576     tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1577     if (tmpsv) {
1578         NV d;
1579
1580         if (SvIOK(tmpsv)) {
1581             I32 i = SvIVX(tmpsv);
1582             if (i > 0)
1583                return 1;
1584             return i? -1 : 0;
1585         }
1586         d = SvNV(tmpsv);
1587         if (d > 0)
1588            return 1;
1589         return d? -1 : 0;
1590      }
1591      return sv_ncmp(aTHX_ a, b);
1592 }
1593
1594 static I32
1595 amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1596 {
1597     SV *tmpsv;
1598     tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1599     if (tmpsv) {
1600         NV d;
1601
1602         if (SvIOK(tmpsv)) {
1603             I32 i = SvIVX(tmpsv);
1604             if (i > 0)
1605                return 1;
1606             return i? -1 : 0;
1607         }
1608         d = SvNV(tmpsv);
1609         if (d > 0)
1610            return 1;
1611         return d? -1 : 0;
1612     }
1613     return sv_i_ncmp(aTHX_ a, b);
1614 }
1615
1616 static I32
1617 amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1618 {
1619     SV *tmpsv;
1620     tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1621     if (tmpsv) {
1622         NV d;
1623
1624         if (SvIOK(tmpsv)) {
1625             I32 i = SvIVX(tmpsv);
1626             if (i > 0)
1627                return 1;
1628             return i? -1 : 0;
1629         }
1630         d = SvNV(tmpsv);
1631         if (d > 0)
1632            return 1;
1633         return d? -1 : 0;
1634     }
1635     return sv_cmp(str1, str2);
1636 }
1637
1638 static I32
1639 amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1640 {
1641     SV *tmpsv;
1642     tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1643     if (tmpsv) {
1644         NV d;
1645
1646         if (SvIOK(tmpsv)) {
1647             I32 i = SvIVX(tmpsv);
1648             if (i > 0)
1649                return 1;
1650             return i? -1 : 0;
1651         }
1652         d = SvNV(tmpsv);
1653         if (d > 0)
1654            return 1;
1655         return d? -1 : 0;
1656     }
1657     return sv_cmp_locale(str1, str2);
1658 }
1659
1660