pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (c) 1991-2001, Larry Wall
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  */
   9
  10 /*
  11  *   ...they shuffled back towards the rear of the line. 'No, not at the
  12  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  13  */
  14
  15 #include "EXTERN.h"
  16 #define PERL_IN_PP_SORT_C
  17 #include "perl.h"
  18
  19 static I32 sortcv(pTHX_ SV *a, SV *b);
  20 static I32 sortcv_stacked(pTHX_ SV *a, SV *b);
  21 static I32 sortcv_xsub(pTHX_ SV *a, SV *b);
  22 static I32 sv_ncmp(pTHX_ SV *a, SV *b);
  23 static I32 sv_i_ncmp(pTHX_ SV *a, SV *b);
  24 static I32 amagic_ncmp(pTHX_ SV *a, SV *b);
  25 static I32 amagic_i_ncmp(pTHX_ SV *a, SV *b);
  26 static I32 amagic_cmp(pTHX_ SV *a, SV *b);
  27 static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b);
  28
  29 #define sv_cmp_static Perl_sv_cmp
  30 #define sv_cmp_locale_static Perl_sv_cmp_locale
  31
  32 #define SORTHINTS(hintsvp) \
  33      ((PL_hintgv &&     \
  34       (hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \
  35           (I32)SvIV(*hintsvp) : 0)
  36
  37 /*
  38  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  39  *
  40  * The original code was written in conjunction with BSD Computer Software
  41  * Research Group at University of California, Berkeley.
  42  *
  43  * See also: "Optimistic Merge Sort" (SODA '92)
  44  *
  45  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  46  *
  47  * The code can be distributed under the same terms as Perl itself.
  48  *
  49  */
  50
  51 #ifdef  TESTHARNESS
  52 #include <sys/types.h>
  53 typedef void SV;
  54 #define pTHX_
  55 #define STATIC
  56 #define New(ID,VAR,N,TYPE) VAR=(TYPE *)malloc((N)*sizeof(TYPE))
  57 #define Safefree(VAR) free(VAR)
  58 typedef int  (*SVCOMPARE_t) (pTHX_ SV*, SV*);
  59 #endif  /* TESTHARNESS */
  60
  61 typedef char * aptr;            /* pointer for arithmetic on sizes */
  62 typedef SV * gptr;              /* pointers in our lists */
  63
  64 /* Binary merge internal sort, with a few special mods
  65 ** for the special perl environment it now finds itself in.
  66 **
  67 ** Things that were once options have been hotwired
  68 ** to values suitable for this use.  In particular, we'll always
  69 ** initialize looking for natural runs, we'll always produce stable
  70 ** output, and we'll always do Peter McIlroy's binary merge.
  71 */
  72
  73 /* Pointer types for arithmetic and storage and convenience casts */
  74
  75 #define APTR(P) ((aptr)(P))
  76 #define GPTP(P) ((gptr *)(P))
  77 #define GPPP(P) ((gptr **)(P))
  78
  79
  80 /* byte offset from pointer P to (larger) pointer Q */
  81 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  82
  83 #define PSIZE sizeof(gptr)
  84
  85 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  86
  87 #ifdef  PSHIFT
  88 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  89 #define PNBYTE(N)       ((N) << (PSHIFT))
  90 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  91 #else
  92 /* Leave optimization to compiler */
  93 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  94 #define PNBYTE(N)       ((N) * (PSIZE))
  95 #define PINDEX(P, N)    (GPTP(P) + (N))
  96 #endif
  97
  98 /* Pointer into other corresponding to pointer into this */
  99 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 100
 101 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 102
 103
 104 /* Runs are identified by a pointer in the auxilliary list.
 105 ** The pointer is at the start of the list,
 106 ** and it points to the start of the next list.
 107 ** NEXT is used as an lvalue, too.
 108 */
 109
 110 #define NEXT(P)         (*GPPP(P))
 111
 112
 113 /* PTHRESH is the minimum number of pairs with the same sense to justify
 114 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 115 ** not just elements, so PTHRESH == 8 means a run of 16.
 116 */
 117
 118 #define PTHRESH (8)
 119
 120 /* RTHRESH is the number of elements in a run that must compare low
 121 ** to the low element from the opposing run before we justify
 122 ** doing a binary rampup instead of single stepping.
 123 ** In random input, N in a row low should only happen with
 124 ** probability 2^(1-N), so we can risk that we are dealing
 125 ** with orderly input without paying much when we aren't.
 126 */
 127
 128 #define RTHRESH (6)
 129
 130
 131 /*
 132 ** Overview of algorithm and variables.
 133 ** The array of elements at list1 will be organized into runs of length 2,
 134 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 135 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 136 **
 137 ** Unless otherwise specified, pair pointers address the first of two elements.
 138 **
 139 ** b and b+1 are a pair that compare with sense ``sense''.
 140 ** b is the ``bottom'' of adjacent pairs that might form a longer run.
 141 **
 142 ** p2 parallels b in the list2 array, where runs are defined by
 143 ** a pointer chain.
 144 **
 145 ** t represents the ``top'' of the adjacent pairs that might extend
 146 ** the run beginning at b.  Usually, t addresses a pair
 147 ** that compares with opposite sense from (b,b+1).
 148 ** However, it may also address a singleton element at the end of list1,
 149 ** or it may be equal to ``last'', the first element beyond list1.
 150 **
 151 ** r addresses the Nth pair following b.  If this would be beyond t,
 152 ** we back it off to t.  Only when r is less than t do we consider the
 153 ** run long enough to consider checking.
 154 **
 155 ** q addresses a pair such that the pairs at b through q already form a run.
 156 ** Often, q will equal b, indicating we only are sure of the pair itself.
 157 ** However, a search on the previous cycle may have revealed a longer run,
 158 ** so q may be greater than b.
 159 **
 160 ** p is used to work back from a candidate r, trying to reach q,
 161 ** which would mean b through r would be a run.  If we discover such a run,
 162 ** we start q at r and try to push it further towards t.
 163 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 164 ** In any event, after the check (if any), we have two main cases.
 165 **
 166 ** 1) Short run.  b <= q < p <= r <= t.
 167 **      b through q is a run (perhaps trivial)
 168 **      q through p are uninteresting pairs
 169 **      p through r is a run
 170 **
 171 ** 2) Long run.  b < r <= q < t.
 172 **      b through q is a run (of length >= 2 * PTHRESH)
 173 **
 174 ** Note that degenerate cases are not only possible, but likely.
 175 ** For example, if the pair following b compares with opposite sense,
 176 ** then b == q < p == r == t.
 177 */
 178
 179
 180 static void
 181 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
 182 {
 183     int sense;
 184     register gptr *b, *p, *q, *t, *p2;
 185     register gptr c, *last, *r;
 186     gptr *savep;
 187
 188     b = list1;
 189     last = PINDEX(b, nmemb);
 190     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 191     for (p2 = list2; b < last; ) {
 192         /* We just started, or just reversed sense.
 193         ** Set t at end of pairs with the prevailing sense.
 194         */
 195         for (p = b+2, t = p; ++p < last; t = ++p) {
 196             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 197         }
 198         q = b;
 199         /* Having laid out the playing field, look for long runs */
 200         do {
 201             p = r = b + (2 * PTHRESH);
 202             if (r >= t) p = r = t;      /* too short to care about */
 203             else {
 204                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 205                        ((p -= 2) > q));
 206                 if (p <= q) {
 207                     /* b through r is a (long) run.
 208                     ** Extend it as far as possible.
 209                     */
 210                     p = q = r;
 211                     while (((p += 2) < t) &&
 212                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 213                     r = p = q + 2;      /* no simple pairs, no after-run */
 214                 }
 215             }
 216             if (q > b) {                /* run of greater than 2 at b */
 217                 savep = p;
 218                 p = q += 2;
 219                 /* pick up singleton, if possible */
 220                 if ((p == t) &&
 221                     ((t + 1) == last) &&
 222                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 223                     savep = r = p = q = last;
 224                 p2 = NEXT(p2) = p2 + (p - b);
 225                 if (sense) while (b < --p) {
 226                     c = *b;
 227                     *b++ = *p;
 228                     *p = c;
 229                 }
 230                 p = savep;
 231             }
 232             while (q < p) {             /* simple pairs */
 233                 p2 = NEXT(p2) = p2 + 2;
 234                 if (sense) {
 235                     c = *q++;
 236                     *(q-1) = *q;
 237                     *q++ = c;
 238                 } else q += 2;
 239             }
 240             if (((b = p) == t) && ((t+1) == last)) {
 241                 NEXT(p2) = p2 + 1;
 242                 b++;
 243             }
 244             q = r;
 245         } while (b < t);
 246         sense = !sense;
 247     }
 248     return;
 249 }
 250
 251
 252 /* Overview of bmerge variables:
 253 **
 254 ** list1 and list2 address the main and auxiliary arrays.
 255 ** They swap identities after each merge pass.
 256 ** Base points to the original list1, so we can tell if
 257 ** the pointers ended up where they belonged (or must be copied).
 258 **
 259 ** When we are merging two lists, f1 and f2 are the next elements
 260 ** on the respective lists.  l1 and l2 mark the end of the lists.
 261 ** tp2 is the current location in the merged list.
 262 **
 263 ** p1 records where f1 started.
 264 ** After the merge, a new descriptor is built there.
 265 **
 266 ** p2 is a ``parallel'' pointer in (what starts as) descriptor space.
 267 ** It is used to identify and delimit the runs.
 268 **
 269 ** In the heat of determining where q, the greater of the f1/f2 elements,
 270 ** belongs in the other list, b, t and p, represent bottom, top and probe
 271 ** locations, respectively, in the other list.
 272 ** They make convenient temporary pointers in other places.
 273 */
 274
 275 STATIC void
 276 S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
 277 {
 278     int i, run;
 279     int sense;
 280     register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q;
 281     gptr *aux, *list2, *p2, *last;
 282     gptr *base = list1;
 283     gptr *p1;
 284
 285     if (nmemb <= 1) return;     /* sorted trivially */
 286     New(799,list2,nmemb,gptr);  /* allocate auxilliary array */
 287     aux = list2;
 288     dynprep(aTHX_ list1, list2, nmemb, cmp);
 289     last = PINDEX(list2, nmemb);
 290     while (NEXT(list2) != last) {
 291         /* More than one run remains.  Do some merging to reduce runs. */
 292         l2 = p1 = list1;
 293         for (tp2 = p2 = list2; p2 != last;) {
 294             /* The new first run begins where the old second list ended.
 295             ** Use the p2 ``parallel'' pointer to identify the end of the run.
 296             */
 297             f1 = l2;
 298             t = NEXT(p2);
 299             f2 = l1 = POTHER(t, list2, list1);
 300             if (t != last) t = NEXT(t);
 301             l2 = POTHER(t, list2, list1);
 302             p2 = t;
 303             while (f1 < l1 && f2 < l2) {
 304                 /* If head 1 is larger than head 2, find ALL the elements
 305                 ** in list 2 strictly less than head1, write them all,
 306                 ** then head 1.  Then compare the new heads, and repeat,
 307                 ** until one or both lists are exhausted.
 308                 **
 309                 ** In all comparisons (after establishing
 310                 ** which head to merge) the item to merge
 311                 ** (at pointer q) is the first operand of
 312                 ** the comparison.  When we want to know
 313                 ** if ``q is strictly less than the other'',
 314                 ** we can't just do
 315                 **    cmp(q, other) < 0
 316                 ** because stability demands that we treat equality
 317                 ** as high when q comes from l2, and as low when
 318                 ** q was from l1.  So we ask the question by doing
 319                 **    cmp(q, other) <= sense
 320                 ** and make sense == 0 when equality should look low,
 321                 ** and -1 when equality should look high.
 322                 */
 323
 324
 325                 if (cmp(aTHX_ *f1, *f2) <= 0) {
 326                     q = f2; b = f1; t = l1;
 327                     sense = -1;
 328                 } else {
 329                     q = f1; b = f2; t = l2;
 330                     sense = 0;
 331                 }
 332
 333
 334                 /* ramp up
 335                 **
 336                 ** Leave t at something strictly
 337                 ** greater than q (or at the end of the list),
 338                 ** and b at something strictly less than q.
 339                 */
 340                 for (i = 1, run = 0 ;;) {
 341                     if ((p = PINDEX(b, i)) >= t) {
 342                         /* off the end */
 343                         if (((p = PINDEX(t, -1)) > b) &&
 344                             (cmp(aTHX_ *q, *p) <= sense))
 345                              t = p;
 346                         else b = p;
 347                         break;
 348                     } else if (cmp(aTHX_ *q, *p) <= sense) {
 349                         t = p;
 350                         break;
 351                     } else b = p;
 352                     if (++run >= RTHRESH) i += i;
 353                 }
 354
 355
 356                 /* q is known to follow b and must be inserted before t.
 357                 ** Increment b, so the range of possibilities is [b,t).
 358                 ** Round binary split down, to favor early appearance.
 359                 ** Adjust b and t until q belongs just before t.
 360                 */
 361
 362                 b++;
 363                 while (b < t) {
 364                     p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 365                     if (cmp(aTHX_ *q, *p) <= sense) {
 366                         t = p;
 367                     } else b = p + 1;
 368                 }
 369
 370
 371                 /* Copy all the strictly low elements */
 372
 373                 if (q == f1) {
 374                     FROMTOUPTO(f2, tp2, t);
 375                     *tp2++ = *f1++;
 376                 } else {
 377                     FROMTOUPTO(f1, tp2, t);
 378                     *tp2++ = *f2++;
 379                 }
 380             }
 381
 382
 383             /* Run out remaining list */
 384             if (f1 == l1) {
 385                    if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 386             } else              FROMTOUPTO(f1, tp2, l1);
 387             p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 388         }
 389         t = list1;
 390         list1 = list2;
 391         list2 = t;
 392         last = PINDEX(list2, nmemb);
 393     }
 394     if (base == list2) {
 395         last = PINDEX(list1, nmemb);
 396         FROMTOUPTO(list1, list2, last);
 397     }
 398     Safefree(aux);
 399     return;
 400 }
 401
 402 /*
 403  * The quicksort implementation was derived from source code contributed
 404  * by Tom Horsley.
 405  *
 406  * NOTE: this code was derived from Tom Horsley's qsort replacement
 407  * and should not be confused with the original code.
 408  */
 409
 410 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 411
 412    Permission granted to distribute under the same terms as perl which are
 413    (briefly):
 414
 415     This program is free software; you can redistribute it and/or modify
 416     it under the terms of either:
 417
 418         a) the GNU General Public License as published by the Free
 419         Software Foundation; either version 1, or (at your option) any
 420         later version, or
 421
 422         b) the "Artistic License" which comes with this Kit.
 423
 424    Details on the perl license can be found in the perl source code which
 425    may be located via the www.perl.com web page.
 426
 427    This is the most wonderfulest possible qsort I can come up with (and
 428    still be mostly portable) My (limited) tests indicate it consistently
 429    does about 20% fewer calls to compare than does the qsort in the Visual
 430    C++ library, other vendors may vary.
 431
 432    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 433    others I invented myself (or more likely re-invented since they seemed
 434    pretty obvious once I watched the algorithm operate for a while).
 435
 436    Most of this code was written while watching the Marlins sweep the Giants
 437    in the 1997 National League Playoffs - no Braves fans allowed to use this
 438    code (just kidding :-).
 439
 440    I realize that if I wanted to be true to the perl tradition, the only
 441    comment in this file would be something like:
 442
 443    ...they shuffled back towards the rear of the line. 'No, not at the
 444    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 445
 446    However, I really needed to violate that tradition just so I could keep
 447    track of what happens myself, not to mention some poor fool trying to
 448    understand this years from now :-).
 449 */
 450
 451 /* ********************************************************** Configuration */
 452
 453 #ifndef QSORT_ORDER_GUESS
 454 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 455 #endif
 456
 457 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 458    future processing - a good max upper bound is log base 2 of memory size
 459    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 460    safely be smaller than that since the program is taking up some space and
 461    most operating systems only let you grab some subset of contiguous
 462    memory (not to mention that you are normally sorting data larger than
 463    1 byte element size :-).
 464 */
 465 #ifndef QSORT_MAX_STACK
 466 #define QSORT_MAX_STACK 32
 467 #endif
 468
 469 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 470    Anything bigger and we use qsort. If you make this too small, the qsort
 471    will probably break (or become less efficient), because it doesn't expect
 472    the middle element of a partition to be the same as the right or left -
 473    you have been warned).
 474 */
 475 #ifndef QSORT_BREAK_EVEN
 476 #define QSORT_BREAK_EVEN 6
 477 #endif
 478
 479 /* ************************************************************* Data Types */
 480
 481 /* hold left and right index values of a partition waiting to be sorted (the
 482    partition includes both left and right - right is NOT one past the end or
 483    anything like that).
 484 */
 485 struct partition_stack_entry {
 486    int left;
 487    int right;
 488 #ifdef QSORT_ORDER_GUESS
 489    int qsort_break_even;
 490 #endif
 491 };
 492
 493 /* ******************************************************* Shorthand Macros */
 494
 495 /* Note that these macros will be used from inside the qsort function where
 496    we happen to know that the variable 'elt_size' contains the size of an
 497    array element and the variable 'temp' points to enough space to hold a
 498    temp element and the variable 'array' points to the array being sorted
 499    and 'compare' is the pointer to the compare routine.
 500
 501    Also note that there are very many highly architecture specific ways
 502    these might be sped up, but this is simply the most generally portable
 503    code I could think of.
 504 */
 505
 506 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 507 */
 508 #define qsort_cmp(elt1, elt2) \
 509    ((*compare)(aTHX_ array[elt1], array[elt2]))
 510
 511 #ifdef QSORT_ORDER_GUESS
 512 #define QSORT_NOTICE_SWAP swapped++;
 513 #else
 514 #define QSORT_NOTICE_SWAP
 515 #endif
 516
 517 /* swaps contents of array elements elt1, elt2.
 518 */
 519 #define qsort_swap(elt1, elt2) \
 520    STMT_START { \
 521       QSORT_NOTICE_SWAP \
 522       temp = array[elt1]; \
 523       array[elt1] = array[elt2]; \
 524       array[elt2] = temp; \
 525    } STMT_END
 526
 527 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 528    elt3 and elt3 gets elt1.
 529 */
 530 #define qsort_rotate(elt1, elt2, elt3) \
 531    STMT_START { \
 532       QSORT_NOTICE_SWAP \
 533       temp = array[elt1]; \
 534       array[elt1] = array[elt2]; \
 535       array[elt2] = array[elt3]; \
 536       array[elt3] = temp; \
 537    } STMT_END
 538
 539 /* ************************************************************ Debug stuff */
 540
 541 #ifdef QSORT_DEBUG
 542
 543 static void
 544 break_here()
 545 {
 546    return; /* good place to set a breakpoint */
 547 }
 548
 549 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 550
 551 static void
 552 doqsort_all_asserts(
 553    void * array,
 554    size_t num_elts,
 555    size_t elt_size,
 556    int (*compare)(const void * elt1, const void * elt2),
 557    int pc_left, int pc_right, int u_left, int u_right)
 558 {
 559    int i;
 560
 561    qsort_assert(pc_left <= pc_right);
 562    qsort_assert(u_right < pc_left);
 563    qsort_assert(pc_right < u_left);
 564    for (i = u_right + 1; i < pc_left; ++i) {
 565       qsort_assert(qsort_cmp(i, pc_left) < 0);
 566    }
 567    for (i = pc_left; i < pc_right; ++i) {
 568       qsort_assert(qsort_cmp(i, pc_right) == 0);
 569    }
 570    for (i = pc_right + 1; i < u_left; ++i) {
 571       qsort_assert(qsort_cmp(pc_right, i) < 0);
 572    }
 573 }
 574
 575 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 576    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 577                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 578
 579 #else
 580
 581 #define qsort_assert(t) ((void)0)
 582
 583 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 584
 585 #endif
 586
 587 /* ****************************************************************** qsort */
 588
 589 STATIC void /* the standard unstable (u) quicksort (qsort) */
 590 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 591 {
 592    register SV * temp;
 593
 594    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 595    int next_stack_entry = 0;
 596
 597    int part_left;
 598    int part_right;
 599 #ifdef QSORT_ORDER_GUESS
 600    int qsort_break_even;
 601    int swapped;
 602 #endif
 603
 604    /* Make sure we actually have work to do.
 605    */
 606    if (num_elts <= 1) {
 607       return;
 608    }
 609
 610    /* Setup the initial partition definition and fall into the sorting loop
 611    */
 612    part_left = 0;
 613    part_right = (int)(num_elts - 1);
 614 #ifdef QSORT_ORDER_GUESS
 615    qsort_break_even = QSORT_BREAK_EVEN;
 616 #else
 617 #define qsort_break_even QSORT_BREAK_EVEN
 618 #endif
 619    for ( ; ; ) {
 620       if ((part_right - part_left) >= qsort_break_even) {
 621          /* OK, this is gonna get hairy, so lets try to document all the
 622             concepts and abbreviations and variables and what they keep
 623             track of:
 624
 625             pc: pivot chunk - the set of array elements we accumulate in the
 626                 middle of the partition, all equal in value to the original
 627                 pivot element selected. The pc is defined by:
 628
 629                 pc_left - the leftmost array index of the pc
 630                 pc_right - the rightmost array index of the pc
 631
 632                 we start with pc_left == pc_right and only one element
 633                 in the pivot chunk (but it can grow during the scan).
 634
 635             u:  uncompared elements - the set of elements in the partition
 636                 we have not yet compared to the pivot value. There are two
 637                 uncompared sets during the scan - one to the left of the pc
 638                 and one to the right.
 639
 640                 u_right - the rightmost index of the left side's uncompared set
 641                 u_left - the leftmost index of the right side's uncompared set
 642
 643                 The leftmost index of the left sides's uncompared set
 644                 doesn't need its own variable because it is always defined
 645                 by the leftmost edge of the whole partition (part_left). The
 646                 same goes for the rightmost edge of the right partition
 647                 (part_right).
 648
 649                 We know there are no uncompared elements on the left once we
 650                 get u_right < part_left and no uncompared elements on the
 651                 right once u_left > part_right. When both these conditions
 652                 are met, we have completed the scan of the partition.
 653
 654                 Any elements which are between the pivot chunk and the
 655                 uncompared elements should be less than the pivot value on
 656                 the left side and greater than the pivot value on the right
 657                 side (in fact, the goal of the whole algorithm is to arrange
 658                 for that to be true and make the groups of less-than and
 659                 greater-then elements into new partitions to sort again).
 660
 661             As you marvel at the complexity of the code and wonder why it
 662             has to be so confusing. Consider some of the things this level
 663             of confusion brings:
 664
 665             Once I do a compare, I squeeze every ounce of juice out of it. I
 666             never do compare calls I don't have to do, and I certainly never
 667             do redundant calls.
 668
 669             I also never swap any elements unless I can prove there is a
 670             good reason. Many sort algorithms will swap a known value with
 671             an uncompared value just to get things in the right place (or
 672             avoid complexity :-), but that uncompared value, once it gets
 673             compared, may then have to be swapped again. A lot of the
 674             complexity of this code is due to the fact that it never swaps
 675             anything except compared values, and it only swaps them when the
 676             compare shows they are out of position.
 677          */
 678          int pc_left, pc_right;
 679          int u_right, u_left;
 680
 681          int s;
 682
 683          pc_left = ((part_left + part_right) / 2);
 684          pc_right = pc_left;
 685          u_right = pc_left - 1;
 686          u_left = pc_right + 1;
 687
 688          /* Qsort works best when the pivot value is also the median value
 689             in the partition (unfortunately you can't find the median value
 690             without first sorting :-), so to give the algorithm a helping
 691             hand, we pick 3 elements and sort them and use the median value
 692             of that tiny set as the pivot value.
 693
 694             Some versions of qsort like to use the left middle and right as
 695             the 3 elements to sort so they can insure the ends of the
 696             partition will contain values which will stop the scan in the
 697             compare loop, but when you have to call an arbitrarily complex
 698             routine to do a compare, its really better to just keep track of
 699             array index values to know when you hit the edge of the
 700             partition and avoid the extra compare. An even better reason to
 701             avoid using a compare call is the fact that you can drop off the
 702             edge of the array if someone foolishly provides you with an
 703             unstable compare function that doesn't always provide consistent
 704             results.
 705
 706             So, since it is simpler for us to compare the three adjacent
 707             elements in the middle of the partition, those are the ones we
 708             pick here (conveniently pointed at by u_right, pc_left, and
 709             u_left). The values of the left, center, and right elements
 710             are refered to as l c and r in the following comments.
 711          */
 712
 713 #ifdef QSORT_ORDER_GUESS
 714          swapped = 0;
 715 #endif
 716          s = qsort_cmp(u_right, pc_left);
 717          if (s < 0) {
 718             /* l < c */
 719             s = qsort_cmp(pc_left, u_left);
 720             /* if l < c, c < r - already in order - nothing to do */
 721             if (s == 0) {
 722                /* l < c, c == r - already in order, pc grows */
 723                ++pc_right;
 724                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 725             } else if (s > 0) {
 726                /* l < c, c > r - need to know more */
 727                s = qsort_cmp(u_right, u_left);
 728                if (s < 0) {
 729                   /* l < c, c > r, l < r - swap c & r to get ordered */
 730                   qsort_swap(pc_left, u_left);
 731                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 732                } else if (s == 0) {
 733                   /* l < c, c > r, l == r - swap c&r, grow pc */
 734                   qsort_swap(pc_left, u_left);
 735                   --pc_left;
 736                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 737                } else {
 738                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 739                   qsort_rotate(pc_left, u_right, u_left);
 740                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 741                }
 742             }
 743          } else if (s == 0) {
 744             /* l == c */
 745             s = qsort_cmp(pc_left, u_left);
 746             if (s < 0) {
 747                /* l == c, c < r - already in order, grow pc */
 748                --pc_left;
 749                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 750             } else if (s == 0) {
 751                /* l == c, c == r - already in order, grow pc both ways */
 752                --pc_left;
 753                ++pc_right;
 754                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 755             } else {
 756                /* l == c, c > r - swap l & r, grow pc */
 757                qsort_swap(u_right, u_left);
 758                ++pc_right;
 759                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 760             }
 761          } else {
 762             /* l > c */
 763             s = qsort_cmp(pc_left, u_left);
 764             if (s < 0) {
 765                /* l > c, c < r - need to know more */
 766                s = qsort_cmp(u_right, u_left);
 767                if (s < 0) {
 768                   /* l > c, c < r, l < r - swap l & c to get ordered */
 769                   qsort_swap(u_right, pc_left);
 770                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 771                } else if (s == 0) {
 772                   /* l > c, c < r, l == r - swap l & c, grow pc */
 773                   qsort_swap(u_right, pc_left);
 774                   ++pc_right;
 775                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 776                } else {
 777                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 778                   qsort_rotate(u_right, pc_left, u_left);
 779                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 780                }
 781             } else if (s == 0) {
 782                /* l > c, c == r - swap ends, grow pc */
 783                qsort_swap(u_right, u_left);
 784                --pc_left;
 785                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 786             } else {
 787                /* l > c, c > r - swap ends to get in order */
 788                qsort_swap(u_right, u_left);
 789                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 790             }
 791          }
 792          /* We now know the 3 middle elements have been compared and
 793             arranged in the desired order, so we can shrink the uncompared
 794             sets on both sides
 795          */
 796          --u_right;
 797          ++u_left;
 798          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 799
 800          /* The above massive nested if was the simple part :-). We now have
 801             the middle 3 elements ordered and we need to scan through the
 802             uncompared sets on either side, swapping elements that are on
 803             the wrong side or simply shuffling equal elements around to get
 804             all equal elements into the pivot chunk.
 805          */
 806
 807          for ( ; ; ) {
 808             int still_work_on_left;
 809             int still_work_on_right;
 810
 811             /* Scan the uncompared values on the left. If I find a value
 812                equal to the pivot value, move it over so it is adjacent to
 813                the pivot chunk and expand the pivot chunk. If I find a value
 814                less than the pivot value, then just leave it - its already
 815                on the correct side of the partition. If I find a greater
 816                value, then stop the scan.
 817             */
 818             while ((still_work_on_left = (u_right >= part_left))) {
 819                s = qsort_cmp(u_right, pc_left);
 820                if (s < 0) {
 821                   --u_right;
 822                } else if (s == 0) {
 823                   --pc_left;
 824                   if (pc_left != u_right) {
 825                      qsort_swap(u_right, pc_left);
 826                   }
 827                   --u_right;
 828                } else {
 829                   break;
 830                }
 831                qsort_assert(u_right < pc_left);
 832                qsort_assert(pc_left <= pc_right);
 833                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
 834                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
 835             }
 836
 837             /* Do a mirror image scan of uncompared values on the right
 838             */
 839             while ((still_work_on_right = (u_left <= part_right))) {
 840                s = qsort_cmp(pc_right, u_left);
 841                if (s < 0) {
 842                   ++u_left;
 843                } else if (s == 0) {
 844                   ++pc_right;
 845                   if (pc_right != u_left) {
 846                      qsort_swap(pc_right, u_left);
 847                   }
 848                   ++u_left;
 849                } else {
 850                   break;
 851                }
 852                qsort_assert(u_left > pc_right);
 853                qsort_assert(pc_left <= pc_right);
 854                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
 855                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
 856             }
 857
 858             if (still_work_on_left) {
 859                /* I know I have a value on the left side which needs to be
 860                   on the right side, but I need to know more to decide
 861                   exactly the best thing to do with it.
 862                */
 863                if (still_work_on_right) {
 864                   /* I know I have values on both side which are out of
 865                      position. This is a big win because I kill two birds
 866                      with one swap (so to speak). I can advance the
 867                      uncompared pointers on both sides after swapping both
 868                      of them into the right place.
 869                   */
 870                   qsort_swap(u_right, u_left);
 871                   --u_right;
 872                   ++u_left;
 873                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 874                } else {
 875                   /* I have an out of position value on the left, but the
 876                      right is fully scanned, so I "slide" the pivot chunk
 877                      and any less-than values left one to make room for the
 878                      greater value over on the right. If the out of position
 879                      value is immediately adjacent to the pivot chunk (there
 880                      are no less-than values), I can do that with a swap,
 881                      otherwise, I have to rotate one of the less than values
 882                      into the former position of the out of position value
 883                      and the right end of the pivot chunk into the left end
 884                      (got all that?).
 885                   */
 886                   --pc_left;
 887                   if (pc_left == u_right) {
 888                      qsort_swap(u_right, pc_right);
 889                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
 890                   } else {
 891                      qsort_rotate(u_right, pc_left, pc_right);
 892                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
 893                   }
 894                   --pc_right;
 895                   --u_right;
 896                }
 897             } else if (still_work_on_right) {
 898                /* Mirror image of complex case above: I have an out of
 899                   position value on the right, but the left is fully
 900                   scanned, so I need to shuffle things around to make room
 901                   for the right value on the left.
 902                */
 903                ++pc_right;
 904                if (pc_right == u_left) {
 905                   qsort_swap(u_left, pc_left);
 906                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
 907                } else {
 908                   qsort_rotate(pc_right, pc_left, u_left);
 909                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
 910                }
 911                ++pc_left;
 912                ++u_left;
 913             } else {
 914                /* No more scanning required on either side of partition,
 915                   break out of loop and figure out next set of partitions
 916                */
 917                break;
 918             }
 919          }
 920
 921          /* The elements in the pivot chunk are now in the right place. They
 922             will never move or be compared again. All I have to do is decide
 923             what to do with the stuff to the left and right of the pivot
 924             chunk.
 925
 926             Notes on the QSORT_ORDER_GUESS ifdef code:
 927
 928             1. If I just built these partitions without swapping any (or
 929                very many) elements, there is a chance that the elements are
 930                already ordered properly (being properly ordered will
 931                certainly result in no swapping, but the converse can't be
 932                proved :-).
 933
 934             2. A (properly written) insertion sort will run faster on
 935                already ordered data than qsort will.
 936
 937             3. Perhaps there is some way to make a good guess about
 938                switching to an insertion sort earlier than partition size 6
 939                (for instance - we could save the partition size on the stack
 940                and increase the size each time we find we didn't swap, thus
 941                switching to insertion sort earlier for partitions with a
 942                history of not swapping).
 943
 944             4. Naturally, if I just switch right away, it will make
 945                artificial benchmarks with pure ascending (or descending)
 946                data look really good, but is that a good reason in general?
 947                Hard to say...
 948          */
 949
 950 #ifdef QSORT_ORDER_GUESS
 951          if (swapped < 3) {
 952 #if QSORT_ORDER_GUESS == 1
 953             qsort_break_even = (part_right - part_left) + 1;
 954 #endif
 955 #if QSORT_ORDER_GUESS == 2
 956             qsort_break_even *= 2;
 957 #endif
 958 #if QSORT_ORDER_GUESS == 3
 959             int prev_break = qsort_break_even;
 960             qsort_break_even *= qsort_break_even;
 961             if (qsort_break_even < prev_break) {
 962                qsort_break_even = (part_right - part_left) + 1;
 963             }
 964 #endif
 965          } else {
 966             qsort_break_even = QSORT_BREAK_EVEN;
 967          }
 968 #endif
 969
 970          if (part_left < pc_left) {
 971             /* There are elements on the left which need more processing.
 972                Check the right as well before deciding what to do.
 973             */
 974             if (pc_right < part_right) {
 975                /* We have two partitions to be sorted. Stack the biggest one
 976                   and process the smallest one on the next iteration. This
 977                   minimizes the stack height by insuring that any additional
 978                   stack entries must come from the smallest partition which
 979                   (because it is smallest) will have the fewest
 980                   opportunities to generate additional stack entries.
 981                */
 982                if ((part_right - pc_right) > (pc_left - part_left)) {
 983                   /* stack the right partition, process the left */
 984                   partition_stack[next_stack_entry].left = pc_right + 1;
 985                   partition_stack[next_stack_entry].right = part_right;
 986 #ifdef QSORT_ORDER_GUESS
 987                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
 988 #endif
 989                   part_right = pc_left - 1;
 990                } else {
 991                   /* stack the left partition, process the right */
 992                   partition_stack[next_stack_entry].left = part_left;
 993                   partition_stack[next_stack_entry].right = pc_left - 1;
 994 #ifdef QSORT_ORDER_GUESS
 995                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
 996 #endif
 997                   part_left = pc_right + 1;
 998                }
 999                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1000                ++next_stack_entry;
1001             } else {
1002                /* The elements on the left are the only remaining elements
1003                   that need sorting, arrange for them to be processed as the
1004                   next partition.
1005                */
1006                part_right = pc_left - 1;
1007             }
1008          } else if (pc_right < part_right) {
1009             /* There is only one chunk on the right to be sorted, make it
1010                the new partition and loop back around.
1011             */
1012             part_left = pc_right + 1;
1013          } else {
1014             /* This whole partition wound up in the pivot chunk, so
1015                we need to get a new partition off the stack.
1016             */
1017             if (next_stack_entry == 0) {
1018                /* the stack is empty - we are done */
1019                break;
1020             }
1021             --next_stack_entry;
1022             part_left = partition_stack[next_stack_entry].left;
1023             part_right = partition_stack[next_stack_entry].right;
1024 #ifdef QSORT_ORDER_GUESS
1025             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1026 #endif
1027          }
1028       } else {
1029          /* This partition is too small to fool with qsort complexity, just
1030             do an ordinary insertion sort to minimize overhead.
1031          */
1032          int i;
1033          /* Assume 1st element is in right place already, and start checking
1034             at 2nd element to see where it should be inserted.
1035          */
1036          for (i = part_left + 1; i <= part_right; ++i) {
1037             int j;
1038             /* Scan (backwards - just in case 'i' is already in right place)
1039                through the elements already sorted to see if the ith element
1040                belongs ahead of one of them.
1041             */
1042             for (j = i - 1; j >= part_left; --j) {
1043                if (qsort_cmp(i, j) >= 0) {
1044                   /* i belongs right after j
1045                   */
1046                   break;
1047                }
1048             }
1049             ++j;
1050             if (j != i) {
1051                /* Looks like we really need to move some things
1052                */
1053                int k;
1054                temp = array[i];
1055                for (k = i - 1; k >= j; --k)
1056                   array[k + 1] = array[k];
1057                array[j] = temp;
1058             }
1059          }
1060
1061          /* That partition is now sorted, grab the next one, or get out
1062             of the loop if there aren't any more.
1063          */
1064
1065          if (next_stack_entry == 0) {
1066             /* the stack is empty - we are done */
1067             break;
1068          }
1069          --next_stack_entry;
1070          part_left = partition_stack[next_stack_entry].left;
1071          part_right = partition_stack[next_stack_entry].right;
1072 #ifdef QSORT_ORDER_GUESS
1073          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1074 #endif
1075       }
1076    }
1077
1078    /* Believe it or not, the array is sorted at this point! */
1079 }
1080
1081 #ifndef SMALLSORT
1082 #define SMALLSORT (200)
1083 #endif
1084
1085 /* Stabilize what is, presumably, an otherwise unstable sort method.
1086  * We do that by allocating (or having on hand) an array of pointers
1087  * that is the same size as the original array of elements to be sorted.
1088  * We initialize this parallel array with the addresses of the original
1089  * array elements.  This indirection can make you crazy.
1090  * Some pictures can help.  After initializing, we have
1091  *
1092  *  indir                  list1
1093  * +----+                 +----+
1094  * |    | --------------> |    | ------> first element to be sorted
1095  * +----+                 +----+
1096  * |    | --------------> |    | ------> second element to be sorted
1097  * +----+                 +----+
1098  * |    | --------------> |    | ------> third element to be sorted
1099  * +----+                 +----+
1100  *  ...
1101  * +----+                 +----+
1102  * |    | --------------> |    | ------> n-1st element to be sorted
1103  * +----+                 +----+
1104  * |    | --------------> |    | ------> n-th element to be sorted
1105  * +----+                 +----+
1106  *
1107  * During the sort phase, we leave the elements of list1 where they are,
1108  * and sort the pointers in the indirect array in the same order determined
1109  * by the original comparison routine on the elements pointed to.
1110  * Because we don't move the elements of list1 around through
1111  * this phase, we can break ties on elements that compare equal
1112  * using their address in the list1 array, ensuring stabilty.
1113  * This leaves us with something looking like
1114  *
1115  *  indir                  list1
1116  * +----+                 +----+
1117  * |    | --+       +---> |    | ------> first element to be sorted
1118  * +----+   |       |     +----+
1119  * |    | --|-------|---> |    | ------> second element to be sorted
1120  * +----+   |       |     +----+
1121  * |    | --|-------+ +-> |    | ------> third element to be sorted
1122  * +----+   |         |   +----+
1123  *  ...
1124  * +----+    | |   | |    +----+
1125  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1126  * +----+    |     |      +----+
1127  * |    | ---+     +----> |    | ------> n-th element to be sorted
1128  * +----+                 +----+
1129  *
1130  * where the i-th element of the indirect array points to the element
1131  * that should be i-th in the sorted array.  After the sort phase,
1132  * we have to put the elements of list1 into the places
1133  * dictated by the indirect array.
1134  */
1135
1136 static SVCOMPARE_t RealCmp;
1137
1138 static I32
1139 cmpindir(pTHX_ gptr a, gptr b)
1140 {
1141     I32 sense;
1142     gptr *ap = (gptr *)a;
1143     gptr *bp = (gptr *)b;
1144
1145     if ((sense = RealCmp(aTHX_ *ap, *bp)) == 0)
1146          sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1147     return sense;
1148 }
1149
1150 STATIC void
1151 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
1152 {
1153     SV **hintsvp;
1154
1155     if (SORTHINTS(hintsvp) & HINT_SORT_FAST)
1156          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1157     else {
1158          register gptr **pp, *q;
1159          register size_t n, j, i;
1160          gptr *small[SMALLSORT], **indir, tmp;
1161          SVCOMPARE_t savecmp;
1162          if (nmemb <= 1) return;     /* sorted trivially */
1163
1164          /* Small arrays can use the stack, big ones must be allocated */
1165          if (nmemb <= SMALLSORT) indir = small;
1166          else { New(1799, indir, nmemb, gptr *); }
1167
1168          /* Copy pointers to original array elements into indirect array */
1169          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1170
1171          savecmp = RealCmp;     /* Save current comparison routine, if any */
1172          RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1173
1174          /* sort, with indirection */
1175          S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir);
1176
1177          pp = indir;
1178          q = list1;
1179          for (n = nmemb; n--; ) {
1180               /* Assert A: all elements of q with index > n are already
1181                * in place.  This is vacuosly true at the start, and we
1182                * put element n where it belongs below (if it wasn't
1183                * already where it belonged). Assert B: we only move
1184                * elements that aren't where they belong,
1185                * so, by A, we never tamper with elements above n.
1186                */
1187               j = pp[n] - q;            /* This sets j so that q[j] is
1188                                          * at pp[n].  *pp[j] belongs in
1189                                          * q[j], by construction.
1190                                          */
1191               if (n != j) {             /* all's well if n == j */
1192                    tmp = q[j];          /* save what's in q[j] */
1193                    do {
1194                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1195                         i = pp[j] - q;  /* the index in q of the element
1196                                          * just moved */
1197                         pp[j] = q + j;  /* this is ok now */
1198                    } while ((j = i) != n);
1199                    /* There are only finitely many (nmemb) addresses
1200                     * in the pp array.
1201                     * So we must eventually revisit an index we saw before.
1202                     * Suppose the first revisited index is k != n.
1203                     * An index is visited because something else belongs there.
1204                     * If we visit k twice, then two different elements must
1205                     * belong in the same place, which cannot be.
1206                     * So j must get back to n, the loop terminates,
1207                     * and we put the saved element where it belongs.
1208                     */
1209                    q[n] = tmp;          /* put what belongs into
1210                                          * the n-th element */
1211               }
1212          }
1213
1214         /* free iff allocated */
1215          if (indir != small) { Safefree(indir); }
1216          /* restore prevailing comparison routine */
1217          RealCmp = savecmp;
1218     }
1219 }
1220
1221 /*
1222 =for apidoc sortsv
1223
1224 Sort an array. Here is an example:
1225
1226     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1227
1228 =cut
1229 */
1230
1231 void
1232 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1233 {
1234     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) =
1235         S_mergesortsv;
1236     SV **hintsvp;
1237     I32 hints;
1238
1239     if ((hints = SORTHINTS(hintsvp))) {
1240          if (hints & HINT_SORT_QUICKSORT)
1241               sortsvp = S_qsortsv;
1242          else {
1243               if (hints & HINT_SORT_MERGESORT)
1244                    sortsvp = S_mergesortsv;
1245               else
1246                    sortsvp = S_mergesortsv;
1247          }
1248     }
1249
1250     sortsvp(aTHX_ array, nmemb, cmp);
1251 }
1252
1253 PP(pp_sort)
1254 {
1255     dSP; dMARK; dORIGMARK;
1256     register SV **up;
1257     SV **myorigmark = ORIGMARK;
1258     register I32 max;
1259     HV *stash;
1260     GV *gv;
1261     CV *cv = 0;
1262     I32 gimme = GIMME;
1263     OP* nextop = PL_op->op_next;
1264     I32 overloading = 0;
1265     bool hasargs = FALSE;
1266     I32 is_xsub = 0;
1267
1268     if (gimme != G_ARRAY) {
1269         SP = MARK;
1270         RETPUSHUNDEF;
1271     }
1272
1273     ENTER;
1274     SAVEVPTR(PL_sortcop);
1275     if (PL_op->op_flags & OPf_STACKED) {
1276         if (PL_op->op_flags & OPf_SPECIAL) {
1277             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1278             kid = kUNOP->op_first;                      /* pass rv2gv */
1279             kid = kUNOP->op_first;                      /* pass leave */
1280             PL_sortcop = kid->op_next;
1281             stash = CopSTASH(PL_curcop);
1282         }
1283         else {
1284             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1285             if (cv && SvPOK(cv)) {
1286                 STRLEN n_a;
1287                 char *proto = SvPV((SV*)cv, n_a);
1288                 if (proto && strEQ(proto, "$$")) {
1289                     hasargs = TRUE;
1290                 }
1291             }
1292             if (!(cv && CvROOT(cv))) {
1293                 if (cv && CvXSUB(cv)) {
1294                     is_xsub = 1;
1295                 }
1296                 else if (gv) {
1297                     SV *tmpstr = sv_newmortal();
1298                     gv_efullname3(tmpstr, gv, Nullch);
1299                     DIE(aTHX_ "Undefined sort subroutine \"%s\" called",
1300                         SvPVX(tmpstr));
1301                 }
1302                 else {
1303                     DIE(aTHX_ "Undefined subroutine in sort");
1304                 }
1305             }
1306
1307             if (is_xsub)
1308                 PL_sortcop = (OP*)cv;
1309             else {
1310                 PL_sortcop = CvSTART(cv);
1311                 SAVEVPTR(CvROOT(cv)->op_ppaddr);
1312                 CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL];
1313
1314                 SAVEVPTR(PL_curpad);
1315                 PL_curpad = AvARRAY((AV*)AvARRAY(CvPADLIST(cv))[1]);
1316             }
1317         }
1318     }
1319     else {
1320         PL_sortcop = Nullop;
1321         stash = CopSTASH(PL_curcop);
1322     }
1323
1324     up = myorigmark + 1;
1325     while (MARK < SP) { /* This may or may not shift down one here. */
1326         /*SUPPRESS 560*/
1327         if ((*up = *++MARK)) {                  /* Weed out nulls. */
1328             SvTEMP_off(*up);
1329             if (!PL_sortcop && !SvPOK(*up)) {
1330                 STRLEN n_a;
1331                 if (SvAMAGIC(*up))
1332                     overloading = 1;
1333                 else
1334                     (void)sv_2pv(*up, &n_a);
1335             }
1336             up++;
1337         }
1338     }
1339     max = --up - myorigmark;
1340     if (PL_sortcop) {
1341         if (max > 1) {
1342             PERL_CONTEXT *cx;
1343             SV** newsp;
1344             bool oldcatch = CATCH_GET;
1345
1346             SAVETMPS;
1347             SAVEOP();
1348
1349             CATCH_SET(TRUE);
1350             PUSHSTACKi(PERLSI_SORT);
1351             if (!hasargs && !is_xsub) {
1352                 if (PL_sortstash != stash || !PL_firstgv || !PL_secondgv) {
1353                     SAVESPTR(PL_firstgv);
1354                     SAVESPTR(PL_secondgv);
1355                     PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1356                     PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1357                     PL_sortstash = stash;
1358                 }
1359 #ifdef USE_5005THREADS
1360                 sv_lock((SV *)PL_firstgv);
1361                 sv_lock((SV *)PL_secondgv);
1362 #endif
1363                 SAVESPTR(GvSV(PL_firstgv));
1364                 SAVESPTR(GvSV(PL_secondgv));
1365             }
1366
1367             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1368             if (!(PL_op->op_flags & OPf_SPECIAL)) {
1369                 cx->cx_type = CXt_SUB;
1370                 cx->blk_gimme = G_SCALAR;
1371                 PUSHSUB(cx);
1372                 if (!CvDEPTH(cv))
1373                     (void)SvREFCNT_inc(cv); /* in preparation for POPSUB */
1374             }
1375             PL_sortcxix = cxstack_ix;
1376
1377             if (hasargs && !is_xsub) {
1378                 /* This is mostly copied from pp_entersub */
1379                 AV *av = (AV*)PL_curpad[0];
1380
1381 #ifndef USE_5005THREADS
1382                 cx->blk_sub.savearray = GvAV(PL_defgv);
1383                 GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1384 #endif /* USE_5005THREADS */
1385                 cx->blk_sub.oldcurpad = PL_curpad;
1386                 cx->blk_sub.argarray = av;
1387             }
1388            sortsv((myorigmark+1), max,
1389                   is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
1390
1391             POPBLOCK(cx,PL_curpm);
1392             PL_stack_sp = newsp;
1393             POPSTACK;
1394             CATCH_SET(oldcatch);
1395         }
1396     }
1397     else {
1398         if (max > 1) {
1399             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1400             sortsv(ORIGMARK+1, max,
1401                   (PL_op->op_private & OPpSORT_NUMERIC)
1402                         ? ( (PL_op->op_private & OPpSORT_INTEGER)
1403                             ? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
1404                             : ( overloading ? amagic_ncmp : sv_ncmp))
1405                         : ( IN_LOCALE_RUNTIME
1406                             ? ( overloading
1407                                 ? amagic_cmp_locale
1408                                 : sv_cmp_locale_static)
1409                             : ( overloading ? amagic_cmp : sv_cmp_static)));
1410             if (PL_op->op_private & OPpSORT_REVERSE) {
1411                 SV **p = ORIGMARK+1;
1412                 SV **q = ORIGMARK+max;
1413                 while (p < q) {
1414                     SV *tmp = *p;
1415                     *p++ = *q;
1416                     *q-- = tmp;
1417                 }
1418             }
1419         }
1420     }
1421     LEAVE;
1422     PL_stack_sp = ORIGMARK + max;
1423     return nextop;
1424 }
1425
1426 static I32
1427 sortcv(pTHX_ SV *a, SV *b)
1428 {
1429     I32 oldsaveix = PL_savestack_ix;
1430     I32 oldscopeix = PL_scopestack_ix;
1431     I32 result;
1432     GvSV(PL_firstgv) = a;
1433     GvSV(PL_secondgv) = b;
1434     PL_stack_sp = PL_stack_base;
1435     PL_op = PL_sortcop;
1436     CALLRUNOPS(aTHX);
1437     if (PL_stack_sp != PL_stack_base + 1)
1438         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1439     if (!SvNIOKp(*PL_stack_sp))
1440         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1441     result = SvIV(*PL_stack_sp);
1442     while (PL_scopestack_ix > oldscopeix) {
1443         LEAVE;
1444     }
1445     leave_scope(oldsaveix);
1446     return result;
1447 }
1448
1449 static I32
1450 sortcv_stacked(pTHX_ SV *a, SV *b)
1451 {
1452     I32 oldsaveix = PL_savestack_ix;
1453     I32 oldscopeix = PL_scopestack_ix;
1454     I32 result;
1455     AV *av;
1456
1457 #ifdef USE_5005THREADS
1458     av = (AV*)PL_curpad[0];
1459 #else
1460     av = GvAV(PL_defgv);
1461 #endif
1462
1463     if (AvMAX(av) < 1) {
1464         SV** ary = AvALLOC(av);
1465         if (AvARRAY(av) != ary) {
1466             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1467             SvPVX(av) = (char*)ary;
1468         }
1469         if (AvMAX(av) < 1) {
1470             AvMAX(av) = 1;
1471             Renew(ary,2,SV*);
1472             SvPVX(av) = (char*)ary;
1473         }
1474     }
1475     AvFILLp(av) = 1;
1476
1477     AvARRAY(av)[0] = a;
1478     AvARRAY(av)[1] = b;
1479     PL_stack_sp = PL_stack_base;
1480     PL_op = PL_sortcop;
1481     CALLRUNOPS(aTHX);
1482     if (PL_stack_sp != PL_stack_base + 1)
1483         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1484     if (!SvNIOKp(*PL_stack_sp))
1485         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1486     result = SvIV(*PL_stack_sp);
1487     while (PL_scopestack_ix > oldscopeix) {
1488         LEAVE;
1489     }
1490     leave_scope(oldsaveix);
1491     return result;
1492 }
1493
1494 static I32
1495 sortcv_xsub(pTHX_ SV *a, SV *b)
1496 {
1497     dSP;
1498     I32 oldsaveix = PL_savestack_ix;
1499     I32 oldscopeix = PL_scopestack_ix;
1500     I32 result;
1501     CV *cv=(CV*)PL_sortcop;
1502
1503     SP = PL_stack_base;
1504     PUSHMARK(SP);
1505     EXTEND(SP, 2);
1506     *++SP = a;
1507     *++SP = b;
1508     PUTBACK;
1509     (void)(*CvXSUB(cv))(aTHX_ cv);
1510     if (PL_stack_sp != PL_stack_base + 1)
1511         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1512     if (!SvNIOKp(*PL_stack_sp))
1513         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1514     result = SvIV(*PL_stack_sp);
1515     while (PL_scopestack_ix > oldscopeix) {
1516         LEAVE;
1517     }
1518     leave_scope(oldsaveix);
1519     return result;
1520 }
1521
1522
1523 static I32
1524 sv_ncmp(pTHX_ SV *a, SV *b)
1525 {
1526     NV nv1 = SvNV(a);
1527     NV nv2 = SvNV(b);
1528     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1529 }
1530
1531 static I32
1532 sv_i_ncmp(pTHX_ SV *a, SV *b)
1533 {
1534     IV iv1 = SvIV(a);
1535     IV iv2 = SvIV(b);
1536     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1537 }
1538 #define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \
1539           *svp = Nullsv;                                \
1540           if (PL_amagic_generation) { \
1541             if (SvAMAGIC(left)||SvAMAGIC(right))\
1542                 *svp = amagic_call(left, \
1543                                    right, \
1544                                    CAT2(meth,_amg), \
1545                                    0); \
1546           } \
1547         } STMT_END
1548
1549 static I32
1550 amagic_ncmp(pTHX_ register SV *a, register SV *b)
1551 {
1552     SV *tmpsv;
1553     tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1554     if (tmpsv) {
1555         NV d;
1556
1557         if (SvIOK(tmpsv)) {
1558             I32 i = SvIVX(tmpsv);
1559             if (i > 0)
1560                return 1;
1561             return i? -1 : 0;
1562         }
1563         d = SvNV(tmpsv);
1564         if (d > 0)
1565            return 1;
1566         return d? -1 : 0;
1567      }
1568      return sv_ncmp(aTHX_ a, b);
1569 }
1570
1571 static I32
1572 amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1573 {
1574     SV *tmpsv;
1575     tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1576     if (tmpsv) {
1577         NV d;
1578
1579         if (SvIOK(tmpsv)) {
1580             I32 i = SvIVX(tmpsv);
1581             if (i > 0)
1582                return 1;
1583             return i? -1 : 0;
1584         }
1585         d = SvNV(tmpsv);
1586         if (d > 0)
1587            return 1;
1588         return d? -1 : 0;
1589     }
1590     return sv_i_ncmp(aTHX_ a, b);
1591 }
1592
1593 static I32
1594 amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1595 {
1596     SV *tmpsv;
1597     tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1598     if (tmpsv) {
1599         NV d;
1600
1601         if (SvIOK(tmpsv)) {
1602             I32 i = SvIVX(tmpsv);
1603             if (i > 0)
1604                return 1;
1605             return i? -1 : 0;
1606         }
1607         d = SvNV(tmpsv);
1608         if (d > 0)
1609            return 1;
1610         return d? -1 : 0;
1611     }
1612     return sv_cmp(str1, str2);
1613 }
1614
1615 static I32
1616 amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1617 {
1618     SV *tmpsv;
1619     tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1620     if (tmpsv) {
1621         NV d;
1622
1623         if (SvIOK(tmpsv)) {
1624             I32 i = SvIVX(tmpsv);
1625             if (i > 0)
1626                return 1;
1627             return i? -1 : 0;
1628         }
1629         d = SvNV(tmpsv);
1630         if (d > 0)
1631            return 1;
1632         return d? -1 : 0;
1633     }
1634     return sv_cmp_locale(str1, str2);
1635 }
1636
1637