pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line. 'No, not at the
  13  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  14  */
  15
  16 /* This file contains pp ("push/pop") functions that
  17  * execute the opcodes that make up a perl program. A typical pp function
  18  * expects to find its arguments on the stack, and usually pushes its
  19  * results onto the stack, hence the 'pp' terminology. Each OP structure
  20  * contains a pointer to the relevant pp_foo() function.
  21  *
  22  * This particular file just contains pp_sort(), which is complex
  23  * enough to merit its own file! See the other pp*.c files for the rest of
  24  * the pp_ functions.
  25  */
  26
  27 #include "EXTERN.h"
  28 #define PERL_IN_PP_SORT_C
  29 #include "perl.h"
  30
  31 #if defined(UNDER_CE)
  32 /* looks like 'small' is reserved word for WINCE (or somesuch)*/
  33 #define small xsmall
  34 #endif
  35
  36 #define sv_cmp_static Perl_sv_cmp
  37 #define sv_cmp_locale_static Perl_sv_cmp_locale
  38
  39 #define dSORTHINTS   SV *hintsv = GvSV(gv_fetchpv("sort::hints", GV_ADDMULTI, SVt_IV))
  40 #define SORTHINTS    (SvIOK(hintsv) ? ((I32)SvIV(hintsv)) : 0)
  41
  42 #ifndef SMALLSORT
  43 #define SMALLSORT (200)
  44 #endif
  45
  46 /*
  47  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  48  *
  49  * The original code was written in conjunction with BSD Computer Software
  50  * Research Group at University of California, Berkeley.
  51  *
  52  * See also: "Optimistic Merge Sort" (SODA '92)
  53  *
  54  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  55  *
  56  * The code can be distributed under the same terms as Perl itself.
  57  *
  58  */
  59
  60
  61 typedef char * aptr;            /* pointer for arithmetic on sizes */
  62 typedef SV * gptr;              /* pointers in our lists */
  63
  64 /* Binary merge internal sort, with a few special mods
  65 ** for the special perl environment it now finds itself in.
  66 **
  67 ** Things that were once options have been hotwired
  68 ** to values suitable for this use.  In particular, we'll always
  69 ** initialize looking for natural runs, we'll always produce stable
  70 ** output, and we'll always do Peter McIlroy's binary merge.
  71 */
  72
  73 /* Pointer types for arithmetic and storage and convenience casts */
  74
  75 #define APTR(P) ((aptr)(P))
  76 #define GPTP(P) ((gptr *)(P))
  77 #define GPPP(P) ((gptr **)(P))
  78
  79
  80 /* byte offset from pointer P to (larger) pointer Q */
  81 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  82
  83 #define PSIZE sizeof(gptr)
  84
  85 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  86
  87 #ifdef  PSHIFT
  88 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  89 #define PNBYTE(N)       ((N) << (PSHIFT))
  90 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  91 #else
  92 /* Leave optimization to compiler */
  93 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  94 #define PNBYTE(N)       ((N) * (PSIZE))
  95 #define PINDEX(P, N)    (GPTP(P) + (N))
  96 #endif
  97
  98 /* Pointer into other corresponding to pointer into this */
  99 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 100
 101 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 102
 103
 104 /* Runs are identified by a pointer in the auxilliary list.
 105 ** The pointer is at the start of the list,
 106 ** and it points to the start of the next list.
 107 ** NEXT is used as an lvalue, too.
 108 */
 109
 110 #define NEXT(P)         (*GPPP(P))
 111
 112
 113 /* PTHRESH is the minimum number of pairs with the same sense to justify
 114 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 115 ** not just elements, so PTHRESH == 8 means a run of 16.
 116 */
 117
 118 #define PTHRESH (8)
 119
 120 /* RTHRESH is the number of elements in a run that must compare low
 121 ** to the low element from the opposing run before we justify
 122 ** doing a binary rampup instead of single stepping.
 123 ** In random input, N in a row low should only happen with
 124 ** probability 2^(1-N), so we can risk that we are dealing
 125 ** with orderly input without paying much when we aren't.
 126 */
 127
 128 #define RTHRESH (6)
 129
 130
 131 /*
 132 ** Overview of algorithm and variables.
 133 ** The array of elements at list1 will be organized into runs of length 2,
 134 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 135 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 136 **
 137 ** Unless otherwise specified, pair pointers address the first of two elements.
 138 **
 139 ** b and b+1 are a pair that compare with sense "sense".
 140 ** b is the "bottom" of adjacent pairs that might form a longer run.
 141 **
 142 ** p2 parallels b in the list2 array, where runs are defined by
 143 ** a pointer chain.
 144 **
 145 ** t represents the "top" of the adjacent pairs that might extend
 146 ** the run beginning at b.  Usually, t addresses a pair
 147 ** that compares with opposite sense from (b,b+1).
 148 ** However, it may also address a singleton element at the end of list1,
 149 ** or it may be equal to "last", the first element beyond list1.
 150 **
 151 ** r addresses the Nth pair following b.  If this would be beyond t,
 152 ** we back it off to t.  Only when r is less than t do we consider the
 153 ** run long enough to consider checking.
 154 **
 155 ** q addresses a pair such that the pairs at b through q already form a run.
 156 ** Often, q will equal b, indicating we only are sure of the pair itself.
 157 ** However, a search on the previous cycle may have revealed a longer run,
 158 ** so q may be greater than b.
 159 **
 160 ** p is used to work back from a candidate r, trying to reach q,
 161 ** which would mean b through r would be a run.  If we discover such a run,
 162 ** we start q at r and try to push it further towards t.
 163 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 164 ** In any event, after the check (if any), we have two main cases.
 165 **
 166 ** 1) Short run.  b <= q < p <= r <= t.
 167 **      b through q is a run (perhaps trivial)
 168 **      q through p are uninteresting pairs
 169 **      p through r is a run
 170 **
 171 ** 2) Long run.  b < r <= q < t.
 172 **      b through q is a run (of length >= 2 * PTHRESH)
 173 **
 174 ** Note that degenerate cases are not only possible, but likely.
 175 ** For example, if the pair following b compares with opposite sense,
 176 ** then b == q < p == r == t.
 177 */
 178
 179
 180 static IV
 181 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
 182 {
 183     I32 sense;
 184     register gptr *b, *p, *q, *t, *p2;
 185     register gptr c, *last, *r;
 186     gptr *savep;
 187     IV runs = 0;
 188
 189     b = list1;
 190     last = PINDEX(b, nmemb);
 191     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 192     for (p2 = list2; b < last; ) {
 193         /* We just started, or just reversed sense.
 194         ** Set t at end of pairs with the prevailing sense.
 195         */
 196         for (p = b+2, t = p; ++p < last; t = ++p) {
 197             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 198         }
 199         q = b;
 200         /* Having laid out the playing field, look for long runs */
 201         do {
 202             p = r = b + (2 * PTHRESH);
 203             if (r >= t) p = r = t;      /* too short to care about */
 204             else {
 205                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 206                        ((p -= 2) > q));
 207                 if (p <= q) {
 208                     /* b through r is a (long) run.
 209                     ** Extend it as far as possible.
 210                     */
 211                     p = q = r;
 212                     while (((p += 2) < t) &&
 213                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 214                     r = p = q + 2;      /* no simple pairs, no after-run */
 215                 }
 216             }
 217             if (q > b) {                /* run of greater than 2 at b */
 218                 savep = p;
 219                 p = q += 2;
 220                 /* pick up singleton, if possible */
 221                 if ((p == t) &&
 222                     ((t + 1) == last) &&
 223                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 224                     savep = r = p = q = last;
 225                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 226                 if (sense) while (b < --p) {
 227                     c = *b;
 228                     *b++ = *p;
 229                     *p = c;
 230                 }
 231                 p = savep;
 232             }
 233             while (q < p) {             /* simple pairs */
 234                 p2 = NEXT(p2) = p2 + 2; ++runs;
 235                 if (sense) {
 236                     c = *q++;
 237                     *(q-1) = *q;
 238                     *q++ = c;
 239                 } else q += 2;
 240             }
 241             if (((b = p) == t) && ((t+1) == last)) {
 242                 NEXT(p2) = p2 + 1; ++runs;
 243                 b++;
 244             }
 245             q = r;
 246         } while (b < t);
 247         sense = !sense;
 248     }
 249     return runs;
 250 }
 251
 252
 253 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 254  * qsort on many platforms, but slower than qsort, conspicuously so,
 255  * on others.  The most likely explanation was platform-specific
 256  * differences in cache sizes and relative speeds.
 257  *
 258  * The quicksort divide-and-conquer algorithm guarantees that, as the
 259  * problem is subdivided into smaller and smaller parts, the parts
 260  * fit into smaller (and faster) caches.  So it doesn't matter how
 261  * many levels of cache exist, quicksort will "find" them, and,
 262  * as long as smaller is faster, take advantage of them.
 263  *
 264  * By contrast, consider how the original mergesort algorithm worked.
 265  * Suppose we have five runs (each typically of length 2 after dynprep).
 266  *
 267  * pass               base                        aux
 268  *  0              1 2 3 4 5
 269  *  1                                           12 34 5
 270  *  2                1234 5
 271  *  3                                            12345
 272  *  4                 12345
 273  *
 274  * Adjacent pairs are merged in "grand sweeps" through the input.
 275  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 276  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 277  * The only cache that matters is one large enough to hold *all* the input.
 278  * On some platforms, this may be many times slower than smaller caches.
 279  *
 280  * The following pseudo-code uses the same basic merge algorithm,
 281  * but in a divide-and-conquer way.
 282  *
 283  * # merge $runs runs at offset $offset of list $list1 into $list2.
 284  * # all unmerged runs ($runs == 1) originate in list $base.
 285  * sub mgsort2 {
 286  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 287  *
 288  *     if ($runs == 1) {
 289  *         if ($list1 is $base) copy run to $list2
 290  *         return offset of end of list (or copy)
 291  *     } else {
 292  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 293  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 294  *         merge the adjacent runs at $offset of $list1 into $list2
 295  *         return the offset of the end of the merged runs
 296  *     }
 297  * }
 298  * mgsort2(0, $runs, $base, $aux, $base);
 299  *
 300  * For our 5 runs, the tree of calls looks like
 301  *
 302  *           5
 303  *      3        2
 304  *   2     1   1   1
 305  * 1   1
 306  *
 307  * 1   2   3   4   5
 308  *
 309  * and the corresponding activity looks like
 310  *
 311  * copy runs 1 and 2 from base to aux
 312  * merge runs 1 and 2 from aux to base
 313  * (run 3 is where it belongs, no copy needed)
 314  * merge runs 12 and 3 from base to aux
 315  * (runs 4 and 5 are where they belong, no copy needed)
 316  * merge runs 4 and 5 from base to aux
 317  * merge runs 123 and 45 from aux to base
 318  *
 319  * Note that we merge runs 1 and 2 immediately after copying them,
 320  * while they are still likely to be in fast cache.  Similarly,
 321  * run 3 is merged with run 12 while it still may be lingering in cache.
 322  * This implementation should therefore enjoy much of the cache-friendly
 323  * behavior that quicksort does.  In addition, it does less copying
 324  * than the original mergesort implementation (only runs 1 and 2 are copied)
 325  * and the "balancing" of merges is better (merged runs comprise more nearly
 326  * equal numbers of original runs).
 327  *
 328  * The actual cache-friendly implementation will use a pseudo-stack
 329  * to avoid recursion, and will unroll processing of runs of length 2,
 330  * but it is otherwise similar to the recursive implementation.
 331  */
 332
 333 typedef struct {
 334     IV  offset;         /* offset of 1st of 2 runs at this level */
 335     IV  runs;           /* how many runs must be combined into 1 */
 336 } off_runs;             /* pseudo-stack element */
 337
 338
 339 static I32
 340 cmp_desc(pTHX_ gptr a, gptr b)
 341 {
 342     return -PL_sort_RealCmp(aTHX_ a, b);
 343 }
 344
 345 STATIC void
 346 S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 347 {
 348     IV i, run, offset;
 349     I32 sense, level;
 350     register gptr *f1, *f2, *t, *b, *p;
 351     int iwhich;
 352     gptr *aux;
 353     gptr *p1;
 354     gptr small[SMALLSORT];
 355     gptr *which[3];
 356     off_runs stack[60], *stackp;
 357     SVCOMPARE_t savecmp = 0;
 358
 359     if (nmemb <= 1) return;                     /* sorted trivially */
 360
 361     if (flags) {
 362         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 363         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 364         cmp = cmp_desc;
 365     }
 366
 367     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 368     else { Newx(aux,nmemb,gptr); }              /* allocate auxilliary array */
 369     level = 0;
 370     stackp = stack;
 371     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 372     stackp->offset = offset = 0;
 373     which[0] = which[2] = base;
 374     which[1] = aux;
 375     for (;;) {
 376         /* On levels where both runs have be constructed (stackp->runs == 0),
 377          * merge them, and note the offset of their end, in case the offset
 378          * is needed at the next level up.  Hop up a level, and,
 379          * as long as stackp->runs is 0, keep merging.
 380          */
 381         IV runs = stackp->runs;
 382         if (runs == 0) {
 383             gptr *list1, *list2;
 384             iwhich = level & 1;
 385             list1 = which[iwhich];              /* area where runs are now */
 386             list2 = which[++iwhich];            /* area for merged runs */
 387             do {
 388                 register gptr *l1, *l2, *tp2;
 389                 offset = stackp->offset;
 390                 f1 = p1 = list1 + offset;               /* start of first run */
 391                 p = tp2 = list2 + offset;       /* where merged run will go */
 392                 t = NEXT(p);                    /* where first run ends */
 393                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 394                 t = NEXT(t);                    /* where second runs ends */
 395                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 396                 offset = PNELEM(list2, t);
 397                 while (f1 < l1 && f2 < l2) {
 398                     /* If head 1 is larger than head 2, find ALL the elements
 399                     ** in list 2 strictly less than head1, write them all,
 400                     ** then head 1.  Then compare the new heads, and repeat,
 401                     ** until one or both lists are exhausted.
 402                     **
 403                     ** In all comparisons (after establishing
 404                     ** which head to merge) the item to merge
 405                     ** (at pointer q) is the first operand of
 406                     ** the comparison.  When we want to know
 407                     ** if "q is strictly less than the other",
 408                     ** we can't just do
 409                     **    cmp(q, other) < 0
 410                     ** because stability demands that we treat equality
 411                     ** as high when q comes from l2, and as low when
 412                     ** q was from l1.  So we ask the question by doing
 413                     **    cmp(q, other) <= sense
 414                     ** and make sense == 0 when equality should look low,
 415                     ** and -1 when equality should look high.
 416                     */
 417
 418                     register gptr *q;
 419                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 420                         q = f2; b = f1; t = l1;
 421                         sense = -1;
 422                     } else {
 423                         q = f1; b = f2; t = l2;
 424                         sense = 0;
 425                     }
 426
 427
 428                     /* ramp up
 429                     **
 430                     ** Leave t at something strictly
 431                     ** greater than q (or at the end of the list),
 432                     ** and b at something strictly less than q.
 433                     */
 434                     for (i = 1, run = 0 ;;) {
 435                         if ((p = PINDEX(b, i)) >= t) {
 436                             /* off the end */
 437                             if (((p = PINDEX(t, -1)) > b) &&
 438                                 (cmp(aTHX_ *q, *p) <= sense))
 439                                  t = p;
 440                             else b = p;
 441                             break;
 442                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 443                             t = p;
 444                             break;
 445                         } else b = p;
 446                         if (++run >= RTHRESH) i += i;
 447                     }
 448
 449
 450                     /* q is known to follow b and must be inserted before t.
 451                     ** Increment b, so the range of possibilities is [b,t).
 452                     ** Round binary split down, to favor early appearance.
 453                     ** Adjust b and t until q belongs just before t.
 454                     */
 455
 456                     b++;
 457                     while (b < t) {
 458                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 459                         if (cmp(aTHX_ *q, *p) <= sense) {
 460                             t = p;
 461                         } else b = p + 1;
 462                     }
 463
 464
 465                     /* Copy all the strictly low elements */
 466
 467                     if (q == f1) {
 468                         FROMTOUPTO(f2, tp2, t);
 469                         *tp2++ = *f1++;
 470                     } else {
 471                         FROMTOUPTO(f1, tp2, t);
 472                         *tp2++ = *f2++;
 473                     }
 474                 }
 475
 476
 477                 /* Run out remaining list */
 478                 if (f1 == l1) {
 479                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 480                 } else              FROMTOUPTO(f1, tp2, l1);
 481                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 482
 483                 if (--level == 0) goto done;
 484                 --stackp;
 485                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 486             } while ((runs = stackp->runs) == 0);
 487         }
 488
 489
 490         stackp->runs = 0;               /* current run will finish level */
 491         /* While there are more than 2 runs remaining,
 492          * turn them into exactly 2 runs (at the "other" level),
 493          * each made up of approximately half the runs.
 494          * Stack the second half for later processing,
 495          * and set about producing the first half now.
 496          */
 497         while (runs > 2) {
 498             ++level;
 499             ++stackp;
 500             stackp->offset = offset;
 501             runs -= stackp->runs = runs / 2;
 502         }
 503         /* We must construct a single run from 1 or 2 runs.
 504          * All the original runs are in which[0] == base.
 505          * The run we construct must end up in which[level&1].
 506          */
 507         iwhich = level & 1;
 508         if (runs == 1) {
 509             /* Constructing a single run from a single run.
 510              * If it's where it belongs already, there's nothing to do.
 511              * Otherwise, copy it to where it belongs.
 512              * A run of 1 is either a singleton at level 0,
 513              * or the second half of a split 3.  In neither event
 514              * is it necessary to set offset.  It will be set by the merge
 515              * that immediately follows.
 516              */
 517             if (iwhich) {       /* Belongs in aux, currently in base */
 518                 f1 = b = PINDEX(base, offset);  /* where list starts */
 519                 f2 = PINDEX(aux, offset);       /* where list goes */
 520                 t = NEXT(f2);                   /* where list will end */
 521                 offset = PNELEM(aux, t);        /* offset thereof */
 522                 t = PINDEX(base, offset);       /* where it currently ends */
 523                 FROMTOUPTO(f1, f2, t);          /* copy */
 524                 NEXT(b) = t;                    /* set up parallel pointer */
 525             } else if (level == 0) goto done;   /* single run at level 0 */
 526         } else {
 527             /* Constructing a single run from two runs.
 528              * The merge code at the top will do that.
 529              * We need only make sure the two runs are in the "other" array,
 530              * so they'll end up in the correct array after the merge.
 531              */
 532             ++level;
 533             ++stackp;
 534             stackp->offset = offset;
 535             stackp->runs = 0;   /* take care of both runs, trigger merge */
 536             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 537                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 538                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 539                 t = NEXT(f2);                   /* where first run will end */
 540                 offset = PNELEM(aux, t);        /* offset thereof */
 541                 p = PINDEX(base, offset);       /* end of first run */
 542                 t = NEXT(t);                    /* where second run will end */
 543                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 544                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 545                 NEXT(b) = p;                    /* paralled pointer for 1st */
 546                 NEXT(p) = t;                    /* ... and for second */
 547             }
 548         }
 549     }
 550 done:
 551     if (aux != small) Safefree(aux);    /* free iff allocated */
 552     if (flags) {
 553          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 554     }
 555     return;
 556 }
 557
 558 /*
 559  * The quicksort implementation was derived from source code contributed
 560  * by Tom Horsley.
 561  *
 562  * NOTE: this code was derived from Tom Horsley's qsort replacement
 563  * and should not be confused with the original code.
 564  */
 565
 566 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 567
 568    Permission granted to distribute under the same terms as perl which are
 569    (briefly):
 570
 571     This program is free software; you can redistribute it and/or modify
 572     it under the terms of either:
 573
 574         a) the GNU General Public License as published by the Free
 575         Software Foundation; either version 1, or (at your option) any
 576         later version, or
 577
 578         b) the "Artistic License" which comes with this Kit.
 579
 580    Details on the perl license can be found in the perl source code which
 581    may be located via the www.perl.com web page.
 582
 583    This is the most wonderfulest possible qsort I can come up with (and
 584    still be mostly portable) My (limited) tests indicate it consistently
 585    does about 20% fewer calls to compare than does the qsort in the Visual
 586    C++ library, other vendors may vary.
 587
 588    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 589    others I invented myself (or more likely re-invented since they seemed
 590    pretty obvious once I watched the algorithm operate for a while).
 591
 592    Most of this code was written while watching the Marlins sweep the Giants
 593    in the 1997 National League Playoffs - no Braves fans allowed to use this
 594    code (just kidding :-).
 595
 596    I realize that if I wanted to be true to the perl tradition, the only
 597    comment in this file would be something like:
 598
 599    ...they shuffled back towards the rear of the line. 'No, not at the
 600    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 601
 602    However, I really needed to violate that tradition just so I could keep
 603    track of what happens myself, not to mention some poor fool trying to
 604    understand this years from now :-).
 605 */
 606
 607 /* ********************************************************** Configuration */
 608
 609 #ifndef QSORT_ORDER_GUESS
 610 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 611 #endif
 612
 613 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 614    future processing - a good max upper bound is log base 2 of memory size
 615    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 616    safely be smaller than that since the program is taking up some space and
 617    most operating systems only let you grab some subset of contiguous
 618    memory (not to mention that you are normally sorting data larger than
 619    1 byte element size :-).
 620 */
 621 #ifndef QSORT_MAX_STACK
 622 #define QSORT_MAX_STACK 32
 623 #endif
 624
 625 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 626    Anything bigger and we use qsort. If you make this too small, the qsort
 627    will probably break (or become less efficient), because it doesn't expect
 628    the middle element of a partition to be the same as the right or left -
 629    you have been warned).
 630 */
 631 #ifndef QSORT_BREAK_EVEN
 632 #define QSORT_BREAK_EVEN 6
 633 #endif
 634
 635 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 636    to go quadratic on.  We innoculate larger partitions against
 637    quadratic behavior by shuffling them before sorting.  This is not
 638    an absolute guarantee of non-quadratic behavior, but it would take
 639    staggeringly bad luck to pick extreme elements as the pivot
 640    from randomized data.
 641 */
 642 #ifndef QSORT_PLAY_SAFE
 643 #define QSORT_PLAY_SAFE 255
 644 #endif
 645
 646 /* ************************************************************* Data Types */
 647
 648 /* hold left and right index values of a partition waiting to be sorted (the
 649    partition includes both left and right - right is NOT one past the end or
 650    anything like that).
 651 */
 652 struct partition_stack_entry {
 653    int left;
 654    int right;
 655 #ifdef QSORT_ORDER_GUESS
 656    int qsort_break_even;
 657 #endif
 658 };
 659
 660 /* ******************************************************* Shorthand Macros */
 661
 662 /* Note that these macros will be used from inside the qsort function where
 663    we happen to know that the variable 'elt_size' contains the size of an
 664    array element and the variable 'temp' points to enough space to hold a
 665    temp element and the variable 'array' points to the array being sorted
 666    and 'compare' is the pointer to the compare routine.
 667
 668    Also note that there are very many highly architecture specific ways
 669    these might be sped up, but this is simply the most generally portable
 670    code I could think of.
 671 */
 672
 673 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 674 */
 675 #define qsort_cmp(elt1, elt2) \
 676    ((*compare)(aTHX_ array[elt1], array[elt2]))
 677
 678 #ifdef QSORT_ORDER_GUESS
 679 #define QSORT_NOTICE_SWAP swapped++;
 680 #else
 681 #define QSORT_NOTICE_SWAP
 682 #endif
 683
 684 /* swaps contents of array elements elt1, elt2.
 685 */
 686 #define qsort_swap(elt1, elt2) \
 687    STMT_START { \
 688       QSORT_NOTICE_SWAP \
 689       temp = array[elt1]; \
 690       array[elt1] = array[elt2]; \
 691       array[elt2] = temp; \
 692    } STMT_END
 693
 694 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 695    elt3 and elt3 gets elt1.
 696 */
 697 #define qsort_rotate(elt1, elt2, elt3) \
 698    STMT_START { \
 699       QSORT_NOTICE_SWAP \
 700       temp = array[elt1]; \
 701       array[elt1] = array[elt2]; \
 702       array[elt2] = array[elt3]; \
 703       array[elt3] = temp; \
 704    } STMT_END
 705
 706 /* ************************************************************ Debug stuff */
 707
 708 #ifdef QSORT_DEBUG
 709
 710 static void
 711 break_here()
 712 {
 713    return; /* good place to set a breakpoint */
 714 }
 715
 716 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 717
 718 static void
 719 doqsort_all_asserts(
 720    void * array,
 721    size_t num_elts,
 722    size_t elt_size,
 723    int (*compare)(const void * elt1, const void * elt2),
 724    int pc_left, int pc_right, int u_left, int u_right)
 725 {
 726    int i;
 727
 728    qsort_assert(pc_left <= pc_right);
 729    qsort_assert(u_right < pc_left);
 730    qsort_assert(pc_right < u_left);
 731    for (i = u_right + 1; i < pc_left; ++i) {
 732       qsort_assert(qsort_cmp(i, pc_left) < 0);
 733    }
 734    for (i = pc_left; i < pc_right; ++i) {
 735       qsort_assert(qsort_cmp(i, pc_right) == 0);
 736    }
 737    for (i = pc_right + 1; i < u_left; ++i) {
 738       qsort_assert(qsort_cmp(pc_right, i) < 0);
 739    }
 740 }
 741
 742 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 743    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 744                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 745
 746 #else
 747
 748 #define qsort_assert(t) ((void)0)
 749
 750 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 751
 752 #endif
 753
 754 /* ****************************************************************** qsort */
 755
 756 STATIC void /* the standard unstable (u) quicksort (qsort) */
 757 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 758 {
 759    register SV * temp;
 760
 761    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 762    int next_stack_entry = 0;
 763
 764    int part_left;
 765    int part_right;
 766 #ifdef QSORT_ORDER_GUESS
 767    int qsort_break_even;
 768    int swapped;
 769 #endif
 770
 771    /* Make sure we actually have work to do.
 772    */
 773    if (num_elts <= 1) {
 774       return;
 775    }
 776
 777    /* Innoculate large partitions against quadratic behavior */
 778    if (num_elts > QSORT_PLAY_SAFE) {
 779       register size_t n;
 780       register SV ** const q = array;
 781       for (n = num_elts; n > 1; ) {
 782          register const size_t j = (size_t)(n-- * Drand01());
 783          temp = q[j];
 784          q[j] = q[n];
 785          q[n] = temp;
 786       }
 787    }
 788
 789    /* Setup the initial partition definition and fall into the sorting loop
 790    */
 791    part_left = 0;
 792    part_right = (int)(num_elts - 1);
 793 #ifdef QSORT_ORDER_GUESS
 794    qsort_break_even = QSORT_BREAK_EVEN;
 795 #else
 796 #define qsort_break_even QSORT_BREAK_EVEN
 797 #endif
 798    for ( ; ; ) {
 799       if ((part_right - part_left) >= qsort_break_even) {
 800          /* OK, this is gonna get hairy, so lets try to document all the
 801             concepts and abbreviations and variables and what they keep
 802             track of:
 803
 804             pc: pivot chunk - the set of array elements we accumulate in the
 805                 middle of the partition, all equal in value to the original
 806                 pivot element selected. The pc is defined by:
 807
 808                 pc_left - the leftmost array index of the pc
 809                 pc_right - the rightmost array index of the pc
 810
 811                 we start with pc_left == pc_right and only one element
 812                 in the pivot chunk (but it can grow during the scan).
 813
 814             u:  uncompared elements - the set of elements in the partition
 815                 we have not yet compared to the pivot value. There are two
 816                 uncompared sets during the scan - one to the left of the pc
 817                 and one to the right.
 818
 819                 u_right - the rightmost index of the left side's uncompared set
 820                 u_left - the leftmost index of the right side's uncompared set
 821
 822                 The leftmost index of the left sides's uncompared set
 823                 doesn't need its own variable because it is always defined
 824                 by the leftmost edge of the whole partition (part_left). The
 825                 same goes for the rightmost edge of the right partition
 826                 (part_right).
 827
 828                 We know there are no uncompared elements on the left once we
 829                 get u_right < part_left and no uncompared elements on the
 830                 right once u_left > part_right. When both these conditions
 831                 are met, we have completed the scan of the partition.
 832
 833                 Any elements which are between the pivot chunk and the
 834                 uncompared elements should be less than the pivot value on
 835                 the left side and greater than the pivot value on the right
 836                 side (in fact, the goal of the whole algorithm is to arrange
 837                 for that to be true and make the groups of less-than and
 838                 greater-then elements into new partitions to sort again).
 839
 840             As you marvel at the complexity of the code and wonder why it
 841             has to be so confusing. Consider some of the things this level
 842             of confusion brings:
 843
 844             Once I do a compare, I squeeze every ounce of juice out of it. I
 845             never do compare calls I don't have to do, and I certainly never
 846             do redundant calls.
 847
 848             I also never swap any elements unless I can prove there is a
 849             good reason. Many sort algorithms will swap a known value with
 850             an uncompared value just to get things in the right place (or
 851             avoid complexity :-), but that uncompared value, once it gets
 852             compared, may then have to be swapped again. A lot of the
 853             complexity of this code is due to the fact that it never swaps
 854             anything except compared values, and it only swaps them when the
 855             compare shows they are out of position.
 856          */
 857          int pc_left, pc_right;
 858          int u_right, u_left;
 859
 860          int s;
 861
 862          pc_left = ((part_left + part_right) / 2);
 863          pc_right = pc_left;
 864          u_right = pc_left - 1;
 865          u_left = pc_right + 1;
 866
 867          /* Qsort works best when the pivot value is also the median value
 868             in the partition (unfortunately you can't find the median value
 869             without first sorting :-), so to give the algorithm a helping
 870             hand, we pick 3 elements and sort them and use the median value
 871             of that tiny set as the pivot value.
 872
 873             Some versions of qsort like to use the left middle and right as
 874             the 3 elements to sort so they can insure the ends of the
 875             partition will contain values which will stop the scan in the
 876             compare loop, but when you have to call an arbitrarily complex
 877             routine to do a compare, its really better to just keep track of
 878             array index values to know when you hit the edge of the
 879             partition and avoid the extra compare. An even better reason to
 880             avoid using a compare call is the fact that you can drop off the
 881             edge of the array if someone foolishly provides you with an
 882             unstable compare function that doesn't always provide consistent
 883             results.
 884
 885             So, since it is simpler for us to compare the three adjacent
 886             elements in the middle of the partition, those are the ones we
 887             pick here (conveniently pointed at by u_right, pc_left, and
 888             u_left). The values of the left, center, and right elements
 889             are refered to as l c and r in the following comments.
 890          */
 891
 892 #ifdef QSORT_ORDER_GUESS
 893          swapped = 0;
 894 #endif
 895          s = qsort_cmp(u_right, pc_left);
 896          if (s < 0) {
 897             /* l < c */
 898             s = qsort_cmp(pc_left, u_left);
 899             /* if l < c, c < r - already in order - nothing to do */
 900             if (s == 0) {
 901                /* l < c, c == r - already in order, pc grows */
 902                ++pc_right;
 903                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 904             } else if (s > 0) {
 905                /* l < c, c > r - need to know more */
 906                s = qsort_cmp(u_right, u_left);
 907                if (s < 0) {
 908                   /* l < c, c > r, l < r - swap c & r to get ordered */
 909                   qsort_swap(pc_left, u_left);
 910                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 911                } else if (s == 0) {
 912                   /* l < c, c > r, l == r - swap c&r, grow pc */
 913                   qsort_swap(pc_left, u_left);
 914                   --pc_left;
 915                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 916                } else {
 917                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 918                   qsort_rotate(pc_left, u_right, u_left);
 919                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 920                }
 921             }
 922          } else if (s == 0) {
 923             /* l == c */
 924             s = qsort_cmp(pc_left, u_left);
 925             if (s < 0) {
 926                /* l == c, c < r - already in order, grow pc */
 927                --pc_left;
 928                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 929             } else if (s == 0) {
 930                /* l == c, c == r - already in order, grow pc both ways */
 931                --pc_left;
 932                ++pc_right;
 933                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 934             } else {
 935                /* l == c, c > r - swap l & r, grow pc */
 936                qsort_swap(u_right, u_left);
 937                ++pc_right;
 938                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 939             }
 940          } else {
 941             /* l > c */
 942             s = qsort_cmp(pc_left, u_left);
 943             if (s < 0) {
 944                /* l > c, c < r - need to know more */
 945                s = qsort_cmp(u_right, u_left);
 946                if (s < 0) {
 947                   /* l > c, c < r, l < r - swap l & c to get ordered */
 948                   qsort_swap(u_right, pc_left);
 949                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 950                } else if (s == 0) {
 951                   /* l > c, c < r, l == r - swap l & c, grow pc */
 952                   qsort_swap(u_right, pc_left);
 953                   ++pc_right;
 954                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 955                } else {
 956                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 957                   qsort_rotate(u_right, pc_left, u_left);
 958                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 959                }
 960             } else if (s == 0) {
 961                /* l > c, c == r - swap ends, grow pc */
 962                qsort_swap(u_right, u_left);
 963                --pc_left;
 964                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 965             } else {
 966                /* l > c, c > r - swap ends to get in order */
 967                qsort_swap(u_right, u_left);
 968                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 969             }
 970          }
 971          /* We now know the 3 middle elements have been compared and
 972             arranged in the desired order, so we can shrink the uncompared
 973             sets on both sides
 974          */
 975          --u_right;
 976          ++u_left;
 977          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 978
 979          /* The above massive nested if was the simple part :-). We now have
 980             the middle 3 elements ordered and we need to scan through the
 981             uncompared sets on either side, swapping elements that are on
 982             the wrong side or simply shuffling equal elements around to get
 983             all equal elements into the pivot chunk.
 984          */
 985
 986          for ( ; ; ) {
 987             int still_work_on_left;
 988             int still_work_on_right;
 989
 990             /* Scan the uncompared values on the left. If I find a value
 991                equal to the pivot value, move it over so it is adjacent to
 992                the pivot chunk and expand the pivot chunk. If I find a value
 993                less than the pivot value, then just leave it - its already
 994                on the correct side of the partition. If I find a greater
 995                value, then stop the scan.
 996             */
 997             while ((still_work_on_left = (u_right >= part_left))) {
 998                s = qsort_cmp(u_right, pc_left);
 999                if (s < 0) {
1000                   --u_right;
1001                } else if (s == 0) {
1002                   --pc_left;
1003                   if (pc_left != u_right) {
1004                      qsort_swap(u_right, pc_left);
1005                   }
1006                   --u_right;
1007                } else {
1008                   break;
1009                }
1010                qsort_assert(u_right < pc_left);
1011                qsort_assert(pc_left <= pc_right);
1012                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1013                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1014             }
1015
1016             /* Do a mirror image scan of uncompared values on the right
1017             */
1018             while ((still_work_on_right = (u_left <= part_right))) {
1019                s = qsort_cmp(pc_right, u_left);
1020                if (s < 0) {
1021                   ++u_left;
1022                } else if (s == 0) {
1023                   ++pc_right;
1024                   if (pc_right != u_left) {
1025                      qsort_swap(pc_right, u_left);
1026                   }
1027                   ++u_left;
1028                } else {
1029                   break;
1030                }
1031                qsort_assert(u_left > pc_right);
1032                qsort_assert(pc_left <= pc_right);
1033                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1034                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1035             }
1036
1037             if (still_work_on_left) {
1038                /* I know I have a value on the left side which needs to be
1039                   on the right side, but I need to know more to decide
1040                   exactly the best thing to do with it.
1041                */
1042                if (still_work_on_right) {
1043                   /* I know I have values on both side which are out of
1044                      position. This is a big win because I kill two birds
1045                      with one swap (so to speak). I can advance the
1046                      uncompared pointers on both sides after swapping both
1047                      of them into the right place.
1048                   */
1049                   qsort_swap(u_right, u_left);
1050                   --u_right;
1051                   ++u_left;
1052                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1053                } else {
1054                   /* I have an out of position value on the left, but the
1055                      right is fully scanned, so I "slide" the pivot chunk
1056                      and any less-than values left one to make room for the
1057                      greater value over on the right. If the out of position
1058                      value is immediately adjacent to the pivot chunk (there
1059                      are no less-than values), I can do that with a swap,
1060                      otherwise, I have to rotate one of the less than values
1061                      into the former position of the out of position value
1062                      and the right end of the pivot chunk into the left end
1063                      (got all that?).
1064                   */
1065                   --pc_left;
1066                   if (pc_left == u_right) {
1067                      qsort_swap(u_right, pc_right);
1068                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1069                   } else {
1070                      qsort_rotate(u_right, pc_left, pc_right);
1071                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1072                   }
1073                   --pc_right;
1074                   --u_right;
1075                }
1076             } else if (still_work_on_right) {
1077                /* Mirror image of complex case above: I have an out of
1078                   position value on the right, but the left is fully
1079                   scanned, so I need to shuffle things around to make room
1080                   for the right value on the left.
1081                */
1082                ++pc_right;
1083                if (pc_right == u_left) {
1084                   qsort_swap(u_left, pc_left);
1085                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1086                } else {
1087                   qsort_rotate(pc_right, pc_left, u_left);
1088                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1089                }
1090                ++pc_left;
1091                ++u_left;
1092             } else {
1093                /* No more scanning required on either side of partition,
1094                   break out of loop and figure out next set of partitions
1095                */
1096                break;
1097             }
1098          }
1099
1100          /* The elements in the pivot chunk are now in the right place. They
1101             will never move or be compared again. All I have to do is decide
1102             what to do with the stuff to the left and right of the pivot
1103             chunk.
1104
1105             Notes on the QSORT_ORDER_GUESS ifdef code:
1106
1107             1. If I just built these partitions without swapping any (or
1108                very many) elements, there is a chance that the elements are
1109                already ordered properly (being properly ordered will
1110                certainly result in no swapping, but the converse can't be
1111                proved :-).
1112
1113             2. A (properly written) insertion sort will run faster on
1114                already ordered data than qsort will.
1115
1116             3. Perhaps there is some way to make a good guess about
1117                switching to an insertion sort earlier than partition size 6
1118                (for instance - we could save the partition size on the stack
1119                and increase the size each time we find we didn't swap, thus
1120                switching to insertion sort earlier for partitions with a
1121                history of not swapping).
1122
1123             4. Naturally, if I just switch right away, it will make
1124                artificial benchmarks with pure ascending (or descending)
1125                data look really good, but is that a good reason in general?
1126                Hard to say...
1127          */
1128
1129 #ifdef QSORT_ORDER_GUESS
1130          if (swapped < 3) {
1131 #if QSORT_ORDER_GUESS == 1
1132             qsort_break_even = (part_right - part_left) + 1;
1133 #endif
1134 #if QSORT_ORDER_GUESS == 2
1135             qsort_break_even *= 2;
1136 #endif
1137 #if QSORT_ORDER_GUESS == 3
1138             const int prev_break = qsort_break_even;
1139             qsort_break_even *= qsort_break_even;
1140             if (qsort_break_even < prev_break) {
1141                qsort_break_even = (part_right - part_left) + 1;
1142             }
1143 #endif
1144          } else {
1145             qsort_break_even = QSORT_BREAK_EVEN;
1146          }
1147 #endif
1148
1149          if (part_left < pc_left) {
1150             /* There are elements on the left which need more processing.
1151                Check the right as well before deciding what to do.
1152             */
1153             if (pc_right < part_right) {
1154                /* We have two partitions to be sorted. Stack the biggest one
1155                   and process the smallest one on the next iteration. This
1156                   minimizes the stack height by insuring that any additional
1157                   stack entries must come from the smallest partition which
1158                   (because it is smallest) will have the fewest
1159                   opportunities to generate additional stack entries.
1160                */
1161                if ((part_right - pc_right) > (pc_left - part_left)) {
1162                   /* stack the right partition, process the left */
1163                   partition_stack[next_stack_entry].left = pc_right + 1;
1164                   partition_stack[next_stack_entry].right = part_right;
1165 #ifdef QSORT_ORDER_GUESS
1166                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1167 #endif
1168                   part_right = pc_left - 1;
1169                } else {
1170                   /* stack the left partition, process the right */
1171                   partition_stack[next_stack_entry].left = part_left;
1172                   partition_stack[next_stack_entry].right = pc_left - 1;
1173 #ifdef QSORT_ORDER_GUESS
1174                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1175 #endif
1176                   part_left = pc_right + 1;
1177                }
1178                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1179                ++next_stack_entry;
1180             } else {
1181                /* The elements on the left are the only remaining elements
1182                   that need sorting, arrange for them to be processed as the
1183                   next partition.
1184                */
1185                part_right = pc_left - 1;
1186             }
1187          } else if (pc_right < part_right) {
1188             /* There is only one chunk on the right to be sorted, make it
1189                the new partition and loop back around.
1190             */
1191             part_left = pc_right + 1;
1192          } else {
1193             /* This whole partition wound up in the pivot chunk, so
1194                we need to get a new partition off the stack.
1195             */
1196             if (next_stack_entry == 0) {
1197                /* the stack is empty - we are done */
1198                break;
1199             }
1200             --next_stack_entry;
1201             part_left = partition_stack[next_stack_entry].left;
1202             part_right = partition_stack[next_stack_entry].right;
1203 #ifdef QSORT_ORDER_GUESS
1204             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1205 #endif
1206          }
1207       } else {
1208          /* This partition is too small to fool with qsort complexity, just
1209             do an ordinary insertion sort to minimize overhead.
1210          */
1211          int i;
1212          /* Assume 1st element is in right place already, and start checking
1213             at 2nd element to see where it should be inserted.
1214          */
1215          for (i = part_left + 1; i <= part_right; ++i) {
1216             int j;
1217             /* Scan (backwards - just in case 'i' is already in right place)
1218                through the elements already sorted to see if the ith element
1219                belongs ahead of one of them.
1220             */
1221             for (j = i - 1; j >= part_left; --j) {
1222                if (qsort_cmp(i, j) >= 0) {
1223                   /* i belongs right after j
1224                   */
1225                   break;
1226                }
1227             }
1228             ++j;
1229             if (j != i) {
1230                /* Looks like we really need to move some things
1231                */
1232                int k;
1233                temp = array[i];
1234                for (k = i - 1; k >= j; --k)
1235                   array[k + 1] = array[k];
1236                array[j] = temp;
1237             }
1238          }
1239
1240          /* That partition is now sorted, grab the next one, or get out
1241             of the loop if there aren't any more.
1242          */
1243
1244          if (next_stack_entry == 0) {
1245             /* the stack is empty - we are done */
1246             break;
1247          }
1248          --next_stack_entry;
1249          part_left = partition_stack[next_stack_entry].left;
1250          part_right = partition_stack[next_stack_entry].right;
1251 #ifdef QSORT_ORDER_GUESS
1252          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1253 #endif
1254       }
1255    }
1256
1257    /* Believe it or not, the array is sorted at this point! */
1258 }
1259
1260 /* Stabilize what is, presumably, an otherwise unstable sort method.
1261  * We do that by allocating (or having on hand) an array of pointers
1262  * that is the same size as the original array of elements to be sorted.
1263  * We initialize this parallel array with the addresses of the original
1264  * array elements.  This indirection can make you crazy.
1265  * Some pictures can help.  After initializing, we have
1266  *
1267  *  indir                  list1
1268  * +----+                 +----+
1269  * |    | --------------> |    | ------> first element to be sorted
1270  * +----+                 +----+
1271  * |    | --------------> |    | ------> second element to be sorted
1272  * +----+                 +----+
1273  * |    | --------------> |    | ------> third element to be sorted
1274  * +----+                 +----+
1275  *  ...
1276  * +----+                 +----+
1277  * |    | --------------> |    | ------> n-1st element to be sorted
1278  * +----+                 +----+
1279  * |    | --------------> |    | ------> n-th element to be sorted
1280  * +----+                 +----+
1281  *
1282  * During the sort phase, we leave the elements of list1 where they are,
1283  * and sort the pointers in the indirect array in the same order determined
1284  * by the original comparison routine on the elements pointed to.
1285  * Because we don't move the elements of list1 around through
1286  * this phase, we can break ties on elements that compare equal
1287  * using their address in the list1 array, ensuring stabilty.
1288  * This leaves us with something looking like
1289  *
1290  *  indir                  list1
1291  * +----+                 +----+
1292  * |    | --+       +---> |    | ------> first element to be sorted
1293  * +----+   |       |     +----+
1294  * |    | --|-------|---> |    | ------> second element to be sorted
1295  * +----+   |       |     +----+
1296  * |    | --|-------+ +-> |    | ------> third element to be sorted
1297  * +----+   |         |   +----+
1298  *  ...
1299  * +----+    | |   | |    +----+
1300  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1301  * +----+    |     |      +----+
1302  * |    | ---+     +----> |    | ------> n-th element to be sorted
1303  * +----+                 +----+
1304  *
1305  * where the i-th element of the indirect array points to the element
1306  * that should be i-th in the sorted array.  After the sort phase,
1307  * we have to put the elements of list1 into the places
1308  * dictated by the indirect array.
1309  */
1310
1311
1312 static I32
1313 cmpindir(pTHX_ gptr a, gptr b)
1314 {
1315     gptr * const ap = (gptr *)a;
1316     gptr * const bp = (gptr *)b;
1317     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1318
1319     if (sense)
1320         return sense;
1321     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1322 }
1323
1324 static I32
1325 cmpindir_desc(pTHX_ gptr a, gptr b)
1326 {
1327     gptr * const ap = (gptr *)a;
1328     gptr * const bp = (gptr *)b;
1329     const I32 sense = PL_sort_RealCmp(aTHX_ *ap, *bp);
1330
1331     /* Reverse the default */
1332     if (sense)
1333         return -sense;
1334     /* But don't reverse the stability test.  */
1335     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1336
1337 }
1338
1339 STATIC void
1340 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1341 {
1342
1343     dSORTHINTS;
1344
1345     if (SORTHINTS & HINT_SORT_STABLE) {
1346          register gptr **pp, *q;
1347          register size_t n, j, i;
1348          gptr *small[SMALLSORT], **indir, tmp;
1349          SVCOMPARE_t savecmp;
1350          if (nmemb <= 1) return;     /* sorted trivially */
1351
1352          /* Small arrays can use the stack, big ones must be allocated */
1353          if (nmemb <= SMALLSORT) indir = small;
1354          else { Newx(indir, nmemb, gptr *); }
1355
1356          /* Copy pointers to original array elements into indirect array */
1357          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1358
1359          savecmp = PL_sort_RealCmp;     /* Save current comparison routine, if any */
1360          PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1361
1362          /* sort, with indirection */
1363          S_qsortsvu(aTHX_ (gptr *)indir, nmemb,
1364                     flags ? cmpindir_desc : cmpindir);
1365
1366          pp = indir;
1367          q = list1;
1368          for (n = nmemb; n--; ) {
1369               /* Assert A: all elements of q with index > n are already
1370                * in place.  This is vacuosly true at the start, and we
1371                * put element n where it belongs below (if it wasn't
1372                * already where it belonged). Assert B: we only move
1373                * elements that aren't where they belong,
1374                * so, by A, we never tamper with elements above n.
1375                */
1376               j = pp[n] - q;            /* This sets j so that q[j] is
1377                                          * at pp[n].  *pp[j] belongs in
1378                                          * q[j], by construction.
1379                                          */
1380               if (n != j) {             /* all's well if n == j */
1381                    tmp = q[j];          /* save what's in q[j] */
1382                    do {
1383                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1384                         i = pp[j] - q;  /* the index in q of the element
1385                                          * just moved */
1386                         pp[j] = q + j;  /* this is ok now */
1387                    } while ((j = i) != n);
1388                    /* There are only finitely many (nmemb) addresses
1389                     * in the pp array.
1390                     * So we must eventually revisit an index we saw before.
1391                     * Suppose the first revisited index is k != n.
1392                     * An index is visited because something else belongs there.
1393                     * If we visit k twice, then two different elements must
1394                     * belong in the same place, which cannot be.
1395                     * So j must get back to n, the loop terminates,
1396                     * and we put the saved element where it belongs.
1397                     */
1398                    q[n] = tmp;          /* put what belongs into
1399                                          * the n-th element */
1400               }
1401          }
1402
1403         /* free iff allocated */
1404          if (indir != small) { Safefree(indir); }
1405          /* restore prevailing comparison routine */
1406          PL_sort_RealCmp = savecmp;
1407     } else if (flags) {
1408          SVCOMPARE_t savecmp = PL_sort_RealCmp; /* Save current comparison routine, if any */
1409          PL_sort_RealCmp = cmp; /* Put comparison routine where cmp_desc can find it */
1410          cmp = cmp_desc;
1411          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1412          /* restore prevailing comparison routine */
1413          PL_sort_RealCmp = savecmp;
1414     } else {
1415          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1416     }
1417 }
1418
1419 /*
1420 =head1 Array Manipulation Functions
1421
1422 =for apidoc sortsv
1423
1424 Sort an array. Here is an example:
1425
1426     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1427
1428 See lib/sort.pm for details about controlling the sorting algorithm.
1429
1430 =cut
1431 */
1432
1433 void
1434 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1435 {
1436     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1437       = S_mergesortsv;
1438     dSORTHINTS;
1439     const I32 hints = SORTHINTS;
1440     if (hints & HINT_SORT_QUICKSORT) {
1441         sortsvp = S_qsortsv;
1442     }
1443     else {
1444         /* The default as of 5.8.0 is mergesort */
1445         sortsvp = S_mergesortsv;
1446     }
1447
1448     sortsvp(aTHX_ array, nmemb, cmp, 0);
1449 }
1450
1451
1452 static void
1453 S_sortsv_desc(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1454 {
1455     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1456       = S_mergesortsv;
1457     dSORTHINTS;
1458     const I32 hints = SORTHINTS;
1459     if (hints & HINT_SORT_QUICKSORT) {
1460         sortsvp = S_qsortsv;
1461     }
1462     else {
1463         /* The default as of 5.8.0 is mergesort */
1464         sortsvp = S_mergesortsv;
1465     }
1466
1467     sortsvp(aTHX_ array, nmemb, cmp, 1);
1468 }
1469
1470 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
1471 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
1472 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
1473
1474 PP(pp_sort)
1475 {
1476     dVAR; dSP; dMARK; dORIGMARK;
1477     register SV **p1 = ORIGMARK+1, **p2;
1478     register I32 max, i;
1479     AV* av = Nullav;
1480     HV *stash;
1481     GV *gv;
1482     CV *cv = 0;
1483     I32 gimme = GIMME;
1484     OP* const nextop = PL_op->op_next;
1485     I32 overloading = 0;
1486     bool hasargs = FALSE;
1487     I32 is_xsub = 0;
1488     I32 sorting_av = 0;
1489     const U8 priv = PL_op->op_private;
1490     const U8 flags = PL_op->op_flags;
1491     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1492       = Perl_sortsv;
1493     I32 all_SIVs = 1;
1494
1495     if (gimme != G_ARRAY) {
1496         SP = MARK;
1497         EXTEND(SP,1);
1498         RETPUSHUNDEF;
1499     }
1500
1501     ENTER;
1502     SAVEVPTR(PL_sortcop);
1503     if (flags & OPf_STACKED) {
1504         if (flags & OPf_SPECIAL) {
1505             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1506             kid = kUNOP->op_first;                      /* pass rv2gv */
1507             kid = kUNOP->op_first;                      /* pass leave */
1508             PL_sortcop = kid->op_next;
1509             stash = CopSTASH(PL_curcop);
1510         }
1511         else {
1512             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1513             if (cv && SvPOK(cv)) {
1514                 const char * const proto = SvPV_nolen_const((SV*)cv);
1515                 if (proto && strEQ(proto, "$$")) {
1516                     hasargs = TRUE;
1517                 }
1518             }
1519             if (!(cv && CvROOT(cv))) {
1520                 if (cv && CvXSUB(cv)) {
1521                     is_xsub = 1;
1522                 }
1523                 else if (gv) {
1524                     SV *tmpstr = sv_newmortal();
1525                     gv_efullname3(tmpstr, gv, Nullch);
1526                     DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
1527                         tmpstr);
1528                 }
1529                 else {
1530                     DIE(aTHX_ "Undefined subroutine in sort");
1531                 }
1532             }
1533
1534             if (is_xsub)
1535                 PL_sortcop = (OP*)cv;
1536             else
1537                 PL_sortcop = CvSTART(cv);
1538         }
1539     }
1540     else {
1541         PL_sortcop = Nullop;
1542         stash = CopSTASH(PL_curcop);
1543     }
1544
1545     /* optimiser converts "@a = sort @a" to "sort \@a";
1546      * in case of tied @a, pessimise: push (@a) onto stack, then assign
1547      * result back to @a at the end of this function */
1548     if (priv & OPpSORT_INPLACE) {
1549         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
1550         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
1551         av = (AV*)(*SP);
1552         max = AvFILL(av) + 1;
1553         if (SvMAGICAL(av)) {
1554             MEXTEND(SP, max);
1555             p2 = SP;
1556             for (i=0; i < max; i++) {
1557                 SV **svp = av_fetch(av, i, FALSE);
1558                 *SP++ = (svp) ? *svp : Nullsv;
1559             }
1560         }
1561         else {
1562             if (SvREADONLY(av))
1563                 Perl_croak(aTHX_ PL_no_modify);
1564             else
1565                 SvREADONLY_on(av);
1566             p1 = p2 = AvARRAY(av);
1567             sorting_av = 1;
1568         }
1569     }
1570     else {
1571         p2 = MARK+1;
1572         max = SP - MARK;
1573    }
1574
1575     if (priv & OPpSORT_DESCEND) {
1576         sortsvp = S_sortsv_desc;
1577     }
1578
1579     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
1580      * any nulls; also stringify or converting to integer or number as
1581      * required any args */
1582     for (i=max; i > 0 ; i--) {
1583         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
1584             SvTEMP_off(*p1);
1585             if (!PL_sortcop) {
1586                 if (priv & OPpSORT_NUMERIC) {
1587                     if (priv & OPpSORT_INTEGER) {
1588                         if (!SvIOK(*p1)) {
1589                             if (SvAMAGIC(*p1))
1590                                 overloading = 1;
1591                             else
1592                                 (void)sv_2iv(*p1);
1593                         }
1594                     }
1595                     else {
1596                         if (!SvNSIOK(*p1)) {
1597                             if (SvAMAGIC(*p1))
1598                                 overloading = 1;
1599                             else
1600                                 (void)sv_2nv(*p1);
1601                         }
1602                         if (all_SIVs && !SvSIOK(*p1))
1603                             all_SIVs = 0;
1604                     }
1605                 }
1606                 else {
1607                     if (!SvPOK(*p1)) {
1608                         if (SvAMAGIC(*p1))
1609                             overloading = 1;
1610                         else
1611                             (void)sv_2pv_flags(*p1, 0,
1612                                                SV_GMAGIC|SV_CONST_RETURN);
1613                     }
1614                 }
1615             }
1616             p1++;
1617         }
1618         else
1619             max--;
1620     }
1621     if (sorting_av)
1622         AvFILLp(av) = max-1;
1623
1624     if (max > 1) {
1625         SV **start;
1626         if (PL_sortcop) {
1627             PERL_CONTEXT *cx;
1628             SV** newsp;
1629             const bool oldcatch = CATCH_GET;
1630
1631             SAVETMPS;
1632             SAVEOP();
1633
1634             CATCH_SET(TRUE);
1635             PUSHSTACKi(PERLSI_SORT);
1636             if (!hasargs && !is_xsub) {
1637                 SAVESPTR(PL_firstgv);
1638                 SAVESPTR(PL_secondgv);
1639                 SAVESPTR(PL_sortstash);
1640                 PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1641                 PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1642                 PL_sortstash = stash;
1643                 SAVESPTR(GvSV(PL_firstgv));
1644                 SAVESPTR(GvSV(PL_secondgv));
1645             }
1646
1647             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1648             if (!(flags & OPf_SPECIAL)) {
1649                 cx->cx_type = CXt_SUB;
1650                 cx->blk_gimme = G_SCALAR;
1651                 PUSHSUB(cx);
1652                 if (!is_xsub) {
1653                     AV* const padlist = CvPADLIST(cv);
1654
1655                     if (++CvDEPTH(cv) >= 2) {
1656                         PERL_STACK_OVERFLOW_CHECK();
1657                         pad_push(padlist, CvDEPTH(cv));
1658                     }
1659                     SAVECOMPPAD();
1660                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
1661
1662                     if (hasargs) {
1663                         /* This is mostly copied from pp_entersub */
1664                         AV *av = (AV*)PAD_SVl(0);
1665
1666                         cx->blk_sub.savearray = GvAV(PL_defgv);
1667                         GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1668                         CX_CURPAD_SAVE(cx->blk_sub);
1669                         cx->blk_sub.argarray = av;
1670                     }
1671
1672                 }
1673             }
1674             cx->cx_type |= CXp_MULTICALL;
1675
1676             start = p1 - max;
1677             sortsvp(aTHX_ start, max,
1678                     is_xsub ? S_sortcv_xsub : hasargs ? S_sortcv_stacked : S_sortcv);
1679
1680             if (!(flags & OPf_SPECIAL)) {
1681                 LEAVESUB(cv);
1682                 if (!is_xsub)
1683                     CvDEPTH(cv)--;
1684             }
1685             POPBLOCK(cx,PL_curpm);
1686             PL_stack_sp = newsp;
1687             POPSTACK;
1688             CATCH_SET(oldcatch);
1689         }
1690         else {
1691             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1692             start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
1693             sortsvp(aTHX_ start, max,
1694                     (priv & OPpSORT_NUMERIC)
1695                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1696                             ? ( overloading ? S_amagic_i_ncmp : S_sv_i_ncmp)
1697                             : ( overloading ? S_amagic_ncmp : S_sv_ncmp ) )
1698                         : ( IN_LOCALE_RUNTIME
1699                             ? ( overloading
1700                                 ? S_amagic_cmp_locale
1701                                 : sv_cmp_locale_static)
1702                             : ( overloading ? S_amagic_cmp : sv_cmp_static)));
1703         }
1704         if (priv & OPpSORT_REVERSE) {
1705             SV **q = start+max-1;
1706             while (start < q) {
1707                 SV * const tmp = *start;
1708                 *start++ = *q;
1709                 *q-- = tmp;
1710             }
1711         }
1712     }
1713     if (sorting_av)
1714         SvREADONLY_off(av);
1715     else if (av && !sorting_av) {
1716         /* simulate pp_aassign of tied AV */
1717         SV** const base = ORIGMARK+1;
1718         for (i=0; i < max; i++) {
1719             base[i] = newSVsv(base[i]);
1720         }
1721         av_clear(av);
1722         av_extend(av, max);
1723         for (i=0; i < max; i++) {
1724             SV * const sv = base[i];
1725             SV ** const didstore = av_store(av, i, sv);
1726             if (SvSMAGICAL(sv))
1727                 mg_set(sv);
1728             if (!didstore)
1729                 sv_2mortal(sv);
1730         }
1731     }
1732     LEAVE;
1733     PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
1734     return nextop;
1735 }
1736
1737 static I32
1738 S_sortcv(pTHX_ SV *a, SV *b)
1739 {
1740     dVAR;
1741     const I32 oldsaveix = PL_savestack_ix;
1742     const I32 oldscopeix = PL_scopestack_ix;
1743     I32 result;
1744     GvSV(PL_firstgv) = a;
1745     GvSV(PL_secondgv) = b;
1746     PL_stack_sp = PL_stack_base;
1747     PL_op = PL_sortcop;
1748     CALLRUNOPS(aTHX);
1749     if (PL_stack_sp != PL_stack_base + 1)
1750         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1751     if (!SvNIOKp(*PL_stack_sp))
1752         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1753     result = SvIV(*PL_stack_sp);
1754     while (PL_scopestack_ix > oldscopeix) {
1755         LEAVE;
1756     }
1757     leave_scope(oldsaveix);
1758     return result;
1759 }
1760
1761 static I32
1762 S_sortcv_stacked(pTHX_ SV *a, SV *b)
1763 {
1764     dVAR;
1765     const I32 oldsaveix = PL_savestack_ix;
1766     const I32 oldscopeix = PL_scopestack_ix;
1767     I32 result;
1768     AV * const av = GvAV(PL_defgv);
1769
1770     if (AvMAX(av) < 1) {
1771         SV** ary = AvALLOC(av);
1772         if (AvARRAY(av) != ary) {
1773             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1774             SvPV_set(av, (char*)ary);
1775         }
1776         if (AvMAX(av) < 1) {
1777             AvMAX(av) = 1;
1778             Renew(ary,2,SV*);
1779             SvPV_set(av, (char*)ary);
1780         }
1781     }
1782     AvFILLp(av) = 1;
1783
1784     AvARRAY(av)[0] = a;
1785     AvARRAY(av)[1] = b;
1786     PL_stack_sp = PL_stack_base;
1787     PL_op = PL_sortcop;
1788     CALLRUNOPS(aTHX);
1789     if (PL_stack_sp != PL_stack_base + 1)
1790         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1791     if (!SvNIOKp(*PL_stack_sp))
1792         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1793     result = SvIV(*PL_stack_sp);
1794     while (PL_scopestack_ix > oldscopeix) {
1795         LEAVE;
1796     }
1797     leave_scope(oldsaveix);
1798     return result;
1799 }
1800
1801 static I32
1802 S_sortcv_xsub(pTHX_ SV *a, SV *b)
1803 {
1804     dVAR; dSP;
1805     const I32 oldsaveix = PL_savestack_ix;
1806     const I32 oldscopeix = PL_scopestack_ix;
1807     CV * const cv=(CV*)PL_sortcop;
1808     I32 result;
1809
1810     SP = PL_stack_base;
1811     PUSHMARK(SP);
1812     EXTEND(SP, 2);
1813     *++SP = a;
1814     *++SP = b;
1815     PUTBACK;
1816     (void)(*CvXSUB(cv))(aTHX_ cv);
1817     if (PL_stack_sp != PL_stack_base + 1)
1818         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1819     if (!SvNIOKp(*PL_stack_sp))
1820         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1821     result = SvIV(*PL_stack_sp);
1822     while (PL_scopestack_ix > oldscopeix) {
1823         LEAVE;
1824     }
1825     leave_scope(oldsaveix);
1826     return result;
1827 }
1828
1829
1830 static I32
1831 S_sv_ncmp(pTHX_ SV *a, SV *b)
1832 {
1833     const NV nv1 = SvNSIV(a);
1834     const NV nv2 = SvNSIV(b);
1835     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1836 }
1837
1838 static I32
1839 S_sv_i_ncmp(pTHX_ SV *a, SV *b)
1840 {
1841     const IV iv1 = SvIV(a);
1842     const IV iv2 = SvIV(b);
1843     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1844 }
1845
1846 #define tryCALL_AMAGICbin(left,right,meth) \
1847     (PL_amagic_generation && (SvAMAGIC(left)||SvAMAGIC(right))) \
1848         ? amagic_call(left, right, CAT2(meth,_amg), 0) \
1849         : Nullsv;
1850
1851 static I32
1852 S_amagic_ncmp(pTHX_ register SV *a, register SV *b)
1853 {
1854     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1855     if (tmpsv) {
1856         if (SvIOK(tmpsv)) {
1857             const I32 i = SvIVX(tmpsv);
1858             if (i > 0)
1859                return 1;
1860             return i? -1 : 0;
1861         }
1862         else {
1863             const NV d = SvNV(tmpsv);
1864             if (d > 0)
1865                return 1;
1866             return d ? -1 : 0;
1867         }
1868      }
1869      return S_sv_ncmp(aTHX_ a, b);
1870 }
1871
1872 static I32
1873 S_amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1874 {
1875     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1876     if (tmpsv) {
1877         if (SvIOK(tmpsv)) {
1878             const I32 i = SvIVX(tmpsv);
1879             if (i > 0)
1880                return 1;
1881             return i? -1 : 0;
1882         }
1883         else {
1884             const NV d = SvNV(tmpsv);
1885             if (d > 0)
1886                return 1;
1887             return d ? -1 : 0;
1888         }
1889     }
1890     return S_sv_i_ncmp(aTHX_ a, b);
1891 }
1892
1893 static I32
1894 S_amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1895 {
1896     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1897     if (tmpsv) {
1898         if (SvIOK(tmpsv)) {
1899             const I32 i = SvIVX(tmpsv);
1900             if (i > 0)
1901                return 1;
1902             return i? -1 : 0;
1903         }
1904         else {
1905             const NV d = SvNV(tmpsv);
1906             if (d > 0)
1907                return 1;
1908             return d? -1 : 0;
1909         }
1910     }
1911     return sv_cmp(str1, str2);
1912 }
1913
1914 static I32
1915 S_amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1916 {
1917     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1918     if (tmpsv) {
1919         if (SvIOK(tmpsv)) {
1920             const I32 i = SvIVX(tmpsv);
1921             if (i > 0)
1922                return 1;
1923             return i? -1 : 0;
1924         }
1925         else {
1926             const NV d = SvNV(tmpsv);
1927             if (d > 0)
1928                return 1;
1929             return d? -1 : 0;
1930         }
1931     }
1932     return sv_cmp_locale(str1, str2);
1933 }
1934
1935 /*
1936  * Local variables:
1937  * c-indentation-style: bsd
1938  * c-basic-offset: 4
1939  * indent-tabs-mode: t
1940  * End:
1941  *
1942  * ex: set ts=8 sts=4 sw=4 noet:
1943  */