pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *   ...they shuffled back towards the rear of the line. 'No, not at the
  13  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  14  */
  15
  16 /* This file contains pp ("push/pop") functions that
  17  * execute the opcodes that make up a perl program. A typical pp function
  18  * expects to find its arguments on the stack, and usually pushes its
  19  * results onto the stack, hence the 'pp' terminology. Each OP structure
  20  * contains a pointer to the relevant pp_foo() function.
  21  *
  22  * This particular file just contains pp_sort(), which is complex
  23  * enough to merit its own file! See the other pp*.c files for the rest of
  24  * the pp_ functions.
  25  */
  26
  27 #include "EXTERN.h"
  28 #define PERL_IN_PP_SORT_C
  29 #include "perl.h"
  30
  31 #if defined(UNDER_CE)
  32 /* looks like 'small' is reserved word for WINCE (or somesuch)*/
  33 #define small xsmall
  34 #endif
  35
  36 static I32 sortcv(pTHX_ SV *a, SV *b);
  37 static I32 sortcv_stacked(pTHX_ SV *a, SV *b);
  38 static I32 sortcv_xsub(pTHX_ SV *a, SV *b);
  39 static I32 sv_ncmp(pTHX_ SV *a, SV *b);
  40 static I32 sv_i_ncmp(pTHX_ SV *a, SV *b);
  41 static I32 amagic_ncmp(pTHX_ SV *a, SV *b);
  42 static I32 amagic_i_ncmp(pTHX_ SV *a, SV *b);
  43 static I32 amagic_cmp(pTHX_ SV *a, SV *b);
  44 static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b);
  45
  46 #define sv_cmp_static Perl_sv_cmp
  47 #define sv_cmp_locale_static Perl_sv_cmp_locale
  48
  49 #define dSORTHINTS   SV *hintsv = GvSV(gv_fetchpv("sort::hints", GV_ADDMULTI, SVt_IV))
  50 #define SORTHINTS    (SvIOK(hintsv) ? ((I32)SvIV(hintsv)) : 0)
  51
  52 #ifndef SMALLSORT
  53 #define SMALLSORT (200)
  54 #endif
  55
  56 /*
  57  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  58  *
  59  * The original code was written in conjunction with BSD Computer Software
  60  * Research Group at University of California, Berkeley.
  61  *
  62  * See also: "Optimistic Merge Sort" (SODA '92)
  63  *
  64  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  65  *
  66  * The code can be distributed under the same terms as Perl itself.
  67  *
  68  */
  69
  70
  71 typedef char * aptr;            /* pointer for arithmetic on sizes */
  72 typedef SV * gptr;              /* pointers in our lists */
  73
  74 /* Binary merge internal sort, with a few special mods
  75 ** for the special perl environment it now finds itself in.
  76 **
  77 ** Things that were once options have been hotwired
  78 ** to values suitable for this use.  In particular, we'll always
  79 ** initialize looking for natural runs, we'll always produce stable
  80 ** output, and we'll always do Peter McIlroy's binary merge.
  81 */
  82
  83 /* Pointer types for arithmetic and storage and convenience casts */
  84
  85 #define APTR(P) ((aptr)(P))
  86 #define GPTP(P) ((gptr *)(P))
  87 #define GPPP(P) ((gptr **)(P))
  88
  89
  90 /* byte offset from pointer P to (larger) pointer Q */
  91 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  92
  93 #define PSIZE sizeof(gptr)
  94
  95 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  96
  97 #ifdef  PSHIFT
  98 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  99 #define PNBYTE(N)       ((N) << (PSHIFT))
 100 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
 101 #else
 102 /* Leave optimization to compiler */
 103 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
 104 #define PNBYTE(N)       ((N) * (PSIZE))
 105 #define PINDEX(P, N)    (GPTP(P) + (N))
 106 #endif
 107
 108 /* Pointer into other corresponding to pointer into this */
 109 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
 110
 111 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
 112
 113
 114 /* Runs are identified by a pointer in the auxilliary list.
 115 ** The pointer is at the start of the list,
 116 ** and it points to the start of the next list.
 117 ** NEXT is used as an lvalue, too.
 118 */
 119
 120 #define NEXT(P)         (*GPPP(P))
 121
 122
 123 /* PTHRESH is the minimum number of pairs with the same sense to justify
 124 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 125 ** not just elements, so PTHRESH == 8 means a run of 16.
 126 */
 127
 128 #define PTHRESH (8)
 129
 130 /* RTHRESH is the number of elements in a run that must compare low
 131 ** to the low element from the opposing run before we justify
 132 ** doing a binary rampup instead of single stepping.
 133 ** In random input, N in a row low should only happen with
 134 ** probability 2^(1-N), so we can risk that we are dealing
 135 ** with orderly input without paying much when we aren't.
 136 */
 137
 138 #define RTHRESH (6)
 139
 140
 141 /*
 142 ** Overview of algorithm and variables.
 143 ** The array of elements at list1 will be organized into runs of length 2,
 144 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 145 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 146 **
 147 ** Unless otherwise specified, pair pointers address the first of two elements.
 148 **
 149 ** b and b+1 are a pair that compare with sense "sense".
 150 ** b is the "bottom" of adjacent pairs that might form a longer run.
 151 **
 152 ** p2 parallels b in the list2 array, where runs are defined by
 153 ** a pointer chain.
 154 **
 155 ** t represents the "top" of the adjacent pairs that might extend
 156 ** the run beginning at b.  Usually, t addresses a pair
 157 ** that compares with opposite sense from (b,b+1).
 158 ** However, it may also address a singleton element at the end of list1,
 159 ** or it may be equal to "last", the first element beyond list1.
 160 **
 161 ** r addresses the Nth pair following b.  If this would be beyond t,
 162 ** we back it off to t.  Only when r is less than t do we consider the
 163 ** run long enough to consider checking.
 164 **
 165 ** q addresses a pair such that the pairs at b through q already form a run.
 166 ** Often, q will equal b, indicating we only are sure of the pair itself.
 167 ** However, a search on the previous cycle may have revealed a longer run,
 168 ** so q may be greater than b.
 169 **
 170 ** p is used to work back from a candidate r, trying to reach q,
 171 ** which would mean b through r would be a run.  If we discover such a run,
 172 ** we start q at r and try to push it further towards t.
 173 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 174 ** In any event, after the check (if any), we have two main cases.
 175 **
 176 ** 1) Short run.  b <= q < p <= r <= t.
 177 **      b through q is a run (perhaps trivial)
 178 **      q through p are uninteresting pairs
 179 **      p through r is a run
 180 **
 181 ** 2) Long run.  b < r <= q < t.
 182 **      b through q is a run (of length >= 2 * PTHRESH)
 183 **
 184 ** Note that degenerate cases are not only possible, but likely.
 185 ** For example, if the pair following b compares with opposite sense,
 186 ** then b == q < p == r == t.
 187 */
 188
 189
 190 static IV
 191 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
 192 {
 193     I32 sense;
 194     register gptr *b, *p, *q, *t, *p2;
 195     register gptr c, *last, *r;
 196     gptr *savep;
 197     IV runs = 0;
 198
 199     b = list1;
 200     last = PINDEX(b, nmemb);
 201     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 202     for (p2 = list2; b < last; ) {
 203         /* We just started, or just reversed sense.
 204         ** Set t at end of pairs with the prevailing sense.
 205         */
 206         for (p = b+2, t = p; ++p < last; t = ++p) {
 207             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 208         }
 209         q = b;
 210         /* Having laid out the playing field, look for long runs */
 211         do {
 212             p = r = b + (2 * PTHRESH);
 213             if (r >= t) p = r = t;      /* too short to care about */
 214             else {
 215                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 216                        ((p -= 2) > q));
 217                 if (p <= q) {
 218                     /* b through r is a (long) run.
 219                     ** Extend it as far as possible.
 220                     */
 221                     p = q = r;
 222                     while (((p += 2) < t) &&
 223                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 224                     r = p = q + 2;      /* no simple pairs, no after-run */
 225                 }
 226             }
 227             if (q > b) {                /* run of greater than 2 at b */
 228                 savep = p;
 229                 p = q += 2;
 230                 /* pick up singleton, if possible */
 231                 if ((p == t) &&
 232                     ((t + 1) == last) &&
 233                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 234                     savep = r = p = q = last;
 235                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 236                 if (sense) while (b < --p) {
 237                     c = *b;
 238                     *b++ = *p;
 239                     *p = c;
 240                 }
 241                 p = savep;
 242             }
 243             while (q < p) {             /* simple pairs */
 244                 p2 = NEXT(p2) = p2 + 2; ++runs;
 245                 if (sense) {
 246                     c = *q++;
 247                     *(q-1) = *q;
 248                     *q++ = c;
 249                 } else q += 2;
 250             }
 251             if (((b = p) == t) && ((t+1) == last)) {
 252                 NEXT(p2) = p2 + 1; ++runs;
 253                 b++;
 254             }
 255             q = r;
 256         } while (b < t);
 257         sense = !sense;
 258     }
 259     return runs;
 260 }
 261
 262
 263 /* The original merge sort, in use since 5.7, was as fast as, or faster than,
 264  * qsort on many platforms, but slower than qsort, conspicuously so,
 265  * on others.  The most likely explanation was platform-specific
 266  * differences in cache sizes and relative speeds.
 267  *
 268  * The quicksort divide-and-conquer algorithm guarantees that, as the
 269  * problem is subdivided into smaller and smaller parts, the parts
 270  * fit into smaller (and faster) caches.  So it doesn't matter how
 271  * many levels of cache exist, quicksort will "find" them, and,
 272  * as long as smaller is faster, take advantage of them.
 273  *
 274  * By contrast, consider how the original mergesort algorithm worked.
 275  * Suppose we have five runs (each typically of length 2 after dynprep).
 276  *
 277  * pass               base                        aux
 278  *  0              1 2 3 4 5
 279  *  1                                           12 34 5
 280  *  2                1234 5
 281  *  3                                            12345
 282  *  4                 12345
 283  *
 284  * Adjacent pairs are merged in "grand sweeps" through the input.
 285  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 286  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 287  * The only cache that matters is one large enough to hold *all* the input.
 288  * On some platforms, this may be many times slower than smaller caches.
 289  *
 290  * The following pseudo-code uses the same basic merge algorithm,
 291  * but in a divide-and-conquer way.
 292  *
 293  * # merge $runs runs at offset $offset of list $list1 into $list2.
 294  * # all unmerged runs ($runs == 1) originate in list $base.
 295  * sub mgsort2 {
 296  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 297  *
 298  *     if ($runs == 1) {
 299  *         if ($list1 is $base) copy run to $list2
 300  *         return offset of end of list (or copy)
 301  *     } else {
 302  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 303  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 304  *         merge the adjacent runs at $offset of $list1 into $list2
 305  *         return the offset of the end of the merged runs
 306  *     }
 307  * }
 308  * mgsort2(0, $runs, $base, $aux, $base);
 309  *
 310  * For our 5 runs, the tree of calls looks like
 311  *
 312  *           5
 313  *      3        2
 314  *   2     1   1   1
 315  * 1   1
 316  *
 317  * 1   2   3   4   5
 318  *
 319  * and the corresponding activity looks like
 320  *
 321  * copy runs 1 and 2 from base to aux
 322  * merge runs 1 and 2 from aux to base
 323  * (run 3 is where it belongs, no copy needed)
 324  * merge runs 12 and 3 from base to aux
 325  * (runs 4 and 5 are where they belong, no copy needed)
 326  * merge runs 4 and 5 from base to aux
 327  * merge runs 123 and 45 from aux to base
 328  *
 329  * Note that we merge runs 1 and 2 immediately after copying them,
 330  * while they are still likely to be in fast cache.  Similarly,
 331  * run 3 is merged with run 12 while it still may be lingering in cache.
 332  * This implementation should therefore enjoy much of the cache-friendly
 333  * behavior that quicksort does.  In addition, it does less copying
 334  * than the original mergesort implementation (only runs 1 and 2 are copied)
 335  * and the "balancing" of merges is better (merged runs comprise more nearly
 336  * equal numbers of original runs).
 337  *
 338  * The actual cache-friendly implementation will use a pseudo-stack
 339  * to avoid recursion, and will unroll processing of runs of length 2,
 340  * but it is otherwise similar to the recursive implementation.
 341  */
 342
 343 typedef struct {
 344     IV  offset;         /* offset of 1st of 2 runs at this level */
 345     IV  runs;           /* how many runs must be combined into 1 */
 346 } off_runs;             /* pseudo-stack element */
 347
 348
 349 static I32
 350 cmp_desc(pTHX_ gptr a, gptr b)
 351 {
 352     return -PL_sort_RealCmp(aTHX_ a, b);
 353 }
 354
 355 STATIC void
 356 S_mergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
 357 {
 358     IV i, run, offset;
 359     I32 sense, level;
 360     register gptr *f1, *f2, *t, *b, *p;
 361     int iwhich;
 362     gptr *aux;
 363     gptr *p1;
 364     gptr small[SMALLSORT];
 365     gptr *which[3];
 366     off_runs stack[60], *stackp;
 367     SVCOMPARE_t savecmp = 0;
 368
 369     if (nmemb <= 1) return;                     /* sorted trivially */
 370
 371     if (flags) {
 372         savecmp = PL_sort_RealCmp;      /* Save current comparison routine, if any */
 373         PL_sort_RealCmp = cmp;  /* Put comparison routine where cmp_desc can find it */
 374         cmp = cmp_desc;
 375     }
 376
 377     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 378     else { Newx(aux,nmemb,gptr); }              /* allocate auxilliary array */
 379     level = 0;
 380     stackp = stack;
 381     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 382     stackp->offset = offset = 0;
 383     which[0] = which[2] = base;
 384     which[1] = aux;
 385     for (;;) {
 386         /* On levels where both runs have be constructed (stackp->runs == 0),
 387          * merge them, and note the offset of their end, in case the offset
 388          * is needed at the next level up.  Hop up a level, and,
 389          * as long as stackp->runs is 0, keep merging.
 390          */
 391         IV runs = stackp->runs;
 392         if (runs == 0) {
 393             gptr *list1, *list2;
 394             iwhich = level & 1;
 395             list1 = which[iwhich];              /* area where runs are now */
 396             list2 = which[++iwhich];            /* area for merged runs */
 397             do {
 398                 register gptr *l1, *l2, *tp2;
 399                 offset = stackp->offset;
 400                 f1 = p1 = list1 + offset;               /* start of first run */
 401                 p = tp2 = list2 + offset;       /* where merged run will go */
 402                 t = NEXT(p);                    /* where first run ends */
 403                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 404                 t = NEXT(t);                    /* where second runs ends */
 405                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 406                 offset = PNELEM(list2, t);
 407                 while (f1 < l1 && f2 < l2) {
 408                     /* If head 1 is larger than head 2, find ALL the elements
 409                     ** in list 2 strictly less than head1, write them all,
 410                     ** then head 1.  Then compare the new heads, and repeat,
 411                     ** until one or both lists are exhausted.
 412                     **
 413                     ** In all comparisons (after establishing
 414                     ** which head to merge) the item to merge
 415                     ** (at pointer q) is the first operand of
 416                     ** the comparison.  When we want to know
 417                     ** if "q is strictly less than the other",
 418                     ** we can't just do
 419                     **    cmp(q, other) < 0
 420                     ** because stability demands that we treat equality
 421                     ** as high when q comes from l2, and as low when
 422                     ** q was from l1.  So we ask the question by doing
 423                     **    cmp(q, other) <= sense
 424                     ** and make sense == 0 when equality should look low,
 425                     ** and -1 when equality should look high.
 426                     */
 427
 428                     register gptr *q;
 429                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 430                         q = f2; b = f1; t = l1;
 431                         sense = -1;
 432                     } else {
 433                         q = f1; b = f2; t = l2;
 434                         sense = 0;
 435                     }
 436
 437
 438                     /* ramp up
 439                     **
 440                     ** Leave t at something strictly
 441                     ** greater than q (or at the end of the list),
 442                     ** and b at something strictly less than q.
 443                     */
 444                     for (i = 1, run = 0 ;;) {
 445                         if ((p = PINDEX(b, i)) >= t) {
 446                             /* off the end */
 447                             if (((p = PINDEX(t, -1)) > b) &&
 448                                 (cmp(aTHX_ *q, *p) <= sense))
 449                                  t = p;
 450                             else b = p;
 451                             break;
 452                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 453                             t = p;
 454                             break;
 455                         } else b = p;
 456                         if (++run >= RTHRESH) i += i;
 457                     }
 458
 459
 460                     /* q is known to follow b and must be inserted before t.
 461                     ** Increment b, so the range of possibilities is [b,t).
 462                     ** Round binary split down, to favor early appearance.
 463                     ** Adjust b and t until q belongs just before t.
 464                     */
 465
 466                     b++;
 467                     while (b < t) {
 468                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 469                         if (cmp(aTHX_ *q, *p) <= sense) {
 470                             t = p;
 471                         } else b = p + 1;
 472                     }
 473
 474
 475                     /* Copy all the strictly low elements */
 476
 477                     if (q == f1) {
 478                         FROMTOUPTO(f2, tp2, t);
 479                         *tp2++ = *f1++;
 480                     } else {
 481                         FROMTOUPTO(f1, tp2, t);
 482                         *tp2++ = *f2++;
 483                     }
 484                 }
 485
 486
 487                 /* Run out remaining list */
 488                 if (f1 == l1) {
 489                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 490                 } else              FROMTOUPTO(f1, tp2, l1);
 491                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 492
 493                 if (--level == 0) goto done;
 494                 --stackp;
 495                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 496             } while ((runs = stackp->runs) == 0);
 497         }
 498
 499
 500         stackp->runs = 0;               /* current run will finish level */
 501         /* While there are more than 2 runs remaining,
 502          * turn them into exactly 2 runs (at the "other" level),
 503          * each made up of approximately half the runs.
 504          * Stack the second half for later processing,
 505          * and set about producing the first half now.
 506          */
 507         while (runs > 2) {
 508             ++level;
 509             ++stackp;
 510             stackp->offset = offset;
 511             runs -= stackp->runs = runs / 2;
 512         }
 513         /* We must construct a single run from 1 or 2 runs.
 514          * All the original runs are in which[0] == base.
 515          * The run we construct must end up in which[level&1].
 516          */
 517         iwhich = level & 1;
 518         if (runs == 1) {
 519             /* Constructing a single run from a single run.
 520              * If it's where it belongs already, there's nothing to do.
 521              * Otherwise, copy it to where it belongs.
 522              * A run of 1 is either a singleton at level 0,
 523              * or the second half of a split 3.  In neither event
 524              * is it necessary to set offset.  It will be set by the merge
 525              * that immediately follows.
 526              */
 527             if (iwhich) {       /* Belongs in aux, currently in base */
 528                 f1 = b = PINDEX(base, offset);  /* where list starts */
 529                 f2 = PINDEX(aux, offset);       /* where list goes */
 530                 t = NEXT(f2);                   /* where list will end */
 531                 offset = PNELEM(aux, t);        /* offset thereof */
 532                 t = PINDEX(base, offset);       /* where it currently ends */
 533                 FROMTOUPTO(f1, f2, t);          /* copy */
 534                 NEXT(b) = t;                    /* set up parallel pointer */
 535             } else if (level == 0) goto done;   /* single run at level 0 */
 536         } else {
 537             /* Constructing a single run from two runs.
 538              * The merge code at the top will do that.
 539              * We need only make sure the two runs are in the "other" array,
 540              * so they'll end up in the correct array after the merge.
 541              */
 542             ++level;
 543             ++stackp;
 544             stackp->offset = offset;
 545             stackp->runs = 0;   /* take care of both runs, trigger merge */
 546             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 547                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 548                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 549                 t = NEXT(f2);                   /* where first run will end */
 550                 offset = PNELEM(aux, t);        /* offset thereof */
 551                 p = PINDEX(base, offset);       /* end of first run */
 552                 t = NEXT(t);                    /* where second run will end */
 553                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 554                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 555                 NEXT(b) = p;                    /* paralled pointer for 1st */
 556                 NEXT(p) = t;                    /* ... and for second */
 557             }
 558         }
 559     }
 560 done:
 561     if (aux != small) Safefree(aux);    /* free iff allocated */
 562     if (flags) {
 563          PL_sort_RealCmp = savecmp;     /* Restore current comparison routine, if any */
 564     }
 565     return;
 566 }
 567
 568 /*
 569  * The quicksort implementation was derived from source code contributed
 570  * by Tom Horsley.
 571  *
 572  * NOTE: this code was derived from Tom Horsley's qsort replacement
 573  * and should not be confused with the original code.
 574  */
 575
 576 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 577
 578    Permission granted to distribute under the same terms as perl which are
 579    (briefly):
 580
 581     This program is free software; you can redistribute it and/or modify
 582     it under the terms of either:
 583
 584         a) the GNU General Public License as published by the Free
 585         Software Foundation; either version 1, or (at your option) any
 586         later version, or
 587
 588         b) the "Artistic License" which comes with this Kit.
 589
 590    Details on the perl license can be found in the perl source code which
 591    may be located via the www.perl.com web page.
 592
 593    This is the most wonderfulest possible qsort I can come up with (and
 594    still be mostly portable) My (limited) tests indicate it consistently
 595    does about 20% fewer calls to compare than does the qsort in the Visual
 596    C++ library, other vendors may vary.
 597
 598    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 599    others I invented myself (or more likely re-invented since they seemed
 600    pretty obvious once I watched the algorithm operate for a while).
 601
 602    Most of this code was written while watching the Marlins sweep the Giants
 603    in the 1997 National League Playoffs - no Braves fans allowed to use this
 604    code (just kidding :-).
 605
 606    I realize that if I wanted to be true to the perl tradition, the only
 607    comment in this file would be something like:
 608
 609    ...they shuffled back towards the rear of the line. 'No, not at the
 610    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 611
 612    However, I really needed to violate that tradition just so I could keep
 613    track of what happens myself, not to mention some poor fool trying to
 614    understand this years from now :-).
 615 */
 616
 617 /* ********************************************************** Configuration */
 618
 619 #ifndef QSORT_ORDER_GUESS
 620 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 621 #endif
 622
 623 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 624    future processing - a good max upper bound is log base 2 of memory size
 625    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 626    safely be smaller than that since the program is taking up some space and
 627    most operating systems only let you grab some subset of contiguous
 628    memory (not to mention that you are normally sorting data larger than
 629    1 byte element size :-).
 630 */
 631 #ifndef QSORT_MAX_STACK
 632 #define QSORT_MAX_STACK 32
 633 #endif
 634
 635 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 636    Anything bigger and we use qsort. If you make this too small, the qsort
 637    will probably break (or become less efficient), because it doesn't expect
 638    the middle element of a partition to be the same as the right or left -
 639    you have been warned).
 640 */
 641 #ifndef QSORT_BREAK_EVEN
 642 #define QSORT_BREAK_EVEN 6
 643 #endif
 644
 645 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 646    to go quadratic on.  We innoculate larger partitions against
 647    quadratic behavior by shuffling them before sorting.  This is not
 648    an absolute guarantee of non-quadratic behavior, but it would take
 649    staggeringly bad luck to pick extreme elements as the pivot
 650    from randomized data.
 651 */
 652 #ifndef QSORT_PLAY_SAFE
 653 #define QSORT_PLAY_SAFE 255
 654 #endif
 655
 656 /* ************************************************************* Data Types */
 657
 658 /* hold left and right index values of a partition waiting to be sorted (the
 659    partition includes both left and right - right is NOT one past the end or
 660    anything like that).
 661 */
 662 struct partition_stack_entry {
 663    int left;
 664    int right;
 665 #ifdef QSORT_ORDER_GUESS
 666    int qsort_break_even;
 667 #endif
 668 };
 669
 670 /* ******************************************************* Shorthand Macros */
 671
 672 /* Note that these macros will be used from inside the qsort function where
 673    we happen to know that the variable 'elt_size' contains the size of an
 674    array element and the variable 'temp' points to enough space to hold a
 675    temp element and the variable 'array' points to the array being sorted
 676    and 'compare' is the pointer to the compare routine.
 677
 678    Also note that there are very many highly architecture specific ways
 679    these might be sped up, but this is simply the most generally portable
 680    code I could think of.
 681 */
 682
 683 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 684 */
 685 #define qsort_cmp(elt1, elt2) \
 686    ((*compare)(aTHX_ array[elt1], array[elt2]))
 687
 688 #ifdef QSORT_ORDER_GUESS
 689 #define QSORT_NOTICE_SWAP swapped++;
 690 #else
 691 #define QSORT_NOTICE_SWAP
 692 #endif
 693
 694 /* swaps contents of array elements elt1, elt2.
 695 */
 696 #define qsort_swap(elt1, elt2) \
 697    STMT_START { \
 698       QSORT_NOTICE_SWAP \
 699       temp = array[elt1]; \
 700       array[elt1] = array[elt2]; \
 701       array[elt2] = temp; \
 702    } STMT_END
 703
 704 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 705    elt3 and elt3 gets elt1.
 706 */
 707 #define qsort_rotate(elt1, elt2, elt3) \
 708    STMT_START { \
 709       QSORT_NOTICE_SWAP \
 710       temp = array[elt1]; \
 711       array[elt1] = array[elt2]; \
 712       array[elt2] = array[elt3]; \
 713       array[elt3] = temp; \
 714    } STMT_END
 715
 716 /* ************************************************************ Debug stuff */
 717
 718 #ifdef QSORT_DEBUG
 719
 720 static void
 721 break_here()
 722 {
 723    return; /* good place to set a breakpoint */
 724 }
 725
 726 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 727
 728 static void
 729 doqsort_all_asserts(
 730    void * array,
 731    size_t num_elts,
 732    size_t elt_size,
 733    int (*compare)(const void * elt1, const void * elt2),
 734    int pc_left, int pc_right, int u_left, int u_right)
 735 {
 736    int i;
 737
 738    qsort_assert(pc_left <= pc_right);
 739    qsort_assert(u_right < pc_left);
 740    qsort_assert(pc_right < u_left);
 741    for (i = u_right + 1; i < pc_left; ++i) {
 742       qsort_assert(qsort_cmp(i, pc_left) < 0);
 743    }
 744    for (i = pc_left; i < pc_right; ++i) {
 745       qsort_assert(qsort_cmp(i, pc_right) == 0);
 746    }
 747    for (i = pc_right + 1; i < u_left; ++i) {
 748       qsort_assert(qsort_cmp(pc_right, i) < 0);
 749    }
 750 }
 751
 752 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 753    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 754                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 755
 756 #else
 757
 758 #define qsort_assert(t) ((void)0)
 759
 760 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 761
 762 #endif
 763
 764 /* ****************************************************************** qsort */
 765
 766 STATIC void /* the standard unstable (u) quicksort (qsort) */
 767 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 768 {
 769    register SV * temp;
 770
 771    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 772    int next_stack_entry = 0;
 773
 774    int part_left;
 775    int part_right;
 776 #ifdef QSORT_ORDER_GUESS
 777    int qsort_break_even;
 778    int swapped;
 779 #endif
 780
 781    /* Make sure we actually have work to do.
 782    */
 783    if (num_elts <= 1) {
 784       return;
 785    }
 786
 787    /* Innoculate large partitions against quadratic behavior */
 788    if (num_elts > QSORT_PLAY_SAFE) {
 789       register size_t n;
 790       register SV ** const q = array;
 791       for (n = num_elts; n > 1; ) {
 792          register const size_t j = (size_t)(n-- * Drand01());
 793          temp = q[j];
 794          q[j] = q[n];
 795          q[n] = temp;
 796       }
 797    }
 798
 799    /* Setup the initial partition definition and fall into the sorting loop
 800    */
 801    part_left = 0;
 802    part_right = (int)(num_elts - 1);
 803 #ifdef QSORT_ORDER_GUESS
 804    qsort_break_even = QSORT_BREAK_EVEN;
 805 #else
 806 #define qsort_break_even QSORT_BREAK_EVEN
 807 #endif
 808    for ( ; ; ) {
 809       if ((part_right - part_left) >= qsort_break_even) {
 810          /* OK, this is gonna get hairy, so lets try to document all the
 811             concepts and abbreviations and variables and what they keep
 812             track of:
 813
 814             pc: pivot chunk - the set of array elements we accumulate in the
 815                 middle of the partition, all equal in value to the original
 816                 pivot element selected. The pc is defined by:
 817
 818                 pc_left - the leftmost array index of the pc
 819                 pc_right - the rightmost array index of the pc
 820
 821                 we start with pc_left == pc_right and only one element
 822                 in the pivot chunk (but it can grow during the scan).
 823
 824             u:  uncompared elements - the set of elements in the partition
 825                 we have not yet compared to the pivot value. There are two
 826                 uncompared sets during the scan - one to the left of the pc
 827                 and one to the right.
 828
 829                 u_right - the rightmost index of the left side's uncompared set
 830                 u_left - the leftmost index of the right side's uncompared set
 831
 832                 The leftmost index of the left sides's uncompared set
 833                 doesn't need its own variable because it is always defined
 834                 by the leftmost edge of the whole partition (part_left). The
 835                 same goes for the rightmost edge of the right partition
 836                 (part_right).
 837
 838                 We know there are no uncompared elements on the left once we
 839                 get u_right < part_left and no uncompared elements on the
 840                 right once u_left > part_right. When both these conditions
 841                 are met, we have completed the scan of the partition.
 842
 843                 Any elements which are between the pivot chunk and the
 844                 uncompared elements should be less than the pivot value on
 845                 the left side and greater than the pivot value on the right
 846                 side (in fact, the goal of the whole algorithm is to arrange
 847                 for that to be true and make the groups of less-than and
 848                 greater-then elements into new partitions to sort again).
 849
 850             As you marvel at the complexity of the code and wonder why it
 851             has to be so confusing. Consider some of the things this level
 852             of confusion brings:
 853
 854             Once I do a compare, I squeeze every ounce of juice out of it. I
 855             never do compare calls I don't have to do, and I certainly never
 856             do redundant calls.
 857
 858             I also never swap any elements unless I can prove there is a
 859             good reason. Many sort algorithms will swap a known value with
 860             an uncompared value just to get things in the right place (or
 861             avoid complexity :-), but that uncompared value, once it gets
 862             compared, may then have to be swapped again. A lot of the
 863             complexity of this code is due to the fact that it never swaps
 864             anything except compared values, and it only swaps them when the
 865             compare shows they are out of position.
 866          */
 867          int pc_left, pc_right;
 868          int u_right, u_left;
 869
 870          int s;
 871
 872          pc_left = ((part_left + part_right) / 2);
 873          pc_right = pc_left;
 874          u_right = pc_left - 1;
 875          u_left = pc_right + 1;
 876
 877          /* Qsort works best when the pivot value is also the median value
 878             in the partition (unfortunately you can't find the median value
 879             without first sorting :-), so to give the algorithm a helping
 880             hand, we pick 3 elements and sort them and use the median value
 881             of that tiny set as the pivot value.
 882
 883             Some versions of qsort like to use the left middle and right as
 884             the 3 elements to sort so they can insure the ends of the
 885             partition will contain values which will stop the scan in the
 886             compare loop, but when you have to call an arbitrarily complex
 887             routine to do a compare, its really better to just keep track of
 888             array index values to know when you hit the edge of the
 889             partition and avoid the extra compare. An even better reason to
 890             avoid using a compare call is the fact that you can drop off the
 891             edge of the array if someone foolishly provides you with an
 892             unstable compare function that doesn't always provide consistent
 893             results.
 894
 895             So, since it is simpler for us to compare the three adjacent
 896             elements in the middle of the partition, those are the ones we
 897             pick here (conveniently pointed at by u_right, pc_left, and
 898             u_left). The values of the left, center, and right elements
 899             are refered to as l c and r in the following comments.
 900          */
 901
 902 #ifdef QSORT_ORDER_GUESS
 903          swapped = 0;
 904 #endif
 905          s = qsort_cmp(u_right, pc_left);
 906          if (s < 0) {
 907             /* l < c */
 908             s = qsort_cmp(pc_left, u_left);
 909             /* if l < c, c < r - already in order - nothing to do */
 910             if (s == 0) {
 911                /* l < c, c == r - already in order, pc grows */
 912                ++pc_right;
 913                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 914             } else if (s > 0) {
 915                /* l < c, c > r - need to know more */
 916                s = qsort_cmp(u_right, u_left);
 917                if (s < 0) {
 918                   /* l < c, c > r, l < r - swap c & r to get ordered */
 919                   qsort_swap(pc_left, u_left);
 920                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 921                } else if (s == 0) {
 922                   /* l < c, c > r, l == r - swap c&r, grow pc */
 923                   qsort_swap(pc_left, u_left);
 924                   --pc_left;
 925                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 926                } else {
 927                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
 928                   qsort_rotate(pc_left, u_right, u_left);
 929                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 930                }
 931             }
 932          } else if (s == 0) {
 933             /* l == c */
 934             s = qsort_cmp(pc_left, u_left);
 935             if (s < 0) {
 936                /* l == c, c < r - already in order, grow pc */
 937                --pc_left;
 938                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 939             } else if (s == 0) {
 940                /* l == c, c == r - already in order, grow pc both ways */
 941                --pc_left;
 942                ++pc_right;
 943                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 944             } else {
 945                /* l == c, c > r - swap l & r, grow pc */
 946                qsort_swap(u_right, u_left);
 947                ++pc_right;
 948                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 949             }
 950          } else {
 951             /* l > c */
 952             s = qsort_cmp(pc_left, u_left);
 953             if (s < 0) {
 954                /* l > c, c < r - need to know more */
 955                s = qsort_cmp(u_right, u_left);
 956                if (s < 0) {
 957                   /* l > c, c < r, l < r - swap l & c to get ordered */
 958                   qsort_swap(u_right, pc_left);
 959                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 960                } else if (s == 0) {
 961                   /* l > c, c < r, l == r - swap l & c, grow pc */
 962                   qsort_swap(u_right, pc_left);
 963                   ++pc_right;
 964                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 965                } else {
 966                   /* l > c, c < r, l > r - rotate lcr into crl to order */
 967                   qsort_rotate(u_right, pc_left, u_left);
 968                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 969                }
 970             } else if (s == 0) {
 971                /* l > c, c == r - swap ends, grow pc */
 972                qsort_swap(u_right, u_left);
 973                --pc_left;
 974                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 975             } else {
 976                /* l > c, c > r - swap ends to get in order */
 977                qsort_swap(u_right, u_left);
 978                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
 979             }
 980          }
 981          /* We now know the 3 middle elements have been compared and
 982             arranged in the desired order, so we can shrink the uncompared
 983             sets on both sides
 984          */
 985          --u_right;
 986          ++u_left;
 987          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
 988
 989          /* The above massive nested if was the simple part :-). We now have
 990             the middle 3 elements ordered and we need to scan through the
 991             uncompared sets on either side, swapping elements that are on
 992             the wrong side or simply shuffling equal elements around to get
 993             all equal elements into the pivot chunk.
 994          */
 995
 996          for ( ; ; ) {
 997             int still_work_on_left;
 998             int still_work_on_right;
 999
1000             /* Scan the uncompared values on the left. If I find a value
1001                equal to the pivot value, move it over so it is adjacent to
1002                the pivot chunk and expand the pivot chunk. If I find a value
1003                less than the pivot value, then just leave it - its already
1004                on the correct side of the partition. If I find a greater
1005                value, then stop the scan.
1006             */
1007             while ((still_work_on_left = (u_right >= part_left))) {
1008                s = qsort_cmp(u_right, pc_left);
1009                if (s < 0) {
1010                   --u_right;
1011                } else if (s == 0) {
1012                   --pc_left;
1013                   if (pc_left != u_right) {
1014                      qsort_swap(u_right, pc_left);
1015                   }
1016                   --u_right;
1017                } else {
1018                   break;
1019                }
1020                qsort_assert(u_right < pc_left);
1021                qsort_assert(pc_left <= pc_right);
1022                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1023                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1024             }
1025
1026             /* Do a mirror image scan of uncompared values on the right
1027             */
1028             while ((still_work_on_right = (u_left <= part_right))) {
1029                s = qsort_cmp(pc_right, u_left);
1030                if (s < 0) {
1031                   ++u_left;
1032                } else if (s == 0) {
1033                   ++pc_right;
1034                   if (pc_right != u_left) {
1035                      qsort_swap(pc_right, u_left);
1036                   }
1037                   ++u_left;
1038                } else {
1039                   break;
1040                }
1041                qsort_assert(u_left > pc_right);
1042                qsort_assert(pc_left <= pc_right);
1043                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1044                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1045             }
1046
1047             if (still_work_on_left) {
1048                /* I know I have a value on the left side which needs to be
1049                   on the right side, but I need to know more to decide
1050                   exactly the best thing to do with it.
1051                */
1052                if (still_work_on_right) {
1053                   /* I know I have values on both side which are out of
1054                      position. This is a big win because I kill two birds
1055                      with one swap (so to speak). I can advance the
1056                      uncompared pointers on both sides after swapping both
1057                      of them into the right place.
1058                   */
1059                   qsort_swap(u_right, u_left);
1060                   --u_right;
1061                   ++u_left;
1062                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1063                } else {
1064                   /* I have an out of position value on the left, but the
1065                      right is fully scanned, so I "slide" the pivot chunk
1066                      and any less-than values left one to make room for the
1067                      greater value over on the right. If the out of position
1068                      value is immediately adjacent to the pivot chunk (there
1069                      are no less-than values), I can do that with a swap,
1070                      otherwise, I have to rotate one of the less than values
1071                      into the former position of the out of position value
1072                      and the right end of the pivot chunk into the left end
1073                      (got all that?).
1074                   */
1075                   --pc_left;
1076                   if (pc_left == u_right) {
1077                      qsort_swap(u_right, pc_right);
1078                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1079                   } else {
1080                      qsort_rotate(u_right, pc_left, pc_right);
1081                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1082                   }
1083                   --pc_right;
1084                   --u_right;
1085                }
1086             } else if (still_work_on_right) {
1087                /* Mirror image of complex case above: I have an out of
1088                   position value on the right, but the left is fully
1089                   scanned, so I need to shuffle things around to make room
1090                   for the right value on the left.
1091                */
1092                ++pc_right;
1093                if (pc_right == u_left) {
1094                   qsort_swap(u_left, pc_left);
1095                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1096                } else {
1097                   qsort_rotate(pc_right, pc_left, u_left);
1098                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1099                }
1100                ++pc_left;
1101                ++u_left;
1102             } else {
1103                /* No more scanning required on either side of partition,
1104                   break out of loop and figure out next set of partitions
1105                */
1106                break;
1107             }
1108          }
1109
1110          /* The elements in the pivot chunk are now in the right place. They
1111             will never move or be compared again. All I have to do is decide
1112             what to do with the stuff to the left and right of the pivot
1113             chunk.
1114
1115             Notes on the QSORT_ORDER_GUESS ifdef code:
1116
1117             1. If I just built these partitions without swapping any (or
1118                very many) elements, there is a chance that the elements are
1119                already ordered properly (being properly ordered will
1120                certainly result in no swapping, but the converse can't be
1121                proved :-).
1122
1123             2. A (properly written) insertion sort will run faster on
1124                already ordered data than qsort will.
1125
1126             3. Perhaps there is some way to make a good guess about
1127                switching to an insertion sort earlier than partition size 6
1128                (for instance - we could save the partition size on the stack
1129                and increase the size each time we find we didn't swap, thus
1130                switching to insertion sort earlier for partitions with a
1131                history of not swapping).
1132
1133             4. Naturally, if I just switch right away, it will make
1134                artificial benchmarks with pure ascending (or descending)
1135                data look really good, but is that a good reason in general?
1136                Hard to say...
1137          */
1138
1139 #ifdef QSORT_ORDER_GUESS
1140          if (swapped < 3) {
1141 #if QSORT_ORDER_GUESS == 1
1142             qsort_break_even = (part_right - part_left) + 1;
1143 #endif
1144 #if QSORT_ORDER_GUESS == 2
1145             qsort_break_even *= 2;
1146 #endif
1147 #if QSORT_ORDER_GUESS == 3
1148             const int prev_break = qsort_break_even;
1149             qsort_break_even *= qsort_break_even;
1150             if (qsort_break_even < prev_break) {
1151                qsort_break_even = (part_right - part_left) + 1;
1152             }
1153 #endif
1154          } else {
1155             qsort_break_even = QSORT_BREAK_EVEN;
1156          }
1157 #endif
1158
1159          if (part_left < pc_left) {
1160             /* There are elements on the left which need more processing.
1161                Check the right as well before deciding what to do.
1162             */
1163             if (pc_right < part_right) {
1164                /* We have two partitions to be sorted. Stack the biggest one
1165                   and process the smallest one on the next iteration. This
1166                   minimizes the stack height by insuring that any additional
1167                   stack entries must come from the smallest partition which
1168                   (because it is smallest) will have the fewest
1169                   opportunities to generate additional stack entries.
1170                */
1171                if ((part_right - pc_right) > (pc_left - part_left)) {
1172                   /* stack the right partition, process the left */
1173                   partition_stack[next_stack_entry].left = pc_right + 1;
1174                   partition_stack[next_stack_entry].right = part_right;
1175 #ifdef QSORT_ORDER_GUESS
1176                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1177 #endif
1178                   part_right = pc_left - 1;
1179                } else {
1180                   /* stack the left partition, process the right */
1181                   partition_stack[next_stack_entry].left = part_left;
1182                   partition_stack[next_stack_entry].right = pc_left - 1;
1183 #ifdef QSORT_ORDER_GUESS
1184                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1185 #endif
1186                   part_left = pc_right + 1;
1187                }
1188                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1189                ++next_stack_entry;
1190             } else {
1191                /* The elements on the left are the only remaining elements
1192                   that need sorting, arrange for them to be processed as the
1193                   next partition.
1194                */
1195                part_right = pc_left - 1;
1196             }
1197          } else if (pc_right < part_right) {
1198             /* There is only one chunk on the right to be sorted, make it
1199                the new partition and loop back around.
1200             */
1201             part_left = pc_right + 1;
1202          } else {
1203             /* This whole partition wound up in the pivot chunk, so
1204                we need to get a new partition off the stack.
1205             */
1206             if (next_stack_entry == 0) {
1207                /* the stack is empty - we are done */
1208                break;
1209             }
1210             --next_stack_entry;
1211             part_left = partition_stack[next_stack_entry].left;
1212             part_right = partition_stack[next_stack_entry].right;
1213 #ifdef QSORT_ORDER_GUESS
1214             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1215 #endif
1216          }
1217       } else {
1218          /* This partition is too small to fool with qsort complexity, just
1219             do an ordinary insertion sort to minimize overhead.
1220          */
1221          int i;
1222          /* Assume 1st element is in right place already, and start checking
1223             at 2nd element to see where it should be inserted.
1224          */
1225          for (i = part_left + 1; i <= part_right; ++i) {
1226             int j;
1227             /* Scan (backwards - just in case 'i' is already in right place)
1228                through the elements already sorted to see if the ith element
1229                belongs ahead of one of them.
1230             */
1231             for (j = i - 1; j >= part_left; --j) {
1232                if (qsort_cmp(i, j) >= 0) {
1233                   /* i belongs right after j
1234                   */
1235                   break;
1236                }
1237             }
1238             ++j;
1239             if (j != i) {
1240                /* Looks like we really need to move some things
1241                */
1242                int k;
1243                temp = array[i];
1244                for (k = i - 1; k >= j; --k)
1245                   array[k + 1] = array[k];
1246                array[j] = temp;
1247             }
1248          }
1249
1250          /* That partition is now sorted, grab the next one, or get out
1251             of the loop if there aren't any more.
1252          */
1253
1254          if (next_stack_entry == 0) {
1255             /* the stack is empty - we are done */
1256             break;
1257          }
1258          --next_stack_entry;
1259          part_left = partition_stack[next_stack_entry].left;
1260          part_right = partition_stack[next_stack_entry].right;
1261 #ifdef QSORT_ORDER_GUESS
1262          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1263 #endif
1264       }
1265    }
1266
1267    /* Believe it or not, the array is sorted at this point! */
1268 }
1269
1270 /* Stabilize what is, presumably, an otherwise unstable sort method.
1271  * We do that by allocating (or having on hand) an array of pointers
1272  * that is the same size as the original array of elements to be sorted.
1273  * We initialize this parallel array with the addresses of the original
1274  * array elements.  This indirection can make you crazy.
1275  * Some pictures can help.  After initializing, we have
1276  *
1277  *  indir                  list1
1278  * +----+                 +----+
1279  * |    | --------------> |    | ------> first element to be sorted
1280  * +----+                 +----+
1281  * |    | --------------> |    | ------> second element to be sorted
1282  * +----+                 +----+
1283  * |    | --------------> |    | ------> third element to be sorted
1284  * +----+                 +----+
1285  *  ...
1286  * +----+                 +----+
1287  * |    | --------------> |    | ------> n-1st element to be sorted
1288  * +----+                 +----+
1289  * |    | --------------> |    | ------> n-th element to be sorted
1290  * +----+                 +----+
1291  *
1292  * During the sort phase, we leave the elements of list1 where they are,
1293  * and sort the pointers in the indirect array in the same order determined
1294  * by the original comparison routine on the elements pointed to.
1295  * Because we don't move the elements of list1 around through
1296  * this phase, we can break ties on elements that compare equal
1297  * using their address in the list1 array, ensuring stabilty.
1298  * This leaves us with something looking like
1299  *
1300  *  indir                  list1
1301  * +----+                 +----+
1302  * |    | --+       +---> |    | ------> first element to be sorted
1303  * +----+   |       |     +----+
1304  * |    | --|-------|---> |    | ------> second element to be sorted
1305  * +----+   |       |     +----+
1306  * |    | --|-------+ +-> |    | ------> third element to be sorted
1307  * +----+   |         |   +----+
1308  *  ...
1309  * +----+    | |   | |    +----+
1310  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1311  * +----+    |     |      +----+
1312  * |    | ---+     +----> |    | ------> n-th element to be sorted
1313  * +----+                 +----+
1314  *
1315  * where the i-th element of the indirect array points to the element
1316  * that should be i-th in the sorted array.  After the sort phase,
1317  * we have to put the elements of list1 into the places
1318  * dictated by the indirect array.
1319  */
1320
1321
1322 static I32
1323 cmpindir(pTHX_ gptr a, gptr b)
1324 {
1325     I32 sense;
1326     gptr * const ap = (gptr *)a;
1327     gptr * const bp = (gptr *)b;
1328
1329     if ((sense = PL_sort_RealCmp(aTHX_ *ap, *bp)) == 0)
1330          sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1331     return sense;
1332 }
1333
1334 static I32
1335 cmpindir_desc(pTHX_ gptr a, gptr b)
1336 {
1337     I32 sense;
1338     gptr * const ap = (gptr *)a;
1339     gptr * const bp = (gptr *)b;
1340
1341     /* Reverse the default */
1342     if ((sense = PL_sort_RealCmp(aTHX_ *ap, *bp)))
1343         return -sense;
1344     /* But don't reverse the stability test.  */
1345     return (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1346
1347 }
1348
1349 STATIC void
1350 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1351 {
1352
1353     dSORTHINTS;
1354
1355     if (SORTHINTS & HINT_SORT_STABLE) {
1356          register gptr **pp, *q;
1357          register size_t n, j, i;
1358          gptr *small[SMALLSORT], **indir, tmp;
1359          SVCOMPARE_t savecmp;
1360          if (nmemb <= 1) return;     /* sorted trivially */
1361
1362          /* Small arrays can use the stack, big ones must be allocated */
1363          if (nmemb <= SMALLSORT) indir = small;
1364          else { Newx(indir, nmemb, gptr *); }
1365
1366          /* Copy pointers to original array elements into indirect array */
1367          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1368
1369          savecmp = PL_sort_RealCmp;     /* Save current comparison routine, if any */
1370          PL_sort_RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1371
1372          /* sort, with indirection */
1373          S_qsortsvu(aTHX_ (gptr *)indir, nmemb,
1374                     flags ? cmpindir_desc : cmpindir);
1375
1376          pp = indir;
1377          q = list1;
1378          for (n = nmemb; n--; ) {
1379               /* Assert A: all elements of q with index > n are already
1380                * in place.  This is vacuosly true at the start, and we
1381                * put element n where it belongs below (if it wasn't
1382                * already where it belonged). Assert B: we only move
1383                * elements that aren't where they belong,
1384                * so, by A, we never tamper with elements above n.
1385                */
1386               j = pp[n] - q;            /* This sets j so that q[j] is
1387                                          * at pp[n].  *pp[j] belongs in
1388                                          * q[j], by construction.
1389                                          */
1390               if (n != j) {             /* all's well if n == j */
1391                    tmp = q[j];          /* save what's in q[j] */
1392                    do {
1393                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1394                         i = pp[j] - q;  /* the index in q of the element
1395                                          * just moved */
1396                         pp[j] = q + j;  /* this is ok now */
1397                    } while ((j = i) != n);
1398                    /* There are only finitely many (nmemb) addresses
1399                     * in the pp array.
1400                     * So we must eventually revisit an index we saw before.
1401                     * Suppose the first revisited index is k != n.
1402                     * An index is visited because something else belongs there.
1403                     * If we visit k twice, then two different elements must
1404                     * belong in the same place, which cannot be.
1405                     * So j must get back to n, the loop terminates,
1406                     * and we put the saved element where it belongs.
1407                     */
1408                    q[n] = tmp;          /* put what belongs into
1409                                          * the n-th element */
1410               }
1411          }
1412
1413         /* free iff allocated */
1414          if (indir != small) { Safefree(indir); }
1415          /* restore prevailing comparison routine */
1416          PL_sort_RealCmp = savecmp;
1417     } else if (flags) {
1418          SVCOMPARE_t savecmp = PL_sort_RealCmp; /* Save current comparison routine, if any */
1419          PL_sort_RealCmp = cmp; /* Put comparison routine where cmp_desc can find it */
1420          cmp = cmp_desc;
1421          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1422          /* restore prevailing comparison routine */
1423          PL_sort_RealCmp = savecmp;
1424     } else {
1425          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1426     }
1427 }
1428
1429 /*
1430 =head1 Array Manipulation Functions
1431
1432 =for apidoc sortsv
1433
1434 Sort an array. Here is an example:
1435
1436     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1437
1438 See lib/sort.pm for details about controlling the sorting algorithm.
1439
1440 =cut
1441 */
1442
1443 void
1444 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1445 {
1446     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1447       = S_mergesortsv;
1448     dSORTHINTS;
1449     const I32 hints = SORTHINTS;
1450     if (hints & HINT_SORT_QUICKSORT) {
1451         sortsvp = S_qsortsv;
1452     }
1453     else {
1454         /* The default as of 5.8.0 is mergesort */
1455         sortsvp = S_mergesortsv;
1456     }
1457
1458     sortsvp(aTHX_ array, nmemb, cmp, 0);
1459 }
1460
1461
1462 static void
1463 S_sortsv_desc(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1464 {
1465     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp, U32 flags)
1466       = S_mergesortsv;
1467     dSORTHINTS;
1468     const I32 hints = SORTHINTS;
1469     if (hints & HINT_SORT_QUICKSORT) {
1470         sortsvp = S_qsortsv;
1471     }
1472     else {
1473         /* The default as of 5.8.0 is mergesort */
1474         sortsvp = S_mergesortsv;
1475     }
1476
1477     sortsvp(aTHX_ array, nmemb, cmp, 1);
1478 }
1479
1480 #define SvNSIOK(sv) ((SvFLAGS(sv) & SVf_NOK) || ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK))
1481 #define SvSIOK(sv) ((SvFLAGS(sv) & (SVf_IOK|SVf_IVisUV)) == SVf_IOK)
1482 #define SvNSIV(sv) ( SvNOK(sv) ? SvNVX(sv) : ( SvSIOK(sv) ? SvIVX(sv) : sv_2nv(sv) ) )
1483
1484 PP(pp_sort)
1485 {
1486     dVAR; dSP; dMARK; dORIGMARK;
1487     register SV **p1 = ORIGMARK+1, **p2;
1488     register I32 max, i;
1489     AV* av = Nullav;
1490     HV *stash;
1491     GV *gv;
1492     CV *cv = 0;
1493     I32 gimme = GIMME;
1494     OP* nextop = PL_op->op_next;
1495     I32 overloading = 0;
1496     bool hasargs = FALSE;
1497     I32 is_xsub = 0;
1498     I32 sorting_av = 0;
1499     const U8 priv = PL_op->op_private;
1500     const U8 flags = PL_op->op_flags;
1501     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1502       = Perl_sortsv;
1503     I32 all_SIVs = 1;
1504
1505     if (gimme != G_ARRAY) {
1506         SP = MARK;
1507         EXTEND(SP,1);
1508         RETPUSHUNDEF;
1509     }
1510
1511     ENTER;
1512     SAVEVPTR(PL_sortcop);
1513     if (flags & OPf_STACKED) {
1514         if (flags & OPf_SPECIAL) {
1515             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1516             kid = kUNOP->op_first;                      /* pass rv2gv */
1517             kid = kUNOP->op_first;                      /* pass leave */
1518             PL_sortcop = kid->op_next;
1519             stash = CopSTASH(PL_curcop);
1520         }
1521         else {
1522             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1523             if (cv && SvPOK(cv)) {
1524                 const char *proto = SvPV_nolen_const((SV*)cv);
1525                 if (proto && strEQ(proto, "$$")) {
1526                     hasargs = TRUE;
1527                 }
1528             }
1529             if (!(cv && CvROOT(cv))) {
1530                 if (cv && CvXSUB(cv)) {
1531                     is_xsub = 1;
1532                 }
1533                 else if (gv) {
1534                     SV *tmpstr = sv_newmortal();
1535                     gv_efullname3(tmpstr, gv, Nullch);
1536                     DIE(aTHX_ "Undefined sort subroutine \"%"SVf"\" called",
1537                         tmpstr);
1538                 }
1539                 else {
1540                     DIE(aTHX_ "Undefined subroutine in sort");
1541                 }
1542             }
1543
1544             if (is_xsub)
1545                 PL_sortcop = (OP*)cv;
1546             else
1547                 PL_sortcop = CvSTART(cv);
1548         }
1549     }
1550     else {
1551         PL_sortcop = Nullop;
1552         stash = CopSTASH(PL_curcop);
1553     }
1554
1555     /* optimiser converts "@a = sort @a" to "sort \@a";
1556      * in case of tied @a, pessimise: push (@a) onto stack, then assign
1557      * result back to @a at the end of this function */
1558     if (priv & OPpSORT_INPLACE) {
1559         assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
1560         (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
1561         av = (AV*)(*SP);
1562         max = AvFILL(av) + 1;
1563         if (SvMAGICAL(av)) {
1564             MEXTEND(SP, max);
1565             p2 = SP;
1566             for (i=0; i < max; i++) {
1567                 SV **svp = av_fetch(av, i, FALSE);
1568                 *SP++ = (svp) ? *svp : Nullsv;
1569             }
1570         }
1571         else {
1572             if (SvREADONLY(av))
1573                 Perl_croak(aTHX_ PL_no_modify);
1574             else
1575                 SvREADONLY_on(av);
1576             p1 = p2 = AvARRAY(av);
1577             sorting_av = 1;
1578         }
1579     }
1580     else {
1581         p2 = MARK+1;
1582         max = SP - MARK;
1583    }
1584
1585     if (priv & OPpSORT_DESCEND) {
1586         sortsvp = S_sortsv_desc;
1587     }
1588
1589     /* shuffle stack down, removing optional initial cv (p1!=p2), plus
1590      * any nulls; also stringify or converting to integer or number as
1591      * required any args */
1592     for (i=max; i > 0 ; i--) {
1593         if ((*p1 = *p2++)) {                    /* Weed out nulls. */
1594             SvTEMP_off(*p1);
1595             if (!PL_sortcop) {
1596                 if (priv & OPpSORT_NUMERIC) {
1597                     if (priv & OPpSORT_INTEGER) {
1598                         if (!SvIOK(*p1)) {
1599                             if (SvAMAGIC(*p1))
1600                                 overloading = 1;
1601                             else
1602                                 (void)sv_2iv(*p1);
1603                         }
1604                     }
1605                     else {
1606                         if (!SvNSIOK(*p1)) {
1607                             if (SvAMAGIC(*p1))
1608                                 overloading = 1;
1609                             else
1610                                 (void)sv_2nv(*p1);
1611                         }
1612                         if (all_SIVs && !SvSIOK(*p1))
1613                             all_SIVs = 0;
1614                     }
1615                 }
1616                 else {
1617                     if (!SvPOK(*p1)) {
1618                         if (SvAMAGIC(*p1))
1619                             overloading = 1;
1620                         else
1621                             (void)sv_2pv_flags(*p1, 0,
1622                                                SV_GMAGIC|SV_CONST_RETURN);
1623                     }
1624                 }
1625             }
1626             p1++;
1627         }
1628         else
1629             max--;
1630     }
1631     if (sorting_av)
1632         AvFILLp(av) = max-1;
1633
1634     if (max > 1) {
1635         SV **start;
1636         if (PL_sortcop) {
1637             PERL_CONTEXT *cx;
1638             SV** newsp;
1639             const bool oldcatch = CATCH_GET;
1640
1641             SAVETMPS;
1642             SAVEOP();
1643
1644             CATCH_SET(TRUE);
1645             PUSHSTACKi(PERLSI_SORT);
1646             if (!hasargs && !is_xsub) {
1647                 SAVESPTR(PL_firstgv);
1648                 SAVESPTR(PL_secondgv);
1649                 SAVESPTR(PL_sortstash);
1650                 PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1651                 PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1652                 PL_sortstash = stash;
1653                 SAVESPTR(GvSV(PL_firstgv));
1654                 SAVESPTR(GvSV(PL_secondgv));
1655             }
1656
1657             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1658             if (!(flags & OPf_SPECIAL)) {
1659                 cx->cx_type = CXt_SUB;
1660                 cx->blk_gimme = G_SCALAR;
1661                 PUSHSUB(cx);
1662                 if (!is_xsub) {
1663                     AV* padlist = CvPADLIST(cv);
1664
1665                     if (++CvDEPTH(cv) >= 2) {
1666                         PERL_STACK_OVERFLOW_CHECK();
1667                         pad_push(padlist, CvDEPTH(cv));
1668                     }
1669                     SAVECOMPPAD();
1670                     PAD_SET_CUR_NOSAVE(padlist, CvDEPTH(cv));
1671
1672                     if (hasargs) {
1673                         /* This is mostly copied from pp_entersub */
1674                         AV *av = (AV*)PAD_SVl(0);
1675
1676                         cx->blk_sub.savearray = GvAV(PL_defgv);
1677                         GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1678                         CX_CURPAD_SAVE(cx->blk_sub);
1679                         cx->blk_sub.argarray = av;
1680                     }
1681
1682                 }
1683             }
1684             cx->cx_type |= CXp_MULTICALL;
1685
1686             start = p1 - max;
1687             sortsvp(aTHX_ start, max,
1688                     is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
1689
1690             if (!(flags & OPf_SPECIAL)) {
1691                 LEAVESUB(cv);
1692                 if (!is_xsub)
1693                     CvDEPTH(cv)--;
1694             }
1695             POPBLOCK(cx,PL_curpm);
1696             PL_stack_sp = newsp;
1697             POPSTACK;
1698             CATCH_SET(oldcatch);
1699         }
1700         else {
1701             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1702             start = sorting_av ? AvARRAY(av) : ORIGMARK+1;
1703             sortsvp(aTHX_ start, max,
1704                     (priv & OPpSORT_NUMERIC)
1705                         ? ( ( ( priv & OPpSORT_INTEGER) || all_SIVs)
1706                             ? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
1707                             : ( overloading ? amagic_ncmp : sv_ncmp ) )
1708                         : ( IN_LOCALE_RUNTIME
1709                             ? ( overloading
1710                                 ? amagic_cmp_locale
1711                                 : sv_cmp_locale_static)
1712                             : ( overloading ? amagic_cmp : sv_cmp_static)));
1713         }
1714         if (priv & OPpSORT_REVERSE) {
1715             SV **q = start+max-1;
1716             while (start < q) {
1717                 SV *tmp = *start;
1718                 *start++ = *q;
1719                 *q-- = tmp;
1720             }
1721         }
1722     }
1723     if (sorting_av)
1724         SvREADONLY_off(av);
1725     else if (av && !sorting_av) {
1726         /* simulate pp_aassign of tied AV */
1727         SV** const base = ORIGMARK+1;
1728         for (i=0; i < max; i++) {
1729             base[i] = newSVsv(base[i]);
1730         }
1731         av_clear(av);
1732         av_extend(av, max);
1733         for (i=0; i < max; i++) {
1734             SV * const sv = base[i];
1735             SV ** const didstore = av_store(av, i, sv);
1736             if (SvSMAGICAL(sv))
1737                 mg_set(sv);
1738             if (!didstore)
1739                 sv_2mortal(sv);
1740         }
1741     }
1742     LEAVE;
1743     PL_stack_sp = ORIGMARK + (sorting_av ? 0 : max);
1744     return nextop;
1745 }
1746
1747 static I32
1748 sortcv(pTHX_ SV *a, SV *b)
1749 {
1750     dVAR;
1751     const I32 oldsaveix = PL_savestack_ix;
1752     const I32 oldscopeix = PL_scopestack_ix;
1753     I32 result;
1754     GvSV(PL_firstgv) = a;
1755     GvSV(PL_secondgv) = b;
1756     PL_stack_sp = PL_stack_base;
1757     PL_op = PL_sortcop;
1758     CALLRUNOPS(aTHX);
1759     if (PL_stack_sp != PL_stack_base + 1)
1760         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1761     if (!SvNIOKp(*PL_stack_sp))
1762         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1763     result = SvIV(*PL_stack_sp);
1764     while (PL_scopestack_ix > oldscopeix) {
1765         LEAVE;
1766     }
1767     leave_scope(oldsaveix);
1768     return result;
1769 }
1770
1771 static I32
1772 sortcv_stacked(pTHX_ SV *a, SV *b)
1773 {
1774     dVAR;
1775     const I32 oldsaveix = PL_savestack_ix;
1776     const I32 oldscopeix = PL_scopestack_ix;
1777     I32 result;
1778     AV * const av = GvAV(PL_defgv);
1779
1780     if (AvMAX(av) < 1) {
1781         SV** ary = AvALLOC(av);
1782         if (AvARRAY(av) != ary) {
1783             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1784             SvPV_set(av, (char*)ary);
1785         }
1786         if (AvMAX(av) < 1) {
1787             AvMAX(av) = 1;
1788             Renew(ary,2,SV*);
1789             SvPV_set(av, (char*)ary);
1790         }
1791     }
1792     AvFILLp(av) = 1;
1793
1794     AvARRAY(av)[0] = a;
1795     AvARRAY(av)[1] = b;
1796     PL_stack_sp = PL_stack_base;
1797     PL_op = PL_sortcop;
1798     CALLRUNOPS(aTHX);
1799     if (PL_stack_sp != PL_stack_base + 1)
1800         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1801     if (!SvNIOKp(*PL_stack_sp))
1802         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1803     result = SvIV(*PL_stack_sp);
1804     while (PL_scopestack_ix > oldscopeix) {
1805         LEAVE;
1806     }
1807     leave_scope(oldsaveix);
1808     return result;
1809 }
1810
1811 static I32
1812 sortcv_xsub(pTHX_ SV *a, SV *b)
1813 {
1814     dVAR; dSP;
1815     const I32 oldsaveix = PL_savestack_ix;
1816     const I32 oldscopeix = PL_scopestack_ix;
1817     CV * const cv=(CV*)PL_sortcop;
1818     I32 result;
1819
1820     SP = PL_stack_base;
1821     PUSHMARK(SP);
1822     EXTEND(SP, 2);
1823     *++SP = a;
1824     *++SP = b;
1825     PUTBACK;
1826     (void)(*CvXSUB(cv))(aTHX_ cv);
1827     if (PL_stack_sp != PL_stack_base + 1)
1828         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1829     if (!SvNIOKp(*PL_stack_sp))
1830         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1831     result = SvIV(*PL_stack_sp);
1832     while (PL_scopestack_ix > oldscopeix) {
1833         LEAVE;
1834     }
1835     leave_scope(oldsaveix);
1836     return result;
1837 }
1838
1839
1840 static I32
1841 sv_ncmp(pTHX_ SV *a, SV *b)
1842 {
1843     const NV nv1 = SvNSIV(a);
1844     const NV nv2 = SvNSIV(b);
1845     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1846 }
1847
1848 static I32
1849 sv_i_ncmp(pTHX_ SV *a, SV *b)
1850 {
1851     const IV iv1 = SvIV(a);
1852     const IV iv2 = SvIV(b);
1853     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1854 }
1855
1856 #define tryCALL_AMAGICbin(left,right,meth) \
1857     (PL_amagic_generation && (SvAMAGIC(left)||SvAMAGIC(right))) \
1858         ? amagic_call(left, right, CAT2(meth,_amg), 0) \
1859         : Nullsv;
1860
1861 static I32
1862 amagic_ncmp(pTHX_ register SV *a, register SV *b)
1863 {
1864     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1865     if (tmpsv) {
1866         if (SvIOK(tmpsv)) {
1867             const I32 i = SvIVX(tmpsv);
1868             if (i > 0)
1869                return 1;
1870             return i? -1 : 0;
1871         }
1872         else {
1873             const NV d = SvNV(tmpsv);
1874             if (d > 0)
1875                return 1;
1876             return d ? -1 : 0;
1877         }
1878      }
1879      return sv_ncmp(aTHX_ a, b);
1880 }
1881
1882 static I32
1883 amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1884 {
1885     SV * const tmpsv = tryCALL_AMAGICbin(a,b,ncmp);
1886     if (tmpsv) {
1887         if (SvIOK(tmpsv)) {
1888             const I32 i = SvIVX(tmpsv);
1889             if (i > 0)
1890                return 1;
1891             return i? -1 : 0;
1892         }
1893         else {
1894             const NV d = SvNV(tmpsv);
1895             if (d > 0)
1896                return 1;
1897             return d ? -1 : 0;
1898         }
1899     }
1900     return sv_i_ncmp(aTHX_ a, b);
1901 }
1902
1903 static I32
1904 amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1905 {
1906     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1907     if (tmpsv) {
1908         if (SvIOK(tmpsv)) {
1909             const I32 i = SvIVX(tmpsv);
1910             if (i > 0)
1911                return 1;
1912             return i? -1 : 0;
1913         }
1914         else {
1915             const NV d = SvNV(tmpsv);
1916             if (d > 0)
1917                return 1;
1918             return d? -1 : 0;
1919         }
1920     }
1921     return sv_cmp(str1, str2);
1922 }
1923
1924 static I32
1925 amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1926 {
1927     SV * const tmpsv = tryCALL_AMAGICbin(str1,str2,scmp);
1928     if (tmpsv) {
1929         if (SvIOK(tmpsv)) {
1930             const I32 i = SvIVX(tmpsv);
1931             if (i > 0)
1932                return 1;
1933             return i? -1 : 0;
1934         }
1935         else {
1936             const NV d = SvNV(tmpsv);
1937             if (d > 0)
1938                return 1;
1939             return d? -1 : 0;
1940         }
1941     }
1942     return sv_cmp_locale(str1, str2);
1943 }
1944
1945 /*
1946  * Local variables:
1947  * c-indentation-style: bsd
1948  * c-basic-offset: 4
1949  * indent-tabs-mode: t
1950  * End:
1951  *
1952  * ex: set ts=8 sts=4 sw=4 noet:
1953  */