pp_sort.c

   1 /*    pp_sort.c
   2  *
   3  *    Copyright (c) 1991-2001, Larry Wall
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  */
   9
  10 /*
  11  *   ...they shuffled back towards the rear of the line. 'No, not at the
  12  *   rear!'  the slave-driver shouted. 'Three files up. And stay there...
  13  */
  14
  15 #include "EXTERN.h"
  16 #define PERL_IN_PP_SORT_C
  17 #include "perl.h"
  18
  19 static I32 sortcv(pTHX_ SV *a, SV *b);
  20 static I32 sortcv_stacked(pTHX_ SV *a, SV *b);
  21 static I32 sortcv_xsub(pTHX_ SV *a, SV *b);
  22 static I32 sv_ncmp(pTHX_ SV *a, SV *b);
  23 static I32 sv_i_ncmp(pTHX_ SV *a, SV *b);
  24 static I32 amagic_ncmp(pTHX_ SV *a, SV *b);
  25 static I32 amagic_i_ncmp(pTHX_ SV *a, SV *b);
  26 static I32 amagic_cmp(pTHX_ SV *a, SV *b);
  27 static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b);
  28
  29 #define sv_cmp_static Perl_sv_cmp
  30 #define sv_cmp_locale_static Perl_sv_cmp_locale
  31
  32 #define SORTHINTS(hintsvp) \
  33      ((PL_hintgv &&     \
  34       (hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \
  35           (I32)SvIV(*hintsvp) : 0)
  36
  37 #ifndef SMALLSORT
  38 #define SMALLSORT (200)
  39 #endif
  40
  41 /*
  42  * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
  43  *
  44  * The original code was written in conjunction with BSD Computer Software
  45  * Research Group at University of California, Berkeley.
  46  *
  47  * See also: "Optimistic Merge Sort" (SODA '92)
  48  *
  49  * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
  50  *
  51  * The code can be distributed under the same terms as Perl itself.
  52  *
  53  */
  54
  55
  56 typedef char * aptr;            /* pointer for arithmetic on sizes */
  57 typedef SV * gptr;              /* pointers in our lists */
  58
  59 /* Binary merge internal sort, with a few special mods
  60 ** for the special perl environment it now finds itself in.
  61 **
  62 ** Things that were once options have been hotwired
  63 ** to values suitable for this use.  In particular, we'll always
  64 ** initialize looking for natural runs, we'll always produce stable
  65 ** output, and we'll always do Peter McIlroy's binary merge.
  66 */
  67
  68 /* Pointer types for arithmetic and storage and convenience casts */
  69
  70 #define APTR(P) ((aptr)(P))
  71 #define GPTP(P) ((gptr *)(P))
  72 #define GPPP(P) ((gptr **)(P))
  73
  74
  75 /* byte offset from pointer P to (larger) pointer Q */
  76 #define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
  77
  78 #define PSIZE sizeof(gptr)
  79
  80 /* If PSIZE is power of 2, make PSHIFT that power, if that helps */
  81
  82 #ifdef  PSHIFT
  83 #define PNELEM(P, Q)    (BYTEOFF(P,Q) >> (PSHIFT))
  84 #define PNBYTE(N)       ((N) << (PSHIFT))
  85 #define PINDEX(P, N)    (GPTP(APTR(P) + PNBYTE(N)))
  86 #else
  87 /* Leave optimization to compiler */
  88 #define PNELEM(P, Q)    (GPTP(Q) - GPTP(P))
  89 #define PNBYTE(N)       ((N) * (PSIZE))
  90 #define PINDEX(P, N)    (GPTP(P) + (N))
  91 #endif
  92
  93 /* Pointer into other corresponding to pointer into this */
  94 #define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
  95
  96 #define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
  97
  98
  99 /* Runs are identified by a pointer in the auxilliary list.
 100 ** The pointer is at the start of the list,
 101 ** and it points to the start of the next list.
 102 ** NEXT is used as an lvalue, too.
 103 */
 104
 105 #define NEXT(P)         (*GPPP(P))
 106
 107
 108 /* PTHRESH is the minimum number of pairs with the same sense to justify
 109 ** checking for a run and extending it.  Note that PTHRESH counts PAIRS,
 110 ** not just elements, so PTHRESH == 8 means a run of 16.
 111 */
 112
 113 #define PTHRESH (8)
 114
 115 /* RTHRESH is the number of elements in a run that must compare low
 116 ** to the low element from the opposing run before we justify
 117 ** doing a binary rampup instead of single stepping.
 118 ** In random input, N in a row low should only happen with
 119 ** probability 2^(1-N), so we can risk that we are dealing
 120 ** with orderly input without paying much when we aren't.
 121 */
 122
 123 #define RTHRESH (6)
 124
 125
 126 /*
 127 ** Overview of algorithm and variables.
 128 ** The array of elements at list1 will be organized into runs of length 2,
 129 ** or runs of length >= 2 * PTHRESH.  We only try to form long runs when
 130 ** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
 131 **
 132 ** Unless otherwise specified, pair pointers address the first of two elements.
 133 **
 134 ** b and b+1 are a pair that compare with sense ``sense''.
 135 ** b is the ``bottom'' of adjacent pairs that might form a longer run.
 136 **
 137 ** p2 parallels b in the list2 array, where runs are defined by
 138 ** a pointer chain.
 139 **
 140 ** t represents the ``top'' of the adjacent pairs that might extend
 141 ** the run beginning at b.  Usually, t addresses a pair
 142 ** that compares with opposite sense from (b,b+1).
 143 ** However, it may also address a singleton element at the end of list1,
 144 ** or it may be equal to ``last'', the first element beyond list1.
 145 **
 146 ** r addresses the Nth pair following b.  If this would be beyond t,
 147 ** we back it off to t.  Only when r is less than t do we consider the
 148 ** run long enough to consider checking.
 149 **
 150 ** q addresses a pair such that the pairs at b through q already form a run.
 151 ** Often, q will equal b, indicating we only are sure of the pair itself.
 152 ** However, a search on the previous cycle may have revealed a longer run,
 153 ** so q may be greater than b.
 154 **
 155 ** p is used to work back from a candidate r, trying to reach q,
 156 ** which would mean b through r would be a run.  If we discover such a run,
 157 ** we start q at r and try to push it further towards t.
 158 ** If b through r is NOT a run, we detect the wrong order at (p-1,p).
 159 ** In any event, after the check (if any), we have two main cases.
 160 **
 161 ** 1) Short run.  b <= q < p <= r <= t.
 162 **      b through q is a run (perhaps trivial)
 163 **      q through p are uninteresting pairs
 164 **      p through r is a run
 165 **
 166 ** 2) Long run.  b < r <= q < t.
 167 **      b through q is a run (of length >= 2 * PTHRESH)
 168 **
 169 ** Note that degenerate cases are not only possible, but likely.
 170 ** For example, if the pair following b compares with opposite sense,
 171 ** then b == q < p == r == t.
 172 */
 173
 174
 175 static IV
 176 dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
 177 {
 178     I32 sense;
 179     register gptr *b, *p, *q, *t, *p2;
 180     register gptr c, *last, *r;
 181     gptr *savep;
 182     IV runs = 0;
 183
 184     b = list1;
 185     last = PINDEX(b, nmemb);
 186     sense = (cmp(aTHX_ *b, *(b+1)) > 0);
 187     for (p2 = list2; b < last; ) {
 188         /* We just started, or just reversed sense.
 189         ** Set t at end of pairs with the prevailing sense.
 190         */
 191         for (p = b+2, t = p; ++p < last; t = ++p) {
 192             if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
 193         }
 194         q = b;
 195         /* Having laid out the playing field, look for long runs */
 196         do {
 197             p = r = b + (2 * PTHRESH);
 198             if (r >= t) p = r = t;      /* too short to care about */
 199             else {
 200                 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
 201                        ((p -= 2) > q));
 202                 if (p <= q) {
 203                     /* b through r is a (long) run.
 204                     ** Extend it as far as possible.
 205                     */
 206                     p = q = r;
 207                     while (((p += 2) < t) &&
 208                            ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
 209                     r = p = q + 2;      /* no simple pairs, no after-run */
 210                 }
 211             }
 212             if (q > b) {                /* run of greater than 2 at b */
 213                 savep = p;
 214                 p = q += 2;
 215                 /* pick up singleton, if possible */
 216                 if ((p == t) &&
 217                     ((t + 1) == last) &&
 218                     ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
 219                     savep = r = p = q = last;
 220                 p2 = NEXT(p2) = p2 + (p - b); ++runs;
 221                 if (sense) while (b < --p) {
 222                     c = *b;
 223                     *b++ = *p;
 224                     *p = c;
 225                 }
 226                 p = savep;
 227             }
 228             while (q < p) {             /* simple pairs */
 229                 p2 = NEXT(p2) = p2 + 2; ++runs;
 230                 if (sense) {
 231                     c = *q++;
 232                     *(q-1) = *q;
 233                     *q++ = c;
 234                 } else q += 2;
 235             }
 236             if (((b = p) == t) && ((t+1) == last)) {
 237                 NEXT(p2) = p2 + 1; ++runs;
 238                 b++;
 239             }
 240             q = r;
 241         } while (b < t);
 242         sense = !sense;
 243     }
 244     return runs;
 245 }
 246
 247
 248 /* Overview of bmerge variables:
 249 **
 250 ** list1 and list2 address the main and auxiliary arrays.
 251 ** They swap identities after each merge pass.
 252 ** Base points to the original list1, so we can tell if
 253 ** the pointers ended up where they belonged (or must be copied).
 254 **
 255 ** When we are merging two lists, f1 and f2 are the next elements
 256 ** on the respective lists.  l1 and l2 mark the end of the lists.
 257 ** tp2 is the current location in the merged list.
 258 **
 259 ** p1 records where f1 started.
 260 ** After the merge, a new descriptor is built there.
 261 **
 262 ** p2 is a ``parallel'' pointer in (what starts as) descriptor space.
 263 ** It is used to identify and delimit the runs.
 264 **
 265 ** In the heat of determining where q, the greater of the f1/f2 elements,
 266 ** belongs in the other list, b, t and p, represent bottom, top and probe
 267 ** locations, respectively, in the other list.
 268 ** They make convenient temporary pointers in other places.
 269 */
 270
 271 STATIC void
 272 S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
 273 {
 274     int i, run;
 275     int sense;
 276     register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q;
 277     gptr *aux, *list2, *p2, *last;
 278     gptr *base = list1;
 279     gptr *p1;
 280     gptr small[SMALLSORT];
 281
 282     if (nmemb <= 1) return;     /* sorted trivially */
 283     if (nmemb <= SMALLSORT) list2 = small;      /* use stack for aux array */
 284     else { New(799,list2,nmemb,gptr); }         /* allocate auxilliary array */
 285     aux = list2;
 286     dynprep(aTHX_ list1, list2, nmemb, cmp);
 287     last = PINDEX(list2, nmemb);
 288     while (NEXT(list2) != last) {
 289         /* More than one run remains.  Do some merging to reduce runs. */
 290         l2 = p1 = list1;
 291         for (tp2 = p2 = list2; p2 != last;) {
 292             /* The new first run begins where the old second list ended.
 293             ** Use the p2 ``parallel'' pointer to identify the end of the run.
 294             */
 295             f1 = l2;
 296             t = NEXT(p2);
 297             f2 = l1 = POTHER(t, list2, list1);
 298             if (t != last) t = NEXT(t);
 299             l2 = POTHER(t, list2, list1);
 300             p2 = t;
 301             while (f1 < l1 && f2 < l2) {
 302                 /* If head 1 is larger than head 2, find ALL the elements
 303                 ** in list 2 strictly less than head1, write them all,
 304                 ** then head 1.  Then compare the new heads, and repeat,
 305                 ** until one or both lists are exhausted.
 306                 **
 307                 ** In all comparisons (after establishing
 308                 ** which head to merge) the item to merge
 309                 ** (at pointer q) is the first operand of
 310                 ** the comparison.  When we want to know
 311                 ** if ``q is strictly less than the other'',
 312                 ** we can't just do
 313                 **    cmp(q, other) < 0
 314                 ** because stability demands that we treat equality
 315                 ** as high when q comes from l2, and as low when
 316                 ** q was from l1.  So we ask the question by doing
 317                 **    cmp(q, other) <= sense
 318                 ** and make sense == 0 when equality should look low,
 319                 ** and -1 when equality should look high.
 320                 */
 321
 322
 323                 if (cmp(aTHX_ *f1, *f2) <= 0) {
 324                     q = f2; b = f1; t = l1;
 325                     sense = -1;
 326                 } else {
 327                     q = f1; b = f2; t = l2;
 328                     sense = 0;
 329                 }
 330
 331
 332                 /* ramp up
 333                 **
 334                 ** Leave t at something strictly
 335                 ** greater than q (or at the end of the list),
 336                 ** and b at something strictly less than q.
 337                 */
 338                 for (i = 1, run = 0 ;;) {
 339                     if ((p = PINDEX(b, i)) >= t) {
 340                         /* off the end */
 341                         if (((p = PINDEX(t, -1)) > b) &&
 342                             (cmp(aTHX_ *q, *p) <= sense))
 343                              t = p;
 344                         else b = p;
 345                         break;
 346                     } else if (cmp(aTHX_ *q, *p) <= sense) {
 347                         t = p;
 348                         break;
 349                     } else b = p;
 350                     if (++run >= RTHRESH) i += i;
 351                 }
 352
 353
 354                 /* q is known to follow b and must be inserted before t.
 355                 ** Increment b, so the range of possibilities is [b,t).
 356                 ** Round binary split down, to favor early appearance.
 357                 ** Adjust b and t until q belongs just before t.
 358                 */
 359
 360                 b++;
 361                 while (b < t) {
 362                     p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 363                     if (cmp(aTHX_ *q, *p) <= sense) {
 364                         t = p;
 365                     } else b = p + 1;
 366                 }
 367
 368
 369                 /* Copy all the strictly low elements */
 370
 371                 if (q == f1) {
 372                     FROMTOUPTO(f2, tp2, t);
 373                     *tp2++ = *f1++;
 374                 } else {
 375                     FROMTOUPTO(f1, tp2, t);
 376                     *tp2++ = *f2++;
 377                 }
 378             }
 379
 380
 381             /* Run out remaining list */
 382             if (f1 == l1) {
 383                    if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 384             } else              FROMTOUPTO(f1, tp2, l1);
 385             p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 386         }
 387         t = list1;
 388         list1 = list2;
 389         list2 = t;
 390         last = PINDEX(list2, nmemb);
 391     }
 392     if (base == list2) {
 393         last = PINDEX(list1, nmemb);
 394         FROMTOUPTO(list1, list2, last);
 395     }
 396     if (aux != small) Safefree(aux);    /* free iff allocated */
 397     return;
 398 }
 399
 400
 401 /* What perl needs (least) is another sort implementation in the core.
 402  * So what's the story?  The short (by jpl's standards) story is that
 403  * the merge sort above, in use since 5.7, is as fast as, or faster than,
 404  * qsort on many platforms, but slower than qsort, conspicuously so,
 405  * on others.  The most likely explanation is platform-specific
 406  * differences in cache sizes and relative speeds.
 407  *
 408  * The quicksort divide-and-conquer algorithm guarantees that, as the
 409  * problem is subdivided into smaller and smaller parts, the parts
 410  * fit into smaller (and faster) caches.  So it doesn't matter how
 411  * many levels of cache exist, quicksort will "find" them, and,
 412  * as long as smaller is faster, take advanatge of them.
 413  *
 414  * By contrast, consider how the quicksort algorithm above works.
 415  * Suppose we have five runs (each typically of length 2 after dynprep).
 416  *
 417  * pass               base                        aux
 418  *  0              1 2 3 4 5
 419  *  1                                           12 34 5
 420  *  2                1234 5
 421  *  3                                            12345
 422  *  4                 12345
 423  *
 424  * Adjacent pairs are merged in "grand sweeps" through the input.
 425  * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
 426  * runs 3 and 4 are merged and the runs from run 5 have been copied.
 427  * The only cache that matters is one large enough to hold *all* the input.
 428  * On some platforms, this may be many times slower than smaller caches.
 429  *
 430  * The following pseudo-code uses the same basic merge algorithm,
 431  * but in a divide-and-conquer way.
 432  *
 433  * # merge $runs runs at offset $offset of list $list1 into $list2.
 434  * # all unmerged runs ($runs == 1) originate in list $base.
 435  * sub mgsort2 {
 436  *     my ($offset, $runs, $base, $list1, $list2) = @_;
 437  *
 438  *     if ($runs == 1) {
 439  *         if ($list1 is $base) copy run to $list2
 440  *         return offset of end of list (or copy)
 441  *     } else {
 442  *         $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
 443  *         mgsort2($off2, $runs/2, $base, $list2, $list1)
 444  *         merge the adjacent runs at $offset of $list1 into $list2
 445  *         return the offset of the end of the merged runs
 446  *     }
 447  * }
 448  * mgsort2(0, $runs, $base, $aux, $base);
 449  *
 450  * For our 5 runs, the tree of calls looks like
 451  *
 452  *           5
 453  *      3        2
 454  *   2     1   1   1
 455  * 1   1
 456  *
 457  * 1   2   3   4   5
 458  *
 459  * and the corresponding activity looks like
 460  *
 461  * copy runs 1 and 2 from base to aux
 462  * merge runs 1 and 2 from aux to base
 463  * (run 3 is where it belongs, no copy needed)
 464  * merge runs 12 and 3 from base to aux
 465  * (runs 4 and 5 are where they belong, no copy needed)
 466  * merge runs 4 and 5 from base to aux
 467  * merge runs 123 and 45 from aux to base
 468  *
 469  * Note that we merge runs 1 and 2 immediately after copying them,
 470  * while they are still likely to be in fast cache.  Similarly,
 471  * run 3 is merged with run 12 while it still may be lingering in cache.
 472  * This implementation should therefore enjoy much of the cache-friendly
 473  * behavior that quicksort does.  In addition, it does less copying
 474  * than the original mergesort implementation (only runs 1 and 2 are copied)
 475  * and the "balancing" of merges is better (merged runs comprise more nearly
 476  * equal numbers of original runs).
 477  *
 478  * The actual cache-friendly implementation will use a pseudo-stack
 479  * to avoid recursion, and will unroll processing of runs of length 2,
 480  * but it is otherwise similar to the recursive implementation.
 481  * If it's as good as the original mergesort implementation on all
 482  * platforms, it should replace that implementation.  For benchmarking,
 483  * though, it is convenient to have both implementations available.
 484  */
 485
 486 typedef struct {
 487     IV  offset;         /* offset of 1st of 2 runs at this level */
 488     IV  runs;           /* how many runs must be combined into 1 */
 489 } off_runs;             /* pseudo-stack element */
 490
 491 STATIC void
 492 S_cfmergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp)
 493 {
 494     IV i, run, runs, offset;
 495     I32 sense, level;
 496     int iwhich;
 497     register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q;
 498     gptr *aux, *list1, *list2;
 499     gptr *p1;
 500     gptr small[SMALLSORT];
 501     gptr *which[3];
 502     off_runs stack[60], *stackp;
 503
 504     if (nmemb <= 1) return;                     /* sorted trivially */
 505     if (nmemb <= SMALLSORT) aux = small;        /* use stack for aux array */
 506     else { New(799,aux,nmemb,gptr); }           /* allocate auxilliary array */
 507     level = 0;
 508     stackp = stack;
 509     stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
 510     stackp->offset = offset = 0;
 511     which[0] = which[2] = base;
 512     which[1] = aux;
 513     for (;;) {
 514         /* On levels where both runs have be constructed (stackp->runs == 0),
 515          * merge them, and note the offset of their end, in case the offset
 516          * is needed at the next level up.  Hop up a level, and,
 517          * as long as stackp->runs is 0, keep merging.
 518          */
 519         if ((runs = stackp->runs) == 0) {
 520             iwhich = level & 1;
 521             list1 = which[iwhich];              /* area where runs are now */
 522             list2 = which[++iwhich];            /* area for merged runs */
 523             do {
 524                 offset = stackp->offset;
 525                 f1 = p1 = list1 + offset;               /* start of first run */
 526                 p = tp2 = list2 + offset;       /* where merged run will go */
 527                 t = NEXT(p);                    /* where first run ends */
 528                 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
 529                 t = NEXT(t);                    /* where second runs ends */
 530                 l2 = POTHER(t, list2, list1);   /* ... on the other side */
 531                 offset = PNELEM(list2, t);
 532                 while (f1 < l1 && f2 < l2) {
 533                     /* If head 1 is larger than head 2, find ALL the elements
 534                     ** in list 2 strictly less than head1, write them all,
 535                     ** then head 1.  Then compare the new heads, and repeat,
 536                     ** until one or both lists are exhausted.
 537                     **
 538                     ** In all comparisons (after establishing
 539                     ** which head to merge) the item to merge
 540                     ** (at pointer q) is the first operand of
 541                     ** the comparison.  When we want to know
 542                     ** if ``q is strictly less than the other'',
 543                     ** we can't just do
 544                     **    cmp(q, other) < 0
 545                     ** because stability demands that we treat equality
 546                     ** as high when q comes from l2, and as low when
 547                     ** q was from l1.  So we ask the question by doing
 548                     **    cmp(q, other) <= sense
 549                     ** and make sense == 0 when equality should look low,
 550                     ** and -1 when equality should look high.
 551                     */
 552
 553
 554                     if (cmp(aTHX_ *f1, *f2) <= 0) {
 555                         q = f2; b = f1; t = l1;
 556                         sense = -1;
 557                     } else {
 558                         q = f1; b = f2; t = l2;
 559                         sense = 0;
 560                     }
 561
 562
 563                     /* ramp up
 564                     **
 565                     ** Leave t at something strictly
 566                     ** greater than q (or at the end of the list),
 567                     ** and b at something strictly less than q.
 568                     */
 569                     for (i = 1, run = 0 ;;) {
 570                         if ((p = PINDEX(b, i)) >= t) {
 571                             /* off the end */
 572                             if (((p = PINDEX(t, -1)) > b) &&
 573                                 (cmp(aTHX_ *q, *p) <= sense))
 574                                  t = p;
 575                             else b = p;
 576                             break;
 577                         } else if (cmp(aTHX_ *q, *p) <= sense) {
 578                             t = p;
 579                             break;
 580                         } else b = p;
 581                         if (++run >= RTHRESH) i += i;
 582                     }
 583
 584
 585                     /* q is known to follow b and must be inserted before t.
 586                     ** Increment b, so the range of possibilities is [b,t).
 587                     ** Round binary split down, to favor early appearance.
 588                     ** Adjust b and t until q belongs just before t.
 589                     */
 590
 591                     b++;
 592                     while (b < t) {
 593                         p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
 594                         if (cmp(aTHX_ *q, *p) <= sense) {
 595                             t = p;
 596                         } else b = p + 1;
 597                     }
 598
 599
 600                     /* Copy all the strictly low elements */
 601
 602                     if (q == f1) {
 603                         FROMTOUPTO(f2, tp2, t);
 604                         *tp2++ = *f1++;
 605                     } else {
 606                         FROMTOUPTO(f1, tp2, t);
 607                         *tp2++ = *f2++;
 608                     }
 609                 }
 610
 611
 612                 /* Run out remaining list */
 613                 if (f1 == l1) {
 614                        if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
 615                 } else              FROMTOUPTO(f1, tp2, l1);
 616                 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
 617
 618                 if (--level == 0) goto done;
 619                 --stackp;
 620                 t = list1; list1 = list2; list2 = t;    /* swap lists */
 621             } while ((runs = stackp->runs) == 0);
 622         }
 623
 624
 625         stackp->runs = 0;               /* current run will finish level */
 626         /* While there are more than 2 runs remaining,
 627          * turn them into exactly 2 runs (at the "other" level),
 628          * each made up of approximately half the runs.
 629          * Stack the second half for later processing,
 630          * and set about producing the first half now.
 631          */
 632         while (runs > 2) {
 633             ++level;
 634             ++stackp;
 635             stackp->offset = offset;
 636             runs -= stackp->runs = runs / 2;
 637         }
 638         /* We must construct a single run from 1 or 2 runs.
 639          * All the original runs are in which[0] == base.
 640          * The run we construct must end up in which[level&1].
 641          */
 642         iwhich = level & 1;
 643         if (runs == 1) {
 644             /* Constructing a single run from a single run.
 645              * If it's where it belongs already, there's nothing to do.
 646              * Otherwise, copy it to where it belongs.
 647              * A run of 1 is either a singleton at level 0,
 648              * or the second half of a split 3.  In neither event
 649              * is it necessary to set offset.  It will be set by the merge
 650              * that immediately follows.
 651              */
 652             if (iwhich) {       /* Belongs in aux, currently in base */
 653                 f1 = b = PINDEX(base, offset);  /* where list starts */
 654                 f2 = PINDEX(aux, offset);       /* where list goes */
 655                 t = NEXT(f2);                   /* where list will end */
 656                 offset = PNELEM(aux, t);        /* offset thereof */
 657                 t = PINDEX(base, offset);       /* where it currently ends */
 658                 FROMTOUPTO(f1, f2, t);          /* copy */
 659                 NEXT(b) = t;                    /* set up parallel pointer */
 660             } else if (level == 0) goto done;   /* single run at level 0 */
 661         } else {
 662             /* Constructing a single run from two runs.
 663              * The merge code at the top will do that.
 664              * We need only make sure the two runs are in the "other" array,
 665              * so they'll end up in the correct array after the merge.
 666              */
 667             ++level;
 668             ++stackp;
 669             stackp->offset = offset;
 670             stackp->runs = 0;   /* take care of both runs, trigger merge */
 671             if (!iwhich) {      /* Merged runs belong in aux, copy 1st */
 672                 f1 = b = PINDEX(base, offset);  /* where first run starts */
 673                 f2 = PINDEX(aux, offset);       /* where it will be copied */
 674                 t = NEXT(f2);                   /* where first run will end */
 675                 offset = PNELEM(aux, t);        /* offset thereof */
 676                 p = PINDEX(base, offset);       /* end of first run */
 677                 t = NEXT(t);                    /* where second run will end */
 678                 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
 679                 FROMTOUPTO(f1, f2, t);          /* copy both runs */
 680                 NEXT(b) = p;                    /* paralled pointer for 1st */
 681                 NEXT(p) = t;                    /* ... and for second */
 682             }
 683         }
 684     }
 685 done:
 686     if (aux != small) Safefree(aux);    /* free iff allocated */
 687     return;
 688 }
 689
 690 /*
 691  * The quicksort implementation was derived from source code contributed
 692  * by Tom Horsley.
 693  *
 694  * NOTE: this code was derived from Tom Horsley's qsort replacement
 695  * and should not be confused with the original code.
 696  */
 697
 698 /* Copyright (C) Tom Horsley, 1997. All rights reserved.
 699
 700    Permission granted to distribute under the same terms as perl which are
 701    (briefly):
 702
 703     This program is free software; you can redistribute it and/or modify
 704     it under the terms of either:
 705
 706         a) the GNU General Public License as published by the Free
 707         Software Foundation; either version 1, or (at your option) any
 708         later version, or
 709
 710         b) the "Artistic License" which comes with this Kit.
 711
 712    Details on the perl license can be found in the perl source code which
 713    may be located via the www.perl.com web page.
 714
 715    This is the most wonderfulest possible qsort I can come up with (and
 716    still be mostly portable) My (limited) tests indicate it consistently
 717    does about 20% fewer calls to compare than does the qsort in the Visual
 718    C++ library, other vendors may vary.
 719
 720    Some of the ideas in here can be found in "Algorithms" by Sedgewick,
 721    others I invented myself (or more likely re-invented since they seemed
 722    pretty obvious once I watched the algorithm operate for a while).
 723
 724    Most of this code was written while watching the Marlins sweep the Giants
 725    in the 1997 National League Playoffs - no Braves fans allowed to use this
 726    code (just kidding :-).
 727
 728    I realize that if I wanted to be true to the perl tradition, the only
 729    comment in this file would be something like:
 730
 731    ...they shuffled back towards the rear of the line. 'No, not at the
 732    rear!'  the slave-driver shouted. 'Three files up. And stay there...
 733
 734    However, I really needed to violate that tradition just so I could keep
 735    track of what happens myself, not to mention some poor fool trying to
 736    understand this years from now :-).
 737 */
 738
 739 /* ********************************************************** Configuration */
 740
 741 #ifndef QSORT_ORDER_GUESS
 742 #define QSORT_ORDER_GUESS 2     /* Select doubling version of the netBSD trick */
 743 #endif
 744
 745 /* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
 746    future processing - a good max upper bound is log base 2 of memory size
 747    (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
 748    safely be smaller than that since the program is taking up some space and
 749    most operating systems only let you grab some subset of contiguous
 750    memory (not to mention that you are normally sorting data larger than
 751    1 byte element size :-).
 752 */
 753 #ifndef QSORT_MAX_STACK
 754 #define QSORT_MAX_STACK 32
 755 #endif
 756
 757 /* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
 758    Anything bigger and we use qsort. If you make this too small, the qsort
 759    will probably break (or become less efficient), because it doesn't expect
 760    the middle element of a partition to be the same as the right or left -
 761    you have been warned).
 762 */
 763 #ifndef QSORT_BREAK_EVEN
 764 #define QSORT_BREAK_EVEN 6
 765 #endif
 766
 767 /* QSORT_PLAY_SAFE is the size of the largest partition we're willing
 768    to go quadratic on.  We innoculate larger partitions against
 769    quadratic behavior by shuffling them before sorting.  This is not
 770    an absolute guarantee of non-quadratic behavior, but it would take
 771    staggeringly bad luck to pick extreme elements as the pivot
 772    from randomized data.
 773 */
 774 #ifndef QSORT_PLAY_SAFE
 775 #define QSORT_PLAY_SAFE 255
 776 #endif
 777
 778 /* ************************************************************* Data Types */
 779
 780 /* hold left and right index values of a partition waiting to be sorted (the
 781    partition includes both left and right - right is NOT one past the end or
 782    anything like that).
 783 */
 784 struct partition_stack_entry {
 785    int left;
 786    int right;
 787 #ifdef QSORT_ORDER_GUESS
 788    int qsort_break_even;
 789 #endif
 790 };
 791
 792 /* ******************************************************* Shorthand Macros */
 793
 794 /* Note that these macros will be used from inside the qsort function where
 795    we happen to know that the variable 'elt_size' contains the size of an
 796    array element and the variable 'temp' points to enough space to hold a
 797    temp element and the variable 'array' points to the array being sorted
 798    and 'compare' is the pointer to the compare routine.
 799
 800    Also note that there are very many highly architecture specific ways
 801    these might be sped up, but this is simply the most generally portable
 802    code I could think of.
 803 */
 804
 805 /* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
 806 */
 807 #define qsort_cmp(elt1, elt2) \
 808    ((*compare)(aTHX_ array[elt1], array[elt2]))
 809
 810 #ifdef QSORT_ORDER_GUESS
 811 #define QSORT_NOTICE_SWAP swapped++;
 812 #else
 813 #define QSORT_NOTICE_SWAP
 814 #endif
 815
 816 /* swaps contents of array elements elt1, elt2.
 817 */
 818 #define qsort_swap(elt1, elt2) \
 819    STMT_START { \
 820       QSORT_NOTICE_SWAP \
 821       temp = array[elt1]; \
 822       array[elt1] = array[elt2]; \
 823       array[elt2] = temp; \
 824    } STMT_END
 825
 826 /* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
 827    elt3 and elt3 gets elt1.
 828 */
 829 #define qsort_rotate(elt1, elt2, elt3) \
 830    STMT_START { \
 831       QSORT_NOTICE_SWAP \
 832       temp = array[elt1]; \
 833       array[elt1] = array[elt2]; \
 834       array[elt2] = array[elt3]; \
 835       array[elt3] = temp; \
 836    } STMT_END
 837
 838 /* ************************************************************ Debug stuff */
 839
 840 #ifdef QSORT_DEBUG
 841
 842 static void
 843 break_here()
 844 {
 845    return; /* good place to set a breakpoint */
 846 }
 847
 848 #define qsort_assert(t) (void)( (t) || (break_here(), 0) )
 849
 850 static void
 851 doqsort_all_asserts(
 852    void * array,
 853    size_t num_elts,
 854    size_t elt_size,
 855    int (*compare)(const void * elt1, const void * elt2),
 856    int pc_left, int pc_right, int u_left, int u_right)
 857 {
 858    int i;
 859
 860    qsort_assert(pc_left <= pc_right);
 861    qsort_assert(u_right < pc_left);
 862    qsort_assert(pc_right < u_left);
 863    for (i = u_right + 1; i < pc_left; ++i) {
 864       qsort_assert(qsort_cmp(i, pc_left) < 0);
 865    }
 866    for (i = pc_left; i < pc_right; ++i) {
 867       qsort_assert(qsort_cmp(i, pc_right) == 0);
 868    }
 869    for (i = pc_right + 1; i < u_left; ++i) {
 870       qsort_assert(qsort_cmp(pc_right, i) < 0);
 871    }
 872 }
 873
 874 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
 875    doqsort_all_asserts(array, num_elts, elt_size, compare, \
 876                  PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
 877
 878 #else
 879
 880 #define qsort_assert(t) ((void)0)
 881
 882 #define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
 883
 884 #endif
 885
 886 /* ****************************************************************** qsort */
 887
 888 STATIC void /* the standard unstable (u) quicksort (qsort) */
 889 S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
 890 {
 891    register SV * temp;
 892
 893    struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
 894    int next_stack_entry = 0;
 895
 896    int part_left;
 897    int part_right;
 898 #ifdef QSORT_ORDER_GUESS
 899    int qsort_break_even;
 900    int swapped;
 901 #endif
 902
 903    /* Make sure we actually have work to do.
 904    */
 905    if (num_elts <= 1) {
 906       return;
 907    }
 908
 909    /* Innoculate large partitions against quadratic behavior */
 910    if (num_elts > QSORT_PLAY_SAFE) {
 911       register size_t n, j;
 912       register SV **q;
 913       for (n = num_elts, q = array; n > 1; ) {
 914          j = n-- * Drand01();
 915          temp = q[j];
 916          q[j] = q[n];
 917          q[n] = temp;
 918       }
 919    }
 920
 921    /* Setup the initial partition definition and fall into the sorting loop
 922    */
 923    part_left = 0;
 924    part_right = (int)(num_elts - 1);
 925 #ifdef QSORT_ORDER_GUESS
 926    qsort_break_even = QSORT_BREAK_EVEN;
 927 #else
 928 #define qsort_break_even QSORT_BREAK_EVEN
 929 #endif
 930    for ( ; ; ) {
 931       if ((part_right - part_left) >= qsort_break_even) {
 932          /* OK, this is gonna get hairy, so lets try to document all the
 933             concepts and abbreviations and variables and what they keep
 934             track of:
 935
 936             pc: pivot chunk - the set of array elements we accumulate in the
 937                 middle of the partition, all equal in value to the original
 938                 pivot element selected. The pc is defined by:
 939
 940                 pc_left - the leftmost array index of the pc
 941                 pc_right - the rightmost array index of the pc
 942
 943                 we start with pc_left == pc_right and only one element
 944                 in the pivot chunk (but it can grow during the scan).
 945
 946             u:  uncompared elements - the set of elements in the partition
 947                 we have not yet compared to the pivot value. There are two
 948                 uncompared sets during the scan - one to the left of the pc
 949                 and one to the right.
 950
 951                 u_right - the rightmost index of the left side's uncompared set
 952                 u_left - the leftmost index of the right side's uncompared set
 953
 954                 The leftmost index of the left sides's uncompared set
 955                 doesn't need its own variable because it is always defined
 956                 by the leftmost edge of the whole partition (part_left). The
 957                 same goes for the rightmost edge of the right partition
 958                 (part_right).
 959
 960                 We know there are no uncompared elements on the left once we
 961                 get u_right < part_left and no uncompared elements on the
 962                 right once u_left > part_right. When both these conditions
 963                 are met, we have completed the scan of the partition.
 964
 965                 Any elements which are between the pivot chunk and the
 966                 uncompared elements should be less than the pivot value on
 967                 the left side and greater than the pivot value on the right
 968                 side (in fact, the goal of the whole algorithm is to arrange
 969                 for that to be true and make the groups of less-than and
 970                 greater-then elements into new partitions to sort again).
 971
 972             As you marvel at the complexity of the code and wonder why it
 973             has to be so confusing. Consider some of the things this level
 974             of confusion brings:
 975
 976             Once I do a compare, I squeeze every ounce of juice out of it. I
 977             never do compare calls I don't have to do, and I certainly never
 978             do redundant calls.
 979
 980             I also never swap any elements unless I can prove there is a
 981             good reason. Many sort algorithms will swap a known value with
 982             an uncompared value just to get things in the right place (or
 983             avoid complexity :-), but that uncompared value, once it gets
 984             compared, may then have to be swapped again. A lot of the
 985             complexity of this code is due to the fact that it never swaps
 986             anything except compared values, and it only swaps them when the
 987             compare shows they are out of position.
 988          */
 989          int pc_left, pc_right;
 990          int u_right, u_left;
 991
 992          int s;
 993
 994          pc_left = ((part_left + part_right) / 2);
 995          pc_right = pc_left;
 996          u_right = pc_left - 1;
 997          u_left = pc_right + 1;
 998
 999          /* Qsort works best when the pivot value is also the median value
1000             in the partition (unfortunately you can't find the median value
1001             without first sorting :-), so to give the algorithm a helping
1002             hand, we pick 3 elements and sort them and use the median value
1003             of that tiny set as the pivot value.
1004
1005             Some versions of qsort like to use the left middle and right as
1006             the 3 elements to sort so they can insure the ends of the
1007             partition will contain values which will stop the scan in the
1008             compare loop, but when you have to call an arbitrarily complex
1009             routine to do a compare, its really better to just keep track of
1010             array index values to know when you hit the edge of the
1011             partition and avoid the extra compare. An even better reason to
1012             avoid using a compare call is the fact that you can drop off the
1013             edge of the array if someone foolishly provides you with an
1014             unstable compare function that doesn't always provide consistent
1015             results.
1016
1017             So, since it is simpler for us to compare the three adjacent
1018             elements in the middle of the partition, those are the ones we
1019             pick here (conveniently pointed at by u_right, pc_left, and
1020             u_left). The values of the left, center, and right elements
1021             are refered to as l c and r in the following comments.
1022          */
1023
1024 #ifdef QSORT_ORDER_GUESS
1025          swapped = 0;
1026 #endif
1027          s = qsort_cmp(u_right, pc_left);
1028          if (s < 0) {
1029             /* l < c */
1030             s = qsort_cmp(pc_left, u_left);
1031             /* if l < c, c < r - already in order - nothing to do */
1032             if (s == 0) {
1033                /* l < c, c == r - already in order, pc grows */
1034                ++pc_right;
1035                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1036             } else if (s > 0) {
1037                /* l < c, c > r - need to know more */
1038                s = qsort_cmp(u_right, u_left);
1039                if (s < 0) {
1040                   /* l < c, c > r, l < r - swap c & r to get ordered */
1041                   qsort_swap(pc_left, u_left);
1042                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1043                } else if (s == 0) {
1044                   /* l < c, c > r, l == r - swap c&r, grow pc */
1045                   qsort_swap(pc_left, u_left);
1046                   --pc_left;
1047                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1048                } else {
1049                   /* l < c, c > r, l > r - make lcr into rlc to get ordered */
1050                   qsort_rotate(pc_left, u_right, u_left);
1051                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1052                }
1053             }
1054          } else if (s == 0) {
1055             /* l == c */
1056             s = qsort_cmp(pc_left, u_left);
1057             if (s < 0) {
1058                /* l == c, c < r - already in order, grow pc */
1059                --pc_left;
1060                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1061             } else if (s == 0) {
1062                /* l == c, c == r - already in order, grow pc both ways */
1063                --pc_left;
1064                ++pc_right;
1065                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1066             } else {
1067                /* l == c, c > r - swap l & r, grow pc */
1068                qsort_swap(u_right, u_left);
1069                ++pc_right;
1070                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1071             }
1072          } else {
1073             /* l > c */
1074             s = qsort_cmp(pc_left, u_left);
1075             if (s < 0) {
1076                /* l > c, c < r - need to know more */
1077                s = qsort_cmp(u_right, u_left);
1078                if (s < 0) {
1079                   /* l > c, c < r, l < r - swap l & c to get ordered */
1080                   qsort_swap(u_right, pc_left);
1081                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1082                } else if (s == 0) {
1083                   /* l > c, c < r, l == r - swap l & c, grow pc */
1084                   qsort_swap(u_right, pc_left);
1085                   ++pc_right;
1086                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1087                } else {
1088                   /* l > c, c < r, l > r - rotate lcr into crl to order */
1089                   qsort_rotate(u_right, pc_left, u_left);
1090                   qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1091                }
1092             } else if (s == 0) {
1093                /* l > c, c == r - swap ends, grow pc */
1094                qsort_swap(u_right, u_left);
1095                --pc_left;
1096                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1097             } else {
1098                /* l > c, c > r - swap ends to get in order */
1099                qsort_swap(u_right, u_left);
1100                qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1101             }
1102          }
1103          /* We now know the 3 middle elements have been compared and
1104             arranged in the desired order, so we can shrink the uncompared
1105             sets on both sides
1106          */
1107          --u_right;
1108          ++u_left;
1109          qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1110
1111          /* The above massive nested if was the simple part :-). We now have
1112             the middle 3 elements ordered and we need to scan through the
1113             uncompared sets on either side, swapping elements that are on
1114             the wrong side or simply shuffling equal elements around to get
1115             all equal elements into the pivot chunk.
1116          */
1117
1118          for ( ; ; ) {
1119             int still_work_on_left;
1120             int still_work_on_right;
1121
1122             /* Scan the uncompared values on the left. If I find a value
1123                equal to the pivot value, move it over so it is adjacent to
1124                the pivot chunk and expand the pivot chunk. If I find a value
1125                less than the pivot value, then just leave it - its already
1126                on the correct side of the partition. If I find a greater
1127                value, then stop the scan.
1128             */
1129             while ((still_work_on_left = (u_right >= part_left))) {
1130                s = qsort_cmp(u_right, pc_left);
1131                if (s < 0) {
1132                   --u_right;
1133                } else if (s == 0) {
1134                   --pc_left;
1135                   if (pc_left != u_right) {
1136                      qsort_swap(u_right, pc_left);
1137                   }
1138                   --u_right;
1139                } else {
1140                   break;
1141                }
1142                qsort_assert(u_right < pc_left);
1143                qsort_assert(pc_left <= pc_right);
1144                qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1145                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1146             }
1147
1148             /* Do a mirror image scan of uncompared values on the right
1149             */
1150             while ((still_work_on_right = (u_left <= part_right))) {
1151                s = qsort_cmp(pc_right, u_left);
1152                if (s < 0) {
1153                   ++u_left;
1154                } else if (s == 0) {
1155                   ++pc_right;
1156                   if (pc_right != u_left) {
1157                      qsort_swap(pc_right, u_left);
1158                   }
1159                   ++u_left;
1160                } else {
1161                   break;
1162                }
1163                qsort_assert(u_left > pc_right);
1164                qsort_assert(pc_left <= pc_right);
1165                qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1166                qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1167             }
1168
1169             if (still_work_on_left) {
1170                /* I know I have a value on the left side which needs to be
1171                   on the right side, but I need to know more to decide
1172                   exactly the best thing to do with it.
1173                */
1174                if (still_work_on_right) {
1175                   /* I know I have values on both side which are out of
1176                      position. This is a big win because I kill two birds
1177                      with one swap (so to speak). I can advance the
1178                      uncompared pointers on both sides after swapping both
1179                      of them into the right place.
1180                   */
1181                   qsort_swap(u_right, u_left);
1182                   --u_right;
1183                   ++u_left;
1184                   qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1185                } else {
1186                   /* I have an out of position value on the left, but the
1187                      right is fully scanned, so I "slide" the pivot chunk
1188                      and any less-than values left one to make room for the
1189                      greater value over on the right. If the out of position
1190                      value is immediately adjacent to the pivot chunk (there
1191                      are no less-than values), I can do that with a swap,
1192                      otherwise, I have to rotate one of the less than values
1193                      into the former position of the out of position value
1194                      and the right end of the pivot chunk into the left end
1195                      (got all that?).
1196                   */
1197                   --pc_left;
1198                   if (pc_left == u_right) {
1199                      qsort_swap(u_right, pc_right);
1200                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1201                   } else {
1202                      qsort_rotate(u_right, pc_left, pc_right);
1203                      qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1204                   }
1205                   --pc_right;
1206                   --u_right;
1207                }
1208             } else if (still_work_on_right) {
1209                /* Mirror image of complex case above: I have an out of
1210                   position value on the right, but the left is fully
1211                   scanned, so I need to shuffle things around to make room
1212                   for the right value on the left.
1213                */
1214                ++pc_right;
1215                if (pc_right == u_left) {
1216                   qsort_swap(u_left, pc_left);
1217                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1218                } else {
1219                   qsort_rotate(pc_right, pc_left, u_left);
1220                   qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1221                }
1222                ++pc_left;
1223                ++u_left;
1224             } else {
1225                /* No more scanning required on either side of partition,
1226                   break out of loop and figure out next set of partitions
1227                */
1228                break;
1229             }
1230          }
1231
1232          /* The elements in the pivot chunk are now in the right place. They
1233             will never move or be compared again. All I have to do is decide
1234             what to do with the stuff to the left and right of the pivot
1235             chunk.
1236
1237             Notes on the QSORT_ORDER_GUESS ifdef code:
1238
1239             1. If I just built these partitions without swapping any (or
1240                very many) elements, there is a chance that the elements are
1241                already ordered properly (being properly ordered will
1242                certainly result in no swapping, but the converse can't be
1243                proved :-).
1244
1245             2. A (properly written) insertion sort will run faster on
1246                already ordered data than qsort will.
1247
1248             3. Perhaps there is some way to make a good guess about
1249                switching to an insertion sort earlier than partition size 6
1250                (for instance - we could save the partition size on the stack
1251                and increase the size each time we find we didn't swap, thus
1252                switching to insertion sort earlier for partitions with a
1253                history of not swapping).
1254
1255             4. Naturally, if I just switch right away, it will make
1256                artificial benchmarks with pure ascending (or descending)
1257                data look really good, but is that a good reason in general?
1258                Hard to say...
1259          */
1260
1261 #ifdef QSORT_ORDER_GUESS
1262          if (swapped < 3) {
1263 #if QSORT_ORDER_GUESS == 1
1264             qsort_break_even = (part_right - part_left) + 1;
1265 #endif
1266 #if QSORT_ORDER_GUESS == 2
1267             qsort_break_even *= 2;
1268 #endif
1269 #if QSORT_ORDER_GUESS == 3
1270             int prev_break = qsort_break_even;
1271             qsort_break_even *= qsort_break_even;
1272             if (qsort_break_even < prev_break) {
1273                qsort_break_even = (part_right - part_left) + 1;
1274             }
1275 #endif
1276          } else {
1277             qsort_break_even = QSORT_BREAK_EVEN;
1278          }
1279 #endif
1280
1281          if (part_left < pc_left) {
1282             /* There are elements on the left which need more processing.
1283                Check the right as well before deciding what to do.
1284             */
1285             if (pc_right < part_right) {
1286                /* We have two partitions to be sorted. Stack the biggest one
1287                   and process the smallest one on the next iteration. This
1288                   minimizes the stack height by insuring that any additional
1289                   stack entries must come from the smallest partition which
1290                   (because it is smallest) will have the fewest
1291                   opportunities to generate additional stack entries.
1292                */
1293                if ((part_right - pc_right) > (pc_left - part_left)) {
1294                   /* stack the right partition, process the left */
1295                   partition_stack[next_stack_entry].left = pc_right + 1;
1296                   partition_stack[next_stack_entry].right = part_right;
1297 #ifdef QSORT_ORDER_GUESS
1298                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1299 #endif
1300                   part_right = pc_left - 1;
1301                } else {
1302                   /* stack the left partition, process the right */
1303                   partition_stack[next_stack_entry].left = part_left;
1304                   partition_stack[next_stack_entry].right = pc_left - 1;
1305 #ifdef QSORT_ORDER_GUESS
1306                   partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1307 #endif
1308                   part_left = pc_right + 1;
1309                }
1310                qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1311                ++next_stack_entry;
1312             } else {
1313                /* The elements on the left are the only remaining elements
1314                   that need sorting, arrange for them to be processed as the
1315                   next partition.
1316                */
1317                part_right = pc_left - 1;
1318             }
1319          } else if (pc_right < part_right) {
1320             /* There is only one chunk on the right to be sorted, make it
1321                the new partition and loop back around.
1322             */
1323             part_left = pc_right + 1;
1324          } else {
1325             /* This whole partition wound up in the pivot chunk, so
1326                we need to get a new partition off the stack.
1327             */
1328             if (next_stack_entry == 0) {
1329                /* the stack is empty - we are done */
1330                break;
1331             }
1332             --next_stack_entry;
1333             part_left = partition_stack[next_stack_entry].left;
1334             part_right = partition_stack[next_stack_entry].right;
1335 #ifdef QSORT_ORDER_GUESS
1336             qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1337 #endif
1338          }
1339       } else {
1340          /* This partition is too small to fool with qsort complexity, just
1341             do an ordinary insertion sort to minimize overhead.
1342          */
1343          int i;
1344          /* Assume 1st element is in right place already, and start checking
1345             at 2nd element to see where it should be inserted.
1346          */
1347          for (i = part_left + 1; i <= part_right; ++i) {
1348             int j;
1349             /* Scan (backwards - just in case 'i' is already in right place)
1350                through the elements already sorted to see if the ith element
1351                belongs ahead of one of them.
1352             */
1353             for (j = i - 1; j >= part_left; --j) {
1354                if (qsort_cmp(i, j) >= 0) {
1355                   /* i belongs right after j
1356                   */
1357                   break;
1358                }
1359             }
1360             ++j;
1361             if (j != i) {
1362                /* Looks like we really need to move some things
1363                */
1364                int k;
1365                temp = array[i];
1366                for (k = i - 1; k >= j; --k)
1367                   array[k + 1] = array[k];
1368                array[j] = temp;
1369             }
1370          }
1371
1372          /* That partition is now sorted, grab the next one, or get out
1373             of the loop if there aren't any more.
1374          */
1375
1376          if (next_stack_entry == 0) {
1377             /* the stack is empty - we are done */
1378             break;
1379          }
1380          --next_stack_entry;
1381          part_left = partition_stack[next_stack_entry].left;
1382          part_right = partition_stack[next_stack_entry].right;
1383 #ifdef QSORT_ORDER_GUESS
1384          qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1385 #endif
1386       }
1387    }
1388
1389    /* Believe it or not, the array is sorted at this point! */
1390 }
1391
1392 /* Stabilize what is, presumably, an otherwise unstable sort method.
1393  * We do that by allocating (or having on hand) an array of pointers
1394  * that is the same size as the original array of elements to be sorted.
1395  * We initialize this parallel array with the addresses of the original
1396  * array elements.  This indirection can make you crazy.
1397  * Some pictures can help.  After initializing, we have
1398  *
1399  *  indir                  list1
1400  * +----+                 +----+
1401  * |    | --------------> |    | ------> first element to be sorted
1402  * +----+                 +----+
1403  * |    | --------------> |    | ------> second element to be sorted
1404  * +----+                 +----+
1405  * |    | --------------> |    | ------> third element to be sorted
1406  * +----+                 +----+
1407  *  ...
1408  * +----+                 +----+
1409  * |    | --------------> |    | ------> n-1st element to be sorted
1410  * +----+                 +----+
1411  * |    | --------------> |    | ------> n-th element to be sorted
1412  * +----+                 +----+
1413  *
1414  * During the sort phase, we leave the elements of list1 where they are,
1415  * and sort the pointers in the indirect array in the same order determined
1416  * by the original comparison routine on the elements pointed to.
1417  * Because we don't move the elements of list1 around through
1418  * this phase, we can break ties on elements that compare equal
1419  * using their address in the list1 array, ensuring stabilty.
1420  * This leaves us with something looking like
1421  *
1422  *  indir                  list1
1423  * +----+                 +----+
1424  * |    | --+       +---> |    | ------> first element to be sorted
1425  * +----+   |       |     +----+
1426  * |    | --|-------|---> |    | ------> second element to be sorted
1427  * +----+   |       |     +----+
1428  * |    | --|-------+ +-> |    | ------> third element to be sorted
1429  * +----+   |         |   +----+
1430  *  ...
1431  * +----+    | |   | |    +----+
1432  * |    | ---|-+   | +--> |    | ------> n-1st element to be sorted
1433  * +----+    |     |      +----+
1434  * |    | ---+     +----> |    | ------> n-th element to be sorted
1435  * +----+                 +----+
1436  *
1437  * where the i-th element of the indirect array points to the element
1438  * that should be i-th in the sorted array.  After the sort phase,
1439  * we have to put the elements of list1 into the places
1440  * dictated by the indirect array.
1441  */
1442
1443 static SVCOMPARE_t RealCmp;
1444
1445 static I32
1446 cmpindir(pTHX_ gptr a, gptr b)
1447 {
1448     I32 sense;
1449     gptr *ap = (gptr *)a;
1450     gptr *bp = (gptr *)b;
1451
1452     if ((sense = RealCmp(aTHX_ *ap, *bp)) == 0)
1453          sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1454     return sense;
1455 }
1456
1457 STATIC void
1458 S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
1459 {
1460     SV **hintsvp;
1461
1462     if (SORTHINTS(hintsvp) & HINT_SORT_STABLE) {
1463          register gptr **pp, *q;
1464          register size_t n, j, i;
1465          gptr *small[SMALLSORT], **indir, tmp;
1466          SVCOMPARE_t savecmp;
1467          if (nmemb <= 1) return;     /* sorted trivially */
1468
1469          /* Small arrays can use the stack, big ones must be allocated */
1470          if (nmemb <= SMALLSORT) indir = small;
1471          else { New(1799, indir, nmemb, gptr *); }
1472
1473          /* Copy pointers to original array elements into indirect array */
1474          for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
1475
1476          savecmp = RealCmp;     /* Save current comparison routine, if any */
1477          RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
1478
1479          /* sort, with indirection */
1480          S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir);
1481
1482          pp = indir;
1483          q = list1;
1484          for (n = nmemb; n--; ) {
1485               /* Assert A: all elements of q with index > n are already
1486                * in place.  This is vacuosly true at the start, and we
1487                * put element n where it belongs below (if it wasn't
1488                * already where it belonged). Assert B: we only move
1489                * elements that aren't where they belong,
1490                * so, by A, we never tamper with elements above n.
1491                */
1492               j = pp[n] - q;            /* This sets j so that q[j] is
1493                                          * at pp[n].  *pp[j] belongs in
1494                                          * q[j], by construction.
1495                                          */
1496               if (n != j) {             /* all's well if n == j */
1497                    tmp = q[j];          /* save what's in q[j] */
1498                    do {
1499                         q[j] = *pp[j];  /* put *pp[j] where it belongs */
1500                         i = pp[j] - q;  /* the index in q of the element
1501                                          * just moved */
1502                         pp[j] = q + j;  /* this is ok now */
1503                    } while ((j = i) != n);
1504                    /* There are only finitely many (nmemb) addresses
1505                     * in the pp array.
1506                     * So we must eventually revisit an index we saw before.
1507                     * Suppose the first revisited index is k != n.
1508                     * An index is visited because something else belongs there.
1509                     * If we visit k twice, then two different elements must
1510                     * belong in the same place, which cannot be.
1511                     * So j must get back to n, the loop terminates,
1512                     * and we put the saved element where it belongs.
1513                     */
1514                    q[n] = tmp;          /* put what belongs into
1515                                          * the n-th element */
1516               }
1517          }
1518
1519         /* free iff allocated */
1520          if (indir != small) { Safefree(indir); }
1521          /* restore prevailing comparison routine */
1522          RealCmp = savecmp;
1523     } else {
1524          S_qsortsvu(aTHX_ list1, nmemb, cmp);
1525     }
1526 }
1527
1528 /*
1529 =for apidoc sortsv
1530
1531 Sort an array. Here is an example:
1532
1533     sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
1534
1535 =cut
1536 */
1537
1538 void
1539 Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1540 {
1541     void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) =
1542         S_mergesortsv;
1543     SV **hintsvp;
1544     I32 hints;
1545
1546     if ((hints = SORTHINTS(hintsvp))) {
1547          if (hints & HINT_SORT_QUICKSORT)
1548               sortsvp = S_qsortsv;
1549          else {
1550               if (hints & HINT_SORT_MERGESORT)
1551                    sortsvp = S_cfmergesortsv;
1552               else
1553                    sortsvp = S_mergesortsv;
1554          }
1555     }
1556
1557     sortsvp(aTHX_ array, nmemb, cmp);
1558 }
1559
1560 PP(pp_sort)
1561 {
1562     dSP; dMARK; dORIGMARK;
1563     register SV **up;
1564     SV **myorigmark = ORIGMARK;
1565     register I32 max;
1566     HV *stash;
1567     GV *gv;
1568     CV *cv = 0;
1569     I32 gimme = GIMME;
1570     OP* nextop = PL_op->op_next;
1571     I32 overloading = 0;
1572     bool hasargs = FALSE;
1573     I32 is_xsub = 0;
1574
1575     if (gimme != G_ARRAY) {
1576         SP = MARK;
1577         RETPUSHUNDEF;
1578     }
1579
1580     ENTER;
1581     SAVEVPTR(PL_sortcop);
1582     if (PL_op->op_flags & OPf_STACKED) {
1583         if (PL_op->op_flags & OPf_SPECIAL) {
1584             OP *kid = cLISTOP->op_first->op_sibling;    /* pass pushmark */
1585             kid = kUNOP->op_first;                      /* pass rv2gv */
1586             kid = kUNOP->op_first;                      /* pass leave */
1587             PL_sortcop = kid->op_next;
1588             stash = CopSTASH(PL_curcop);
1589         }
1590         else {
1591             cv = sv_2cv(*++MARK, &stash, &gv, 0);
1592             if (cv && SvPOK(cv)) {
1593                 STRLEN n_a;
1594                 char *proto = SvPV((SV*)cv, n_a);
1595                 if (proto && strEQ(proto, "$$")) {
1596                     hasargs = TRUE;
1597                 }
1598             }
1599             if (!(cv && CvROOT(cv))) {
1600                 if (cv && CvXSUB(cv)) {
1601                     is_xsub = 1;
1602                 }
1603                 else if (gv) {
1604                     SV *tmpstr = sv_newmortal();
1605                     gv_efullname3(tmpstr, gv, Nullch);
1606                     DIE(aTHX_ "Undefined sort subroutine \"%s\" called",
1607                         SvPVX(tmpstr));
1608                 }
1609                 else {
1610                     DIE(aTHX_ "Undefined subroutine in sort");
1611                 }
1612             }
1613
1614             if (is_xsub)
1615                 PL_sortcop = (OP*)cv;
1616             else {
1617                 PL_sortcop = CvSTART(cv);
1618                 SAVEVPTR(CvROOT(cv)->op_ppaddr);
1619                 CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL];
1620
1621                 SAVEVPTR(PL_curpad);
1622                 PL_curpad = AvARRAY((AV*)AvARRAY(CvPADLIST(cv))[1]);
1623             }
1624         }
1625     }
1626     else {
1627         PL_sortcop = Nullop;
1628         stash = CopSTASH(PL_curcop);
1629     }
1630
1631     up = myorigmark + 1;
1632     while (MARK < SP) { /* This may or may not shift down one here. */
1633         /*SUPPRESS 560*/
1634         if ((*up = *++MARK)) {                  /* Weed out nulls. */
1635             SvTEMP_off(*up);
1636             if (!PL_sortcop && !SvPOK(*up)) {
1637                 STRLEN n_a;
1638                 if (SvAMAGIC(*up))
1639                     overloading = 1;
1640                 else
1641                     (void)sv_2pv(*up, &n_a);
1642             }
1643             up++;
1644         }
1645     }
1646     max = --up - myorigmark;
1647     if (PL_sortcop) {
1648         if (max > 1) {
1649             PERL_CONTEXT *cx;
1650             SV** newsp;
1651             bool oldcatch = CATCH_GET;
1652
1653             SAVETMPS;
1654             SAVEOP();
1655
1656             CATCH_SET(TRUE);
1657             PUSHSTACKi(PERLSI_SORT);
1658             if (!hasargs && !is_xsub) {
1659                 if (PL_sortstash != stash || !PL_firstgv || !PL_secondgv) {
1660                     SAVESPTR(PL_firstgv);
1661                     SAVESPTR(PL_secondgv);
1662                     PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1663                     PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1664                     PL_sortstash = stash;
1665                 }
1666 #ifdef USE_5005THREADS
1667                 sv_lock((SV *)PL_firstgv);
1668                 sv_lock((SV *)PL_secondgv);
1669 #endif
1670                 SAVESPTR(GvSV(PL_firstgv));
1671                 SAVESPTR(GvSV(PL_secondgv));
1672             }
1673
1674             PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1675             if (!(PL_op->op_flags & OPf_SPECIAL)) {
1676                 cx->cx_type = CXt_SUB;
1677                 cx->blk_gimme = G_SCALAR;
1678                 PUSHSUB(cx);
1679                 if (!CvDEPTH(cv))
1680                     (void)SvREFCNT_inc(cv); /* in preparation for POPSUB */
1681             }
1682             PL_sortcxix = cxstack_ix;
1683
1684             if (hasargs && !is_xsub) {
1685                 /* This is mostly copied from pp_entersub */
1686                 AV *av = (AV*)PL_curpad[0];
1687
1688 #ifndef USE_5005THREADS
1689                 cx->blk_sub.savearray = GvAV(PL_defgv);
1690                 GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1691 #endif /* USE_5005THREADS */
1692                 cx->blk_sub.oldcurpad = PL_curpad;
1693                 cx->blk_sub.argarray = av;
1694             }
1695            sortsv((myorigmark+1), max,
1696                   is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
1697
1698             POPBLOCK(cx,PL_curpm);
1699             PL_stack_sp = newsp;
1700             POPSTACK;
1701             CATCH_SET(oldcatch);
1702         }
1703     }
1704     else {
1705         if (max > 1) {
1706             MEXTEND(SP, 20);    /* Can't afford stack realloc on signal. */
1707             sortsv(ORIGMARK+1, max,
1708                   (PL_op->op_private & OPpSORT_NUMERIC)
1709                         ? ( (PL_op->op_private & OPpSORT_INTEGER)
1710                             ? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
1711                             : ( overloading ? amagic_ncmp : sv_ncmp))
1712                         : ( IN_LOCALE_RUNTIME
1713                             ? ( overloading
1714                                 ? amagic_cmp_locale
1715                                 : sv_cmp_locale_static)
1716                             : ( overloading ? amagic_cmp : sv_cmp_static)));
1717             if (PL_op->op_private & OPpSORT_REVERSE) {
1718                 SV **p = ORIGMARK+1;
1719                 SV **q = ORIGMARK+max;
1720                 while (p < q) {
1721                     SV *tmp = *p;
1722                     *p++ = *q;
1723                     *q-- = tmp;
1724                 }
1725             }
1726         }
1727     }
1728     LEAVE;
1729     PL_stack_sp = ORIGMARK + max;
1730     return nextop;
1731 }
1732
1733 static I32
1734 sortcv(pTHX_ SV *a, SV *b)
1735 {
1736     I32 oldsaveix = PL_savestack_ix;
1737     I32 oldscopeix = PL_scopestack_ix;
1738     I32 result;
1739     GvSV(PL_firstgv) = a;
1740     GvSV(PL_secondgv) = b;
1741     PL_stack_sp = PL_stack_base;
1742     PL_op = PL_sortcop;
1743     CALLRUNOPS(aTHX);
1744     if (PL_stack_sp != PL_stack_base + 1)
1745         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1746     if (!SvNIOKp(*PL_stack_sp))
1747         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1748     result = SvIV(*PL_stack_sp);
1749     while (PL_scopestack_ix > oldscopeix) {
1750         LEAVE;
1751     }
1752     leave_scope(oldsaveix);
1753     return result;
1754 }
1755
1756 static I32
1757 sortcv_stacked(pTHX_ SV *a, SV *b)
1758 {
1759     I32 oldsaveix = PL_savestack_ix;
1760     I32 oldscopeix = PL_scopestack_ix;
1761     I32 result;
1762     AV *av;
1763
1764 #ifdef USE_5005THREADS
1765     av = (AV*)PL_curpad[0];
1766 #else
1767     av = GvAV(PL_defgv);
1768 #endif
1769
1770     if (AvMAX(av) < 1) {
1771         SV** ary = AvALLOC(av);
1772         if (AvARRAY(av) != ary) {
1773             AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1774             SvPVX(av) = (char*)ary;
1775         }
1776         if (AvMAX(av) < 1) {
1777             AvMAX(av) = 1;
1778             Renew(ary,2,SV*);
1779             SvPVX(av) = (char*)ary;
1780         }
1781     }
1782     AvFILLp(av) = 1;
1783
1784     AvARRAY(av)[0] = a;
1785     AvARRAY(av)[1] = b;
1786     PL_stack_sp = PL_stack_base;
1787     PL_op = PL_sortcop;
1788     CALLRUNOPS(aTHX);
1789     if (PL_stack_sp != PL_stack_base + 1)
1790         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1791     if (!SvNIOKp(*PL_stack_sp))
1792         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1793     result = SvIV(*PL_stack_sp);
1794     while (PL_scopestack_ix > oldscopeix) {
1795         LEAVE;
1796     }
1797     leave_scope(oldsaveix);
1798     return result;
1799 }
1800
1801 static I32
1802 sortcv_xsub(pTHX_ SV *a, SV *b)
1803 {
1804     dSP;
1805     I32 oldsaveix = PL_savestack_ix;
1806     I32 oldscopeix = PL_scopestack_ix;
1807     I32 result;
1808     CV *cv=(CV*)PL_sortcop;
1809
1810     SP = PL_stack_base;
1811     PUSHMARK(SP);
1812     EXTEND(SP, 2);
1813     *++SP = a;
1814     *++SP = b;
1815     PUTBACK;
1816     (void)(*CvXSUB(cv))(aTHX_ cv);
1817     if (PL_stack_sp != PL_stack_base + 1)
1818         Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1819     if (!SvNIOKp(*PL_stack_sp))
1820         Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1821     result = SvIV(*PL_stack_sp);
1822     while (PL_scopestack_ix > oldscopeix) {
1823         LEAVE;
1824     }
1825     leave_scope(oldsaveix);
1826     return result;
1827 }
1828
1829
1830 static I32
1831 sv_ncmp(pTHX_ SV *a, SV *b)
1832 {
1833     NV nv1 = SvNV(a);
1834     NV nv2 = SvNV(b);
1835     return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1836 }
1837
1838 static I32
1839 sv_i_ncmp(pTHX_ SV *a, SV *b)
1840 {
1841     IV iv1 = SvIV(a);
1842     IV iv2 = SvIV(b);
1843     return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1844 }
1845 #define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \
1846           *svp = Nullsv;                                \
1847           if (PL_amagic_generation) { \
1848             if (SvAMAGIC(left)||SvAMAGIC(right))\
1849                 *svp = amagic_call(left, \
1850                                    right, \
1851                                    CAT2(meth,_amg), \
1852                                    0); \
1853           } \
1854         } STMT_END
1855
1856 static I32
1857 amagic_ncmp(pTHX_ register SV *a, register SV *b)
1858 {
1859     SV *tmpsv;
1860     tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1861     if (tmpsv) {
1862         NV d;
1863
1864         if (SvIOK(tmpsv)) {
1865             I32 i = SvIVX(tmpsv);
1866             if (i > 0)
1867                return 1;
1868             return i? -1 : 0;
1869         }
1870         d = SvNV(tmpsv);
1871         if (d > 0)
1872            return 1;
1873         return d? -1 : 0;
1874      }
1875      return sv_ncmp(aTHX_ a, b);
1876 }
1877
1878 static I32
1879 amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1880 {
1881     SV *tmpsv;
1882     tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1883     if (tmpsv) {
1884         NV d;
1885
1886         if (SvIOK(tmpsv)) {
1887             I32 i = SvIVX(tmpsv);
1888             if (i > 0)
1889                return 1;
1890             return i? -1 : 0;
1891         }
1892         d = SvNV(tmpsv);
1893         if (d > 0)
1894            return 1;
1895         return d? -1 : 0;
1896     }
1897     return sv_i_ncmp(aTHX_ a, b);
1898 }
1899
1900 static I32
1901 amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1902 {
1903     SV *tmpsv;
1904     tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1905     if (tmpsv) {
1906         NV d;
1907
1908         if (SvIOK(tmpsv)) {
1909             I32 i = SvIVX(tmpsv);
1910             if (i > 0)
1911                return 1;
1912             return i? -1 : 0;
1913         }
1914         d = SvNV(tmpsv);
1915         if (d > 0)
1916            return 1;
1917         return d? -1 : 0;
1918     }
1919     return sv_cmp(str1, str2);
1920 }
1921
1922 static I32
1923 amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1924 {
1925     SV *tmpsv;
1926     tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1927     if (tmpsv) {
1928         NV d;
1929
1930         if (SvIOK(tmpsv)) {
1931             I32 i = SvIVX(tmpsv);
1932             if (i > 0)
1933                return 1;
1934             return i? -1 : 0;
1935         }
1936         d = SvNV(tmpsv);
1937         if (d > 0)
1938            return 1;
1939         return d? -1 : 0;
1940     }
1941     return sv_cmp_locale(str1, str2);
1942 }