Mithing.
[p5sagit/p5-mst-13.2.git] / pp_sort.c
CommitLineData
84d4ea48 1/* pp_sort.c
2 *
3 * Copyright (c) 1991-2001, Larry Wall
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 */
9
10/*
11 * ...they shuffled back towards the rear of the line. 'No, not at the
12 * rear!' the slave-driver shouted. 'Three files up. And stay there...
13 */
14
15#include "EXTERN.h"
16#define PERL_IN_PP_SORT_C
17#include "perl.h"
18
19static I32 sortcv(pTHX_ SV *a, SV *b);
20static I32 sortcv_stacked(pTHX_ SV *a, SV *b);
21static I32 sortcv_xsub(pTHX_ SV *a, SV *b);
22static I32 sv_ncmp(pTHX_ SV *a, SV *b);
23static I32 sv_i_ncmp(pTHX_ SV *a, SV *b);
24static I32 amagic_ncmp(pTHX_ SV *a, SV *b);
25static I32 amagic_i_ncmp(pTHX_ SV *a, SV *b);
26static I32 amagic_cmp(pTHX_ SV *a, SV *b);
27static I32 amagic_cmp_locale(pTHX_ SV *a, SV *b);
28
29#define sv_cmp_static Perl_sv_cmp
30#define sv_cmp_locale_static Perl_sv_cmp_locale
31
32#define SORTHINTS(hintsvp) \
33 ((PL_hintgv && \
34 (hintsvp = hv_fetch(GvHV(PL_hintgv), "SORT", 4, FALSE))) ? \
35 (I32)SvIV(*hintsvp) : 0)
36
c53fc8a6 37#ifndef SMALLSORT
38#define SMALLSORT (200)
39#endif
40
84d4ea48 41/*
42 * The mergesort implementation is by Peter M. Mcilroy <pmcilroy@lucent.com>.
43 *
44 * The original code was written in conjunction with BSD Computer Software
45 * Research Group at University of California, Berkeley.
46 *
47 * See also: "Optimistic Merge Sort" (SODA '92)
48 *
49 * The integration to Perl is by John P. Linderman <jpl@research.att.com>.
50 *
51 * The code can be distributed under the same terms as Perl itself.
52 *
53 */
54
84d4ea48 55
56typedef char * aptr; /* pointer for arithmetic on sizes */
57typedef SV * gptr; /* pointers in our lists */
58
59/* Binary merge internal sort, with a few special mods
60** for the special perl environment it now finds itself in.
61**
62** Things that were once options have been hotwired
63** to values suitable for this use. In particular, we'll always
64** initialize looking for natural runs, we'll always produce stable
65** output, and we'll always do Peter McIlroy's binary merge.
66*/
67
68/* Pointer types for arithmetic and storage and convenience casts */
69
70#define APTR(P) ((aptr)(P))
71#define GPTP(P) ((gptr *)(P))
72#define GPPP(P) ((gptr **)(P))
73
74
75/* byte offset from pointer P to (larger) pointer Q */
76#define BYTEOFF(P, Q) (APTR(Q) - APTR(P))
77
78#define PSIZE sizeof(gptr)
79
80/* If PSIZE is power of 2, make PSHIFT that power, if that helps */
81
82#ifdef PSHIFT
83#define PNELEM(P, Q) (BYTEOFF(P,Q) >> (PSHIFT))
84#define PNBYTE(N) ((N) << (PSHIFT))
85#define PINDEX(P, N) (GPTP(APTR(P) + PNBYTE(N)))
86#else
87/* Leave optimization to compiler */
88#define PNELEM(P, Q) (GPTP(Q) - GPTP(P))
89#define PNBYTE(N) ((N) * (PSIZE))
90#define PINDEX(P, N) (GPTP(P) + (N))
91#endif
92
93/* Pointer into other corresponding to pointer into this */
94#define POTHER(P, THIS, OTHER) GPTP(APTR(OTHER) + BYTEOFF(THIS,P))
95
96#define FROMTOUPTO(src, dst, lim) do *dst++ = *src++; while(src<lim)
97
98
99/* Runs are identified by a pointer in the auxilliary list.
100** The pointer is at the start of the list,
101** and it points to the start of the next list.
102** NEXT is used as an lvalue, too.
103*/
104
105#define NEXT(P) (*GPPP(P))
106
107
108/* PTHRESH is the minimum number of pairs with the same sense to justify
109** checking for a run and extending it. Note that PTHRESH counts PAIRS,
110** not just elements, so PTHRESH == 8 means a run of 16.
111*/
112
113#define PTHRESH (8)
114
115/* RTHRESH is the number of elements in a run that must compare low
116** to the low element from the opposing run before we justify
117** doing a binary rampup instead of single stepping.
118** In random input, N in a row low should only happen with
119** probability 2^(1-N), so we can risk that we are dealing
120** with orderly input without paying much when we aren't.
121*/
122
123#define RTHRESH (6)
124
125
126/*
127** Overview of algorithm and variables.
128** The array of elements at list1 will be organized into runs of length 2,
129** or runs of length >= 2 * PTHRESH. We only try to form long runs when
130** PTHRESH adjacent pairs compare in the same way, suggesting overall order.
131**
132** Unless otherwise specified, pair pointers address the first of two elements.
133**
134** b and b+1 are a pair that compare with sense ``sense''.
135** b is the ``bottom'' of adjacent pairs that might form a longer run.
136**
137** p2 parallels b in the list2 array, where runs are defined by
138** a pointer chain.
139**
140** t represents the ``top'' of the adjacent pairs that might extend
141** the run beginning at b. Usually, t addresses a pair
142** that compares with opposite sense from (b,b+1).
143** However, it may also address a singleton element at the end of list1,
144** or it may be equal to ``last'', the first element beyond list1.
145**
146** r addresses the Nth pair following b. If this would be beyond t,
147** we back it off to t. Only when r is less than t do we consider the
148** run long enough to consider checking.
149**
150** q addresses a pair such that the pairs at b through q already form a run.
151** Often, q will equal b, indicating we only are sure of the pair itself.
152** However, a search on the previous cycle may have revealed a longer run,
153** so q may be greater than b.
154**
155** p is used to work back from a candidate r, trying to reach q,
156** which would mean b through r would be a run. If we discover such a run,
157** we start q at r and try to push it further towards t.
158** If b through r is NOT a run, we detect the wrong order at (p-1,p).
159** In any event, after the check (if any), we have two main cases.
160**
161** 1) Short run. b <= q < p <= r <= t.
162** b through q is a run (perhaps trivial)
163** q through p are uninteresting pairs
164** p through r is a run
165**
166** 2) Long run. b < r <= q < t.
167** b through q is a run (of length >= 2 * PTHRESH)
168**
169** Note that degenerate cases are not only possible, but likely.
170** For example, if the pair following b compares with opposite sense,
171** then b == q < p == r == t.
172*/
173
174
957d8989 175static IV
84d4ea48 176dynprep(pTHX_ gptr *list1, gptr *list2, size_t nmemb, SVCOMPARE_t cmp)
177{
957d8989 178 I32 sense;
84d4ea48 179 register gptr *b, *p, *q, *t, *p2;
180 register gptr c, *last, *r;
181 gptr *savep;
957d8989 182 IV runs = 0;
84d4ea48 183
184 b = list1;
185 last = PINDEX(b, nmemb);
186 sense = (cmp(aTHX_ *b, *(b+1)) > 0);
187 for (p2 = list2; b < last; ) {
188 /* We just started, or just reversed sense.
189 ** Set t at end of pairs with the prevailing sense.
190 */
191 for (p = b+2, t = p; ++p < last; t = ++p) {
192 if ((cmp(aTHX_ *t, *p) > 0) != sense) break;
193 }
194 q = b;
195 /* Having laid out the playing field, look for long runs */
196 do {
197 p = r = b + (2 * PTHRESH);
198 if (r >= t) p = r = t; /* too short to care about */
199 else {
200 while (((cmp(aTHX_ *(p-1), *p) > 0) == sense) &&
201 ((p -= 2) > q));
202 if (p <= q) {
203 /* b through r is a (long) run.
204 ** Extend it as far as possible.
205 */
206 p = q = r;
207 while (((p += 2) < t) &&
208 ((cmp(aTHX_ *(p-1), *p) > 0) == sense)) q = p;
209 r = p = q + 2; /* no simple pairs, no after-run */
210 }
211 }
212 if (q > b) { /* run of greater than 2 at b */
213 savep = p;
214 p = q += 2;
215 /* pick up singleton, if possible */
216 if ((p == t) &&
217 ((t + 1) == last) &&
218 ((cmp(aTHX_ *(p-1), *p) > 0) == sense))
219 savep = r = p = q = last;
957d8989 220 p2 = NEXT(p2) = p2 + (p - b); ++runs;
84d4ea48 221 if (sense) while (b < --p) {
222 c = *b;
223 *b++ = *p;
224 *p = c;
225 }
226 p = savep;
227 }
228 while (q < p) { /* simple pairs */
957d8989 229 p2 = NEXT(p2) = p2 + 2; ++runs;
84d4ea48 230 if (sense) {
231 c = *q++;
232 *(q-1) = *q;
233 *q++ = c;
234 } else q += 2;
235 }
236 if (((b = p) == t) && ((t+1) == last)) {
957d8989 237 NEXT(p2) = p2 + 1; ++runs;
84d4ea48 238 b++;
239 }
240 q = r;
241 } while (b < t);
242 sense = !sense;
243 }
957d8989 244 return runs;
84d4ea48 245}
246
247
248/* Overview of bmerge variables:
249**
250** list1 and list2 address the main and auxiliary arrays.
251** They swap identities after each merge pass.
252** Base points to the original list1, so we can tell if
253** the pointers ended up where they belonged (or must be copied).
254**
255** When we are merging two lists, f1 and f2 are the next elements
256** on the respective lists. l1 and l2 mark the end of the lists.
257** tp2 is the current location in the merged list.
258**
259** p1 records where f1 started.
260** After the merge, a new descriptor is built there.
261**
262** p2 is a ``parallel'' pointer in (what starts as) descriptor space.
263** It is used to identify and delimit the runs.
264**
265** In the heat of determining where q, the greater of the f1/f2 elements,
266** belongs in the other list, b, t and p, represent bottom, top and probe
267** locations, respectively, in the other list.
268** They make convenient temporary pointers in other places.
269*/
270
271STATIC void
272S_mergesortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
273{
274 int i, run;
275 int sense;
276 register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q;
277 gptr *aux, *list2, *p2, *last;
278 gptr *base = list1;
279 gptr *p1;
c53fc8a6 280 gptr small[SMALLSORT];
84d4ea48 281
282 if (nmemb <= 1) return; /* sorted trivially */
c53fc8a6 283 if (nmemb <= SMALLSORT) list2 = small; /* use stack for aux array */
284 else { New(799,list2,nmemb,gptr); } /* allocate auxilliary array */
84d4ea48 285 aux = list2;
286 dynprep(aTHX_ list1, list2, nmemb, cmp);
287 last = PINDEX(list2, nmemb);
288 while (NEXT(list2) != last) {
289 /* More than one run remains. Do some merging to reduce runs. */
290 l2 = p1 = list1;
291 for (tp2 = p2 = list2; p2 != last;) {
292 /* The new first run begins where the old second list ended.
293 ** Use the p2 ``parallel'' pointer to identify the end of the run.
294 */
295 f1 = l2;
296 t = NEXT(p2);
297 f2 = l1 = POTHER(t, list2, list1);
298 if (t != last) t = NEXT(t);
299 l2 = POTHER(t, list2, list1);
300 p2 = t;
301 while (f1 < l1 && f2 < l2) {
302 /* If head 1 is larger than head 2, find ALL the elements
303 ** in list 2 strictly less than head1, write them all,
304 ** then head 1. Then compare the new heads, and repeat,
305 ** until one or both lists are exhausted.
306 **
307 ** In all comparisons (after establishing
308 ** which head to merge) the item to merge
309 ** (at pointer q) is the first operand of
310 ** the comparison. When we want to know
311 ** if ``q is strictly less than the other'',
312 ** we can't just do
313 ** cmp(q, other) < 0
314 ** because stability demands that we treat equality
315 ** as high when q comes from l2, and as low when
316 ** q was from l1. So we ask the question by doing
317 ** cmp(q, other) <= sense
318 ** and make sense == 0 when equality should look low,
319 ** and -1 when equality should look high.
320 */
321
322
323 if (cmp(aTHX_ *f1, *f2) <= 0) {
324 q = f2; b = f1; t = l1;
325 sense = -1;
326 } else {
327 q = f1; b = f2; t = l2;
328 sense = 0;
329 }
330
331
332 /* ramp up
333 **
334 ** Leave t at something strictly
335 ** greater than q (or at the end of the list),
336 ** and b at something strictly less than q.
337 */
338 for (i = 1, run = 0 ;;) {
339 if ((p = PINDEX(b, i)) >= t) {
340 /* off the end */
341 if (((p = PINDEX(t, -1)) > b) &&
342 (cmp(aTHX_ *q, *p) <= sense))
343 t = p;
344 else b = p;
345 break;
346 } else if (cmp(aTHX_ *q, *p) <= sense) {
347 t = p;
348 break;
349 } else b = p;
350 if (++run >= RTHRESH) i += i;
351 }
352
353
354 /* q is known to follow b and must be inserted before t.
355 ** Increment b, so the range of possibilities is [b,t).
356 ** Round binary split down, to favor early appearance.
357 ** Adjust b and t until q belongs just before t.
358 */
359
360 b++;
361 while (b < t) {
362 p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
363 if (cmp(aTHX_ *q, *p) <= sense) {
364 t = p;
365 } else b = p + 1;
366 }
367
368
369 /* Copy all the strictly low elements */
370
371 if (q == f1) {
372 FROMTOUPTO(f2, tp2, t);
373 *tp2++ = *f1++;
374 } else {
375 FROMTOUPTO(f1, tp2, t);
376 *tp2++ = *f2++;
377 }
378 }
379
380
381 /* Run out remaining list */
382 if (f1 == l1) {
383 if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
384 } else FROMTOUPTO(f1, tp2, l1);
385 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
386 }
387 t = list1;
388 list1 = list2;
389 list2 = t;
390 last = PINDEX(list2, nmemb);
391 }
392 if (base == list2) {
393 last = PINDEX(list1, nmemb);
394 FROMTOUPTO(list1, list2, last);
395 }
c53fc8a6 396 if (aux != small) Safefree(aux); /* free iff allocated */
84d4ea48 397 return;
398}
399
957d8989 400
401/* What perl needs (least) is another sort implementation in the core.
402 * So what's the story? The short (by jpl's standards) story is that
403 * the merge sort above, in use since 5.7, is as fast as, or faster than,
404 * qsort on many platforms, but slower than qsort, conspicuously so,
405 * on others. The most likely explanation is platform-specific
406 * differences in cache sizes and relative speeds.
407 *
408 * The quicksort divide-and-conquer algorithm guarantees that, as the
409 * problem is subdivided into smaller and smaller parts, the parts
410 * fit into smaller (and faster) caches. So it doesn't matter how
411 * many levels of cache exist, quicksort will "find" them, and,
412 * as long as smaller is faster, take advanatge of them.
413 *
414 * By contrast, consider how the quicksort algorithm above works.
415 * Suppose we have five runs (each typically of length 2 after dynprep).
416 *
417 * pass base aux
418 * 0 1 2 3 4 5
419 * 1 12 34 5
420 * 2 1234 5
421 * 3 12345
422 * 4 12345
423 *
424 * Adjacent pairs are merged in "grand sweeps" through the input.
425 * This means, on pass 1, the records in runs 1 and 2 aren't revisited until
426 * runs 3 and 4 are merged and the runs from run 5 have been copied.
427 * The only cache that matters is one large enough to hold *all* the input.
428 * On some platforms, this may be many times slower than smaller caches.
429 *
430 * The following pseudo-code uses the same basic merge algorithm,
431 * but in a divide-and-conquer way.
432 *
433 * # merge $runs runs at offset $offset of list $list1 into $list2.
434 * # all unmerged runs ($runs == 1) originate in list $base.
435 * sub mgsort2 {
436 * my ($offset, $runs, $base, $list1, $list2) = @_;
437 *
438 * if ($runs == 1) {
439 * if ($list1 is $base) copy run to $list2
440 * return offset of end of list (or copy)
441 * } else {
442 * $off2 = mgsort2($offset, $runs-($runs/2), $base, $list2, $list1)
443 * mgsort2($off2, $runs/2, $base, $list2, $list1)
444 * merge the adjacent runs at $offset of $list1 into $list2
445 * return the offset of the end of the merged runs
446 * }
447 * }
448 * mgsort2(0, $runs, $base, $aux, $base);
449 *
450 * For our 5 runs, the tree of calls looks like
451 *
452 * 5
453 * 3 2
454 * 2 1 1 1
455 * 1 1
456 *
457 * 1 2 3 4 5
458 *
459 * and the corresponding activity looks like
460 *
461 * copy runs 1 and 2 from base to aux
462 * merge runs 1 and 2 from aux to base
463 * (run 3 is where it belongs, no copy needed)
464 * merge runs 12 and 3 from base to aux
465 * (runs 4 and 5 are where they belong, no copy needed)
466 * merge runs 4 and 5 from base to aux
467 * merge runs 123 and 45 from aux to base
468 *
469 * Note that we merge runs 1 and 2 immediately after copying them,
470 * while they are still likely to be in fast cache. Similarly,
471 * run 3 is merged with run 12 while it still may be lingering in cache.
472 * This implementation should therefore enjoy much of the cache-friendly
473 * behavior that quicksort does. In addition, it does less copying
474 * than the original mergesort implementation (only runs 1 and 2 are copied)
475 * and the "balancing" of merges is better (merged runs comprise more nearly
476 * equal numbers of original runs).
477 *
478 * The actual cache-friendly implementation will use a pseudo-stack
479 * to avoid recursion, and will unroll processing of runs of length 2,
480 * but it is otherwise similar to the recursive implementation.
481 * If it's as good as the original mergesort implementation on all
482 * platforms, it should replace that implementation. For benchmarking,
483 * though, it is convenient to have both implementations available.
484 */
485
486typedef struct {
487 IV offset; /* offset of 1st of 2 runs at this level */
488 IV runs; /* how many runs must be combined into 1 */
489} off_runs; /* pseudo-stack element */
490
491STATIC void
492S_cfmergesortsv(pTHX_ gptr *base, size_t nmemb, SVCOMPARE_t cmp)
493{
494 IV i, run, runs, offset;
495 I32 sense, level;
496 int iwhich;
497 register gptr *f1, *f2, *t, *b, *p, *tp2, *l1, *l2, *q;
498 gptr *aux, *list1, *list2;
499 gptr *p1;
500 gptr small[SMALLSORT];
501 gptr *which[3];
502 off_runs stack[60], *stackp;
503
504 if (nmemb <= 1) return; /* sorted trivially */
505 if (nmemb <= SMALLSORT) aux = small; /* use stack for aux array */
506 else { New(799,aux,nmemb,gptr); } /* allocate auxilliary array */
507 level = 0;
508 stackp = stack;
509 stackp->runs = dynprep(aTHX_ base, aux, nmemb, cmp);
510 stackp->offset = offset = 0;
511 which[0] = which[2] = base;
512 which[1] = aux;
513 for (;;) {
514 /* On levels where both runs have be constructed (stackp->runs == 0),
515 * merge them, and note the offset of their end, in case the offset
516 * is needed at the next level up. Hop up a level, and,
517 * as long as stackp->runs is 0, keep merging.
518 */
519 if ((runs = stackp->runs) == 0) {
520 iwhich = level & 1;
521 list1 = which[iwhich]; /* area where runs are now */
522 list2 = which[++iwhich]; /* area for merged runs */
523 do {
524 offset = stackp->offset;
525 f1 = p1 = list1 + offset; /* start of first run */
526 p = tp2 = list2 + offset; /* where merged run will go */
527 t = NEXT(p); /* where first run ends */
528 f2 = l1 = POTHER(t, list2, list1); /* ... on the other side */
529 t = NEXT(t); /* where second runs ends */
530 l2 = POTHER(t, list2, list1); /* ... on the other side */
531 offset = PNELEM(list2, t);
532 while (f1 < l1 && f2 < l2) {
533 /* If head 1 is larger than head 2, find ALL the elements
534 ** in list 2 strictly less than head1, write them all,
535 ** then head 1. Then compare the new heads, and repeat,
536 ** until one or both lists are exhausted.
537 **
538 ** In all comparisons (after establishing
539 ** which head to merge) the item to merge
540 ** (at pointer q) is the first operand of
541 ** the comparison. When we want to know
542 ** if ``q is strictly less than the other'',
543 ** we can't just do
544 ** cmp(q, other) < 0
545 ** because stability demands that we treat equality
546 ** as high when q comes from l2, and as low when
547 ** q was from l1. So we ask the question by doing
548 ** cmp(q, other) <= sense
549 ** and make sense == 0 when equality should look low,
550 ** and -1 when equality should look high.
551 */
552
553
554 if (cmp(aTHX_ *f1, *f2) <= 0) {
555 q = f2; b = f1; t = l1;
556 sense = -1;
557 } else {
558 q = f1; b = f2; t = l2;
559 sense = 0;
560 }
561
562
563 /* ramp up
564 **
565 ** Leave t at something strictly
566 ** greater than q (or at the end of the list),
567 ** and b at something strictly less than q.
568 */
569 for (i = 1, run = 0 ;;) {
570 if ((p = PINDEX(b, i)) >= t) {
571 /* off the end */
572 if (((p = PINDEX(t, -1)) > b) &&
573 (cmp(aTHX_ *q, *p) <= sense))
574 t = p;
575 else b = p;
576 break;
577 } else if (cmp(aTHX_ *q, *p) <= sense) {
578 t = p;
579 break;
580 } else b = p;
581 if (++run >= RTHRESH) i += i;
582 }
583
584
585 /* q is known to follow b and must be inserted before t.
586 ** Increment b, so the range of possibilities is [b,t).
587 ** Round binary split down, to favor early appearance.
588 ** Adjust b and t until q belongs just before t.
589 */
590
591 b++;
592 while (b < t) {
593 p = PINDEX(b, (PNELEM(b, t) - 1) / 2);
594 if (cmp(aTHX_ *q, *p) <= sense) {
595 t = p;
596 } else b = p + 1;
597 }
598
599
600 /* Copy all the strictly low elements */
601
602 if (q == f1) {
603 FROMTOUPTO(f2, tp2, t);
604 *tp2++ = *f1++;
605 } else {
606 FROMTOUPTO(f1, tp2, t);
607 *tp2++ = *f2++;
608 }
609 }
610
611
612 /* Run out remaining list */
613 if (f1 == l1) {
614 if (f2 < l2) FROMTOUPTO(f2, tp2, l2);
615 } else FROMTOUPTO(f1, tp2, l1);
616 p1 = NEXT(p1) = POTHER(tp2, list2, list1);
617
618 if (--level == 0) goto done;
619 --stackp;
620 t = list1; list1 = list2; list2 = t; /* swap lists */
621 } while ((runs = stackp->runs) == 0);
622 }
623
624
625 stackp->runs = 0; /* current run will finish level */
626 /* While there are more than 2 runs remaining,
627 * turn them into exactly 2 runs (at the "other" level),
628 * each made up of approximately half the runs.
629 * Stack the second half for later processing,
630 * and set about producing the first half now.
631 */
632 while (runs > 2) {
633 ++level;
634 ++stackp;
635 stackp->offset = offset;
636 runs -= stackp->runs = runs / 2;
637 }
638 /* We must construct a single run from 1 or 2 runs.
639 * All the original runs are in which[0] == base.
640 * The run we construct must end up in which[level&1].
641 */
642 iwhich = level & 1;
643 if (runs == 1) {
644 /* Constructing a single run from a single run.
645 * If it's where it belongs already, there's nothing to do.
646 * Otherwise, copy it to where it belongs.
647 * A run of 1 is either a singleton at level 0,
648 * or the second half of a split 3. In neither event
649 * is it necessary to set offset. It will be set by the merge
650 * that immediately follows.
651 */
652 if (iwhich) { /* Belongs in aux, currently in base */
653 f1 = b = PINDEX(base, offset); /* where list starts */
654 f2 = PINDEX(aux, offset); /* where list goes */
655 t = NEXT(f2); /* where list will end */
656 offset = PNELEM(aux, t); /* offset thereof */
657 t = PINDEX(base, offset); /* where it currently ends */
658 FROMTOUPTO(f1, f2, t); /* copy */
659 NEXT(b) = t; /* set up parallel pointer */
660 } else if (level == 0) goto done; /* single run at level 0 */
661 } else {
662 /* Constructing a single run from two runs.
663 * The merge code at the top will do that.
664 * We need only make sure the two runs are in the "other" array,
665 * so they'll end up in the correct array after the merge.
666 */
667 ++level;
668 ++stackp;
669 stackp->offset = offset;
670 stackp->runs = 0; /* take care of both runs, trigger merge */
671 if (!iwhich) { /* Merged runs belong in aux, copy 1st */
672 f1 = b = PINDEX(base, offset); /* where first run starts */
673 f2 = PINDEX(aux, offset); /* where it will be copied */
674 t = NEXT(f2); /* where first run will end */
675 offset = PNELEM(aux, t); /* offset thereof */
676 p = PINDEX(base, offset); /* end of first run */
677 t = NEXT(t); /* where second run will end */
678 t = PINDEX(base, PNELEM(aux, t)); /* where it now ends */
679 FROMTOUPTO(f1, f2, t); /* copy both runs */
680 NEXT(b) = p; /* paralled pointer for 1st */
681 NEXT(p) = t; /* ... and for second */
682 }
683 }
684 }
685done:
686 if (aux != small) Safefree(aux); /* free iff allocated */
687 return;
688}
689
84d4ea48 690/*
691 * The quicksort implementation was derived from source code contributed
692 * by Tom Horsley.
693 *
694 * NOTE: this code was derived from Tom Horsley's qsort replacement
695 * and should not be confused with the original code.
696 */
697
698/* Copyright (C) Tom Horsley, 1997. All rights reserved.
699
700 Permission granted to distribute under the same terms as perl which are
701 (briefly):
702
703 This program is free software; you can redistribute it and/or modify
704 it under the terms of either:
705
706 a) the GNU General Public License as published by the Free
707 Software Foundation; either version 1, or (at your option) any
708 later version, or
709
710 b) the "Artistic License" which comes with this Kit.
711
712 Details on the perl license can be found in the perl source code which
713 may be located via the www.perl.com web page.
714
715 This is the most wonderfulest possible qsort I can come up with (and
716 still be mostly portable) My (limited) tests indicate it consistently
717 does about 20% fewer calls to compare than does the qsort in the Visual
718 C++ library, other vendors may vary.
719
720 Some of the ideas in here can be found in "Algorithms" by Sedgewick,
721 others I invented myself (or more likely re-invented since they seemed
722 pretty obvious once I watched the algorithm operate for a while).
723
724 Most of this code was written while watching the Marlins sweep the Giants
725 in the 1997 National League Playoffs - no Braves fans allowed to use this
726 code (just kidding :-).
727
728 I realize that if I wanted to be true to the perl tradition, the only
729 comment in this file would be something like:
730
731 ...they shuffled back towards the rear of the line. 'No, not at the
732 rear!' the slave-driver shouted. 'Three files up. And stay there...
733
734 However, I really needed to violate that tradition just so I could keep
735 track of what happens myself, not to mention some poor fool trying to
736 understand this years from now :-).
737*/
738
739/* ********************************************************** Configuration */
740
741#ifndef QSORT_ORDER_GUESS
742#define QSORT_ORDER_GUESS 2 /* Select doubling version of the netBSD trick */
743#endif
744
745/* QSORT_MAX_STACK is the largest number of partitions that can be stacked up for
746 future processing - a good max upper bound is log base 2 of memory size
747 (32 on 32 bit machines, 64 on 64 bit machines, etc). In reality can
748 safely be smaller than that since the program is taking up some space and
749 most operating systems only let you grab some subset of contiguous
750 memory (not to mention that you are normally sorting data larger than
751 1 byte element size :-).
752*/
753#ifndef QSORT_MAX_STACK
754#define QSORT_MAX_STACK 32
755#endif
756
757/* QSORT_BREAK_EVEN is the size of the largest partition we should insertion sort.
758 Anything bigger and we use qsort. If you make this too small, the qsort
759 will probably break (or become less efficient), because it doesn't expect
760 the middle element of a partition to be the same as the right or left -
761 you have been warned).
762*/
763#ifndef QSORT_BREAK_EVEN
764#define QSORT_BREAK_EVEN 6
765#endif
766
4eb872f6 767/* QSORT_PLAY_SAFE is the size of the largest partition we're willing
768 to go quadratic on. We innoculate larger partitions against
769 quadratic behavior by shuffling them before sorting. This is not
770 an absolute guarantee of non-quadratic behavior, but it would take
771 staggeringly bad luck to pick extreme elements as the pivot
772 from randomized data.
773*/
774#ifndef QSORT_PLAY_SAFE
775#define QSORT_PLAY_SAFE 255
776#endif
777
84d4ea48 778/* ************************************************************* Data Types */
779
780/* hold left and right index values of a partition waiting to be sorted (the
781 partition includes both left and right - right is NOT one past the end or
782 anything like that).
783*/
784struct partition_stack_entry {
785 int left;
786 int right;
787#ifdef QSORT_ORDER_GUESS
788 int qsort_break_even;
789#endif
790};
791
792/* ******************************************************* Shorthand Macros */
793
794/* Note that these macros will be used from inside the qsort function where
795 we happen to know that the variable 'elt_size' contains the size of an
796 array element and the variable 'temp' points to enough space to hold a
797 temp element and the variable 'array' points to the array being sorted
798 and 'compare' is the pointer to the compare routine.
799
800 Also note that there are very many highly architecture specific ways
801 these might be sped up, but this is simply the most generally portable
802 code I could think of.
803*/
804
805/* Return < 0 == 0 or > 0 as the value of elt1 is < elt2, == elt2, > elt2
806*/
807#define qsort_cmp(elt1, elt2) \
808 ((*compare)(aTHX_ array[elt1], array[elt2]))
809
810#ifdef QSORT_ORDER_GUESS
811#define QSORT_NOTICE_SWAP swapped++;
812#else
813#define QSORT_NOTICE_SWAP
814#endif
815
816/* swaps contents of array elements elt1, elt2.
817*/
818#define qsort_swap(elt1, elt2) \
819 STMT_START { \
820 QSORT_NOTICE_SWAP \
821 temp = array[elt1]; \
822 array[elt1] = array[elt2]; \
823 array[elt2] = temp; \
824 } STMT_END
825
826/* rotate contents of elt1, elt2, elt3 such that elt1 gets elt2, elt2 gets
827 elt3 and elt3 gets elt1.
828*/
829#define qsort_rotate(elt1, elt2, elt3) \
830 STMT_START { \
831 QSORT_NOTICE_SWAP \
832 temp = array[elt1]; \
833 array[elt1] = array[elt2]; \
834 array[elt2] = array[elt3]; \
835 array[elt3] = temp; \
836 } STMT_END
837
838/* ************************************************************ Debug stuff */
839
840#ifdef QSORT_DEBUG
841
842static void
843break_here()
844{
845 return; /* good place to set a breakpoint */
846}
847
848#define qsort_assert(t) (void)( (t) || (break_here(), 0) )
849
850static void
851doqsort_all_asserts(
852 void * array,
853 size_t num_elts,
854 size_t elt_size,
855 int (*compare)(const void * elt1, const void * elt2),
856 int pc_left, int pc_right, int u_left, int u_right)
857{
858 int i;
859
860 qsort_assert(pc_left <= pc_right);
861 qsort_assert(u_right < pc_left);
862 qsort_assert(pc_right < u_left);
863 for (i = u_right + 1; i < pc_left; ++i) {
864 qsort_assert(qsort_cmp(i, pc_left) < 0);
865 }
866 for (i = pc_left; i < pc_right; ++i) {
867 qsort_assert(qsort_cmp(i, pc_right) == 0);
868 }
869 for (i = pc_right + 1; i < u_left; ++i) {
870 qsort_assert(qsort_cmp(pc_right, i) < 0);
871 }
872}
873
874#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) \
875 doqsort_all_asserts(array, num_elts, elt_size, compare, \
876 PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT)
877
878#else
879
880#define qsort_assert(t) ((void)0)
881
882#define qsort_all_asserts(PC_LEFT, PC_RIGHT, U_LEFT, U_RIGHT) ((void)0)
883
884#endif
885
886/* ****************************************************************** qsort */
887
888STATIC void /* the standard unstable (u) quicksort (qsort) */
889S_qsortsvu(pTHX_ SV ** array, size_t num_elts, SVCOMPARE_t compare)
890{
891 register SV * temp;
892
893 struct partition_stack_entry partition_stack[QSORT_MAX_STACK];
894 int next_stack_entry = 0;
895
896 int part_left;
897 int part_right;
898#ifdef QSORT_ORDER_GUESS
899 int qsort_break_even;
900 int swapped;
901#endif
902
903 /* Make sure we actually have work to do.
904 */
905 if (num_elts <= 1) {
906 return;
907 }
908
4eb872f6 909 /* Innoculate large partitions against quadratic behavior */
910 if (num_elts > QSORT_PLAY_SAFE) {
911 register size_t n, j;
912 register SV **q;
913 for (n = num_elts, q = array; n > 1; ) {
914 j = n-- * Drand01();
915 temp = q[j];
916 q[j] = q[n];
917 q[n] = temp;
918 }
919 }
920
84d4ea48 921 /* Setup the initial partition definition and fall into the sorting loop
922 */
923 part_left = 0;
924 part_right = (int)(num_elts - 1);
925#ifdef QSORT_ORDER_GUESS
926 qsort_break_even = QSORT_BREAK_EVEN;
927#else
928#define qsort_break_even QSORT_BREAK_EVEN
929#endif
930 for ( ; ; ) {
931 if ((part_right - part_left) >= qsort_break_even) {
932 /* OK, this is gonna get hairy, so lets try to document all the
933 concepts and abbreviations and variables and what they keep
934 track of:
935
936 pc: pivot chunk - the set of array elements we accumulate in the
937 middle of the partition, all equal in value to the original
938 pivot element selected. The pc is defined by:
939
940 pc_left - the leftmost array index of the pc
941 pc_right - the rightmost array index of the pc
942
943 we start with pc_left == pc_right and only one element
944 in the pivot chunk (but it can grow during the scan).
945
946 u: uncompared elements - the set of elements in the partition
947 we have not yet compared to the pivot value. There are two
948 uncompared sets during the scan - one to the left of the pc
949 and one to the right.
950
951 u_right - the rightmost index of the left side's uncompared set
952 u_left - the leftmost index of the right side's uncompared set
953
954 The leftmost index of the left sides's uncompared set
955 doesn't need its own variable because it is always defined
956 by the leftmost edge of the whole partition (part_left). The
957 same goes for the rightmost edge of the right partition
958 (part_right).
959
960 We know there are no uncompared elements on the left once we
961 get u_right < part_left and no uncompared elements on the
962 right once u_left > part_right. When both these conditions
963 are met, we have completed the scan of the partition.
964
965 Any elements which are between the pivot chunk and the
966 uncompared elements should be less than the pivot value on
967 the left side and greater than the pivot value on the right
968 side (in fact, the goal of the whole algorithm is to arrange
969 for that to be true and make the groups of less-than and
970 greater-then elements into new partitions to sort again).
971
972 As you marvel at the complexity of the code and wonder why it
973 has to be so confusing. Consider some of the things this level
974 of confusion brings:
975
976 Once I do a compare, I squeeze every ounce of juice out of it. I
977 never do compare calls I don't have to do, and I certainly never
978 do redundant calls.
979
980 I also never swap any elements unless I can prove there is a
981 good reason. Many sort algorithms will swap a known value with
982 an uncompared value just to get things in the right place (or
983 avoid complexity :-), but that uncompared value, once it gets
984 compared, may then have to be swapped again. A lot of the
985 complexity of this code is due to the fact that it never swaps
986 anything except compared values, and it only swaps them when the
987 compare shows they are out of position.
988 */
989 int pc_left, pc_right;
990 int u_right, u_left;
991
992 int s;
993
994 pc_left = ((part_left + part_right) / 2);
995 pc_right = pc_left;
996 u_right = pc_left - 1;
997 u_left = pc_right + 1;
998
999 /* Qsort works best when the pivot value is also the median value
1000 in the partition (unfortunately you can't find the median value
1001 without first sorting :-), so to give the algorithm a helping
1002 hand, we pick 3 elements and sort them and use the median value
1003 of that tiny set as the pivot value.
1004
1005 Some versions of qsort like to use the left middle and right as
1006 the 3 elements to sort so they can insure the ends of the
1007 partition will contain values which will stop the scan in the
1008 compare loop, but when you have to call an arbitrarily complex
1009 routine to do a compare, its really better to just keep track of
1010 array index values to know when you hit the edge of the
1011 partition and avoid the extra compare. An even better reason to
1012 avoid using a compare call is the fact that you can drop off the
1013 edge of the array if someone foolishly provides you with an
1014 unstable compare function that doesn't always provide consistent
1015 results.
1016
1017 So, since it is simpler for us to compare the three adjacent
1018 elements in the middle of the partition, those are the ones we
1019 pick here (conveniently pointed at by u_right, pc_left, and
1020 u_left). The values of the left, center, and right elements
1021 are refered to as l c and r in the following comments.
1022 */
1023
1024#ifdef QSORT_ORDER_GUESS
1025 swapped = 0;
1026#endif
1027 s = qsort_cmp(u_right, pc_left);
1028 if (s < 0) {
1029 /* l < c */
1030 s = qsort_cmp(pc_left, u_left);
1031 /* if l < c, c < r - already in order - nothing to do */
1032 if (s == 0) {
1033 /* l < c, c == r - already in order, pc grows */
1034 ++pc_right;
1035 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1036 } else if (s > 0) {
1037 /* l < c, c > r - need to know more */
1038 s = qsort_cmp(u_right, u_left);
1039 if (s < 0) {
1040 /* l < c, c > r, l < r - swap c & r to get ordered */
1041 qsort_swap(pc_left, u_left);
1042 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1043 } else if (s == 0) {
1044 /* l < c, c > r, l == r - swap c&r, grow pc */
1045 qsort_swap(pc_left, u_left);
1046 --pc_left;
1047 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1048 } else {
1049 /* l < c, c > r, l > r - make lcr into rlc to get ordered */
1050 qsort_rotate(pc_left, u_right, u_left);
1051 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1052 }
1053 }
1054 } else if (s == 0) {
1055 /* l == c */
1056 s = qsort_cmp(pc_left, u_left);
1057 if (s < 0) {
1058 /* l == c, c < r - already in order, grow pc */
1059 --pc_left;
1060 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1061 } else if (s == 0) {
1062 /* l == c, c == r - already in order, grow pc both ways */
1063 --pc_left;
1064 ++pc_right;
1065 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1066 } else {
1067 /* l == c, c > r - swap l & r, grow pc */
1068 qsort_swap(u_right, u_left);
1069 ++pc_right;
1070 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1071 }
1072 } else {
1073 /* l > c */
1074 s = qsort_cmp(pc_left, u_left);
1075 if (s < 0) {
1076 /* l > c, c < r - need to know more */
1077 s = qsort_cmp(u_right, u_left);
1078 if (s < 0) {
1079 /* l > c, c < r, l < r - swap l & c to get ordered */
1080 qsort_swap(u_right, pc_left);
1081 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1082 } else if (s == 0) {
1083 /* l > c, c < r, l == r - swap l & c, grow pc */
1084 qsort_swap(u_right, pc_left);
1085 ++pc_right;
1086 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1087 } else {
1088 /* l > c, c < r, l > r - rotate lcr into crl to order */
1089 qsort_rotate(u_right, pc_left, u_left);
1090 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1091 }
1092 } else if (s == 0) {
1093 /* l > c, c == r - swap ends, grow pc */
1094 qsort_swap(u_right, u_left);
1095 --pc_left;
1096 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1097 } else {
1098 /* l > c, c > r - swap ends to get in order */
1099 qsort_swap(u_right, u_left);
1100 qsort_all_asserts(pc_left, pc_right, u_left + 1, u_right - 1);
1101 }
1102 }
1103 /* We now know the 3 middle elements have been compared and
1104 arranged in the desired order, so we can shrink the uncompared
1105 sets on both sides
1106 */
1107 --u_right;
1108 ++u_left;
1109 qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1110
1111 /* The above massive nested if was the simple part :-). We now have
1112 the middle 3 elements ordered and we need to scan through the
1113 uncompared sets on either side, swapping elements that are on
1114 the wrong side or simply shuffling equal elements around to get
1115 all equal elements into the pivot chunk.
1116 */
1117
1118 for ( ; ; ) {
1119 int still_work_on_left;
1120 int still_work_on_right;
1121
1122 /* Scan the uncompared values on the left. If I find a value
1123 equal to the pivot value, move it over so it is adjacent to
1124 the pivot chunk and expand the pivot chunk. If I find a value
1125 less than the pivot value, then just leave it - its already
1126 on the correct side of the partition. If I find a greater
1127 value, then stop the scan.
1128 */
1129 while ((still_work_on_left = (u_right >= part_left))) {
1130 s = qsort_cmp(u_right, pc_left);
1131 if (s < 0) {
1132 --u_right;
1133 } else if (s == 0) {
1134 --pc_left;
1135 if (pc_left != u_right) {
1136 qsort_swap(u_right, pc_left);
1137 }
1138 --u_right;
1139 } else {
1140 break;
1141 }
1142 qsort_assert(u_right < pc_left);
1143 qsort_assert(pc_left <= pc_right);
1144 qsort_assert(qsort_cmp(u_right + 1, pc_left) <= 0);
1145 qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1146 }
1147
1148 /* Do a mirror image scan of uncompared values on the right
1149 */
1150 while ((still_work_on_right = (u_left <= part_right))) {
1151 s = qsort_cmp(pc_right, u_left);
1152 if (s < 0) {
1153 ++u_left;
1154 } else if (s == 0) {
1155 ++pc_right;
1156 if (pc_right != u_left) {
1157 qsort_swap(pc_right, u_left);
1158 }
1159 ++u_left;
1160 } else {
1161 break;
1162 }
1163 qsort_assert(u_left > pc_right);
1164 qsort_assert(pc_left <= pc_right);
1165 qsort_assert(qsort_cmp(pc_right, u_left - 1) <= 0);
1166 qsort_assert(qsort_cmp(pc_left, pc_right) == 0);
1167 }
1168
1169 if (still_work_on_left) {
1170 /* I know I have a value on the left side which needs to be
1171 on the right side, but I need to know more to decide
1172 exactly the best thing to do with it.
1173 */
1174 if (still_work_on_right) {
1175 /* I know I have values on both side which are out of
1176 position. This is a big win because I kill two birds
1177 with one swap (so to speak). I can advance the
1178 uncompared pointers on both sides after swapping both
1179 of them into the right place.
1180 */
1181 qsort_swap(u_right, u_left);
1182 --u_right;
1183 ++u_left;
1184 qsort_all_asserts(pc_left, pc_right, u_left, u_right);
1185 } else {
1186 /* I have an out of position value on the left, but the
1187 right is fully scanned, so I "slide" the pivot chunk
1188 and any less-than values left one to make room for the
1189 greater value over on the right. If the out of position
1190 value is immediately adjacent to the pivot chunk (there
1191 are no less-than values), I can do that with a swap,
1192 otherwise, I have to rotate one of the less than values
1193 into the former position of the out of position value
1194 and the right end of the pivot chunk into the left end
1195 (got all that?).
1196 */
1197 --pc_left;
1198 if (pc_left == u_right) {
1199 qsort_swap(u_right, pc_right);
1200 qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1201 } else {
1202 qsort_rotate(u_right, pc_left, pc_right);
1203 qsort_all_asserts(pc_left, pc_right-1, u_left, u_right-1);
1204 }
1205 --pc_right;
1206 --u_right;
1207 }
1208 } else if (still_work_on_right) {
1209 /* Mirror image of complex case above: I have an out of
1210 position value on the right, but the left is fully
1211 scanned, so I need to shuffle things around to make room
1212 for the right value on the left.
1213 */
1214 ++pc_right;
1215 if (pc_right == u_left) {
1216 qsort_swap(u_left, pc_left);
1217 qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1218 } else {
1219 qsort_rotate(pc_right, pc_left, u_left);
1220 qsort_all_asserts(pc_left+1, pc_right, u_left+1, u_right);
1221 }
1222 ++pc_left;
1223 ++u_left;
1224 } else {
1225 /* No more scanning required on either side of partition,
1226 break out of loop and figure out next set of partitions
1227 */
1228 break;
1229 }
1230 }
1231
1232 /* The elements in the pivot chunk are now in the right place. They
1233 will never move or be compared again. All I have to do is decide
1234 what to do with the stuff to the left and right of the pivot
1235 chunk.
1236
1237 Notes on the QSORT_ORDER_GUESS ifdef code:
1238
1239 1. If I just built these partitions without swapping any (or
1240 very many) elements, there is a chance that the elements are
1241 already ordered properly (being properly ordered will
1242 certainly result in no swapping, but the converse can't be
1243 proved :-).
1244
1245 2. A (properly written) insertion sort will run faster on
1246 already ordered data than qsort will.
1247
1248 3. Perhaps there is some way to make a good guess about
1249 switching to an insertion sort earlier than partition size 6
1250 (for instance - we could save the partition size on the stack
1251 and increase the size each time we find we didn't swap, thus
1252 switching to insertion sort earlier for partitions with a
1253 history of not swapping).
1254
1255 4. Naturally, if I just switch right away, it will make
1256 artificial benchmarks with pure ascending (or descending)
1257 data look really good, but is that a good reason in general?
1258 Hard to say...
1259 */
1260
1261#ifdef QSORT_ORDER_GUESS
1262 if (swapped < 3) {
1263#if QSORT_ORDER_GUESS == 1
1264 qsort_break_even = (part_right - part_left) + 1;
1265#endif
1266#if QSORT_ORDER_GUESS == 2
1267 qsort_break_even *= 2;
1268#endif
1269#if QSORT_ORDER_GUESS == 3
1270 int prev_break = qsort_break_even;
1271 qsort_break_even *= qsort_break_even;
1272 if (qsort_break_even < prev_break) {
1273 qsort_break_even = (part_right - part_left) + 1;
1274 }
1275#endif
1276 } else {
1277 qsort_break_even = QSORT_BREAK_EVEN;
1278 }
1279#endif
1280
1281 if (part_left < pc_left) {
1282 /* There are elements on the left which need more processing.
1283 Check the right as well before deciding what to do.
1284 */
1285 if (pc_right < part_right) {
1286 /* We have two partitions to be sorted. Stack the biggest one
1287 and process the smallest one on the next iteration. This
1288 minimizes the stack height by insuring that any additional
1289 stack entries must come from the smallest partition which
1290 (because it is smallest) will have the fewest
1291 opportunities to generate additional stack entries.
1292 */
1293 if ((part_right - pc_right) > (pc_left - part_left)) {
1294 /* stack the right partition, process the left */
1295 partition_stack[next_stack_entry].left = pc_right + 1;
1296 partition_stack[next_stack_entry].right = part_right;
1297#ifdef QSORT_ORDER_GUESS
1298 partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1299#endif
1300 part_right = pc_left - 1;
1301 } else {
1302 /* stack the left partition, process the right */
1303 partition_stack[next_stack_entry].left = part_left;
1304 partition_stack[next_stack_entry].right = pc_left - 1;
1305#ifdef QSORT_ORDER_GUESS
1306 partition_stack[next_stack_entry].qsort_break_even = qsort_break_even;
1307#endif
1308 part_left = pc_right + 1;
1309 }
1310 qsort_assert(next_stack_entry < QSORT_MAX_STACK);
1311 ++next_stack_entry;
1312 } else {
1313 /* The elements on the left are the only remaining elements
1314 that need sorting, arrange for them to be processed as the
1315 next partition.
1316 */
1317 part_right = pc_left - 1;
1318 }
1319 } else if (pc_right < part_right) {
1320 /* There is only one chunk on the right to be sorted, make it
1321 the new partition and loop back around.
1322 */
1323 part_left = pc_right + 1;
1324 } else {
1325 /* This whole partition wound up in the pivot chunk, so
1326 we need to get a new partition off the stack.
1327 */
1328 if (next_stack_entry == 0) {
1329 /* the stack is empty - we are done */
1330 break;
1331 }
1332 --next_stack_entry;
1333 part_left = partition_stack[next_stack_entry].left;
1334 part_right = partition_stack[next_stack_entry].right;
1335#ifdef QSORT_ORDER_GUESS
1336 qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1337#endif
1338 }
1339 } else {
1340 /* This partition is too small to fool with qsort complexity, just
1341 do an ordinary insertion sort to minimize overhead.
1342 */
1343 int i;
1344 /* Assume 1st element is in right place already, and start checking
1345 at 2nd element to see where it should be inserted.
1346 */
1347 for (i = part_left + 1; i <= part_right; ++i) {
1348 int j;
1349 /* Scan (backwards - just in case 'i' is already in right place)
1350 through the elements already sorted to see if the ith element
1351 belongs ahead of one of them.
1352 */
1353 for (j = i - 1; j >= part_left; --j) {
1354 if (qsort_cmp(i, j) >= 0) {
1355 /* i belongs right after j
1356 */
1357 break;
1358 }
1359 }
1360 ++j;
1361 if (j != i) {
1362 /* Looks like we really need to move some things
1363 */
1364 int k;
1365 temp = array[i];
1366 for (k = i - 1; k >= j; --k)
1367 array[k + 1] = array[k];
1368 array[j] = temp;
1369 }
1370 }
1371
1372 /* That partition is now sorted, grab the next one, or get out
1373 of the loop if there aren't any more.
1374 */
1375
1376 if (next_stack_entry == 0) {
1377 /* the stack is empty - we are done */
1378 break;
1379 }
1380 --next_stack_entry;
1381 part_left = partition_stack[next_stack_entry].left;
1382 part_right = partition_stack[next_stack_entry].right;
1383#ifdef QSORT_ORDER_GUESS
1384 qsort_break_even = partition_stack[next_stack_entry].qsort_break_even;
1385#endif
1386 }
1387 }
1388
1389 /* Believe it or not, the array is sorted at this point! */
1390}
1391
84d4ea48 1392/* Stabilize what is, presumably, an otherwise unstable sort method.
1393 * We do that by allocating (or having on hand) an array of pointers
1394 * that is the same size as the original array of elements to be sorted.
1395 * We initialize this parallel array with the addresses of the original
1396 * array elements. This indirection can make you crazy.
1397 * Some pictures can help. After initializing, we have
1398 *
1399 * indir list1
1400 * +----+ +----+
1401 * | | --------------> | | ------> first element to be sorted
1402 * +----+ +----+
1403 * | | --------------> | | ------> second element to be sorted
1404 * +----+ +----+
1405 * | | --------------> | | ------> third element to be sorted
1406 * +----+ +----+
1407 * ...
1408 * +----+ +----+
1409 * | | --------------> | | ------> n-1st element to be sorted
1410 * +----+ +----+
1411 * | | --------------> | | ------> n-th element to be sorted
1412 * +----+ +----+
1413 *
1414 * During the sort phase, we leave the elements of list1 where they are,
1415 * and sort the pointers in the indirect array in the same order determined
1416 * by the original comparison routine on the elements pointed to.
1417 * Because we don't move the elements of list1 around through
1418 * this phase, we can break ties on elements that compare equal
1419 * using their address in the list1 array, ensuring stabilty.
1420 * This leaves us with something looking like
1421 *
1422 * indir list1
1423 * +----+ +----+
1424 * | | --+ +---> | | ------> first element to be sorted
1425 * +----+ | | +----+
1426 * | | --|-------|---> | | ------> second element to be sorted
1427 * +----+ | | +----+
1428 * | | --|-------+ +-> | | ------> third element to be sorted
1429 * +----+ | | +----+
1430 * ...
1431 * +----+ | | | | +----+
1432 * | | ---|-+ | +--> | | ------> n-1st element to be sorted
1433 * +----+ | | +----+
1434 * | | ---+ +----> | | ------> n-th element to be sorted
1435 * +----+ +----+
1436 *
1437 * where the i-th element of the indirect array points to the element
1438 * that should be i-th in the sorted array. After the sort phase,
1439 * we have to put the elements of list1 into the places
1440 * dictated by the indirect array.
1441 */
1442
1443static SVCOMPARE_t RealCmp;
1444
1445static I32
1446cmpindir(pTHX_ gptr a, gptr b)
1447{
1448 I32 sense;
1449 gptr *ap = (gptr *)a;
1450 gptr *bp = (gptr *)b;
1451
1452 if ((sense = RealCmp(aTHX_ *ap, *bp)) == 0)
1453 sense = (ap > bp) ? 1 : ((ap < bp) ? -1 : 0);
1454 return sense;
1455}
1456
1457STATIC void
1458S_qsortsv(pTHX_ gptr *list1, size_t nmemb, SVCOMPARE_t cmp)
1459{
1460 SV **hintsvp;
1461
c53fc8a6 1462 if (SORTHINTS(hintsvp) & HINT_SORT_STABLE) {
84d4ea48 1463 register gptr **pp, *q;
1464 register size_t n, j, i;
1465 gptr *small[SMALLSORT], **indir, tmp;
1466 SVCOMPARE_t savecmp;
1467 if (nmemb <= 1) return; /* sorted trivially */
4eb872f6 1468
84d4ea48 1469 /* Small arrays can use the stack, big ones must be allocated */
1470 if (nmemb <= SMALLSORT) indir = small;
1471 else { New(1799, indir, nmemb, gptr *); }
4eb872f6 1472
84d4ea48 1473 /* Copy pointers to original array elements into indirect array */
1474 for (n = nmemb, pp = indir, q = list1; n--; ) *pp++ = q++;
4eb872f6 1475
84d4ea48 1476 savecmp = RealCmp; /* Save current comparison routine, if any */
1477 RealCmp = cmp; /* Put comparison routine where cmpindir can find it */
4eb872f6 1478
84d4ea48 1479 /* sort, with indirection */
1480 S_qsortsvu(aTHX_ (gptr *)indir, nmemb, cmpindir);
4eb872f6 1481
84d4ea48 1482 pp = indir;
1483 q = list1;
1484 for (n = nmemb; n--; ) {
1485 /* Assert A: all elements of q with index > n are already
1486 * in place. This is vacuosly true at the start, and we
1487 * put element n where it belongs below (if it wasn't
1488 * already where it belonged). Assert B: we only move
1489 * elements that aren't where they belong,
1490 * so, by A, we never tamper with elements above n.
1491 */
1492 j = pp[n] - q; /* This sets j so that q[j] is
1493 * at pp[n]. *pp[j] belongs in
1494 * q[j], by construction.
1495 */
1496 if (n != j) { /* all's well if n == j */
1497 tmp = q[j]; /* save what's in q[j] */
1498 do {
1499 q[j] = *pp[j]; /* put *pp[j] where it belongs */
1500 i = pp[j] - q; /* the index in q of the element
1501 * just moved */
1502 pp[j] = q + j; /* this is ok now */
1503 } while ((j = i) != n);
1504 /* There are only finitely many (nmemb) addresses
1505 * in the pp array.
1506 * So we must eventually revisit an index we saw before.
1507 * Suppose the first revisited index is k != n.
1508 * An index is visited because something else belongs there.
1509 * If we visit k twice, then two different elements must
1510 * belong in the same place, which cannot be.
1511 * So j must get back to n, the loop terminates,
1512 * and we put the saved element where it belongs.
1513 */
1514 q[n] = tmp; /* put what belongs into
1515 * the n-th element */
1516 }
1517 }
1518
1519 /* free iff allocated */
1520 if (indir != small) { Safefree(indir); }
1521 /* restore prevailing comparison routine */
1522 RealCmp = savecmp;
c53fc8a6 1523 } else {
1524 S_qsortsvu(aTHX_ list1, nmemb, cmp);
84d4ea48 1525 }
1526}
4eb872f6 1527
1528/*
84d4ea48 1529=for apidoc sortsv
1530
1531Sort an array. Here is an example:
1532
4eb872f6 1533 sortsv(AvARRAY(av), av_len(av)+1, Perl_sv_cmp_locale);
84d4ea48 1534
1535=cut
1536*/
4eb872f6 1537
84d4ea48 1538void
1539Perl_sortsv(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp)
1540{
1541 void (*sortsvp)(pTHX_ SV **array, size_t nmemb, SVCOMPARE_t cmp) =
1542 S_mergesortsv;
1543 SV **hintsvp;
1544 I32 hints;
4eb872f6 1545
84d4ea48 1546 if ((hints = SORTHINTS(hintsvp))) {
1547 if (hints & HINT_SORT_QUICKSORT)
1548 sortsvp = S_qsortsv;
1549 else {
1550 if (hints & HINT_SORT_MERGESORT)
957d8989 1551 sortsvp = S_cfmergesortsv;
84d4ea48 1552 else
1553 sortsvp = S_mergesortsv;
1554 }
1555 }
4eb872f6 1556
84d4ea48 1557 sortsvp(aTHX_ array, nmemb, cmp);
1558}
1559
1560PP(pp_sort)
1561{
1562 dSP; dMARK; dORIGMARK;
1563 register SV **up;
1564 SV **myorigmark = ORIGMARK;
1565 register I32 max;
1566 HV *stash;
1567 GV *gv;
1568 CV *cv = 0;
1569 I32 gimme = GIMME;
1570 OP* nextop = PL_op->op_next;
1571 I32 overloading = 0;
1572 bool hasargs = FALSE;
1573 I32 is_xsub = 0;
1574
1575 if (gimme != G_ARRAY) {
1576 SP = MARK;
1577 RETPUSHUNDEF;
1578 }
1579
1580 ENTER;
1581 SAVEVPTR(PL_sortcop);
1582 if (PL_op->op_flags & OPf_STACKED) {
1583 if (PL_op->op_flags & OPf_SPECIAL) {
1584 OP *kid = cLISTOP->op_first->op_sibling; /* pass pushmark */
1585 kid = kUNOP->op_first; /* pass rv2gv */
1586 kid = kUNOP->op_first; /* pass leave */
1587 PL_sortcop = kid->op_next;
1588 stash = CopSTASH(PL_curcop);
1589 }
1590 else {
1591 cv = sv_2cv(*++MARK, &stash, &gv, 0);
1592 if (cv && SvPOK(cv)) {
1593 STRLEN n_a;
1594 char *proto = SvPV((SV*)cv, n_a);
1595 if (proto && strEQ(proto, "$$")) {
1596 hasargs = TRUE;
1597 }
1598 }
1599 if (!(cv && CvROOT(cv))) {
1600 if (cv && CvXSUB(cv)) {
1601 is_xsub = 1;
1602 }
1603 else if (gv) {
1604 SV *tmpstr = sv_newmortal();
1605 gv_efullname3(tmpstr, gv, Nullch);
1606 DIE(aTHX_ "Undefined sort subroutine \"%s\" called",
1607 SvPVX(tmpstr));
1608 }
1609 else {
1610 DIE(aTHX_ "Undefined subroutine in sort");
1611 }
1612 }
1613
1614 if (is_xsub)
1615 PL_sortcop = (OP*)cv;
1616 else {
1617 PL_sortcop = CvSTART(cv);
1618 SAVEVPTR(CvROOT(cv)->op_ppaddr);
1619 CvROOT(cv)->op_ppaddr = PL_ppaddr[OP_NULL];
1620
1621 SAVEVPTR(PL_curpad);
1622 PL_curpad = AvARRAY((AV*)AvARRAY(CvPADLIST(cv))[1]);
1623 }
1624 }
1625 }
1626 else {
1627 PL_sortcop = Nullop;
1628 stash = CopSTASH(PL_curcop);
1629 }
1630
1631 up = myorigmark + 1;
1632 while (MARK < SP) { /* This may or may not shift down one here. */
1633 /*SUPPRESS 560*/
1634 if ((*up = *++MARK)) { /* Weed out nulls. */
1635 SvTEMP_off(*up);
1636 if (!PL_sortcop && !SvPOK(*up)) {
1637 STRLEN n_a;
1638 if (SvAMAGIC(*up))
1639 overloading = 1;
1640 else
1641 (void)sv_2pv(*up, &n_a);
1642 }
1643 up++;
1644 }
1645 }
1646 max = --up - myorigmark;
1647 if (PL_sortcop) {
1648 if (max > 1) {
1649 PERL_CONTEXT *cx;
1650 SV** newsp;
1651 bool oldcatch = CATCH_GET;
1652
1653 SAVETMPS;
1654 SAVEOP();
1655
1656 CATCH_SET(TRUE);
1657 PUSHSTACKi(PERLSI_SORT);
1658 if (!hasargs && !is_xsub) {
1659 if (PL_sortstash != stash || !PL_firstgv || !PL_secondgv) {
1660 SAVESPTR(PL_firstgv);
1661 SAVESPTR(PL_secondgv);
1662 PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
1663 PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
1664 PL_sortstash = stash;
1665 }
1666#ifdef USE_5005THREADS
1667 sv_lock((SV *)PL_firstgv);
1668 sv_lock((SV *)PL_secondgv);
1669#endif
1670 SAVESPTR(GvSV(PL_firstgv));
1671 SAVESPTR(GvSV(PL_secondgv));
1672 }
1673
1674 PUSHBLOCK(cx, CXt_NULL, PL_stack_base);
1675 if (!(PL_op->op_flags & OPf_SPECIAL)) {
1676 cx->cx_type = CXt_SUB;
1677 cx->blk_gimme = G_SCALAR;
1678 PUSHSUB(cx);
1679 if (!CvDEPTH(cv))
1680 (void)SvREFCNT_inc(cv); /* in preparation for POPSUB */
1681 }
1682 PL_sortcxix = cxstack_ix;
1683
1684 if (hasargs && !is_xsub) {
1685 /* This is mostly copied from pp_entersub */
1686 AV *av = (AV*)PL_curpad[0];
1687
1688#ifndef USE_5005THREADS
1689 cx->blk_sub.savearray = GvAV(PL_defgv);
1690 GvAV(PL_defgv) = (AV*)SvREFCNT_inc(av);
1691#endif /* USE_5005THREADS */
1692 cx->blk_sub.oldcurpad = PL_curpad;
1693 cx->blk_sub.argarray = av;
1694 }
1695 sortsv((myorigmark+1), max,
1696 is_xsub ? sortcv_xsub : hasargs ? sortcv_stacked : sortcv);
1697
1698 POPBLOCK(cx,PL_curpm);
1699 PL_stack_sp = newsp;
1700 POPSTACK;
1701 CATCH_SET(oldcatch);
1702 }
1703 }
1704 else {
1705 if (max > 1) {
1706 MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */
1707 sortsv(ORIGMARK+1, max,
1708 (PL_op->op_private & OPpSORT_NUMERIC)
1709 ? ( (PL_op->op_private & OPpSORT_INTEGER)
1710 ? ( overloading ? amagic_i_ncmp : sv_i_ncmp)
1711 : ( overloading ? amagic_ncmp : sv_ncmp))
1712 : ( IN_LOCALE_RUNTIME
1713 ? ( overloading
1714 ? amagic_cmp_locale
1715 : sv_cmp_locale_static)
1716 : ( overloading ? amagic_cmp : sv_cmp_static)));
1717 if (PL_op->op_private & OPpSORT_REVERSE) {
1718 SV **p = ORIGMARK+1;
1719 SV **q = ORIGMARK+max;
1720 while (p < q) {
1721 SV *tmp = *p;
1722 *p++ = *q;
1723 *q-- = tmp;
1724 }
1725 }
1726 }
1727 }
1728 LEAVE;
1729 PL_stack_sp = ORIGMARK + max;
1730 return nextop;
1731}
1732
1733static I32
1734sortcv(pTHX_ SV *a, SV *b)
1735{
1736 I32 oldsaveix = PL_savestack_ix;
1737 I32 oldscopeix = PL_scopestack_ix;
1738 I32 result;
1739 GvSV(PL_firstgv) = a;
1740 GvSV(PL_secondgv) = b;
1741 PL_stack_sp = PL_stack_base;
1742 PL_op = PL_sortcop;
1743 CALLRUNOPS(aTHX);
1744 if (PL_stack_sp != PL_stack_base + 1)
1745 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1746 if (!SvNIOKp(*PL_stack_sp))
1747 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1748 result = SvIV(*PL_stack_sp);
1749 while (PL_scopestack_ix > oldscopeix) {
1750 LEAVE;
1751 }
1752 leave_scope(oldsaveix);
1753 return result;
1754}
1755
1756static I32
1757sortcv_stacked(pTHX_ SV *a, SV *b)
1758{
1759 I32 oldsaveix = PL_savestack_ix;
1760 I32 oldscopeix = PL_scopestack_ix;
1761 I32 result;
1762 AV *av;
1763
1764#ifdef USE_5005THREADS
1765 av = (AV*)PL_curpad[0];
1766#else
1767 av = GvAV(PL_defgv);
1768#endif
1769
1770 if (AvMAX(av) < 1) {
1771 SV** ary = AvALLOC(av);
1772 if (AvARRAY(av) != ary) {
1773 AvMAX(av) += AvARRAY(av) - AvALLOC(av);
1774 SvPVX(av) = (char*)ary;
1775 }
1776 if (AvMAX(av) < 1) {
1777 AvMAX(av) = 1;
1778 Renew(ary,2,SV*);
1779 SvPVX(av) = (char*)ary;
1780 }
1781 }
1782 AvFILLp(av) = 1;
1783
1784 AvARRAY(av)[0] = a;
1785 AvARRAY(av)[1] = b;
1786 PL_stack_sp = PL_stack_base;
1787 PL_op = PL_sortcop;
1788 CALLRUNOPS(aTHX);
1789 if (PL_stack_sp != PL_stack_base + 1)
1790 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1791 if (!SvNIOKp(*PL_stack_sp))
1792 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1793 result = SvIV(*PL_stack_sp);
1794 while (PL_scopestack_ix > oldscopeix) {
1795 LEAVE;
1796 }
1797 leave_scope(oldsaveix);
1798 return result;
1799}
1800
1801static I32
1802sortcv_xsub(pTHX_ SV *a, SV *b)
1803{
1804 dSP;
1805 I32 oldsaveix = PL_savestack_ix;
1806 I32 oldscopeix = PL_scopestack_ix;
1807 I32 result;
1808 CV *cv=(CV*)PL_sortcop;
1809
1810 SP = PL_stack_base;
1811 PUSHMARK(SP);
1812 EXTEND(SP, 2);
1813 *++SP = a;
1814 *++SP = b;
1815 PUTBACK;
1816 (void)(*CvXSUB(cv))(aTHX_ cv);
1817 if (PL_stack_sp != PL_stack_base + 1)
1818 Perl_croak(aTHX_ "Sort subroutine didn't return single value");
1819 if (!SvNIOKp(*PL_stack_sp))
1820 Perl_croak(aTHX_ "Sort subroutine didn't return a numeric value");
1821 result = SvIV(*PL_stack_sp);
1822 while (PL_scopestack_ix > oldscopeix) {
1823 LEAVE;
1824 }
1825 leave_scope(oldsaveix);
1826 return result;
1827}
1828
1829
1830static I32
1831sv_ncmp(pTHX_ SV *a, SV *b)
1832{
1833 NV nv1 = SvNV(a);
1834 NV nv2 = SvNV(b);
1835 return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0;
1836}
1837
1838static I32
1839sv_i_ncmp(pTHX_ SV *a, SV *b)
1840{
1841 IV iv1 = SvIV(a);
1842 IV iv2 = SvIV(b);
1843 return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0;
1844}
1845#define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \
1846 *svp = Nullsv; \
1847 if (PL_amagic_generation) { \
1848 if (SvAMAGIC(left)||SvAMAGIC(right))\
1849 *svp = amagic_call(left, \
1850 right, \
1851 CAT2(meth,_amg), \
1852 0); \
1853 } \
1854 } STMT_END
1855
1856static I32
1857amagic_ncmp(pTHX_ register SV *a, register SV *b)
1858{
1859 SV *tmpsv;
1860 tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1861 if (tmpsv) {
1862 NV d;
4eb872f6 1863
84d4ea48 1864 if (SvIOK(tmpsv)) {
1865 I32 i = SvIVX(tmpsv);
1866 if (i > 0)
1867 return 1;
1868 return i? -1 : 0;
1869 }
1870 d = SvNV(tmpsv);
1871 if (d > 0)
1872 return 1;
1873 return d? -1 : 0;
1874 }
1875 return sv_ncmp(aTHX_ a, b);
1876}
1877
1878static I32
1879amagic_i_ncmp(pTHX_ register SV *a, register SV *b)
1880{
1881 SV *tmpsv;
1882 tryCALL_AMAGICbin(a,b,ncmp,&tmpsv);
1883 if (tmpsv) {
1884 NV d;
4eb872f6 1885
84d4ea48 1886 if (SvIOK(tmpsv)) {
1887 I32 i = SvIVX(tmpsv);
1888 if (i > 0)
1889 return 1;
1890 return i? -1 : 0;
1891 }
1892 d = SvNV(tmpsv);
1893 if (d > 0)
1894 return 1;
1895 return d? -1 : 0;
1896 }
1897 return sv_i_ncmp(aTHX_ a, b);
1898}
1899
1900static I32
1901amagic_cmp(pTHX_ register SV *str1, register SV *str2)
1902{
1903 SV *tmpsv;
1904 tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1905 if (tmpsv) {
1906 NV d;
4eb872f6 1907
84d4ea48 1908 if (SvIOK(tmpsv)) {
1909 I32 i = SvIVX(tmpsv);
1910 if (i > 0)
1911 return 1;
1912 return i? -1 : 0;
1913 }
1914 d = SvNV(tmpsv);
1915 if (d > 0)
1916 return 1;
1917 return d? -1 : 0;
1918 }
1919 return sv_cmp(str1, str2);
1920}
1921
1922static I32
1923amagic_cmp_locale(pTHX_ register SV *str1, register SV *str2)
1924{
1925 SV *tmpsv;
1926 tryCALL_AMAGICbin(str1,str2,scmp,&tmpsv);
1927 if (tmpsv) {
1928 NV d;
4eb872f6 1929
84d4ea48 1930 if (SvIOK(tmpsv)) {
1931 I32 i = SvIVX(tmpsv);
1932 if (i > 0)
1933 return 1;
1934 return i? -1 : 0;
1935 }
1936 d = SvNV(tmpsv);
1937 if (d > 0)
1938 return 1;
1939 return d? -1 : 0;
1940 }
1941 return sv_cmp_locale(str1, str2);
1942}