Clarify portability and main program.
[python/dscho.git] / Objects / listobject.c
blobdf2108c9ba91e2797d434d240275cb428f262f7d
1 /***********************************************************
2 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3 The Netherlands.
5 All Rights Reserved
7 Permission to use, copy, modify, and distribute this software and its
8 documentation for any purpose and without fee is hereby granted,
9 provided that the above copyright notice appear in all copies and that
10 both that copyright notice and this permission notice appear in
11 supporting documentation, and that the names of Stichting Mathematisch
12 Centrum or CWI or Corporation for National Research Initiatives or
13 CNRI not be used in advertising or publicity pertaining to
14 distribution of the software without specific, written prior
15 permission.
17 While CWI is the initial source for this software, a modified version
18 is made available by the Corporation for National Research Initiatives
19 (CNRI) at the Internet address ftp://ftp.python.org.
21 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28 PERFORMANCE OF THIS SOFTWARE.
30 ******************************************************************/
32 /* List object implementation */
34 #include "Python.h"
36 #ifdef STDC_HEADERS
37 #include <stddef.h>
38 #else
39 #include <sys/types.h> /* For size_t */
40 #endif
42 #define ROUNDUP(n, PyTryBlock) \
43 ((((n)+(PyTryBlock)-1)/(PyTryBlock))*(PyTryBlock))
45 static int
46 roundupsize(n)
47 int n;
49 if (n < 500)
50 return ROUNDUP(n, 10);
51 else
52 return ROUNDUP(n, 100);
55 #define NRESIZE(var, type, nitems) PyMem_RESIZE(var, type, roundupsize(nitems))
57 PyObject *
58 PyList_New(size)
59 int size;
61 int i;
62 PyListObject *op;
63 size_t nbytes;
64 if (size < 0) {
65 PyErr_BadInternalCall();
66 return NULL;
68 nbytes = size * sizeof(PyObject *);
69 /* Check for overflow */
70 if (nbytes / sizeof(PyObject *) != (size_t)size) {
71 return PyErr_NoMemory();
73 op = (PyListObject *) malloc(sizeof(PyListObject));
74 if (op == NULL) {
75 return PyErr_NoMemory();
77 if (size <= 0) {
78 op->ob_item = NULL;
80 else {
81 op->ob_item = (PyObject **) malloc(nbytes);
82 if (op->ob_item == NULL) {
83 free((ANY *)op);
84 return PyErr_NoMemory();
87 op->ob_type = &PyList_Type;
88 op->ob_size = size;
89 for (i = 0; i < size; i++)
90 op->ob_item[i] = NULL;
91 _Py_NewReference(op);
92 return (PyObject *) op;
95 int
96 PyList_Size(op)
97 PyObject *op;
99 if (!PyList_Check(op)) {
100 PyErr_BadInternalCall();
101 return -1;
103 else
104 return ((PyListObject *)op) -> ob_size;
107 static PyObject *indexerr;
109 PyObject *
110 PyList_GetItem(op, i)
111 PyObject *op;
112 int i;
114 if (!PyList_Check(op)) {
115 PyErr_BadInternalCall();
116 return NULL;
118 if (i < 0 || i >= ((PyListObject *)op) -> ob_size) {
119 if (indexerr == NULL)
120 indexerr = PyString_FromString(
121 "list index out of range");
122 PyErr_SetObject(PyExc_IndexError, indexerr);
123 return NULL;
125 return ((PyListObject *)op) -> ob_item[i];
129 PyList_SetItem(op, i, newitem)
130 register PyObject *op;
131 register int i;
132 register PyObject *newitem;
134 register PyObject *olditem;
135 register PyObject **p;
136 if (!PyList_Check(op)) {
137 Py_XDECREF(newitem);
138 PyErr_BadInternalCall();
139 return -1;
141 if (i < 0 || i >= ((PyListObject *)op) -> ob_size) {
142 Py_XDECREF(newitem);
143 PyErr_SetString(PyExc_IndexError,
144 "list assignment index out of range");
145 return -1;
147 p = ((PyListObject *)op) -> ob_item + i;
148 olditem = *p;
149 *p = newitem;
150 Py_XDECREF(olditem);
151 return 0;
154 static int
155 ins1(self, where, v)
156 PyListObject *self;
157 int where;
158 PyObject *v;
160 int i;
161 PyObject **items;
162 if (v == NULL) {
163 PyErr_BadInternalCall();
164 return -1;
166 items = self->ob_item;
167 NRESIZE(items, PyObject *, self->ob_size+1);
168 if (items == NULL) {
169 PyErr_NoMemory();
170 return -1;
172 if (where < 0)
173 where = 0;
174 if (where > self->ob_size)
175 where = self->ob_size;
176 for (i = self->ob_size; --i >= where; )
177 items[i+1] = items[i];
178 Py_INCREF(v);
179 items[where] = v;
180 self->ob_item = items;
181 self->ob_size++;
182 return 0;
186 PyList_Insert(op, where, newitem)
187 PyObject *op;
188 int where;
189 PyObject *newitem;
191 if (!PyList_Check(op)) {
192 PyErr_BadInternalCall();
193 return -1;
195 return ins1((PyListObject *)op, where, newitem);
199 PyList_Append(op, newitem)
200 PyObject *op;
201 PyObject *newitem;
203 if (!PyList_Check(op)) {
204 PyErr_BadInternalCall();
205 return -1;
207 return ins1((PyListObject *)op,
208 (int) ((PyListObject *)op)->ob_size, newitem);
211 /* Methods */
213 static void
214 list_dealloc(op)
215 PyListObject *op;
217 int i;
218 if (op->ob_item != NULL) {
219 for (i = 0; i < op->ob_size; i++) {
220 Py_XDECREF(op->ob_item[i]);
222 free((ANY *)op->ob_item);
224 free((ANY *)op);
227 static int
228 list_print(op, fp, flags)
229 PyListObject *op;
230 FILE *fp;
231 int flags;
233 int i;
235 i = Py_ReprEnter((PyObject*)op);
236 if (i != 0) {
237 if (i < 0)
238 return i;
239 fprintf(fp, "[...]");
240 return 0;
242 fprintf(fp, "[");
243 for (i = 0; i < op->ob_size; i++) {
244 if (i > 0)
245 fprintf(fp, ", ");
246 if (PyObject_Print(op->ob_item[i], fp, 0) != 0) {
247 Py_ReprLeave((PyObject *)op);
248 return -1;
251 fprintf(fp, "]");
252 Py_ReprLeave((PyObject *)op);
253 return 0;
256 static PyObject *
257 list_repr(v)
258 PyListObject *v;
260 PyObject *s, *comma;
261 int i;
263 i = Py_ReprEnter((PyObject*)v);
264 if (i != 0) {
265 if (i > 0)
266 return PyString_FromString("[...]");
267 return NULL;
269 s = PyString_FromString("[");
270 comma = PyString_FromString(", ");
271 for (i = 0; i < v->ob_size && s != NULL; i++) {
272 if (i > 0)
273 PyString_Concat(&s, comma);
274 PyString_ConcatAndDel(&s, PyObject_Repr(v->ob_item[i]));
276 Py_XDECREF(comma);
277 PyString_ConcatAndDel(&s, PyString_FromString("]"));
278 Py_ReprLeave((PyObject *)v);
279 return s;
282 static int
283 list_compare(v, w)
284 PyListObject *v, *w;
286 int i;
287 for (i = 0; i < v->ob_size && i < w->ob_size; i++) {
288 int cmp = PyObject_Compare(v->ob_item[i], w->ob_item[i]);
289 if (cmp != 0)
290 return cmp;
292 return v->ob_size - w->ob_size;
295 static int
296 list_length(a)
297 PyListObject *a;
299 return a->ob_size;
302 static PyObject *
303 list_item(a, i)
304 PyListObject *a;
305 int i;
307 if (i < 0 || i >= a->ob_size) {
308 if (indexerr == NULL)
309 indexerr = PyString_FromString(
310 "list index out of range");
311 PyErr_SetObject(PyExc_IndexError, indexerr);
312 return NULL;
314 Py_INCREF(a->ob_item[i]);
315 return a->ob_item[i];
318 static PyObject *
319 list_slice(a, ilow, ihigh)
320 PyListObject *a;
321 int ilow, ihigh;
323 PyListObject *np;
324 int i;
325 if (ilow < 0)
326 ilow = 0;
327 else if (ilow > a->ob_size)
328 ilow = a->ob_size;
329 if (ihigh < ilow)
330 ihigh = ilow;
331 else if (ihigh > a->ob_size)
332 ihigh = a->ob_size;
333 np = (PyListObject *) PyList_New(ihigh - ilow);
334 if (np == NULL)
335 return NULL;
336 for (i = ilow; i < ihigh; i++) {
337 PyObject *v = a->ob_item[i];
338 Py_INCREF(v);
339 np->ob_item[i - ilow] = v;
341 return (PyObject *)np;
344 PyObject *
345 PyList_GetSlice(a, ilow, ihigh)
346 PyObject *a;
347 int ilow, ihigh;
349 if (!PyList_Check(a)) {
350 PyErr_BadInternalCall();
351 return NULL;
353 return list_slice((PyListObject *)a, ilow, ihigh);
356 static PyObject *
357 list_concat(a, bb)
358 PyListObject *a;
359 PyObject *bb;
361 int size;
362 int i;
363 PyListObject *np;
364 if (!PyList_Check(bb)) {
365 PyErr_BadArgument();
366 return NULL;
368 #define b ((PyListObject *)bb)
369 size = a->ob_size + b->ob_size;
370 np = (PyListObject *) PyList_New(size);
371 if (np == NULL) {
372 return NULL;
374 for (i = 0; i < a->ob_size; i++) {
375 PyObject *v = a->ob_item[i];
376 Py_INCREF(v);
377 np->ob_item[i] = v;
379 for (i = 0; i < b->ob_size; i++) {
380 PyObject *v = b->ob_item[i];
381 Py_INCREF(v);
382 np->ob_item[i + a->ob_size] = v;
384 return (PyObject *)np;
385 #undef b
388 static PyObject *
389 list_repeat(a, n)
390 PyListObject *a;
391 int n;
393 int i, j;
394 int size;
395 PyListObject *np;
396 PyObject **p;
397 if (n < 0)
398 n = 0;
399 size = a->ob_size * n;
400 np = (PyListObject *) PyList_New(size);
401 if (np == NULL)
402 return NULL;
403 p = np->ob_item;
404 for (i = 0; i < n; i++) {
405 for (j = 0; j < a->ob_size; j++) {
406 *p = a->ob_item[j];
407 Py_INCREF(*p);
408 p++;
411 return (PyObject *) np;
414 static int
415 list_ass_slice(a, ilow, ihigh, v)
416 PyListObject *a;
417 int ilow, ihigh;
418 PyObject *v;
420 /* Because [X]DECREF can recursively invoke list operations on
421 this list, we must postpone all [X]DECREF activity until
422 after the list is back in its canonical shape. Therefore
423 we must allocate an additional array, 'recycle', into which
424 we temporarily copy the items that are deleted from the
425 list. :-( */
426 PyObject **recycle, **p;
427 PyObject **item;
428 int n; /* Size of replacement list */
429 int d; /* Change in size */
430 int k; /* Loop index */
431 #define b ((PyListObject *)v)
432 if (v == NULL)
433 n = 0;
434 else if (PyList_Check(v)) {
435 n = b->ob_size;
436 if (a == b) {
437 /* Special case "a[i:j] = a" -- copy b first */
438 int ret;
439 v = list_slice(b, 0, n);
440 ret = list_ass_slice(a, ilow, ihigh, v);
441 Py_DECREF(v);
442 return ret;
445 else {
446 PyErr_BadArgument();
447 return -1;
449 if (ilow < 0)
450 ilow = 0;
451 else if (ilow > a->ob_size)
452 ilow = a->ob_size;
453 if (ihigh < ilow)
454 ihigh = ilow;
455 else if (ihigh > a->ob_size)
456 ihigh = a->ob_size;
457 item = a->ob_item;
458 d = n - (ihigh-ilow);
459 if (ihigh > ilow)
460 p = recycle = PyMem_NEW(PyObject *, (ihigh-ilow));
461 else
462 p = recycle = NULL;
463 if (d <= 0) { /* Delete -d items; recycle ihigh-ilow items */
464 for (k = ilow; k < ihigh; k++)
465 *p++ = item[k];
466 if (d < 0) {
467 for (/*k = ihigh*/; k < a->ob_size; k++)
468 item[k+d] = item[k];
469 a->ob_size += d;
470 NRESIZE(item, PyObject *, a->ob_size); /* Can't fail */
471 a->ob_item = item;
474 else { /* Insert d items; recycle ihigh-ilow items */
475 NRESIZE(item, PyObject *, a->ob_size + d);
476 if (item == NULL) {
477 PyMem_XDEL(recycle);
478 PyErr_NoMemory();
479 return -1;
481 for (k = a->ob_size; --k >= ihigh; )
482 item[k+d] = item[k];
483 for (/*k = ihigh-1*/; k >= ilow; --k)
484 *p++ = item[k];
485 a->ob_item = item;
486 a->ob_size += d;
488 for (k = 0; k < n; k++, ilow++) {
489 PyObject *w = b->ob_item[k];
490 Py_XINCREF(w);
491 item[ilow] = w;
493 if (recycle) {
494 while (--p >= recycle)
495 Py_XDECREF(*p);
496 PyMem_DEL(recycle);
498 return 0;
499 #undef b
503 PyList_SetSlice(a, ilow, ihigh, v)
504 PyObject *a;
505 int ilow, ihigh;
506 PyObject *v;
508 if (!PyList_Check(a)) {
509 PyErr_BadInternalCall();
510 return -1;
512 return list_ass_slice((PyListObject *)a, ilow, ihigh, v);
515 static int
516 list_ass_item(a, i, v)
517 PyListObject *a;
518 int i;
519 PyObject *v;
521 PyObject *old_value;
522 if (i < 0 || i >= a->ob_size) {
523 PyErr_SetString(PyExc_IndexError,
524 "list assignment index out of range");
525 return -1;
527 if (v == NULL)
528 return list_ass_slice(a, i, i+1, v);
529 Py_INCREF(v);
530 old_value = a->ob_item[i];
531 a->ob_item[i] = v;
532 Py_DECREF(old_value);
533 return 0;
536 static PyObject *
537 ins(self, where, v)
538 PyListObject *self;
539 int where;
540 PyObject *v;
542 if (ins1(self, where, v) != 0)
543 return NULL;
544 Py_INCREF(Py_None);
545 return Py_None;
548 static PyObject *
549 listinsert(self, args)
550 PyListObject *self;
551 PyObject *args;
553 int i;
554 PyObject *v;
555 if (!PyArg_Parse(args, "(iO)", &i, &v))
556 return NULL;
557 return ins(self, i, v);
560 static PyObject *
561 listappend(self, args)
562 PyListObject *self;
563 PyObject *args;
565 PyObject *v;
566 if (!PyArg_Parse(args, "O", &v))
567 return NULL;
568 return ins(self, (int) self->ob_size, v);
571 static PyObject *
572 listextend(self, args)
573 PyListObject *self;
574 PyObject *args;
576 PyObject *b = NULL, *res = NULL;
577 PyObject **items;
578 int selflen = PyList_GET_SIZE(self);
579 int blen;
580 register int i;
582 if (!PyArg_ParseTuple(args, "O", &b))
583 return NULL;
585 if (!PyList_Check(b)) {
586 PyErr_SetString(PyExc_TypeError,
587 "list.extend() argument must be a list");
588 return NULL;
590 if (PyList_GET_SIZE(b) == 0) {
591 /* short circuit when b is empty */
592 Py_INCREF(Py_None);
593 return Py_None;
595 if (self == (PyListObject*)b) {
596 /* as in list_ass_slice() we must special case the
597 * situation: a.extend(a)
599 * XXX: I think this way ought to be faster than using
600 * list_slice() the way list_ass_slice() does.
602 b = PyList_New(selflen);
603 if (!b)
604 return NULL;
605 for (i = 0; i < selflen; i++) {
606 PyObject *o = PyList_GET_ITEM(self, i);
607 Py_INCREF(o);
608 PyList_SET_ITEM(b, i, o);
611 else
612 /* we want b to have the same refcount semantics for the
613 * Py_XDECREF() in the finally clause regardless of which
614 * branch in the above conditional we took.
616 Py_INCREF(b);
618 blen = PyList_GET_SIZE(b);
619 /* resize a using idiom */
620 items = self->ob_item;
621 NRESIZE(items, PyObject*, selflen + blen);
622 if (items == NULL ) {
623 PyErr_NoMemory();
624 goto finally;
626 self->ob_item = items;
628 /* populate the end self with b's items */
629 for (i = 0; i < blen; i++) {
630 PyObject *o = PyList_GET_ITEM(b, i);
631 Py_INCREF(o);
632 PyList_SET_ITEM(self, self->ob_size++, o);
634 res = Py_None;
635 Py_INCREF(res);
636 finally:
637 Py_XDECREF(b);
638 return res;
642 static PyObject *
643 listpop(self, args)
644 PyListObject *self;
645 PyObject *args;
647 int i = -1;
648 PyObject *v;
649 if (!PyArg_ParseTuple(args, "|i", &i))
650 return NULL;
651 if (self->ob_size == 0) {
652 /* Special-case most common failure cause */
653 PyErr_SetString(PyExc_IndexError, "pop from empty list");
654 return NULL;
656 if (i < 0)
657 i += self->ob_size;
658 if (i < 0 || i >= self->ob_size) {
659 PyErr_SetString(PyExc_IndexError, "pop index out of range");
660 return NULL;
662 v = self->ob_item[i];
663 Py_INCREF(v);
664 if (list_ass_slice(self, i, i+1, (PyObject *)NULL) != 0) {
665 Py_DECREF(v);
666 return NULL;
668 return v;
671 /* New quicksort implementation for arrays of object pointers.
672 Thanks to discussions with Tim Peters. */
674 /* CMPERROR is returned by our comparison function when an error
675 occurred. This is the largest negative integer (0x80000000 on a
676 32-bit system). */
677 #define CMPERROR ( (int) ((unsigned int)1 << (8*sizeof(int) - 1)) )
679 /* Comparison function. Takes care of calling a user-supplied
680 comparison function (any callable Python object). Calls the
681 standard comparison function, PyObject_Compare(), if the user-
682 supplied function is NULL. */
684 static int
685 docompare(x, y, compare)
686 PyObject *x;
687 PyObject *y;
688 PyObject *compare;
690 PyObject *args, *res;
691 int i;
693 if (compare == NULL) {
694 i = PyObject_Compare(x, y);
695 if (i && PyErr_Occurred())
696 i = CMPERROR;
697 return i;
700 args = Py_BuildValue("(OO)", x, y);
701 if (args == NULL)
702 return CMPERROR;
703 res = PyEval_CallObject(compare, args);
704 Py_DECREF(args);
705 if (res == NULL)
706 return CMPERROR;
707 if (!PyInt_Check(res)) {
708 Py_DECREF(res);
709 PyErr_SetString(PyExc_TypeError,
710 "comparison function should return int");
711 return CMPERROR;
713 i = PyInt_AsLong(res);
714 Py_DECREF(res);
715 if (i < 0)
716 return -1;
717 if (i > 0)
718 return 1;
719 return 0;
722 /* MINSIZE is the smallest array that will get a full-blown samplesort
723 treatment; smaller arrays are sorted using binary insertion. It must
724 be at least 7 for the samplesort implementation to work. Binary
725 insertion does fewer compares, but can suffer O(N**2) data movement.
726 The more expensive compares, the larger MINSIZE should be. */
727 #define MINSIZE 100
729 /* MINPARTITIONSIZE is the smallest array slice samplesort will bother to
730 partition; smaller slices are passed to binarysort. It must be at
731 least 2, and no larger than MINSIZE. Setting it higher reduces the #
732 of compares slowly, but increases the amount of data movement quickly.
733 The value here was chosen assuming a compare costs ~25x more than
734 swapping a pair of memory-resident pointers -- but under that assumption,
735 changing the value by a few dozen more or less has aggregate effect
736 under 1%. So the value is crucial, but not touchy <wink>. */
737 #define MINPARTITIONSIZE 40
739 /* MAXMERGE is the largest number of elements we'll always merge into
740 a known-to-be sorted chunk via binary insertion, regardless of the
741 size of that chunk. Given a chunk of N sorted elements, and a group
742 of K unknowns, the largest K for which it's better to do insertion
743 (than a full-blown sort) is a complicated function of N and K mostly
744 involving the expected number of compares and data moves under each
745 approach, and the relative cost of those operations on a specific
746 architecure. The fixed value here is conservative, and should be a
747 clear win regardless of architecture or N. */
748 #define MAXMERGE 15
750 /* STACKSIZE is the size of our work stack. A rough estimate is that
751 this allows us to sort arrays of size N where
752 N / ln(N) = MINPARTITIONSIZE * 2**STACKSIZE, so 60 is more than enough
753 for arrays of size 2**64. Because we push the biggest partition
754 first, the worst case occurs when all subarrays are always partitioned
755 exactly in two. */
756 #define STACKSIZE 60
759 #define SETK(X,Y) if ((k = docompare(X,Y,compare))==CMPERROR) goto fail
761 /* binarysort is the best method for sorting small arrays: it does
762 few compares, but can do data movement quadratic in the number of
763 elements.
764 [lo, hi) is a contiguous slice of a list, and is sorted via
765 binary insertion.
766 On entry, must have lo <= start <= hi, and that [lo, start) is already
767 sorted (pass start == lo if you don't know!).
768 If docompare complains (returns CMPERROR) return -1, else 0.
769 Even in case of error, the output slice will be some permutation of
770 the input (nothing is lost or duplicated).
773 static int
774 binarysort(lo, hi, start, compare)
775 PyObject **lo;
776 PyObject **hi;
777 PyObject **start;
778 PyObject *compare;/* Comparison function object, or NULL for default */
780 /* assert lo <= start <= hi
781 assert [lo, start) is sorted */
782 register int k;
783 register PyObject **l, **p, **r;
784 register PyObject *pivot;
786 if (lo == start)
787 ++start;
788 for (; start < hi; ++start) {
789 /* set l to where *start belongs */
790 l = lo;
791 r = start;
792 pivot = *r;
793 do {
794 p = l + ((r - l) >> 1);
795 SETK(pivot, *p);
796 if (k < 0)
797 r = p;
798 else
799 l = p + 1;
800 } while (l < r);
801 /* Pivot should go at l -- slide over to make room.
802 Caution: using memmove is much slower under MSVC 5;
803 we're not usually moving many slots. */
804 for (p = start; p > l; --p)
805 *p = *(p-1);
806 *l = pivot;
808 return 0;
810 fail:
811 return -1;
814 /* samplesortslice is the sorting workhorse.
815 [lo, hi) is a contiguous slice of a list, to be sorted in place.
816 On entry, must have lo <= hi,
817 If docompare complains (returns CMPERROR) return -1, else 0.
818 Even in case of error, the output slice will be some permutation of
819 the input (nothing is lost or duplicated).
821 samplesort is basically quicksort on steroids: a power of 2 close
822 to n/ln(n) is computed, and that many elements (less 1) are picked at
823 random from the array and sorted. These 2**k - 1 elements are then
824 used as preselected pivots for an equal number of quicksort
825 partitioning steps, partitioning the slice into 2**k chunks each of
826 size about ln(n). These small final chunks are then usually handled
827 by binarysort. Note that when k=1, this is roughly the same as an
828 ordinary quicksort using a random pivot, and when k=2 this is roughly
829 a median-of-3 quicksort. From that view, using k ~= lg(n/ln(n)) makes
830 this a "median of n/ln(n)" quicksort. You can also view it as a kind
831 of bucket sort, where 2**k-1 bucket boundaries are picked dynamically.
833 The large number of samples makes a quadratic-time case almost
834 impossible, and asymptotically drives the average-case number of
835 compares from quicksort's 2 N ln N (or 12/7 N ln N for the median-of-
836 3 variant) down to N lg N.
838 We also play lots of low-level tricks to cut the number of compares.
840 Very obscure: To avoid using extra memory, the PPs are stored in the
841 array and shuffled around as partitioning proceeds. At the start of a
842 partitioning step, we'll have 2**m-1 (for some m) PPs in sorted order,
843 adjacent (either on the left or the right!) to a chunk of X elements
844 that are to be partitioned: P X or X P. In either case we need to
845 shuffle things *in place* so that the 2**(m-1) smaller PPs are on the
846 left, followed by the PP to be used for this step (that's the middle
847 of the PPs), followed by X, followed by the 2**(m-1) larger PPs:
848 P X or X P -> Psmall pivot X Plarge
849 and the order of the PPs must not be altered. It can take a while
850 to realize this isn't trivial! It can take even longer <wink> to
851 understand why the simple code below works, using only 2**(m-1) swaps.
852 The key is that the order of the X elements isn't necessarily
853 preserved: X can end up as some cyclic permutation of its original
854 order. That's OK, because X is unsorted anyway. If the order of X
855 had to be preserved too, the simplest method I know of using O(1)
856 scratch storage requires len(X) + 2**(m-1) swaps, spread over 2 passes.
857 Since len(X) is typically several times larger than 2**(m-1), that
858 would slow things down.
861 struct SamplesortStackNode {
862 /* Represents a slice of the array, from (& including) lo up
863 to (but excluding) hi. "extra" additional & adjacent elements
864 are pre-selected pivots (PPs), spanning [lo-extra, lo) if
865 extra > 0, or [hi, hi-extra) if extra < 0. The PPs are
866 already sorted, but nothing is known about the other elements
867 in [lo, hi). |extra| is always one less than a power of 2.
868 When extra is 0, we're out of PPs, and the slice must be
869 sorted by some other means. */
870 PyObject **lo;
871 PyObject **hi;
872 int extra;
875 /* The number of PPs we want is 2**k - 1, where 2**k is as close to
876 N / ln(N) as possible. So k ~= lg(N / ln(N)). Calling libm routines
877 is undesirable, so cutoff values are canned in the "cutoff" table
878 below: cutoff[i] is the smallest N such that k == CUTOFFBASE + i. */
879 #define CUTOFFBASE 4
880 static int cutoff[] = {
881 43, /* smallest N such that k == 4 */
882 106, /* etc */
883 250,
884 576,
885 1298,
886 2885,
887 6339,
888 13805,
889 29843,
890 64116,
891 137030,
892 291554,
893 617916,
894 1305130,
895 2748295,
896 5771662,
897 12091672,
898 25276798,
899 52734615,
900 109820537,
901 228324027,
902 473977813,
903 982548444, /* smallest N such that k == 26 */
904 2034159050 /* largest N that fits in signed 32-bit; k == 27 */
907 static int
908 samplesortslice(lo, hi, compare)
909 PyObject **lo;
910 PyObject **hi;
911 PyObject *compare;/* Comparison function object, or NULL for default */
913 register PyObject **l, **r;
914 register PyObject *tmp, *pivot;
915 register int k;
916 int n, extra, top, extraOnRight;
917 struct SamplesortStackNode stack[STACKSIZE];
919 /* assert lo <= hi */
920 n = hi - lo;
922 /* ----------------------------------------------------------
923 * Special cases
924 * --------------------------------------------------------*/
925 if (n < 2)
926 return 0;
928 /* Set r to the largest value such that [lo,r) is sorted.
929 This catches the already-sorted case, the all-the-same
930 case, and the appended-a-few-elements-to-a-sorted-list case.
931 If the array is unsorted, we're very likely to get out of
932 the loop fast, so the test is cheap if it doesn't pay off.
934 /* assert lo < hi */
935 for (r = lo+1; r < hi; ++r) {
936 SETK(*r, *(r-1));
937 if (k < 0)
938 break;
940 /* [lo,r) is sorted, [r,hi) unknown. Get out cheap if there are
941 few unknowns, or few elements in total. */
942 if (hi - r <= MAXMERGE || n < MINSIZE)
943 return binarysort(lo, hi, r, compare);
945 /* Check for the array already being reverse-sorted. Typical
946 benchmark-driven silliness <wink>. */
947 /* assert lo < hi */
948 for (r = lo+1; r < hi; ++r) {
949 SETK(*(r-1), *r);
950 if (k < 0)
951 break;
953 if (hi - r <= MAXMERGE) {
954 /* Reverse the reversed prefix, then insert the tail */
955 PyObject **originalr = r;
956 l = lo;
957 do {
958 --r;
959 tmp = *l; *l = *r; *r = tmp;
960 ++l;
961 } while (l < r);
962 return binarysort(lo, hi, originalr, compare);
965 /* ----------------------------------------------------------
966 * Normal case setup: a large array without obvious pattern.
967 * --------------------------------------------------------*/
969 /* extra := a power of 2 ~= n/ln(n), less 1.
970 First find the smallest extra s.t. n < cutoff[extra] */
971 for (extra = 0;
972 extra < sizeof(cutoff) / sizeof(cutoff[0]);
973 ++extra) {
974 if (n < cutoff[extra])
975 break;
976 /* note that if we fall out of the loop, the value of
977 extra still makes *sense*, but may be smaller than
978 we would like (but the array has more than ~= 2**31
979 elements in this case!) */
981 /* Now k == extra - 1 + CUTOFFBASE. The smallest value k can
982 have is CUTOFFBASE-1, so
983 assert MINSIZE >= 2**(CUTOFFBASE-1) - 1 */
984 extra = (1 << (extra - 1 + CUTOFFBASE)) - 1;
985 /* assert extra > 0 and n >= extra */
987 /* Swap that many values to the start of the array. The
988 selection of elements is pseudo-random, but the same on
989 every run (this is intentional! timing algorithm changes is
990 a pain if timing varies across runs). */
992 unsigned int seed = n / extra; /* arbitrary */
993 unsigned int i;
994 for (i = 0; i < (unsigned)extra; ++i) {
995 /* j := random int in [i, n) */
996 unsigned int j;
997 seed = seed * 69069 + 7;
998 j = i + seed % (n - i);
999 tmp = lo[i]; lo[i] = lo[j]; lo[j] = tmp;
1003 /* Recursively sort the preselected pivots. */
1004 if (samplesortslice(lo, lo + extra, compare) < 0)
1005 goto fail;
1007 top = 0; /* index of available stack slot */
1008 lo += extra; /* point to first unknown */
1009 extraOnRight = 0; /* the PPs are at the left end */
1011 /* ----------------------------------------------------------
1012 * Partition [lo, hi), and repeat until out of work.
1013 * --------------------------------------------------------*/
1014 for (;;) {
1015 /* assert lo <= hi, so n >= 0 */
1016 n = hi - lo;
1018 /* We may not want, or may not be able, to partition:
1019 If n is small, it's quicker to insert.
1020 If extra is 0, we're out of pivots, and *must* use
1021 another method.
1023 if (n < MINPARTITIONSIZE || extra == 0) {
1024 if (n >= MINSIZE) {
1025 /* assert extra == 0
1026 This is rare, since the average size
1027 of a final block is only about
1028 ln(original n). */
1029 if (samplesortslice(lo, hi, compare) < 0)
1030 goto fail;
1032 else {
1033 /* Binary insertion should be quicker,
1034 and we can take advantage of the PPs
1035 already being sorted. */
1036 if (extraOnRight && extra) {
1037 /* swap the PPs to the left end */
1038 k = extra;
1039 do {
1040 tmp = *lo;
1041 *lo = *hi;
1042 *hi = tmp;
1043 ++lo; ++hi;
1044 } while (--k);
1046 if (binarysort(lo - extra, hi, lo,
1047 compare) < 0)
1048 goto fail;
1051 /* Find another slice to work on. */
1052 if (--top < 0)
1053 break; /* no more -- done! */
1054 lo = stack[top].lo;
1055 hi = stack[top].hi;
1056 extra = stack[top].extra;
1057 extraOnRight = 0;
1058 if (extra < 0) {
1059 extraOnRight = 1;
1060 extra = -extra;
1062 continue;
1065 /* Pretend the PPs are indexed 0, 1, ..., extra-1.
1066 Then our preselected pivot is at (extra-1)/2, and we
1067 want to move the PPs before that to the left end of
1068 the slice, and the PPs after that to the right end.
1069 The following section changes extra, lo, hi, and the
1070 slice such that:
1071 [lo-extra, lo) contains the smaller PPs.
1072 *lo == our PP.
1073 (lo, hi) contains the unknown elements.
1074 [hi, hi+extra) contains the larger PPs.
1076 k = extra >>= 1; /* num PPs to move */
1077 if (extraOnRight) {
1078 /* Swap the smaller PPs to the left end.
1079 Note that this loop actually moves k+1 items:
1080 the last is our PP */
1081 do {
1082 tmp = *lo; *lo = *hi; *hi = tmp;
1083 ++lo; ++hi;
1084 } while (k--);
1086 else {
1087 /* Swap the larger PPs to the right end. */
1088 while (k--) {
1089 --lo; --hi;
1090 tmp = *lo; *lo = *hi; *hi = tmp;
1093 --lo; /* *lo is now our PP */
1094 pivot = *lo;
1096 /* Now an almost-ordinary quicksort partition step.
1097 Note that most of the time is spent here!
1098 Only odd thing is that we partition into < and >=,
1099 instead of the usual <= and >=. This helps when
1100 there are lots of duplicates of different values,
1101 because it eventually tends to make subfiles
1102 "pure" (all duplicates), and we special-case for
1103 duplicates later. */
1104 l = lo + 1;
1105 r = hi - 1;
1106 /* assert lo < l < r < hi (small n weeded out above) */
1108 do {
1109 /* slide l right, looking for key >= pivot */
1110 do {
1111 SETK(*l, pivot);
1112 if (k < 0)
1113 ++l;
1114 else
1115 break;
1116 } while (l < r);
1118 /* slide r left, looking for key < pivot */
1119 while (l < r) {
1120 register PyObject *rval = *r--;
1121 SETK(rval, pivot);
1122 if (k < 0) {
1123 /* swap and advance */
1124 r[1] = *l;
1125 *l++ = rval;
1126 break;
1130 } while (l < r);
1132 /* assert lo < r <= l < hi
1133 assert r == l or r+1 == l
1134 everything to the left of l is < pivot, and
1135 everything to the right of r is >= pivot */
1137 if (l == r) {
1138 SETK(*r, pivot);
1139 if (k < 0)
1140 ++l;
1141 else
1142 --r;
1144 /* assert lo <= r and r+1 == l and l <= hi
1145 assert r == lo or a[r] < pivot
1146 assert a[lo] is pivot
1147 assert l == hi or a[l] >= pivot
1148 Swap the pivot into "the middle", so we can henceforth
1149 ignore it.
1151 *lo = *r;
1152 *r = pivot;
1154 /* The following is true now, & will be preserved:
1155 All in [lo,r) are < pivot
1156 All in [r,l) == pivot (& so can be ignored)
1157 All in [l,hi) are >= pivot */
1159 /* Check for duplicates of the pivot. One compare is
1160 wasted if there are no duplicates, but can win big
1161 when there are.
1162 Tricky: we're sticking to "<" compares, so deduce
1163 equality indirectly. We know pivot <= *l, so they're
1164 equal iff not pivot < *l.
1166 while (l < hi) {
1167 /* pivot <= *l known */
1168 SETK(pivot, *l);
1169 if (k < 0)
1170 break;
1171 else
1172 /* <= and not < implies == */
1173 ++l;
1176 /* assert lo <= r < l <= hi
1177 Partitions are [lo, r) and [l, hi) */
1179 /* push fattest first; remember we still have extra PPs
1180 to the left of the left chunk and to the right of
1181 the right chunk! */
1182 /* assert top < STACKSIZE */
1183 if (r - lo <= hi - l) {
1184 /* second is bigger */
1185 stack[top].lo = l;
1186 stack[top].hi = hi;
1187 stack[top].extra = -extra;
1188 hi = r;
1189 extraOnRight = 0;
1191 else {
1192 /* first is bigger */
1193 stack[top].lo = lo;
1194 stack[top].hi = r;
1195 stack[top].extra = extra;
1196 lo = l;
1197 extraOnRight = 1;
1199 ++top;
1201 } /* end of partitioning loop */
1203 return 0;
1205 fail:
1206 return -1;
1209 #undef SETK
1211 staticforward PyTypeObject immutable_list_type;
1213 static PyObject *
1214 listsort(self, compare)
1215 PyListObject *self;
1216 PyObject *compare;
1218 int err;
1220 self->ob_type = &immutable_list_type;
1221 err = samplesortslice(self->ob_item,
1222 self->ob_item + self->ob_size,
1223 compare);
1224 self->ob_type = &PyList_Type;
1225 if (err < 0)
1226 return NULL;
1227 Py_INCREF(Py_None);
1228 return Py_None;
1232 PyList_Sort(v)
1233 PyObject *v;
1235 if (v == NULL || !PyList_Check(v)) {
1236 PyErr_BadInternalCall();
1237 return -1;
1239 v = listsort((PyListObject *)v, (PyObject *)NULL);
1240 if (v == NULL)
1241 return -1;
1242 Py_DECREF(v);
1243 return 0;
1246 static PyObject *
1247 listreverse(self, args)
1248 PyListObject *self;
1249 PyObject *args;
1251 register PyObject **p, **q;
1252 register PyObject *tmp;
1254 if (args != NULL) {
1255 PyErr_BadArgument();
1256 return NULL;
1259 if (self->ob_size > 1) {
1260 for (p = self->ob_item, q = self->ob_item + self->ob_size - 1;
1261 p < q; p++, q--) {
1262 tmp = *p;
1263 *p = *q;
1264 *q = tmp;
1268 Py_INCREF(Py_None);
1269 return Py_None;
1273 PyList_Reverse(v)
1274 PyObject *v;
1276 if (v == NULL || !PyList_Check(v)) {
1277 PyErr_BadInternalCall();
1278 return -1;
1280 v = listreverse((PyListObject *)v, (PyObject *)NULL);
1281 if (v == NULL)
1282 return -1;
1283 Py_DECREF(v);
1284 return 0;
1287 PyObject *
1288 PyList_AsTuple(v)
1289 PyObject *v;
1291 PyObject *w;
1292 PyObject **p;
1293 int n;
1294 if (v == NULL || !PyList_Check(v)) {
1295 PyErr_BadInternalCall();
1296 return NULL;
1298 n = ((PyListObject *)v)->ob_size;
1299 w = PyTuple_New(n);
1300 if (w == NULL)
1301 return NULL;
1302 p = ((PyTupleObject *)w)->ob_item;
1303 memcpy((ANY *)p,
1304 (ANY *)((PyListObject *)v)->ob_item,
1305 n*sizeof(PyObject *));
1306 while (--n >= 0) {
1307 Py_INCREF(*p);
1308 p++;
1310 return w;
1313 static PyObject *
1314 listindex(self, args)
1315 PyListObject *self;
1316 PyObject *args;
1318 int i;
1320 if (args == NULL) {
1321 PyErr_BadArgument();
1322 return NULL;
1324 for (i = 0; i < self->ob_size; i++) {
1325 if (PyObject_Compare(self->ob_item[i], args) == 0)
1326 return PyInt_FromLong((long)i);
1327 if (PyErr_Occurred())
1328 return NULL;
1330 PyErr_SetString(PyExc_ValueError, "list.index(x): x not in list");
1331 return NULL;
1334 static PyObject *
1335 listcount(self, args)
1336 PyListObject *self;
1337 PyObject *args;
1339 int count = 0;
1340 int i;
1342 if (args == NULL) {
1343 PyErr_BadArgument();
1344 return NULL;
1346 for (i = 0; i < self->ob_size; i++) {
1347 if (PyObject_Compare(self->ob_item[i], args) == 0)
1348 count++;
1349 if (PyErr_Occurred())
1350 return NULL;
1352 return PyInt_FromLong((long)count);
1355 static PyObject *
1356 listremove(self, args)
1357 PyListObject *self;
1358 PyObject *args;
1360 int i;
1362 if (args == NULL) {
1363 PyErr_BadArgument();
1364 return NULL;
1366 for (i = 0; i < self->ob_size; i++) {
1367 if (PyObject_Compare(self->ob_item[i], args) == 0) {
1368 if (list_ass_slice(self, i, i+1,
1369 (PyObject *)NULL) != 0)
1370 return NULL;
1371 Py_INCREF(Py_None);
1372 return Py_None;
1374 if (PyErr_Occurred())
1375 return NULL;
1377 PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
1378 return NULL;
1381 static char append_doc[] =
1382 "L.append(object) -- append object to end";
1383 static char extend_doc[] =
1384 "L.extend(list) -- extend list by appending list elements";
1385 static char insert_doc[] =
1386 "L.insert(index, object) -- insert object before index";
1387 static char pop_doc[] =
1388 "L.pop([index]) -> item -- remove and return item at index (default last)";
1389 static char remove_doc[] =
1390 "L.remove(value) -- remove first occurrence of value";
1391 static char index_doc[] =
1392 "L.index(value) -> integer -- return index of first occurrence of value";
1393 static char count_doc[] =
1394 "L.count(value) -> integer -- return number of occurrences of value";
1395 static char reverse_doc[] =
1396 "L.reverse() -- reverse *IN PLACE*";
1397 static char sort_doc[] =
1398 "L.sort([cmpfunc]) -- sort *IN PLACE*; if given, cmpfunc(x, y) -> -1, 0, 1";
1400 static PyMethodDef list_methods[] = {
1401 {"append", (PyCFunction)listappend, 0, append_doc},
1402 {"insert", (PyCFunction)listinsert, 0, insert_doc},
1403 {"extend", (PyCFunction)listextend, 1, extend_doc},
1404 {"pop", (PyCFunction)listpop, 1, pop_doc},
1405 {"remove", (PyCFunction)listremove, 0, remove_doc},
1406 {"index", (PyCFunction)listindex, 0, index_doc},
1407 {"count", (PyCFunction)listcount, 0, count_doc},
1408 {"reverse", (PyCFunction)listreverse, 0, reverse_doc},
1409 {"sort", (PyCFunction)listsort, 0, sort_doc},
1410 {NULL, NULL} /* sentinel */
1413 static PyObject *
1414 list_getattr(f, name)
1415 PyListObject *f;
1416 char *name;
1418 return Py_FindMethod(list_methods, (PyObject *)f, name);
1421 static PySequenceMethods list_as_sequence = {
1422 (inquiry)list_length, /*sq_length*/
1423 (binaryfunc)list_concat, /*sq_concat*/
1424 (intargfunc)list_repeat, /*sq_repeat*/
1425 (intargfunc)list_item, /*sq_item*/
1426 (intintargfunc)list_slice, /*sq_slice*/
1427 (intobjargproc)list_ass_item, /*sq_ass_item*/
1428 (intintobjargproc)list_ass_slice, /*sq_ass_slice*/
1431 PyTypeObject PyList_Type = {
1432 PyObject_HEAD_INIT(&PyType_Type)
1434 "list",
1435 sizeof(PyListObject),
1437 (destructor)list_dealloc, /*tp_dealloc*/
1438 (printfunc)list_print, /*tp_print*/
1439 (getattrfunc)list_getattr, /*tp_getattr*/
1440 0, /*tp_setattr*/
1441 (cmpfunc)list_compare, /*tp_compare*/
1442 (reprfunc)list_repr, /*tp_repr*/
1443 0, /*tp_as_number*/
1444 &list_as_sequence, /*tp_as_sequence*/
1445 0, /*tp_as_mapping*/
1449 /* During a sort, we really can't have anyone modifying the list; it could
1450 cause core dumps. Thus, we substitute a dummy type that raises an
1451 explanatory exception when a modifying operation is used. Caveat:
1452 comparisons may behave differently; but I guess it's a bad idea anyway to
1453 compare a list that's being sorted... */
1455 static PyObject *
1456 immutable_list_op(/*No args!*/)
1458 PyErr_SetString(PyExc_TypeError,
1459 "a list cannot be modified while it is being sorted");
1460 return NULL;
1463 static PyMethodDef immutable_list_methods[] = {
1464 {"append", (PyCFunction)immutable_list_op},
1465 {"insert", (PyCFunction)immutable_list_op},
1466 {"remove", (PyCFunction)immutable_list_op},
1467 {"index", (PyCFunction)listindex},
1468 {"count", (PyCFunction)listcount},
1469 {"reverse", (PyCFunction)immutable_list_op},
1470 {"sort", (PyCFunction)immutable_list_op},
1471 {NULL, NULL} /* sentinel */
1474 static PyObject *
1475 immutable_list_getattr(f, name)
1476 PyListObject *f;
1477 char *name;
1479 return Py_FindMethod(immutable_list_methods, (PyObject *)f, name);
1482 static int
1483 immutable_list_ass(/*No args!*/)
1485 immutable_list_op();
1486 return -1;
1489 static PySequenceMethods immutable_list_as_sequence = {
1490 (inquiry)list_length, /*sq_length*/
1491 (binaryfunc)list_concat, /*sq_concat*/
1492 (intargfunc)list_repeat, /*sq_repeat*/
1493 (intargfunc)list_item, /*sq_item*/
1494 (intintargfunc)list_slice, /*sq_slice*/
1495 (intobjargproc)immutable_list_ass, /*sq_ass_item*/
1496 (intintobjargproc)immutable_list_ass, /*sq_ass_slice*/
1499 static PyTypeObject immutable_list_type = {
1500 PyObject_HEAD_INIT(&PyType_Type)
1502 "list (immutable, during sort)",
1503 sizeof(PyListObject),
1505 0, /*tp_dealloc*/ /* Cannot happen */
1506 (printfunc)list_print, /*tp_print*/
1507 (getattrfunc)immutable_list_getattr, /*tp_getattr*/
1508 0, /*tp_setattr*/
1509 0, /*tp_compare*/ /* Won't be called */
1510 (reprfunc)list_repr, /*tp_repr*/
1511 0, /*tp_as_number*/
1512 &immutable_list_as_sequence, /*tp_as_sequence*/
1513 0, /*tp_as_mapping*/