Bump version to 0.9.1.
[python/dscho.git] / Objects / listobject.c
blob2b016eda199fc2629eb8afc30cabb097f7884298
1 /***********************************************************
2 Copyright (c) 2000, BeOpen.com.
3 Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4 Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5 All rights reserved.
7 See the file "Misc/COPYRIGHT" for information on usage and
8 redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
9 ******************************************************************/
11 /* List object implementation */
13 #include "Python.h"
15 #ifdef STDC_HEADERS
16 #include <stddef.h>
17 #else
18 #include <sys/types.h> /* For size_t */
19 #endif
21 #define ROUNDUP(n, PyTryBlock) \
22 ((((n)+(PyTryBlock)-1)/(PyTryBlock))*(PyTryBlock))
24 static int
25 roundupsize(int n)
27 if (n < 500)
28 return ROUNDUP(n, 10);
29 else
30 return ROUNDUP(n, 100);
33 #define NRESIZE(var, type, nitems) PyMem_RESIZE(var, type, roundupsize(nitems))
35 PyObject *
36 PyList_New(int size)
38 int i;
39 PyListObject *op;
40 size_t nbytes;
41 if (size < 0) {
42 PyErr_BadInternalCall();
43 return NULL;
45 nbytes = size * sizeof(PyObject *);
46 /* Check for overflow */
47 if (nbytes / sizeof(PyObject *) != (size_t)size) {
48 return PyErr_NoMemory();
50 /* PyObject_NewVar is inlined */
51 op = (PyListObject *) PyObject_MALLOC(sizeof(PyListObject)
52 + PyGC_HEAD_SIZE);
53 if (op == NULL) {
54 return PyErr_NoMemory();
56 op = (PyListObject *) PyObject_FROM_GC(op);
57 if (size <= 0) {
58 op->ob_item = NULL;
60 else {
61 op->ob_item = (PyObject **) PyMem_MALLOC(nbytes);
62 if (op->ob_item == NULL) {
63 PyObject_FREE(PyObject_AS_GC(op));
64 return PyErr_NoMemory();
67 PyObject_INIT_VAR(op, &PyList_Type, size);
68 for (i = 0; i < size; i++)
69 op->ob_item[i] = NULL;
70 PyObject_GC_Init(op);
71 return (PyObject *) op;
74 int
75 PyList_Size(PyObject *op)
77 if (!PyList_Check(op)) {
78 PyErr_BadInternalCall();
79 return -1;
81 else
82 return ((PyListObject *)op) -> ob_size;
85 static PyObject *indexerr;
87 PyObject *
88 PyList_GetItem(PyObject *op, int i)
90 if (!PyList_Check(op)) {
91 PyErr_BadInternalCall();
92 return NULL;
94 if (i < 0 || i >= ((PyListObject *)op) -> ob_size) {
95 if (indexerr == NULL)
96 indexerr = PyString_FromString(
97 "list index out of range");
98 PyErr_SetObject(PyExc_IndexError, indexerr);
99 return NULL;
101 return ((PyListObject *)op) -> ob_item[i];
105 PyList_SetItem(register PyObject *op, register int i,
106 register PyObject *newitem)
108 register PyObject *olditem;
109 register PyObject **p;
110 if (!PyList_Check(op)) {
111 Py_XDECREF(newitem);
112 PyErr_BadInternalCall();
113 return -1;
115 if (i < 0 || i >= ((PyListObject *)op) -> ob_size) {
116 Py_XDECREF(newitem);
117 PyErr_SetString(PyExc_IndexError,
118 "list assignment index out of range");
119 return -1;
121 p = ((PyListObject *)op) -> ob_item + i;
122 olditem = *p;
123 *p = newitem;
124 Py_XDECREF(olditem);
125 return 0;
128 static int
129 ins1(PyListObject *self, int where, PyObject *v)
131 int i;
132 PyObject **items;
133 if (v == NULL) {
134 PyErr_BadInternalCall();
135 return -1;
137 if (self->ob_size == INT_MAX) {
138 PyErr_SetString(PyExc_OverflowError,
139 "cannot add more objects to list");
140 return -1;
142 items = self->ob_item;
143 NRESIZE(items, PyObject *, self->ob_size+1);
144 if (items == NULL) {
145 PyErr_NoMemory();
146 return -1;
148 if (where < 0)
149 where = 0;
150 if (where > self->ob_size)
151 where = self->ob_size;
152 for (i = self->ob_size; --i >= where; )
153 items[i+1] = items[i];
154 Py_INCREF(v);
155 items[where] = v;
156 self->ob_item = items;
157 self->ob_size++;
158 return 0;
162 PyList_Insert(PyObject *op, int where, PyObject *newitem)
164 if (!PyList_Check(op)) {
165 PyErr_BadInternalCall();
166 return -1;
168 return ins1((PyListObject *)op, where, newitem);
172 PyList_Append(PyObject *op, PyObject *newitem)
174 if (!PyList_Check(op)) {
175 PyErr_BadInternalCall();
176 return -1;
178 return ins1((PyListObject *)op,
179 (int) ((PyListObject *)op)->ob_size, newitem);
182 /* Methods */
184 static void
185 list_dealloc(PyListObject *op)
187 int i;
188 Py_TRASHCAN_SAFE_BEGIN(op)
189 PyObject_GC_Fini(op);
190 if (op->ob_item != NULL) {
191 /* Do it backwards, for Christian Tismer.
192 There's a simple test case where somehow this reduces
193 thrashing when a *very* large list is created and
194 immediately deleted. */
195 i = op->ob_size;
196 while (--i >= 0) {
197 Py_XDECREF(op->ob_item[i]);
199 PyMem_FREE(op->ob_item);
201 op = (PyListObject *) PyObject_AS_GC(op);
202 PyObject_DEL(op);
203 Py_TRASHCAN_SAFE_END(op)
206 static int
207 list_print(PyListObject *op, FILE *fp, int flags)
209 int i;
211 i = Py_ReprEnter((PyObject*)op);
212 if (i != 0) {
213 if (i < 0)
214 return i;
215 fprintf(fp, "[...]");
216 return 0;
218 fprintf(fp, "[");
219 for (i = 0; i < op->ob_size; i++) {
220 if (i > 0)
221 fprintf(fp, ", ");
222 if (PyObject_Print(op->ob_item[i], fp, 0) != 0) {
223 Py_ReprLeave((PyObject *)op);
224 return -1;
227 fprintf(fp, "]");
228 Py_ReprLeave((PyObject *)op);
229 return 0;
232 static PyObject *
233 list_repr(PyListObject *v)
235 PyObject *s, *comma;
236 int i;
238 i = Py_ReprEnter((PyObject*)v);
239 if (i != 0) {
240 if (i > 0)
241 return PyString_FromString("[...]");
242 return NULL;
244 s = PyString_FromString("[");
245 comma = PyString_FromString(", ");
246 for (i = 0; i < v->ob_size && s != NULL; i++) {
247 if (i > 0)
248 PyString_Concat(&s, comma);
249 PyString_ConcatAndDel(&s, PyObject_Repr(v->ob_item[i]));
251 Py_XDECREF(comma);
252 PyString_ConcatAndDel(&s, PyString_FromString("]"));
253 Py_ReprLeave((PyObject *)v);
254 return s;
257 static int
258 list_compare(PyListObject *v, PyListObject *w)
260 int i;
261 for (i = 0; i < v->ob_size && i < w->ob_size; i++) {
262 int cmp = PyObject_Compare(v->ob_item[i], w->ob_item[i]);
263 if (cmp != 0)
264 return cmp;
266 return v->ob_size - w->ob_size;
269 static int
270 list_length(PyListObject *a)
272 return a->ob_size;
277 static int
278 list_contains(PyListObject *a, PyObject *el)
280 int i, cmp;
282 for (i = 0; i < a->ob_size; ++i) {
283 cmp = PyObject_Compare(el, PyList_GET_ITEM(a, i));
284 if (cmp == 0)
285 return 1;
286 if (PyErr_Occurred())
287 return -1;
289 return 0;
293 static PyObject *
294 list_item(PyListObject *a, int i)
296 if (i < 0 || i >= a->ob_size) {
297 if (indexerr == NULL)
298 indexerr = PyString_FromString(
299 "list index out of range");
300 PyErr_SetObject(PyExc_IndexError, indexerr);
301 return NULL;
303 Py_INCREF(a->ob_item[i]);
304 return a->ob_item[i];
307 static PyObject *
308 list_slice(PyListObject *a, int ilow, int ihigh)
310 PyListObject *np;
311 int i;
312 if (ilow < 0)
313 ilow = 0;
314 else if (ilow > a->ob_size)
315 ilow = a->ob_size;
316 if (ihigh < ilow)
317 ihigh = ilow;
318 else if (ihigh > a->ob_size)
319 ihigh = a->ob_size;
320 np = (PyListObject *) PyList_New(ihigh - ilow);
321 if (np == NULL)
322 return NULL;
323 for (i = ilow; i < ihigh; i++) {
324 PyObject *v = a->ob_item[i];
325 Py_INCREF(v);
326 np->ob_item[i - ilow] = v;
328 return (PyObject *)np;
331 PyObject *
332 PyList_GetSlice(PyObject *a, int ilow, int ihigh)
334 if (!PyList_Check(a)) {
335 PyErr_BadInternalCall();
336 return NULL;
338 return list_slice((PyListObject *)a, ilow, ihigh);
341 static PyObject *
342 list_concat(PyListObject *a, PyObject *bb)
344 int size;
345 int i;
346 PyListObject *np;
347 if (!PyList_Check(bb)) {
348 PyErr_Format(PyExc_TypeError,
349 "can only concatenate list (not \"%.200s\") to list",
350 bb->ob_type->tp_name);
351 return NULL;
353 #define b ((PyListObject *)bb)
354 size = a->ob_size + b->ob_size;
355 np = (PyListObject *) PyList_New(size);
356 if (np == NULL) {
357 return NULL;
359 for (i = 0; i < a->ob_size; i++) {
360 PyObject *v = a->ob_item[i];
361 Py_INCREF(v);
362 np->ob_item[i] = v;
364 for (i = 0; i < b->ob_size; i++) {
365 PyObject *v = b->ob_item[i];
366 Py_INCREF(v);
367 np->ob_item[i + a->ob_size] = v;
369 return (PyObject *)np;
370 #undef b
373 static PyObject *
374 list_repeat(PyListObject *a, int n)
376 int i, j;
377 int size;
378 PyListObject *np;
379 PyObject **p;
380 if (n < 0)
381 n = 0;
382 size = a->ob_size * n;
383 np = (PyListObject *) PyList_New(size);
384 if (np == NULL)
385 return NULL;
386 p = np->ob_item;
387 for (i = 0; i < n; i++) {
388 for (j = 0; j < a->ob_size; j++) {
389 *p = a->ob_item[j];
390 Py_INCREF(*p);
391 p++;
394 return (PyObject *) np;
397 static int
398 list_ass_slice(PyListObject *a, int ilow, int ihigh, PyObject *v)
400 /* Because [X]DECREF can recursively invoke list operations on
401 this list, we must postpone all [X]DECREF activity until
402 after the list is back in its canonical shape. Therefore
403 we must allocate an additional array, 'recycle', into which
404 we temporarily copy the items that are deleted from the
405 list. :-( */
406 PyObject **recycle, **p;
407 PyObject **item;
408 int n; /* Size of replacement list */
409 int d; /* Change in size */
410 int k; /* Loop index */
411 #define b ((PyListObject *)v)
412 if (v == NULL)
413 n = 0;
414 else if (PyList_Check(v)) {
415 n = b->ob_size;
416 if (a == b) {
417 /* Special case "a[i:j] = a" -- copy b first */
418 int ret;
419 v = list_slice(b, 0, n);
420 ret = list_ass_slice(a, ilow, ihigh, v);
421 Py_DECREF(v);
422 return ret;
425 else {
426 PyErr_Format(PyExc_TypeError,
427 "must assign list (not \"%.200s\") to slice",
428 v->ob_type->tp_name);
429 return -1;
431 if (ilow < 0)
432 ilow = 0;
433 else if (ilow > a->ob_size)
434 ilow = a->ob_size;
435 if (ihigh < ilow)
436 ihigh = ilow;
437 else if (ihigh > a->ob_size)
438 ihigh = a->ob_size;
439 item = a->ob_item;
440 d = n - (ihigh-ilow);
441 if (ihigh > ilow)
442 p = recycle = PyMem_NEW(PyObject *, (ihigh-ilow));
443 else
444 p = recycle = NULL;
445 if (d <= 0) { /* Delete -d items; recycle ihigh-ilow items */
446 for (k = ilow; k < ihigh; k++)
447 *p++ = item[k];
448 if (d < 0) {
449 for (/*k = ihigh*/; k < a->ob_size; k++)
450 item[k+d] = item[k];
451 a->ob_size += d;
452 NRESIZE(item, PyObject *, a->ob_size); /* Can't fail */
453 a->ob_item = item;
456 else { /* Insert d items; recycle ihigh-ilow items */
457 NRESIZE(item, PyObject *, a->ob_size + d);
458 if (item == NULL) {
459 if (recycle != NULL)
460 PyMem_DEL(recycle);
461 PyErr_NoMemory();
462 return -1;
464 for (k = a->ob_size; --k >= ihigh; )
465 item[k+d] = item[k];
466 for (/*k = ihigh-1*/; k >= ilow; --k)
467 *p++ = item[k];
468 a->ob_item = item;
469 a->ob_size += d;
471 for (k = 0; k < n; k++, ilow++) {
472 PyObject *w = b->ob_item[k];
473 Py_XINCREF(w);
474 item[ilow] = w;
476 if (recycle) {
477 while (--p >= recycle)
478 Py_XDECREF(*p);
479 PyMem_DEL(recycle);
481 return 0;
482 #undef b
486 PyList_SetSlice(PyObject *a, int ilow, int ihigh, PyObject *v)
488 if (!PyList_Check(a)) {
489 PyErr_BadInternalCall();
490 return -1;
492 return list_ass_slice((PyListObject *)a, ilow, ihigh, v);
495 static int
496 list_ass_item(PyListObject *a, int i, PyObject *v)
498 PyObject *old_value;
499 if (i < 0 || i >= a->ob_size) {
500 PyErr_SetString(PyExc_IndexError,
501 "list assignment index out of range");
502 return -1;
504 if (v == NULL)
505 return list_ass_slice(a, i, i+1, v);
506 Py_INCREF(v);
507 old_value = a->ob_item[i];
508 a->ob_item[i] = v;
509 Py_DECREF(old_value);
510 return 0;
513 static PyObject *
514 ins(PyListObject *self, int where, PyObject *v)
516 if (ins1(self, where, v) != 0)
517 return NULL;
518 Py_INCREF(Py_None);
519 return Py_None;
522 static PyObject *
523 listinsert(PyListObject *self, PyObject *args)
525 int i;
526 PyObject *v;
527 if (!PyArg_ParseTuple(args, "iO:insert", &i, &v))
528 return NULL;
529 return ins(self, i, v);
532 /* Define NO_STRICT_LIST_APPEND to enable multi-argument append() */
534 #ifndef NO_STRICT_LIST_APPEND
535 #define PyArg_ParseTuple_Compat1 PyArg_ParseTuple
536 #else
537 #define PyArg_ParseTuple_Compat1(args, format, ret) \
539 PyTuple_GET_SIZE(args) > 1 ? (*ret = args, 1) : \
540 PyTuple_GET_SIZE(args) == 1 ? (*ret = PyTuple_GET_ITEM(args, 0), 1) : \
541 PyArg_ParseTuple(args, format, ret) \
543 #endif
546 static PyObject *
547 listappend(PyListObject *self, PyObject *args)
549 PyObject *v;
550 if (!PyArg_ParseTuple_Compat1(args, "O:append", &v))
551 return NULL;
552 return ins(self, (int) self->ob_size, v);
555 static PyObject *
556 listextend(PyListObject *self, PyObject *args)
558 PyObject *b = NULL, *res = NULL;
559 PyObject **items;
560 int selflen = PyList_GET_SIZE(self);
561 int blen;
562 register int i;
564 if (!PyArg_ParseTuple(args, "O:extend", &b))
565 return NULL;
567 b = PySequence_Fast(b, "list.extend() argument must be a sequence");
568 if (!b)
569 return NULL;
571 if (PyObject_Size(b) == 0)
572 /* short circuit when b is empty */
573 goto ok;
575 if (self == (PyListObject*)b) {
576 /* as in list_ass_slice() we must special case the
577 * situation: a.extend(a)
579 * XXX: I think this way ought to be faster than using
580 * list_slice() the way list_ass_slice() does.
582 Py_DECREF(b);
583 b = PyList_New(selflen);
584 if (!b)
585 return NULL;
586 for (i = 0; i < selflen; i++) {
587 PyObject *o = PyList_GET_ITEM(self, i);
588 Py_INCREF(o);
589 PyList_SET_ITEM(b, i, o);
593 blen = PyObject_Size(b);
595 /* resize a using idiom */
596 items = self->ob_item;
597 NRESIZE(items, PyObject*, selflen + blen);
598 if (items == NULL) {
599 PyErr_NoMemory();
600 goto failed;
602 self->ob_item = items;
604 /* populate the end of self with b's items */
605 for (i = 0; i < blen; i++) {
606 PyObject *o = PySequence_Fast_GET_ITEM(b, i);
607 Py_INCREF(o);
608 PyList_SET_ITEM(self, self->ob_size++, o);
611 res = Py_None;
612 Py_INCREF(res);
613 failed:
614 Py_DECREF(b);
615 return res;
619 static PyObject *
620 listpop(PyListObject *self, PyObject *args)
622 int i = -1;
623 PyObject *v;
624 if (!PyArg_ParseTuple(args, "|i:pop", &i))
625 return NULL;
626 if (self->ob_size == 0) {
627 /* Special-case most common failure cause */
628 PyErr_SetString(PyExc_IndexError, "pop from empty list");
629 return NULL;
631 if (i < 0)
632 i += self->ob_size;
633 if (i < 0 || i >= self->ob_size) {
634 PyErr_SetString(PyExc_IndexError, "pop index out of range");
635 return NULL;
637 v = self->ob_item[i];
638 Py_INCREF(v);
639 if (list_ass_slice(self, i, i+1, (PyObject *)NULL) != 0) {
640 Py_DECREF(v);
641 return NULL;
643 return v;
646 /* New quicksort implementation for arrays of object pointers.
647 Thanks to discussions with Tim Peters. */
649 /* CMPERROR is returned by our comparison function when an error
650 occurred. This is the largest negative integer (0x80000000 on a
651 32-bit system). */
652 #define CMPERROR ( (int) ((unsigned int)1 << (8*sizeof(int) - 1)) )
654 /* Comparison function. Takes care of calling a user-supplied
655 comparison function (any callable Python object). Calls the
656 standard comparison function, PyObject_Compare(), if the user-
657 supplied function is NULL. */
659 static int
660 docompare(PyObject *x, PyObject *y, PyObject *compare)
662 PyObject *args, *res;
663 int i;
665 if (compare == NULL) {
666 i = PyObject_Compare(x, y);
667 if (i && PyErr_Occurred())
668 i = CMPERROR;
669 return i;
672 args = Py_BuildValue("(OO)", x, y);
673 if (args == NULL)
674 return CMPERROR;
675 res = PyEval_CallObject(compare, args);
676 Py_DECREF(args);
677 if (res == NULL)
678 return CMPERROR;
679 if (!PyInt_Check(res)) {
680 Py_DECREF(res);
681 PyErr_SetString(PyExc_TypeError,
682 "comparison function must return int");
683 return CMPERROR;
685 i = PyInt_AsLong(res);
686 Py_DECREF(res);
687 if (i < 0)
688 return -1;
689 if (i > 0)
690 return 1;
691 return 0;
694 /* MINSIZE is the smallest array that will get a full-blown samplesort
695 treatment; smaller arrays are sorted using binary insertion. It must
696 be at least 7 for the samplesort implementation to work. Binary
697 insertion does fewer compares, but can suffer O(N**2) data movement.
698 The more expensive compares, the larger MINSIZE should be. */
699 #define MINSIZE 100
701 /* MINPARTITIONSIZE is the smallest array slice samplesort will bother to
702 partition; smaller slices are passed to binarysort. It must be at
703 least 2, and no larger than MINSIZE. Setting it higher reduces the #
704 of compares slowly, but increases the amount of data movement quickly.
705 The value here was chosen assuming a compare costs ~25x more than
706 swapping a pair of memory-resident pointers -- but under that assumption,
707 changing the value by a few dozen more or less has aggregate effect
708 under 1%. So the value is crucial, but not touchy <wink>. */
709 #define MINPARTITIONSIZE 40
711 /* MAXMERGE is the largest number of elements we'll always merge into
712 a known-to-be sorted chunk via binary insertion, regardless of the
713 size of that chunk. Given a chunk of N sorted elements, and a group
714 of K unknowns, the largest K for which it's better to do insertion
715 (than a full-blown sort) is a complicated function of N and K mostly
716 involving the expected number of compares and data moves under each
717 approach, and the relative cost of those operations on a specific
718 architecure. The fixed value here is conservative, and should be a
719 clear win regardless of architecture or N. */
720 #define MAXMERGE 15
722 /* STACKSIZE is the size of our work stack. A rough estimate is that
723 this allows us to sort arrays of size N where
724 N / ln(N) = MINPARTITIONSIZE * 2**STACKSIZE, so 60 is more than enough
725 for arrays of size 2**64. Because we push the biggest partition
726 first, the worst case occurs when all subarrays are always partitioned
727 exactly in two. */
728 #define STACKSIZE 60
731 #define SETK(X,Y) if ((k = docompare(X,Y,compare))==CMPERROR) goto fail
733 /* binarysort is the best method for sorting small arrays: it does
734 few compares, but can do data movement quadratic in the number of
735 elements.
736 [lo, hi) is a contiguous slice of a list, and is sorted via
737 binary insertion.
738 On entry, must have lo <= start <= hi, and that [lo, start) is already
739 sorted (pass start == lo if you don't know!).
740 If docompare complains (returns CMPERROR) return -1, else 0.
741 Even in case of error, the output slice will be some permutation of
742 the input (nothing is lost or duplicated).
745 static int
746 binarysort(PyObject **lo, PyObject **hi, PyObject **start, PyObject *compare)
747 /* compare -- comparison function object, or NULL for default */
749 /* assert lo <= start <= hi
750 assert [lo, start) is sorted */
751 register int k;
752 register PyObject **l, **p, **r;
753 register PyObject *pivot;
755 if (lo == start)
756 ++start;
757 for (; start < hi; ++start) {
758 /* set l to where *start belongs */
759 l = lo;
760 r = start;
761 pivot = *r;
762 do {
763 p = l + ((r - l) >> 1);
764 SETK(pivot, *p);
765 if (k < 0)
766 r = p;
767 else
768 l = p + 1;
769 } while (l < r);
770 /* Pivot should go at l -- slide over to make room.
771 Caution: using memmove is much slower under MSVC 5;
772 we're not usually moving many slots. */
773 for (p = start; p > l; --p)
774 *p = *(p-1);
775 *l = pivot;
777 return 0;
779 fail:
780 return -1;
783 /* samplesortslice is the sorting workhorse.
784 [lo, hi) is a contiguous slice of a list, to be sorted in place.
785 On entry, must have lo <= hi,
786 If docompare complains (returns CMPERROR) return -1, else 0.
787 Even in case of error, the output slice will be some permutation of
788 the input (nothing is lost or duplicated).
790 samplesort is basically quicksort on steroids: a power of 2 close
791 to n/ln(n) is computed, and that many elements (less 1) are picked at
792 random from the array and sorted. These 2**k - 1 elements are then
793 used as preselected pivots for an equal number of quicksort
794 partitioning steps, partitioning the slice into 2**k chunks each of
795 size about ln(n). These small final chunks are then usually handled
796 by binarysort. Note that when k=1, this is roughly the same as an
797 ordinary quicksort using a random pivot, and when k=2 this is roughly
798 a median-of-3 quicksort. From that view, using k ~= lg(n/ln(n)) makes
799 this a "median of n/ln(n)" quicksort. You can also view it as a kind
800 of bucket sort, where 2**k-1 bucket boundaries are picked dynamically.
802 The large number of samples makes a quadratic-time case almost
803 impossible, and asymptotically drives the average-case number of
804 compares from quicksort's 2 N ln N (or 12/7 N ln N for the median-of-
805 3 variant) down to N lg N.
807 We also play lots of low-level tricks to cut the number of compares.
809 Very obscure: To avoid using extra memory, the PPs are stored in the
810 array and shuffled around as partitioning proceeds. At the start of a
811 partitioning step, we'll have 2**m-1 (for some m) PPs in sorted order,
812 adjacent (either on the left or the right!) to a chunk of X elements
813 that are to be partitioned: P X or X P. In either case we need to
814 shuffle things *in place* so that the 2**(m-1) smaller PPs are on the
815 left, followed by the PP to be used for this step (that's the middle
816 of the PPs), followed by X, followed by the 2**(m-1) larger PPs:
817 P X or X P -> Psmall pivot X Plarge
818 and the order of the PPs must not be altered. It can take a while
819 to realize this isn't trivial! It can take even longer <wink> to
820 understand why the simple code below works, using only 2**(m-1) swaps.
821 The key is that the order of the X elements isn't necessarily
822 preserved: X can end up as some cyclic permutation of its original
823 order. That's OK, because X is unsorted anyway. If the order of X
824 had to be preserved too, the simplest method I know of using O(1)
825 scratch storage requires len(X) + 2**(m-1) swaps, spread over 2 passes.
826 Since len(X) is typically several times larger than 2**(m-1), that
827 would slow things down.
830 struct SamplesortStackNode {
831 /* Represents a slice of the array, from (& including) lo up
832 to (but excluding) hi. "extra" additional & adjacent elements
833 are pre-selected pivots (PPs), spanning [lo-extra, lo) if
834 extra > 0, or [hi, hi-extra) if extra < 0. The PPs are
835 already sorted, but nothing is known about the other elements
836 in [lo, hi). |extra| is always one less than a power of 2.
837 When extra is 0, we're out of PPs, and the slice must be
838 sorted by some other means. */
839 PyObject **lo;
840 PyObject **hi;
841 int extra;
844 /* The number of PPs we want is 2**k - 1, where 2**k is as close to
845 N / ln(N) as possible. So k ~= lg(N / ln(N)). Calling libm routines
846 is undesirable, so cutoff values are canned in the "cutoff" table
847 below: cutoff[i] is the smallest N such that k == CUTOFFBASE + i. */
848 #define CUTOFFBASE 4
849 static long cutoff[] = {
850 43, /* smallest N such that k == 4 */
851 106, /* etc */
852 250,
853 576,
854 1298,
855 2885,
856 6339,
857 13805,
858 29843,
859 64116,
860 137030,
861 291554,
862 617916,
863 1305130,
864 2748295,
865 5771662,
866 12091672,
867 25276798,
868 52734615,
869 109820537,
870 228324027,
871 473977813,
872 982548444, /* smallest N such that k == 26 */
873 2034159050 /* largest N that fits in signed 32-bit; k == 27 */
876 static int
877 samplesortslice(PyObject **lo, PyObject **hi, PyObject *compare)
878 /* compare -- comparison function object, or NULL for default */
880 register PyObject **l, **r;
881 register PyObject *tmp, *pivot;
882 register int k;
883 int n, extra, top, extraOnRight;
884 struct SamplesortStackNode stack[STACKSIZE];
886 /* assert lo <= hi */
887 n = hi - lo;
889 /* ----------------------------------------------------------
890 * Special cases
891 * --------------------------------------------------------*/
892 if (n < 2)
893 return 0;
895 /* Set r to the largest value such that [lo,r) is sorted.
896 This catches the already-sorted case, the all-the-same
897 case, and the appended-a-few-elements-to-a-sorted-list case.
898 If the array is unsorted, we're very likely to get out of
899 the loop fast, so the test is cheap if it doesn't pay off.
901 /* assert lo < hi */
902 for (r = lo+1; r < hi; ++r) {
903 SETK(*r, *(r-1));
904 if (k < 0)
905 break;
907 /* [lo,r) is sorted, [r,hi) unknown. Get out cheap if there are
908 few unknowns, or few elements in total. */
909 if (hi - r <= MAXMERGE || n < MINSIZE)
910 return binarysort(lo, hi, r, compare);
912 /* Check for the array already being reverse-sorted. Typical
913 benchmark-driven silliness <wink>. */
914 /* assert lo < hi */
915 for (r = lo+1; r < hi; ++r) {
916 SETK(*(r-1), *r);
917 if (k < 0)
918 break;
920 if (hi - r <= MAXMERGE) {
921 /* Reverse the reversed prefix, then insert the tail */
922 PyObject **originalr = r;
923 l = lo;
924 do {
925 --r;
926 tmp = *l; *l = *r; *r = tmp;
927 ++l;
928 } while (l < r);
929 return binarysort(lo, hi, originalr, compare);
932 /* ----------------------------------------------------------
933 * Normal case setup: a large array without obvious pattern.
934 * --------------------------------------------------------*/
936 /* extra := a power of 2 ~= n/ln(n), less 1.
937 First find the smallest extra s.t. n < cutoff[extra] */
938 for (extra = 0;
939 extra < sizeof(cutoff) / sizeof(cutoff[0]);
940 ++extra) {
941 if (n < cutoff[extra])
942 break;
943 /* note that if we fall out of the loop, the value of
944 extra still makes *sense*, but may be smaller than
945 we would like (but the array has more than ~= 2**31
946 elements in this case!) */
948 /* Now k == extra - 1 + CUTOFFBASE. The smallest value k can
949 have is CUTOFFBASE-1, so
950 assert MINSIZE >= 2**(CUTOFFBASE-1) - 1 */
951 extra = (1 << (extra - 1 + CUTOFFBASE)) - 1;
952 /* assert extra > 0 and n >= extra */
954 /* Swap that many values to the start of the array. The
955 selection of elements is pseudo-random, but the same on
956 every run (this is intentional! timing algorithm changes is
957 a pain if timing varies across runs). */
959 unsigned int seed = n / extra; /* arbitrary */
960 unsigned int i;
961 for (i = 0; i < (unsigned)extra; ++i) {
962 /* j := random int in [i, n) */
963 unsigned int j;
964 seed = seed * 69069 + 7;
965 j = i + seed % (n - i);
966 tmp = lo[i]; lo[i] = lo[j]; lo[j] = tmp;
970 /* Recursively sort the preselected pivots. */
971 if (samplesortslice(lo, lo + extra, compare) < 0)
972 goto fail;
974 top = 0; /* index of available stack slot */
975 lo += extra; /* point to first unknown */
976 extraOnRight = 0; /* the PPs are at the left end */
978 /* ----------------------------------------------------------
979 * Partition [lo, hi), and repeat until out of work.
980 * --------------------------------------------------------*/
981 for (;;) {
982 /* assert lo <= hi, so n >= 0 */
983 n = hi - lo;
985 /* We may not want, or may not be able, to partition:
986 If n is small, it's quicker to insert.
987 If extra is 0, we're out of pivots, and *must* use
988 another method.
990 if (n < MINPARTITIONSIZE || extra == 0) {
991 if (n >= MINSIZE) {
992 /* assert extra == 0
993 This is rare, since the average size
994 of a final block is only about
995 ln(original n). */
996 if (samplesortslice(lo, hi, compare) < 0)
997 goto fail;
999 else {
1000 /* Binary insertion should be quicker,
1001 and we can take advantage of the PPs
1002 already being sorted. */
1003 if (extraOnRight && extra) {
1004 /* swap the PPs to the left end */
1005 k = extra;
1006 do {
1007 tmp = *lo;
1008 *lo = *hi;
1009 *hi = tmp;
1010 ++lo; ++hi;
1011 } while (--k);
1013 if (binarysort(lo - extra, hi, lo,
1014 compare) < 0)
1015 goto fail;
1018 /* Find another slice to work on. */
1019 if (--top < 0)
1020 break; /* no more -- done! */
1021 lo = stack[top].lo;
1022 hi = stack[top].hi;
1023 extra = stack[top].extra;
1024 extraOnRight = 0;
1025 if (extra < 0) {
1026 extraOnRight = 1;
1027 extra = -extra;
1029 continue;
1032 /* Pretend the PPs are indexed 0, 1, ..., extra-1.
1033 Then our preselected pivot is at (extra-1)/2, and we
1034 want to move the PPs before that to the left end of
1035 the slice, and the PPs after that to the right end.
1036 The following section changes extra, lo, hi, and the
1037 slice such that:
1038 [lo-extra, lo) contains the smaller PPs.
1039 *lo == our PP.
1040 (lo, hi) contains the unknown elements.
1041 [hi, hi+extra) contains the larger PPs.
1043 k = extra >>= 1; /* num PPs to move */
1044 if (extraOnRight) {
1045 /* Swap the smaller PPs to the left end.
1046 Note that this loop actually moves k+1 items:
1047 the last is our PP */
1048 do {
1049 tmp = *lo; *lo = *hi; *hi = tmp;
1050 ++lo; ++hi;
1051 } while (k--);
1053 else {
1054 /* Swap the larger PPs to the right end. */
1055 while (k--) {
1056 --lo; --hi;
1057 tmp = *lo; *lo = *hi; *hi = tmp;
1060 --lo; /* *lo is now our PP */
1061 pivot = *lo;
1063 /* Now an almost-ordinary quicksort partition step.
1064 Note that most of the time is spent here!
1065 Only odd thing is that we partition into < and >=,
1066 instead of the usual <= and >=. This helps when
1067 there are lots of duplicates of different values,
1068 because it eventually tends to make subfiles
1069 "pure" (all duplicates), and we special-case for
1070 duplicates later. */
1071 l = lo + 1;
1072 r = hi - 1;
1073 /* assert lo < l < r < hi (small n weeded out above) */
1075 do {
1076 /* slide l right, looking for key >= pivot */
1077 do {
1078 SETK(*l, pivot);
1079 if (k < 0)
1080 ++l;
1081 else
1082 break;
1083 } while (l < r);
1085 /* slide r left, looking for key < pivot */
1086 while (l < r) {
1087 register PyObject *rval = *r--;
1088 SETK(rval, pivot);
1089 if (k < 0) {
1090 /* swap and advance */
1091 r[1] = *l;
1092 *l++ = rval;
1093 break;
1097 } while (l < r);
1099 /* assert lo < r <= l < hi
1100 assert r == l or r+1 == l
1101 everything to the left of l is < pivot, and
1102 everything to the right of r is >= pivot */
1104 if (l == r) {
1105 SETK(*r, pivot);
1106 if (k < 0)
1107 ++l;
1108 else
1109 --r;
1111 /* assert lo <= r and r+1 == l and l <= hi
1112 assert r == lo or a[r] < pivot
1113 assert a[lo] is pivot
1114 assert l == hi or a[l] >= pivot
1115 Swap the pivot into "the middle", so we can henceforth
1116 ignore it.
1118 *lo = *r;
1119 *r = pivot;
1121 /* The following is true now, & will be preserved:
1122 All in [lo,r) are < pivot
1123 All in [r,l) == pivot (& so can be ignored)
1124 All in [l,hi) are >= pivot */
1126 /* Check for duplicates of the pivot. One compare is
1127 wasted if there are no duplicates, but can win big
1128 when there are.
1129 Tricky: we're sticking to "<" compares, so deduce
1130 equality indirectly. We know pivot <= *l, so they're
1131 equal iff not pivot < *l.
1133 while (l < hi) {
1134 /* pivot <= *l known */
1135 SETK(pivot, *l);
1136 if (k < 0)
1137 break;
1138 else
1139 /* <= and not < implies == */
1140 ++l;
1143 /* assert lo <= r < l <= hi
1144 Partitions are [lo, r) and [l, hi) */
1146 /* push fattest first; remember we still have extra PPs
1147 to the left of the left chunk and to the right of
1148 the right chunk! */
1149 /* assert top < STACKSIZE */
1150 if (r - lo <= hi - l) {
1151 /* second is bigger */
1152 stack[top].lo = l;
1153 stack[top].hi = hi;
1154 stack[top].extra = -extra;
1155 hi = r;
1156 extraOnRight = 0;
1158 else {
1159 /* first is bigger */
1160 stack[top].lo = lo;
1161 stack[top].hi = r;
1162 stack[top].extra = extra;
1163 lo = l;
1164 extraOnRight = 1;
1166 ++top;
1168 } /* end of partitioning loop */
1170 return 0;
1172 fail:
1173 return -1;
1176 #undef SETK
1178 staticforward PyTypeObject immutable_list_type;
1180 static PyObject *
1181 listsort(PyListObject *self, PyObject *args)
1183 int err;
1184 PyObject *compare = NULL;
1186 if (args != NULL) {
1187 if (!PyArg_ParseTuple(args, "|O:sort", &compare))
1188 return NULL;
1190 self->ob_type = &immutable_list_type;
1191 err = samplesortslice(self->ob_item,
1192 self->ob_item + self->ob_size,
1193 compare);
1194 self->ob_type = &PyList_Type;
1195 if (err < 0)
1196 return NULL;
1197 Py_INCREF(Py_None);
1198 return Py_None;
1202 PyList_Sort(PyObject *v)
1204 if (v == NULL || !PyList_Check(v)) {
1205 PyErr_BadInternalCall();
1206 return -1;
1208 v = listsort((PyListObject *)v, (PyObject *)NULL);
1209 if (v == NULL)
1210 return -1;
1211 Py_DECREF(v);
1212 return 0;
1215 static PyObject *
1216 listreverse(PyListObject *self, PyObject *args)
1218 register PyObject **p, **q;
1219 register PyObject *tmp;
1221 if (!PyArg_ParseTuple(args, ":reverse"))
1222 return NULL;
1224 if (self->ob_size > 1) {
1225 for (p = self->ob_item, q = self->ob_item + self->ob_size - 1;
1226 p < q; p++, q--) {
1227 tmp = *p;
1228 *p = *q;
1229 *q = tmp;
1233 Py_INCREF(Py_None);
1234 return Py_None;
1238 PyList_Reverse(PyObject *v)
1240 if (v == NULL || !PyList_Check(v)) {
1241 PyErr_BadInternalCall();
1242 return -1;
1244 v = listreverse((PyListObject *)v, (PyObject *)NULL);
1245 if (v == NULL)
1246 return -1;
1247 Py_DECREF(v);
1248 return 0;
1251 PyObject *
1252 PyList_AsTuple(PyObject *v)
1254 PyObject *w;
1255 PyObject **p;
1256 int n;
1257 if (v == NULL || !PyList_Check(v)) {
1258 PyErr_BadInternalCall();
1259 return NULL;
1261 n = ((PyListObject *)v)->ob_size;
1262 w = PyTuple_New(n);
1263 if (w == NULL)
1264 return NULL;
1265 p = ((PyTupleObject *)w)->ob_item;
1266 memcpy((void *)p,
1267 (void *)((PyListObject *)v)->ob_item,
1268 n*sizeof(PyObject *));
1269 while (--n >= 0) {
1270 Py_INCREF(*p);
1271 p++;
1273 return w;
1276 static PyObject *
1277 listindex(PyListObject *self, PyObject *args)
1279 int i;
1280 PyObject *v;
1282 if (!PyArg_ParseTuple_Compat1(args, "O:index", &v))
1283 return NULL;
1284 for (i = 0; i < self->ob_size; i++) {
1285 if (PyObject_Compare(self->ob_item[i], v) == 0)
1286 return PyInt_FromLong((long)i);
1287 if (PyErr_Occurred())
1288 return NULL;
1290 PyErr_SetString(PyExc_ValueError, "list.index(x): x not in list");
1291 return NULL;
1294 static PyObject *
1295 listcount(PyListObject *self, PyObject *args)
1297 int count = 0;
1298 int i;
1299 PyObject *v;
1301 if (!PyArg_ParseTuple_Compat1(args, "O:count", &v))
1302 return NULL;
1303 for (i = 0; i < self->ob_size; i++) {
1304 if (PyObject_Compare(self->ob_item[i], v) == 0)
1305 count++;
1306 if (PyErr_Occurred())
1307 return NULL;
1309 return PyInt_FromLong((long)count);
1312 static PyObject *
1313 listremove(PyListObject *self, PyObject *args)
1315 int i;
1316 PyObject *v;
1318 if (!PyArg_ParseTuple_Compat1(args, "O:remove", &v))
1319 return NULL;
1320 for (i = 0; i < self->ob_size; i++) {
1321 if (PyObject_Compare(self->ob_item[i], v) == 0) {
1322 if (list_ass_slice(self, i, i+1,
1323 (PyObject *)NULL) != 0)
1324 return NULL;
1325 Py_INCREF(Py_None);
1326 return Py_None;
1328 if (PyErr_Occurred())
1329 return NULL;
1331 PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
1332 return NULL;
1335 static int
1336 list_traverse(PyListObject *o, visitproc visit, void *arg)
1338 int i, err;
1339 PyObject *x;
1341 for (i = o->ob_size; --i >= 0; ) {
1342 x = o->ob_item[i];
1343 if (x != NULL) {
1344 err = visit(x, arg);
1345 if (err)
1346 return err;
1349 return 0;
1352 static int
1353 list_clear(PyListObject *lp)
1355 (void) PyList_SetSlice((PyObject *)lp, 0, lp->ob_size, 0);
1356 return 0;
1359 static char append_doc[] =
1360 "L.append(object) -- append object to end";
1361 static char extend_doc[] =
1362 "L.extend(list) -- extend list by appending list elements";
1363 static char insert_doc[] =
1364 "L.insert(index, object) -- insert object before index";
1365 static char pop_doc[] =
1366 "L.pop([index]) -> item -- remove and return item at index (default last)";
1367 static char remove_doc[] =
1368 "L.remove(value) -- remove first occurrence of value";
1369 static char index_doc[] =
1370 "L.index(value) -> integer -- return index of first occurrence of value";
1371 static char count_doc[] =
1372 "L.count(value) -> integer -- return number of occurrences of value";
1373 static char reverse_doc[] =
1374 "L.reverse() -- reverse *IN PLACE*";
1375 static char sort_doc[] =
1376 "L.sort([cmpfunc]) -- sort *IN PLACE*; if given, cmpfunc(x, y) -> -1, 0, 1";
1378 static PyMethodDef list_methods[] = {
1379 {"append", (PyCFunction)listappend, 1, append_doc},
1380 {"insert", (PyCFunction)listinsert, 1, insert_doc},
1381 {"extend", (PyCFunction)listextend, 1, extend_doc},
1382 {"pop", (PyCFunction)listpop, 1, pop_doc},
1383 {"remove", (PyCFunction)listremove, 1, remove_doc},
1384 {"index", (PyCFunction)listindex, 1, index_doc},
1385 {"count", (PyCFunction)listcount, 1, count_doc},
1386 {"reverse", (PyCFunction)listreverse, 1, reverse_doc},
1387 {"sort", (PyCFunction)listsort, 1, sort_doc},
1388 {NULL, NULL} /* sentinel */
1391 static PyObject *
1392 list_getattr(PyListObject *f, char *name)
1394 return Py_FindMethod(list_methods, (PyObject *)f, name);
1397 static PySequenceMethods list_as_sequence = {
1398 (inquiry)list_length, /*sq_length*/
1399 (binaryfunc)list_concat, /*sq_concat*/
1400 (intargfunc)list_repeat, /*sq_repeat*/
1401 (intargfunc)list_item, /*sq_item*/
1402 (intintargfunc)list_slice, /*sq_slice*/
1403 (intobjargproc)list_ass_item, /*sq_ass_item*/
1404 (intintobjargproc)list_ass_slice, /*sq_ass_slice*/
1405 (objobjproc)list_contains, /*sq_contains*/
1408 PyTypeObject PyList_Type = {
1409 PyObject_HEAD_INIT(&PyType_Type)
1411 "list",
1412 sizeof(PyListObject) + PyGC_HEAD_SIZE,
1414 (destructor)list_dealloc, /*tp_dealloc*/
1415 (printfunc)list_print, /*tp_print*/
1416 (getattrfunc)list_getattr, /*tp_getattr*/
1417 0, /*tp_setattr*/
1418 (cmpfunc)list_compare, /*tp_compare*/
1419 (reprfunc)list_repr, /*tp_repr*/
1420 0, /*tp_as_number*/
1421 &list_as_sequence, /*tp_as_sequence*/
1422 0, /*tp_as_mapping*/
1423 0, /*tp_hash*/
1424 0, /*tp_call*/
1425 0, /*tp_str*/
1426 0, /*tp_getattro*/
1427 0, /*tp_setattro*/
1428 0, /*tp_as_buffer*/
1429 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1430 0, /* tp_doc */
1431 (traverseproc)list_traverse, /* tp_traverse */
1432 (inquiry)list_clear, /* tp_clear */
1436 /* During a sort, we really can't have anyone modifying the list; it could
1437 cause core dumps. Thus, we substitute a dummy type that raises an
1438 explanatory exception when a modifying operation is used. Caveat:
1439 comparisons may behave differently; but I guess it's a bad idea anyway to
1440 compare a list that's being sorted... */
1442 static PyObject *
1443 immutable_list_op(void)
1445 PyErr_SetString(PyExc_TypeError,
1446 "a list cannot be modified while it is being sorted");
1447 return NULL;
1450 static PyMethodDef immutable_list_methods[] = {
1451 {"append", (PyCFunction)immutable_list_op},
1452 {"insert", (PyCFunction)immutable_list_op},
1453 {"remove", (PyCFunction)immutable_list_op},
1454 {"index", (PyCFunction)listindex},
1455 {"count", (PyCFunction)listcount},
1456 {"reverse", (PyCFunction)immutable_list_op},
1457 {"sort", (PyCFunction)immutable_list_op},
1458 {NULL, NULL} /* sentinel */
1461 static PyObject *
1462 immutable_list_getattr(PyListObject *f, char *name)
1464 return Py_FindMethod(immutable_list_methods, (PyObject *)f, name);
1467 static int
1468 immutable_list_ass(void)
1470 immutable_list_op();
1471 return -1;
1474 static PySequenceMethods immutable_list_as_sequence = {
1475 (inquiry)list_length, /*sq_length*/
1476 (binaryfunc)list_concat, /*sq_concat*/
1477 (intargfunc)list_repeat, /*sq_repeat*/
1478 (intargfunc)list_item, /*sq_item*/
1479 (intintargfunc)list_slice, /*sq_slice*/
1480 (intobjargproc)immutable_list_ass, /*sq_ass_item*/
1481 (intintobjargproc)immutable_list_ass, /*sq_ass_slice*/
1482 (objobjproc)list_contains, /*sq_contains*/
1485 static PyTypeObject immutable_list_type = {
1486 PyObject_HEAD_INIT(&PyType_Type)
1488 "list (immutable, during sort)",
1489 sizeof(PyListObject) + PyGC_HEAD_SIZE,
1491 0, /*tp_dealloc*/ /* Cannot happen */
1492 (printfunc)list_print, /*tp_print*/
1493 (getattrfunc)immutable_list_getattr, /*tp_getattr*/
1494 0, /*tp_setattr*/
1495 0, /*tp_compare*/ /* Won't be called */
1496 (reprfunc)list_repr, /*tp_repr*/
1497 0, /*tp_as_number*/
1498 &immutable_list_as_sequence, /*tp_as_sequence*/
1499 0, /*tp_as_mapping*/
1500 0, /*tp_hash*/
1501 0, /*tp_call*/
1502 0, /*tp_str*/
1503 0, /*tp_getattro*/
1504 0, /*tp_setattro*/
1505 0, /*tp_as_buffer*/
1506 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
1507 0, /* tp_doc */
1508 (traverseproc)list_traverse, /* tp_traverse */