This commit was manufactured by cvs2svn to create tag 'r212c1'.
[python/dscho.git] / Objects / dictobject.c
blob8ad44c181cd16d5da7e75d7ae44fdb1d66ee72d7
2 /* Dictionary object implementation using a hash table */
4 #include "Python.h"
7 /*
8 * MINSIZE is the minimum size of a dictionary.
9 */
11 #define MINSIZE 4
13 /* define this out if you don't want conversion statistics on exit */
14 #undef SHOW_CONVERSION_COUNTS
17 Table of irreducible polynomials to efficiently cycle through
18 GF(2^n)-{0}, 2<=n<=30. A table size is always a power of 2.
20 static long polys[] = {
21 4 + 3,
22 8 + 3,
23 16 + 3,
24 32 + 5,
25 64 + 3,
26 128 + 3,
27 256 + 29,
28 512 + 17,
29 1024 + 9,
30 2048 + 5,
31 4096 + 83,
32 8192 + 27,
33 16384 + 43,
34 32768 + 3,
35 65536 + 45,
36 131072 + 9,
37 262144 + 39,
38 524288 + 39,
39 1048576 + 9,
40 2097152 + 5,
41 4194304 + 3,
42 8388608 + 33,
43 16777216 + 27,
44 33554432 + 9,
45 67108864 + 71,
46 134217728 + 39,
47 268435456 + 9,
48 536870912 + 5,
49 1073741824 + 83,
53 /* Object used as dummy key to fill deleted entries */
54 static PyObject *dummy; /* Initialized by first call to newdictobject() */
57 There are three kinds of slots in the table:
59 1. Unused. me_key == me_value == NULL
60 Does not hold an active (key, value) pair now and never did. Unused can
61 transition to Active upon key insertion. This is the only case in which
62 me_key is NULL, and is each slot's initial state.
64 2. Active. me_key != NULL and me_key != dummy and me_value != NULL
65 Holds an active (key, value) pair. Active can transition to Dummy upon
66 key deletion. This is the only case in which me_value != NULL.
68 3. Dummy. me_key == dummy and me_value == NULL
69 Previously held an active (key, value) pair, but that was deleted and an
70 active pair has not yet overwritten the slot. Dummy can transition to
71 Active upon key insertion. Dummy slots cannot be made Unused again
72 (cannot have me_key set to NULL), else the probe sequence in case of
73 collision would have no way to know they were once active.
75 Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
76 hold a search finger. The me_hash field of Unused or Dummy slots has no
77 meaning otherwise.
79 typedef struct {
80 long me_hash; /* cached hash code of me_key */
81 PyObject *me_key;
82 PyObject *me_value;
83 #ifdef USE_CACHE_ALIGNED
84 long aligner;
85 #endif
86 } dictentry;
89 To ensure the lookup algorithm terminates, there must be at least one Unused
90 slot (NULL key) in the table.
91 The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
92 ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
93 values == the number of Active items).
94 To avoid slowing down lookups on a near-full table, we resize the table when
95 it's two-thirds full.
97 typedef struct dictobject dictobject;
98 struct dictobject {
99 PyObject_HEAD
100 int ma_fill; /* # Active + # Dummy */
101 int ma_used; /* # Active */
102 int ma_size; /* total # slots in ma_table */
103 int ma_poly; /* appopriate entry from polys vector */
104 dictentry *ma_table;
105 dictentry *(*ma_lookup)(dictobject *mp, PyObject *key, long hash);
108 /* forward declarations */
109 static dictentry *
110 lookdict_string(dictobject *mp, PyObject *key, long hash);
112 #ifdef SHOW_CONVERSION_COUNTS
113 static long created = 0L;
114 static long converted = 0L;
116 static void
117 show_counts(void)
119 fprintf(stderr, "created %ld string dicts\n", created);
120 fprintf(stderr, "converted %ld to normal dicts\n", converted);
121 fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
123 #endif
125 PyObject *
126 PyDict_New(void)
128 register dictobject *mp;
129 if (dummy == NULL) { /* Auto-initialize dummy */
130 dummy = PyString_FromString("<dummy key>");
131 if (dummy == NULL)
132 return NULL;
133 #ifdef SHOW_CONVERSION_COUNTS
134 Py_AtExit(show_counts);
135 #endif
137 mp = PyObject_NEW(dictobject, &PyDict_Type);
138 if (mp == NULL)
139 return NULL;
140 mp->ma_size = 0;
141 mp->ma_poly = 0;
142 mp->ma_table = NULL;
143 mp->ma_fill = 0;
144 mp->ma_used = 0;
145 mp->ma_lookup = lookdict_string;
146 #ifdef SHOW_CONVERSION_COUNTS
147 ++created;
148 #endif
149 PyObject_GC_Init(mp);
150 return (PyObject *)mp;
154 The basic lookup function used by all operations.
155 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
156 Open addressing is preferred over chaining since the link overhead for
157 chaining would be substantial (100% with typical malloc overhead).
158 However, instead of going through the table at constant steps, we cycle
159 through the values of GF(2^n). This avoids modulo computations, being
160 much cheaper on RISC machines, without leading to clustering.
162 The initial probe index is computed as hash mod the table size.
163 Subsequent probe indices use the values of x^i in GF(2^n)-{0} as an offset,
164 where x is a root. The initial offset is derived from hash, too.
166 All arithmetic on hash should ignore overflow.
168 (This version is due to Reimer Behrends, some ideas are also due to
169 Jyrki Alakuijala and Vladimir Marangozov.)
171 This function must never return NULL; failures are indicated by returning
172 a dictentry* for which the me_value field is NULL. Exceptions are never
173 reported by this function, and outstanding exceptions are maintained.
175 static dictentry *
176 lookdict(dictobject *mp, PyObject *key, register long hash)
178 register int i;
179 register unsigned incr;
180 register dictentry *freeslot;
181 register unsigned int mask = mp->ma_size-1;
182 dictentry *ep0 = mp->ma_table;
183 register dictentry *ep;
184 register int restore_error = 0;
185 register int checked_error = 0;
186 register int cmp;
187 PyObject *err_type, *err_value, *err_tb;
188 /* We must come up with (i, incr) such that 0 <= i < ma_size
189 and 0 < incr < ma_size and both are a function of hash.
190 i is the initial table index and incr the initial probe offset. */
191 i = (~hash) & mask;
192 /* We use ~hash instead of hash, as degenerate hash functions, such
193 as for ints <sigh>, can have lots of leading zeros. It's not
194 really a performance risk, but better safe than sorry.
195 12-Dec-00 tim: so ~hash produces lots of leading ones instead --
196 what's the gain? */
197 ep = &ep0[i];
198 if (ep->me_key == NULL || ep->me_key == key)
199 return ep;
200 if (ep->me_key == dummy)
201 freeslot = ep;
202 else {
203 if (ep->me_hash == hash) {
204 /* error can't have been checked yet */
205 checked_error = 1;
206 if (PyErr_Occurred()) {
207 restore_error = 1;
208 PyErr_Fetch(&err_type, &err_value, &err_tb);
210 cmp = PyObject_RichCompareBool(ep->me_key, key, Py_EQ);
211 if (cmp > 0) {
212 if (restore_error)
213 PyErr_Restore(err_type, err_value,
214 err_tb);
215 return ep;
217 else if (cmp < 0)
218 PyErr_Clear();
220 freeslot = NULL;
222 /* Derive incr from hash, just to make it more arbitrary. Note that
223 incr must not be 0, or we will get into an infinite loop.*/
224 incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
225 if (!incr)
226 incr = mask;
227 for (;;) {
228 ep = &ep0[(i+incr)&mask];
229 if (ep->me_key == NULL) {
230 if (restore_error)
231 PyErr_Restore(err_type, err_value, err_tb);
232 if (freeslot != NULL)
233 return freeslot;
234 else
235 return ep;
237 if (ep->me_key == dummy) {
238 if (freeslot == NULL)
239 freeslot = ep;
241 else if (ep->me_key == key) {
242 if (restore_error)
243 PyErr_Restore(err_type, err_value, err_tb);
244 return ep;
246 else if (ep->me_hash == hash) {
247 if (!checked_error) {
248 checked_error = 1;
249 if (PyErr_Occurred()) {
250 restore_error = 1;
251 PyErr_Fetch(&err_type, &err_value,
252 &err_tb);
255 cmp = PyObject_RichCompareBool(ep->me_key, key, Py_EQ);
256 if (cmp > 0) {
257 if (restore_error)
258 PyErr_Restore(err_type, err_value,
259 err_tb);
260 return ep;
262 else if (cmp < 0)
263 PyErr_Clear();
265 /* Cycle through GF(2^n)-{0} */
266 incr = incr << 1;
267 if (incr > mask)
268 incr ^= mp->ma_poly; /* This will implicitly clear
269 the highest bit */
274 * Hacked up version of lookdict which can assume keys are always strings;
275 * this assumption allows testing for errors during PyObject_Compare() to
276 * be dropped; string-string comparisons never raise exceptions. This also
277 * means we don't need to go through PyObject_Compare(); we can always use
278 * the tp_compare slot of the string type object directly.
280 * This really only becomes meaningful if proper error handling in lookdict()
281 * is too expensive.
283 static dictentry *
284 lookdict_string(dictobject *mp, PyObject *key, register long hash)
286 register int i;
287 register unsigned incr;
288 register dictentry *freeslot;
289 register unsigned int mask = mp->ma_size-1;
290 dictentry *ep0 = mp->ma_table;
291 register dictentry *ep;
292 cmpfunc compare = PyString_Type.tp_compare;
294 /* make sure this function doesn't have to handle non-string keys */
295 if (!PyString_Check(key)) {
296 #ifdef SHOW_CONVERSION_COUNTS
297 ++converted;
298 #endif
299 mp->ma_lookup = lookdict;
300 return lookdict(mp, key, hash);
302 /* We must come up with (i, incr) such that 0 <= i < ma_size
303 and 0 < incr < ma_size and both are a function of hash */
304 i = (~hash) & mask;
305 /* We use ~hash instead of hash, as degenerate hash functions, such
306 as for ints <sigh>, can have lots of leading zeros. It's not
307 really a performance risk, but better safe than sorry. */
308 ep = &ep0[i];
309 if (ep->me_key == NULL || ep->me_key == key)
310 return ep;
311 if (ep->me_key == dummy)
312 freeslot = ep;
313 else {
314 if (ep->me_hash == hash
315 && compare(ep->me_key, key) == 0) {
316 return ep;
318 freeslot = NULL;
320 /* Derive incr from hash, just to make it more arbitrary. Note that
321 incr must not be 0, or we will get into an infinite loop.*/
322 incr = (hash ^ ((unsigned long)hash >> 3)) & mask;
323 if (!incr)
324 incr = mask;
325 for (;;) {
326 ep = &ep0[(i+incr)&mask];
327 if (ep->me_key == NULL) {
328 if (freeslot != NULL)
329 return freeslot;
330 else
331 return ep;
333 if (ep->me_key == dummy) {
334 if (freeslot == NULL)
335 freeslot = ep;
337 else if (ep->me_key == key
338 || (ep->me_hash == hash
339 && compare(ep->me_key, key) == 0)) {
340 return ep;
342 /* Cycle through GF(2^n)-{0} */
343 incr = incr << 1;
344 if (incr > mask)
345 incr ^= mp->ma_poly; /* This will implicitly clear
346 the highest bit */
351 Internal routine to insert a new item into the table.
352 Used both by the internal resize routine and by the public insert routine.
353 Eats a reference to key and one to value.
355 static void
356 insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
358 PyObject *old_value;
359 register dictentry *ep;
360 ep = (mp->ma_lookup)(mp, key, hash);
361 if (ep->me_value != NULL) {
362 old_value = ep->me_value;
363 ep->me_value = value;
364 Py_DECREF(old_value); /* which **CAN** re-enter */
365 Py_DECREF(key);
367 else {
368 if (ep->me_key == NULL)
369 mp->ma_fill++;
370 else
371 Py_DECREF(ep->me_key);
372 ep->me_key = key;
373 ep->me_hash = hash;
374 ep->me_value = value;
375 mp->ma_used++;
380 Restructure the table by allocating a new table and reinserting all
381 items again. When entries have been deleted, the new table may
382 actually be smaller than the old one.
384 static int
385 dictresize(dictobject *mp, int minused)
387 register int oldsize = mp->ma_size;
388 register int newsize, newpoly;
389 register dictentry *oldtable = mp->ma_table;
390 register dictentry *newtable;
391 register dictentry *ep;
392 register int i;
393 for (i = 0, newsize = MINSIZE; ; i++, newsize <<= 1) {
394 if (i > sizeof(polys)/sizeof(polys[0])) {
395 /* Ran out of polynomials */
396 PyErr_NoMemory();
397 return -1;
399 if (newsize > minused) {
400 newpoly = polys[i];
401 break;
404 newtable = PyMem_NEW(dictentry, newsize);
405 if (newtable == NULL) {
406 PyErr_NoMemory();
407 return -1;
409 memset(newtable, '\0', sizeof(dictentry) * newsize);
410 mp->ma_size = newsize;
411 mp->ma_poly = newpoly;
412 mp->ma_table = newtable;
413 mp->ma_fill = 0;
414 mp->ma_used = 0;
416 /* Make two passes, so we can avoid decrefs
417 (and possible side effects) till the table is copied */
418 for (i = 0, ep = oldtable; i < oldsize; i++, ep++) {
419 if (ep->me_value != NULL)
420 insertdict(mp,ep->me_key,ep->me_hash,ep->me_value);
422 for (i = 0, ep = oldtable; i < oldsize; i++, ep++) {
423 if (ep->me_value == NULL) {
424 Py_XDECREF(ep->me_key);
428 if (oldtable != NULL)
429 PyMem_DEL(oldtable);
430 return 0;
433 PyObject *
434 PyDict_GetItem(PyObject *op, PyObject *key)
436 long hash;
437 dictobject *mp = (dictobject *)op;
438 if (!PyDict_Check(op)) {
439 return NULL;
441 if (mp->ma_table == NULL)
442 return NULL;
443 #ifdef CACHE_HASH
444 if (!PyString_Check(key) ||
445 (hash = ((PyStringObject *) key)->ob_shash) == -1)
446 #endif
448 hash = PyObject_Hash(key);
449 if (hash == -1) {
450 PyErr_Clear();
451 return NULL;
454 return (mp->ma_lookup)(mp, key, hash)->me_value;
457 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
458 * dictionary if it is merely replacing the value for an existing key.
459 * This is means that it's safe to loop over a dictionary with
460 * PyDict_Next() and occasionally replace a value -- but you can't
461 * insert new keys or remove them.
464 PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
466 register dictobject *mp;
467 register long hash;
468 register int n_used;
470 if (!PyDict_Check(op)) {
471 PyErr_BadInternalCall();
472 return -1;
474 mp = (dictobject *)op;
475 #ifdef CACHE_HASH
476 if (PyString_Check(key)) {
477 #ifdef INTERN_STRINGS
478 if (((PyStringObject *)key)->ob_sinterned != NULL) {
479 key = ((PyStringObject *)key)->ob_sinterned;
480 hash = ((PyStringObject *)key)->ob_shash;
482 else
483 #endif
485 hash = ((PyStringObject *)key)->ob_shash;
486 if (hash == -1)
487 hash = PyObject_Hash(key);
490 else
491 #endif
493 hash = PyObject_Hash(key);
494 if (hash == -1)
495 return -1;
497 if (mp->ma_fill >= mp->ma_size) {
498 /* No room for a new key.
499 * This only happens when the dict is empty.
500 * Let dictresize() create a minimal dict.
502 assert(mp->ma_used == 0);
503 if (dictresize(mp, 0) != 0)
504 return -1;
505 assert(mp->ma_fill < mp->ma_size);
507 n_used = mp->ma_used;
508 Py_INCREF(value);
509 Py_INCREF(key);
510 insertdict(mp, key, hash, value);
511 /* If we added a key, we can safely resize. Otherwise skip this!
512 * If fill >= 2/3 size, adjust size. Normally, this doubles the
513 * size, but it's also possible for the dict to shrink (if ma_fill is
514 * much larger than ma_used, meaning a lot of dict keys have been
515 * deleted).
517 if (mp->ma_used > n_used && mp->ma_fill*3 >= mp->ma_size*2) {
518 if (dictresize(mp, mp->ma_used*2) != 0)
519 return -1;
521 return 0;
525 PyDict_DelItem(PyObject *op, PyObject *key)
527 register dictobject *mp;
528 register long hash;
529 register dictentry *ep;
530 PyObject *old_value, *old_key;
532 if (!PyDict_Check(op)) {
533 PyErr_BadInternalCall();
534 return -1;
536 #ifdef CACHE_HASH
537 if (!PyString_Check(key) ||
538 (hash = ((PyStringObject *) key)->ob_shash) == -1)
539 #endif
541 hash = PyObject_Hash(key);
542 if (hash == -1)
543 return -1;
545 mp = (dictobject *)op;
546 if (((dictobject *)op)->ma_table == NULL)
547 goto empty;
548 ep = (mp->ma_lookup)(mp, key, hash);
549 if (ep->me_value == NULL) {
550 empty:
551 PyErr_SetObject(PyExc_KeyError, key);
552 return -1;
554 old_key = ep->me_key;
555 Py_INCREF(dummy);
556 ep->me_key = dummy;
557 old_value = ep->me_value;
558 ep->me_value = NULL;
559 mp->ma_used--;
560 Py_DECREF(old_value);
561 Py_DECREF(old_key);
562 return 0;
565 void
566 PyDict_Clear(PyObject *op)
568 int i, n;
569 register dictentry *table;
570 dictobject *mp;
571 if (!PyDict_Check(op))
572 return;
573 mp = (dictobject *)op;
574 table = mp->ma_table;
575 if (table == NULL)
576 return;
577 n = mp->ma_size;
578 mp->ma_size = mp->ma_used = mp->ma_fill = 0;
579 mp->ma_table = NULL;
580 for (i = 0; i < n; i++) {
581 Py_XDECREF(table[i].me_key);
582 Py_XDECREF(table[i].me_value);
584 PyMem_DEL(table);
587 /* CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
588 * mutates the dict. One exception: it is safe if the loop merely changes
589 * the values associated with the keys (but doesn't insert new keys or
590 * delete keys), via PyDict_SetItem().
593 PyDict_Next(PyObject *op, int *ppos, PyObject **pkey, PyObject **pvalue)
595 int i;
596 register dictobject *mp;
597 if (!PyDict_Check(op))
598 return 0;
599 mp = (dictobject *)op;
600 i = *ppos;
601 if (i < 0)
602 return 0;
603 while (i < mp->ma_size && mp->ma_table[i].me_value == NULL)
604 i++;
605 *ppos = i+1;
606 if (i >= mp->ma_size)
607 return 0;
608 if (pkey)
609 *pkey = mp->ma_table[i].me_key;
610 if (pvalue)
611 *pvalue = mp->ma_table[i].me_value;
612 return 1;
615 /* Methods */
617 static void
618 dict_dealloc(register dictobject *mp)
620 register int i;
621 register dictentry *ep;
622 Py_TRASHCAN_SAFE_BEGIN(mp)
623 PyObject_GC_Fini(mp);
624 for (i = 0, ep = mp->ma_table; i < mp->ma_size; i++, ep++) {
625 if (ep->me_key != NULL) {
626 Py_DECREF(ep->me_key);
628 if (ep->me_value != NULL) {
629 Py_DECREF(ep->me_value);
632 if (mp->ma_table != NULL)
633 PyMem_DEL(mp->ma_table);
634 mp = (dictobject *) PyObject_AS_GC(mp);
635 PyObject_DEL(mp);
636 Py_TRASHCAN_SAFE_END(mp)
639 static int
640 dict_print(register dictobject *mp, register FILE *fp, register int flags)
642 register int i;
643 register int any;
644 register dictentry *ep;
646 i = Py_ReprEnter((PyObject*)mp);
647 if (i != 0) {
648 if (i < 0)
649 return i;
650 fprintf(fp, "{...}");
651 return 0;
654 fprintf(fp, "{");
655 any = 0;
656 for (i = 0, ep = mp->ma_table; i < mp->ma_size; i++, ep++) {
657 if (ep->me_value != NULL) {
658 if (any++ > 0)
659 fprintf(fp, ", ");
660 if (PyObject_Print((PyObject *)ep->me_key, fp, 0)!=0) {
661 Py_ReprLeave((PyObject*)mp);
662 return -1;
664 fprintf(fp, ": ");
665 if (PyObject_Print(ep->me_value, fp, 0) != 0) {
666 Py_ReprLeave((PyObject*)mp);
667 return -1;
671 fprintf(fp, "}");
672 Py_ReprLeave((PyObject*)mp);
673 return 0;
676 static PyObject *
677 dict_repr(dictobject *mp)
679 auto PyObject *v;
680 PyObject *sepa, *colon;
681 register int i;
682 register int any;
683 register dictentry *ep;
685 i = Py_ReprEnter((PyObject*)mp);
686 if (i != 0) {
687 if (i > 0)
688 return PyString_FromString("{...}");
689 return NULL;
692 v = PyString_FromString("{");
693 sepa = PyString_FromString(", ");
694 colon = PyString_FromString(": ");
695 any = 0;
696 for (i = 0, ep = mp->ma_table; i < mp->ma_size && v; i++, ep++) {
697 if (ep->me_value != NULL) {
698 if (any++)
699 PyString_Concat(&v, sepa);
700 PyString_ConcatAndDel(&v, PyObject_Repr(ep->me_key));
701 PyString_Concat(&v, colon);
702 PyString_ConcatAndDel(&v, PyObject_Repr(ep->me_value));
705 PyString_ConcatAndDel(&v, PyString_FromString("}"));
706 Py_ReprLeave((PyObject*)mp);
707 Py_XDECREF(sepa);
708 Py_XDECREF(colon);
709 return v;
712 static int
713 dict_length(dictobject *mp)
715 return mp->ma_used;
718 static PyObject *
719 dict_subscript(dictobject *mp, register PyObject *key)
721 PyObject *v;
722 long hash;
723 if (mp->ma_table == NULL) {
724 PyErr_SetObject(PyExc_KeyError, key);
725 return NULL;
727 #ifdef CACHE_HASH
728 if (!PyString_Check(key) ||
729 (hash = ((PyStringObject *) key)->ob_shash) == -1)
730 #endif
732 hash = PyObject_Hash(key);
733 if (hash == -1)
734 return NULL;
736 v = (mp->ma_lookup)(mp, key, hash) -> me_value;
737 if (v == NULL)
738 PyErr_SetObject(PyExc_KeyError, key);
739 else
740 Py_INCREF(v);
741 return v;
744 static int
745 dict_ass_sub(dictobject *mp, PyObject *v, PyObject *w)
747 if (w == NULL)
748 return PyDict_DelItem((PyObject *)mp, v);
749 else
750 return PyDict_SetItem((PyObject *)mp, v, w);
753 static PyMappingMethods dict_as_mapping = {
754 (inquiry)dict_length, /*mp_length*/
755 (binaryfunc)dict_subscript, /*mp_subscript*/
756 (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
759 static PyObject *
760 dict_keys(register dictobject *mp, PyObject *args)
762 register PyObject *v;
763 register int i, j, n;
765 if (!PyArg_NoArgs(args))
766 return NULL;
767 again:
768 n = mp->ma_used;
769 v = PyList_New(n);
770 if (v == NULL)
771 return NULL;
772 if (n != mp->ma_used) {
773 /* Durnit. The allocations caused the dict to resize.
774 * Just start over, this shouldn't normally happen.
776 Py_DECREF(v);
777 goto again;
779 for (i = 0, j = 0; i < mp->ma_size; i++) {
780 if (mp->ma_table[i].me_value != NULL) {
781 PyObject *key = mp->ma_table[i].me_key;
782 Py_INCREF(key);
783 PyList_SET_ITEM(v, j, key);
784 j++;
787 return v;
790 static PyObject *
791 dict_values(register dictobject *mp, PyObject *args)
793 register PyObject *v;
794 register int i, j, n;
796 if (!PyArg_NoArgs(args))
797 return NULL;
798 again:
799 n = mp->ma_used;
800 v = PyList_New(n);
801 if (v == NULL)
802 return NULL;
803 if (n != mp->ma_used) {
804 /* Durnit. The allocations caused the dict to resize.
805 * Just start over, this shouldn't normally happen.
807 Py_DECREF(v);
808 goto again;
810 for (i = 0, j = 0; i < mp->ma_size; i++) {
811 if (mp->ma_table[i].me_value != NULL) {
812 PyObject *value = mp->ma_table[i].me_value;
813 Py_INCREF(value);
814 PyList_SET_ITEM(v, j, value);
815 j++;
818 return v;
821 static PyObject *
822 dict_items(register dictobject *mp, PyObject *args)
824 register PyObject *v;
825 register int i, j, n;
826 PyObject *item, *key, *value;
828 if (!PyArg_NoArgs(args))
829 return NULL;
830 /* Preallocate the list of tuples, to avoid allocations during
831 * the loop over the items, which could trigger GC, which
832 * could resize the dict. :-(
834 again:
835 n = mp->ma_used;
836 v = PyList_New(n);
837 if (v == NULL)
838 return NULL;
839 for (i = 0; i < n; i++) {
840 item = PyTuple_New(2);
841 if (item == NULL) {
842 Py_DECREF(v);
843 return NULL;
845 PyList_SET_ITEM(v, i, item);
847 if (n != mp->ma_used) {
848 /* Durnit. The allocations caused the dict to resize.
849 * Just start over, this shouldn't normally happen.
851 Py_DECREF(v);
852 goto again;
854 /* Nothing we do below makes any function calls. */
855 for (i = 0, j = 0; i < mp->ma_size; i++) {
856 if (mp->ma_table[i].me_value != NULL) {
857 key = mp->ma_table[i].me_key;
858 value = mp->ma_table[i].me_value;
859 item = PyList_GET_ITEM(v, j);
860 Py_INCREF(key);
861 PyTuple_SET_ITEM(item, 0, key);
862 Py_INCREF(value);
863 PyTuple_SET_ITEM(item, 1, value);
864 j++;
867 assert(j == n);
868 return v;
871 static PyObject *
872 dict_update(register dictobject *mp, PyObject *args)
874 register int i;
875 dictobject *other;
876 dictentry *entry;
877 if (!PyArg_Parse(args, "O!", &PyDict_Type, &other))
878 return NULL;
879 if (other == mp || other->ma_used == 0)
880 goto done; /* a.update(a) or a.update({}); nothing to do */
881 /* Do one big resize at the start, rather than incrementally
882 resizing as we insert new items. Expect that there will be
883 no (or few) overlapping keys. */
884 if ((mp->ma_fill + other->ma_used)*3 >= mp->ma_size*2) {
885 if (dictresize(mp, (mp->ma_used + other->ma_used)*3/2) != 0)
886 return NULL;
888 for (i = 0; i < other->ma_size; i++) {
889 entry = &other->ma_table[i];
890 if (entry->me_value != NULL) {
891 Py_INCREF(entry->me_key);
892 Py_INCREF(entry->me_value);
893 insertdict(mp, entry->me_key, entry->me_hash,
894 entry->me_value);
897 done:
898 Py_INCREF(Py_None);
899 return Py_None;
902 static PyObject *
903 dict_copy(register dictobject *mp, PyObject *args)
905 if (!PyArg_Parse(args, ""))
906 return NULL;
907 return PyDict_Copy((PyObject*)mp);
910 PyObject *
911 PyDict_Copy(PyObject *o)
913 register dictobject *mp;
914 register int i;
915 dictobject *copy;
916 dictentry *entry;
918 if (o == NULL || !PyDict_Check(o)) {
919 PyErr_BadInternalCall();
920 return NULL;
922 mp = (dictobject *)o;
923 copy = (dictobject *)PyDict_New();
924 if (copy == NULL)
925 return NULL;
926 if (mp->ma_used > 0) {
927 if (dictresize(copy, mp->ma_used*3/2) != 0)
928 return NULL;
929 for (i = 0; i < mp->ma_size; i++) {
930 entry = &mp->ma_table[i];
931 if (entry->me_value != NULL) {
932 Py_INCREF(entry->me_key);
933 Py_INCREF(entry->me_value);
934 insertdict(copy, entry->me_key, entry->me_hash,
935 entry->me_value);
939 return (PyObject *)copy;
943 PyDict_Size(PyObject *mp)
945 if (mp == NULL || !PyDict_Check(mp)) {
946 PyErr_BadInternalCall();
947 return 0;
949 return ((dictobject *)mp)->ma_used;
952 PyObject *
953 PyDict_Keys(PyObject *mp)
955 if (mp == NULL || !PyDict_Check(mp)) {
956 PyErr_BadInternalCall();
957 return NULL;
959 return dict_keys((dictobject *)mp, (PyObject *)NULL);
962 PyObject *
963 PyDict_Values(PyObject *mp)
965 if (mp == NULL || !PyDict_Check(mp)) {
966 PyErr_BadInternalCall();
967 return NULL;
969 return dict_values((dictobject *)mp, (PyObject *)NULL);
972 PyObject *
973 PyDict_Items(PyObject *mp)
975 if (mp == NULL || !PyDict_Check(mp)) {
976 PyErr_BadInternalCall();
977 return NULL;
979 return dict_items((dictobject *)mp, (PyObject *)NULL);
982 /* Subroutine which returns the smallest key in a for which b's value
983 is different or absent. The value is returned too, through the
984 pval argument. Both are NULL if no key in a is found for which b's status
985 differs. The refcounts on (and only on) non-NULL *pval and function return
986 values must be decremented by the caller (characterize() increments them
987 to ensure that mutating comparison and PyDict_GetItem calls can't delete
988 them before the caller is done looking at them). */
990 static PyObject *
991 characterize(dictobject *a, dictobject *b, PyObject **pval)
993 PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
994 PyObject *aval = NULL; /* a[akey] */
995 int i, cmp;
997 for (i = 0; i < a->ma_size; i++) {
998 PyObject *thiskey, *thisaval, *thisbval;
999 if (a->ma_table[i].me_value == NULL)
1000 continue;
1001 thiskey = a->ma_table[i].me_key;
1002 Py_INCREF(thiskey); /* keep alive across compares */
1003 if (akey != NULL) {
1004 cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT);
1005 if (cmp < 0) {
1006 Py_DECREF(thiskey);
1007 goto Fail;
1009 if (cmp > 0 ||
1010 i >= a->ma_size ||
1011 a->ma_table[i].me_value == NULL)
1013 /* Not the *smallest* a key; or maybe it is
1014 * but the compare shrunk the dict so we can't
1015 * find its associated value anymore; or
1016 * maybe it is but the compare deleted the
1017 * a[thiskey] entry.
1019 Py_DECREF(thiskey);
1020 continue;
1024 /* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
1025 thisaval = a->ma_table[i].me_value;
1026 assert(thisaval);
1027 Py_INCREF(thisaval); /* keep alive */
1028 thisbval = PyDict_GetItem((PyObject *)b, thiskey);
1029 if (thisbval == NULL)
1030 cmp = 0;
1031 else {
1032 /* both dicts have thiskey: same values? */
1033 cmp = PyObject_RichCompareBool(
1034 thisaval, thisbval, Py_EQ);
1035 if (cmp < 0) {
1036 Py_DECREF(thiskey);
1037 Py_DECREF(thisaval);
1038 goto Fail;
1041 if (cmp == 0) {
1042 /* New winner. */
1043 Py_XDECREF(akey);
1044 Py_XDECREF(aval);
1045 akey = thiskey;
1046 aval = thisaval;
1048 else {
1049 Py_DECREF(thiskey);
1050 Py_DECREF(thisaval);
1053 *pval = aval;
1054 return akey;
1056 Fail:
1057 Py_XDECREF(akey);
1058 Py_XDECREF(aval);
1059 *pval = NULL;
1060 return NULL;
1063 static int
1064 dict_compare(dictobject *a, dictobject *b)
1066 PyObject *adiff, *bdiff, *aval, *bval;
1067 int res;
1069 /* Compare lengths first */
1070 if (a->ma_used < b->ma_used)
1071 return -1; /* a is shorter */
1072 else if (a->ma_used > b->ma_used)
1073 return 1; /* b is shorter */
1075 /* Same length -- check all keys */
1076 bdiff = bval = NULL;
1077 adiff = characterize(a, b, &aval);
1078 if (adiff == NULL) {
1079 assert(!aval);
1080 /* Either an error, or a is a subst with the same length so
1081 * must be equal.
1083 res = PyErr_Occurred() ? -1 : 0;
1084 goto Finished;
1086 bdiff = characterize(b, a, &bval);
1087 if (bdiff == NULL && PyErr_Occurred()) {
1088 assert(!bval);
1089 res = -1;
1090 goto Finished;
1092 res = 0;
1093 if (bdiff) {
1094 /* bdiff == NULL "should be" impossible now, but perhaps
1095 * the last comparison done by the characterize() on a had
1096 * the side effect of making the dicts equal!
1098 res = PyObject_Compare(adiff, bdiff);
1100 if (res == 0 && bval != NULL)
1101 res = PyObject_Compare(aval, bval);
1103 Finished:
1104 Py_XDECREF(adiff);
1105 Py_XDECREF(bdiff);
1106 Py_XDECREF(aval);
1107 Py_XDECREF(bval);
1108 return res;
1111 static PyObject *
1112 dict_has_key(register dictobject *mp, PyObject *args)
1114 PyObject *key;
1115 long hash;
1116 register long ok;
1117 if (!PyArg_ParseTuple(args, "O:has_key", &key))
1118 return NULL;
1119 #ifdef CACHE_HASH
1120 if (!PyString_Check(key) ||
1121 (hash = ((PyStringObject *) key)->ob_shash) == -1)
1122 #endif
1124 hash = PyObject_Hash(key);
1125 if (hash == -1)
1126 return NULL;
1128 ok = (mp->ma_size != 0
1129 && (mp->ma_lookup)(mp, key, hash)->me_value != NULL);
1130 return PyInt_FromLong(ok);
1133 static PyObject *
1134 dict_get(register dictobject *mp, PyObject *args)
1136 PyObject *key;
1137 PyObject *failobj = Py_None;
1138 PyObject *val = NULL;
1139 long hash;
1141 if (!PyArg_ParseTuple(args, "O|O:get", &key, &failobj))
1142 return NULL;
1143 if (mp->ma_table == NULL)
1144 goto finally;
1146 #ifdef CACHE_HASH
1147 if (!PyString_Check(key) ||
1148 (hash = ((PyStringObject *) key)->ob_shash) == -1)
1149 #endif
1151 hash = PyObject_Hash(key);
1152 if (hash == -1)
1153 return NULL;
1155 val = (mp->ma_lookup)(mp, key, hash)->me_value;
1157 finally:
1158 if (val == NULL)
1159 val = failobj;
1160 Py_INCREF(val);
1161 return val;
1165 static PyObject *
1166 dict_setdefault(register dictobject *mp, PyObject *args)
1168 PyObject *key;
1169 PyObject *failobj = Py_None;
1170 PyObject *val = NULL;
1171 long hash;
1173 if (!PyArg_ParseTuple(args, "O|O:setdefault", &key, &failobj))
1174 return NULL;
1175 if (mp->ma_table == NULL)
1176 goto finally;
1178 #ifdef CACHE_HASH
1179 if (!PyString_Check(key) ||
1180 (hash = ((PyStringObject *) key)->ob_shash) == -1)
1181 #endif
1183 hash = PyObject_Hash(key);
1184 if (hash == -1)
1185 return NULL;
1187 val = (mp->ma_lookup)(mp, key, hash)->me_value;
1189 finally:
1190 if (val == NULL) {
1191 val = failobj;
1192 if (PyDict_SetItem((PyObject*)mp, key, failobj))
1193 val = NULL;
1195 Py_XINCREF(val);
1196 return val;
1200 static PyObject *
1201 dict_clear(register dictobject *mp, PyObject *args)
1203 if (!PyArg_NoArgs(args))
1204 return NULL;
1205 PyDict_Clear((PyObject *)mp);
1206 Py_INCREF(Py_None);
1207 return Py_None;
1210 static PyObject *
1211 dict_popitem(dictobject *mp, PyObject *args)
1213 int i = 0;
1214 dictentry *ep;
1215 PyObject *res;
1217 if (!PyArg_NoArgs(args))
1218 return NULL;
1219 /* Allocate the result tuple first. Believe it or not,
1220 * this allocation could trigger a garbage collection which
1221 * could resize the dict, which would invalidate the pointer
1222 * (ep) into the dict calculated below.
1223 * So we have to do this first.
1225 res = PyTuple_New(2);
1226 if (res == NULL)
1227 return NULL;
1228 if (mp->ma_used == 0) {
1229 Py_DECREF(res);
1230 PyErr_SetString(PyExc_KeyError,
1231 "popitem(): dictionary is empty");
1232 return NULL;
1234 /* Set ep to "the first" dict entry with a value. We abuse the hash
1235 * field of slot 0 to hold a search finger:
1236 * If slot 0 has a value, use slot 0.
1237 * Else slot 0 is being used to hold a search finger,
1238 * and we use its hash value as the first index to look.
1240 ep = &mp->ma_table[0];
1241 if (ep->me_value == NULL) {
1242 i = (int)ep->me_hash;
1243 /* The hash field may be uninitialized trash, or it
1244 * may be a real hash value, or it may be a legit
1245 * search finger, or it may be a once-legit search
1246 * finger that's out of bounds now because it
1247 * wrapped around or the table shrunk -- simply
1248 * make sure it's in bounds now.
1250 if (i >= mp->ma_size || i < 1)
1251 i = 1; /* skip slot 0 */
1252 while ((ep = &mp->ma_table[i])->me_value == NULL) {
1253 i++;
1254 if (i >= mp->ma_size)
1255 i = 1;
1258 PyTuple_SET_ITEM(res, 0, ep->me_key);
1259 PyTuple_SET_ITEM(res, 1, ep->me_value);
1260 Py_INCREF(dummy);
1261 ep->me_key = dummy;
1262 ep->me_value = NULL;
1263 mp->ma_used--;
1264 assert(mp->ma_table[0].me_value == NULL);
1265 mp->ma_table[0].me_hash = i + 1; /* next place to start */
1266 return res;
1269 static int
1270 dict_traverse(PyObject *op, visitproc visit, void *arg)
1272 int i = 0, err;
1273 PyObject *pk;
1274 PyObject *pv;
1276 while (PyDict_Next(op, &i, &pk, &pv)) {
1277 err = visit(pk, arg);
1278 if (err)
1279 return err;
1280 err = visit(pv, arg);
1281 if (err)
1282 return err;
1284 return 0;
1287 static int
1288 dict_tp_clear(PyObject *op)
1290 PyDict_Clear(op);
1291 return 0;
1295 static char has_key__doc__[] =
1296 "D.has_key(k) -> 1 if D has a key k, else 0";
1298 static char get__doc__[] =
1299 "D.get(k[,d]) -> D[k] if D.has_key(k), else d. d defaults to None.";
1301 static char setdefault_doc__[] =
1302 "D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if not D.has_key(k)";
1304 static char popitem__doc__[] =
1305 "D.popitem() -> (k, v), remove and return some (key, value) pair as a\n\
1306 2-tuple; but raise KeyError if D is empty";
1308 static char keys__doc__[] =
1309 "D.keys() -> list of D's keys";
1311 static char items__doc__[] =
1312 "D.items() -> list of D's (key, value) pairs, as 2-tuples";
1314 static char values__doc__[] =
1315 "D.values() -> list of D's values";
1317 static char update__doc__[] =
1318 "D.update(E) -> None. Update D from E: for k in E.keys(): D[k] = E[k]";
1320 static char clear__doc__[] =
1321 "D.clear() -> None. Remove all items from D.";
1323 static char copy__doc__[] =
1324 "D.copy() -> a shallow copy of D";
1326 static PyMethodDef mapp_methods[] = {
1327 {"has_key", (PyCFunction)dict_has_key, METH_VARARGS,
1328 has_key__doc__},
1329 {"get", (PyCFunction)dict_get, METH_VARARGS,
1330 get__doc__},
1331 {"setdefault", (PyCFunction)dict_setdefault, METH_VARARGS,
1332 setdefault_doc__},
1333 {"popitem", (PyCFunction)dict_popitem, METH_OLDARGS,
1334 popitem__doc__},
1335 {"keys", (PyCFunction)dict_keys, METH_OLDARGS,
1336 keys__doc__},
1337 {"items", (PyCFunction)dict_items, METH_OLDARGS,
1338 items__doc__},
1339 {"values", (PyCFunction)dict_values, METH_OLDARGS,
1340 values__doc__},
1341 {"update", (PyCFunction)dict_update, METH_OLDARGS,
1342 update__doc__},
1343 {"clear", (PyCFunction)dict_clear, METH_OLDARGS,
1344 clear__doc__},
1345 {"copy", (PyCFunction)dict_copy, METH_OLDARGS,
1346 copy__doc__},
1347 {NULL, NULL} /* sentinel */
1350 static PyObject *
1351 dict_getattr(dictobject *mp, char *name)
1353 return Py_FindMethod(mapp_methods, (PyObject *)mp, name);
1356 PyTypeObject PyDict_Type = {
1357 PyObject_HEAD_INIT(&PyType_Type)
1359 "dictionary",
1360 sizeof(dictobject) + PyGC_HEAD_SIZE,
1362 (destructor)dict_dealloc, /* tp_dealloc */
1363 (printfunc)dict_print, /* tp_print */
1364 (getattrfunc)dict_getattr, /* tp_getattr */
1365 0, /* tp_setattr */
1366 (cmpfunc)dict_compare, /* tp_compare */
1367 (reprfunc)dict_repr, /* tp_repr */
1368 0, /* tp_as_number */
1369 0, /* tp_as_sequence */
1370 &dict_as_mapping, /* tp_as_mapping */
1371 0, /* tp_hash */
1372 0, /* tp_call */
1373 0, /* tp_str */
1374 0, /* tp_getattro */
1375 0, /* tp_setattro */
1376 0, /* tp_as_buffer */
1377 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /* tp_flags */
1378 0, /* tp_doc */
1379 (traverseproc)dict_traverse, /* tp_traverse */
1380 (inquiry)dict_tp_clear, /* tp_clear */
1381 0, /* tp_richcompare */
1384 /* For backward compatibility with old dictionary interface */
1386 PyObject *
1387 PyDict_GetItemString(PyObject *v, char *key)
1389 PyObject *kv, *rv;
1390 kv = PyString_FromString(key);
1391 if (kv == NULL)
1392 return NULL;
1393 rv = PyDict_GetItem(v, kv);
1394 Py_DECREF(kv);
1395 return rv;
1399 PyDict_SetItemString(PyObject *v, char *key, PyObject *item)
1401 PyObject *kv;
1402 int err;
1403 kv = PyString_FromString(key);
1404 if (kv == NULL)
1405 return -1;
1406 PyString_InternInPlace(&kv); /* XXX Should we really? */
1407 err = PyDict_SetItem(v, kv, item);
1408 Py_DECREF(kv);
1409 return err;
1413 PyDict_DelItemString(PyObject *v, char *key)
1415 PyObject *kv;
1416 int err;
1417 kv = PyString_FromString(key);
1418 if (kv == NULL)
1419 return -1;
1420 err = PyDict_DelItem(v, kv);
1421 Py_DECREF(kv);
1422 return err;