2 /* Dictionary object implementation using a hash table */
8 * MINSIZE is the minimum size of a dictionary.
13 /* define this out if you don't want conversion statistics on exit */
14 #undef SHOW_CONVERSION_COUNTS
17 Table of irreducible polynomials to efficiently cycle through
18 GF(2^n)-{0}, 2<=n<=30. A table size is always a power of 2.
20 static long polys
[] = {
53 /* Object used as dummy key to fill deleted entries */
54 static PyObject
*dummy
; /* Initialized by first call to newdictobject() */
57 There are three kinds of slots in the table:
59 1. Unused. me_key == me_value == NULL
60 Does not hold an active (key, value) pair now and never did. Unused can
61 transition to Active upon key insertion. This is the only case in which
62 me_key is NULL, and is each slot's initial state.
64 2. Active. me_key != NULL and me_key != dummy and me_value != NULL
65 Holds an active (key, value) pair. Active can transition to Dummy upon
66 key deletion. This is the only case in which me_value != NULL.
68 3. Dummy. me_key == dummy and me_value == NULL
69 Previously held an active (key, value) pair, but that was deleted and an
70 active pair has not yet overwritten the slot. Dummy can transition to
71 Active upon key insertion. Dummy slots cannot be made Unused again
72 (cannot have me_key set to NULL), else the probe sequence in case of
73 collision would have no way to know they were once active.
75 Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
76 hold a search finger. The me_hash field of Unused or Dummy slots has no
80 long me_hash
; /* cached hash code of me_key */
83 #ifdef USE_CACHE_ALIGNED
89 To ensure the lookup algorithm terminates, there must be at least one Unused
90 slot (NULL key) in the table.
91 The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
92 ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
93 values == the number of Active items).
94 To avoid slowing down lookups on a near-full table, we resize the table when
97 typedef struct dictobject dictobject
;
100 int ma_fill
; /* # Active + # Dummy */
101 int ma_used
; /* # Active */
102 int ma_size
; /* total # slots in ma_table */
103 int ma_poly
; /* appopriate entry from polys vector */
105 dictentry
*(*ma_lookup
)(dictobject
*mp
, PyObject
*key
, long hash
);
108 /* forward declarations */
110 lookdict_string(dictobject
*mp
, PyObject
*key
, long hash
);
112 #ifdef SHOW_CONVERSION_COUNTS
113 static long created
= 0L;
114 static long converted
= 0L;
119 fprintf(stderr
, "created %ld string dicts\n", created
);
120 fprintf(stderr
, "converted %ld to normal dicts\n", converted
);
121 fprintf(stderr
, "%.2f%% conversion rate\n", (100.0*converted
)/created
);
128 register dictobject
*mp
;
129 if (dummy
== NULL
) { /* Auto-initialize dummy */
130 dummy
= PyString_FromString("<dummy key>");
133 #ifdef SHOW_CONVERSION_COUNTS
134 Py_AtExit(show_counts
);
137 mp
= PyObject_NEW(dictobject
, &PyDict_Type
);
145 mp
->ma_lookup
= lookdict_string
;
146 #ifdef SHOW_CONVERSION_COUNTS
149 PyObject_GC_Init(mp
);
150 return (PyObject
*)mp
;
154 The basic lookup function used by all operations.
155 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
156 Open addressing is preferred over chaining since the link overhead for
157 chaining would be substantial (100% with typical malloc overhead).
158 However, instead of going through the table at constant steps, we cycle
159 through the values of GF(2^n). This avoids modulo computations, being
160 much cheaper on RISC machines, without leading to clustering.
162 The initial probe index is computed as hash mod the table size.
163 Subsequent probe indices use the values of x^i in GF(2^n)-{0} as an offset,
164 where x is a root. The initial offset is derived from hash, too.
166 All arithmetic on hash should ignore overflow.
168 (This version is due to Reimer Behrends, some ideas are also due to
169 Jyrki Alakuijala and Vladimir Marangozov.)
171 This function must never return NULL; failures are indicated by returning
172 a dictentry* for which the me_value field is NULL. Exceptions are never
173 reported by this function, and outstanding exceptions are maintained.
176 lookdict(dictobject
*mp
, PyObject
*key
, register long hash
)
179 register unsigned incr
;
180 register dictentry
*freeslot
;
181 register unsigned int mask
= mp
->ma_size
-1;
182 dictentry
*ep0
= mp
->ma_table
;
183 register dictentry
*ep
;
184 register int restore_error
= 0;
185 register int checked_error
= 0;
187 PyObject
*err_type
, *err_value
, *err_tb
;
188 /* We must come up with (i, incr) such that 0 <= i < ma_size
189 and 0 < incr < ma_size and both are a function of hash.
190 i is the initial table index and incr the initial probe offset. */
192 /* We use ~hash instead of hash, as degenerate hash functions, such
193 as for ints <sigh>, can have lots of leading zeros. It's not
194 really a performance risk, but better safe than sorry.
195 12-Dec-00 tim: so ~hash produces lots of leading ones instead --
198 if (ep
->me_key
== NULL
|| ep
->me_key
== key
)
200 if (ep
->me_key
== dummy
)
203 if (ep
->me_hash
== hash
) {
204 /* error can't have been checked yet */
206 if (PyErr_Occurred()) {
208 PyErr_Fetch(&err_type
, &err_value
, &err_tb
);
210 cmp
= PyObject_RichCompareBool(ep
->me_key
, key
, Py_EQ
);
213 PyErr_Restore(err_type
, err_value
,
222 /* Derive incr from hash, just to make it more arbitrary. Note that
223 incr must not be 0, or we will get into an infinite loop.*/
224 incr
= (hash
^ ((unsigned long)hash
>> 3)) & mask
;
228 ep
= &ep0
[(i
+incr
)&mask
];
229 if (ep
->me_key
== NULL
) {
231 PyErr_Restore(err_type
, err_value
, err_tb
);
232 if (freeslot
!= NULL
)
237 if (ep
->me_key
== dummy
) {
238 if (freeslot
== NULL
)
241 else if (ep
->me_key
== key
) {
243 PyErr_Restore(err_type
, err_value
, err_tb
);
246 else if (ep
->me_hash
== hash
) {
247 if (!checked_error
) {
249 if (PyErr_Occurred()) {
251 PyErr_Fetch(&err_type
, &err_value
,
255 cmp
= PyObject_RichCompareBool(ep
->me_key
, key
, Py_EQ
);
258 PyErr_Restore(err_type
, err_value
,
265 /* Cycle through GF(2^n)-{0} */
268 incr
^= mp
->ma_poly
; /* This will implicitly clear
274 * Hacked up version of lookdict which can assume keys are always strings;
275 * this assumption allows testing for errors during PyObject_Compare() to
276 * be dropped; string-string comparisons never raise exceptions. This also
277 * means we don't need to go through PyObject_Compare(); we can always use
278 * the tp_compare slot of the string type object directly.
280 * This really only becomes meaningful if proper error handling in lookdict()
284 lookdict_string(dictobject
*mp
, PyObject
*key
, register long hash
)
287 register unsigned incr
;
288 register dictentry
*freeslot
;
289 register unsigned int mask
= mp
->ma_size
-1;
290 dictentry
*ep0
= mp
->ma_table
;
291 register dictentry
*ep
;
292 cmpfunc compare
= PyString_Type
.tp_compare
;
294 /* make sure this function doesn't have to handle non-string keys */
295 if (!PyString_Check(key
)) {
296 #ifdef SHOW_CONVERSION_COUNTS
299 mp
->ma_lookup
= lookdict
;
300 return lookdict(mp
, key
, hash
);
302 /* We must come up with (i, incr) such that 0 <= i < ma_size
303 and 0 < incr < ma_size and both are a function of hash */
305 /* We use ~hash instead of hash, as degenerate hash functions, such
306 as for ints <sigh>, can have lots of leading zeros. It's not
307 really a performance risk, but better safe than sorry. */
309 if (ep
->me_key
== NULL
|| ep
->me_key
== key
)
311 if (ep
->me_key
== dummy
)
314 if (ep
->me_hash
== hash
315 && compare(ep
->me_key
, key
) == 0) {
320 /* Derive incr from hash, just to make it more arbitrary. Note that
321 incr must not be 0, or we will get into an infinite loop.*/
322 incr
= (hash
^ ((unsigned long)hash
>> 3)) & mask
;
326 ep
= &ep0
[(i
+incr
)&mask
];
327 if (ep
->me_key
== NULL
) {
328 if (freeslot
!= NULL
)
333 if (ep
->me_key
== dummy
) {
334 if (freeslot
== NULL
)
337 else if (ep
->me_key
== key
338 || (ep
->me_hash
== hash
339 && compare(ep
->me_key
, key
) == 0)) {
342 /* Cycle through GF(2^n)-{0} */
345 incr
^= mp
->ma_poly
; /* This will implicitly clear
351 Internal routine to insert a new item into the table.
352 Used both by the internal resize routine and by the public insert routine.
353 Eats a reference to key and one to value.
356 insertdict(register dictobject
*mp
, PyObject
*key
, long hash
, PyObject
*value
)
359 register dictentry
*ep
;
360 ep
= (mp
->ma_lookup
)(mp
, key
, hash
);
361 if (ep
->me_value
!= NULL
) {
362 old_value
= ep
->me_value
;
363 ep
->me_value
= value
;
364 Py_DECREF(old_value
); /* which **CAN** re-enter */
368 if (ep
->me_key
== NULL
)
371 Py_DECREF(ep
->me_key
);
374 ep
->me_value
= value
;
380 Restructure the table by allocating a new table and reinserting all
381 items again. When entries have been deleted, the new table may
382 actually be smaller than the old one.
385 dictresize(dictobject
*mp
, int minused
)
387 register int oldsize
= mp
->ma_size
;
388 register int newsize
, newpoly
;
389 register dictentry
*oldtable
= mp
->ma_table
;
390 register dictentry
*newtable
;
391 register dictentry
*ep
;
393 for (i
= 0, newsize
= MINSIZE
; ; i
++, newsize
<<= 1) {
394 if (i
> sizeof(polys
)/sizeof(polys
[0])) {
395 /* Ran out of polynomials */
399 if (newsize
> minused
) {
404 newtable
= PyMem_NEW(dictentry
, newsize
);
405 if (newtable
== NULL
) {
409 memset(newtable
, '\0', sizeof(dictentry
) * newsize
);
410 mp
->ma_size
= newsize
;
411 mp
->ma_poly
= newpoly
;
412 mp
->ma_table
= newtable
;
416 /* Make two passes, so we can avoid decrefs
417 (and possible side effects) till the table is copied */
418 for (i
= 0, ep
= oldtable
; i
< oldsize
; i
++, ep
++) {
419 if (ep
->me_value
!= NULL
)
420 insertdict(mp
,ep
->me_key
,ep
->me_hash
,ep
->me_value
);
422 for (i
= 0, ep
= oldtable
; i
< oldsize
; i
++, ep
++) {
423 if (ep
->me_value
== NULL
) {
424 Py_XDECREF(ep
->me_key
);
428 if (oldtable
!= NULL
)
434 PyDict_GetItem(PyObject
*op
, PyObject
*key
)
437 dictobject
*mp
= (dictobject
*)op
;
438 if (!PyDict_Check(op
)) {
441 if (mp
->ma_table
== NULL
)
444 if (!PyString_Check(key
) ||
445 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
448 hash
= PyObject_Hash(key
);
454 return (mp
->ma_lookup
)(mp
, key
, hash
)->me_value
;
457 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
458 * dictionary if it is merely replacing the value for an existing key.
459 * This is means that it's safe to loop over a dictionary with
460 * PyDict_Next() and occasionally replace a value -- but you can't
461 * insert new keys or remove them.
464 PyDict_SetItem(register PyObject
*op
, PyObject
*key
, PyObject
*value
)
466 register dictobject
*mp
;
470 if (!PyDict_Check(op
)) {
471 PyErr_BadInternalCall();
474 mp
= (dictobject
*)op
;
476 if (PyString_Check(key
)) {
477 #ifdef INTERN_STRINGS
478 if (((PyStringObject
*)key
)->ob_sinterned
!= NULL
) {
479 key
= ((PyStringObject
*)key
)->ob_sinterned
;
480 hash
= ((PyStringObject
*)key
)->ob_shash
;
485 hash
= ((PyStringObject
*)key
)->ob_shash
;
487 hash
= PyObject_Hash(key
);
493 hash
= PyObject_Hash(key
);
497 if (mp
->ma_fill
>= mp
->ma_size
) {
498 /* No room for a new key.
499 * This only happens when the dict is empty.
500 * Let dictresize() create a minimal dict.
502 assert(mp
->ma_used
== 0);
503 if (dictresize(mp
, 0) != 0)
505 assert(mp
->ma_fill
< mp
->ma_size
);
507 n_used
= mp
->ma_used
;
510 insertdict(mp
, key
, hash
, value
);
511 /* If we added a key, we can safely resize. Otherwise skip this!
512 * If fill >= 2/3 size, adjust size. Normally, this doubles the
513 * size, but it's also possible for the dict to shrink (if ma_fill is
514 * much larger than ma_used, meaning a lot of dict keys have been
517 if (mp
->ma_used
> n_used
&& mp
->ma_fill
*3 >= mp
->ma_size
*2) {
518 if (dictresize(mp
, mp
->ma_used
*2) != 0)
525 PyDict_DelItem(PyObject
*op
, PyObject
*key
)
527 register dictobject
*mp
;
529 register dictentry
*ep
;
530 PyObject
*old_value
, *old_key
;
532 if (!PyDict_Check(op
)) {
533 PyErr_BadInternalCall();
537 if (!PyString_Check(key
) ||
538 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
541 hash
= PyObject_Hash(key
);
545 mp
= (dictobject
*)op
;
546 if (((dictobject
*)op
)->ma_table
== NULL
)
548 ep
= (mp
->ma_lookup
)(mp
, key
, hash
);
549 if (ep
->me_value
== NULL
) {
551 PyErr_SetObject(PyExc_KeyError
, key
);
554 old_key
= ep
->me_key
;
557 old_value
= ep
->me_value
;
560 Py_DECREF(old_value
);
566 PyDict_Clear(PyObject
*op
)
569 register dictentry
*table
;
571 if (!PyDict_Check(op
))
573 mp
= (dictobject
*)op
;
574 table
= mp
->ma_table
;
578 mp
->ma_size
= mp
->ma_used
= mp
->ma_fill
= 0;
580 for (i
= 0; i
< n
; i
++) {
581 Py_XDECREF(table
[i
].me_key
);
582 Py_XDECREF(table
[i
].me_value
);
587 /* CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
588 * mutates the dict. One exception: it is safe if the loop merely changes
589 * the values associated with the keys (but doesn't insert new keys or
590 * delete keys), via PyDict_SetItem().
593 PyDict_Next(PyObject
*op
, int *ppos
, PyObject
**pkey
, PyObject
**pvalue
)
596 register dictobject
*mp
;
597 if (!PyDict_Check(op
))
599 mp
= (dictobject
*)op
;
603 while (i
< mp
->ma_size
&& mp
->ma_table
[i
].me_value
== NULL
)
606 if (i
>= mp
->ma_size
)
609 *pkey
= mp
->ma_table
[i
].me_key
;
611 *pvalue
= mp
->ma_table
[i
].me_value
;
618 dict_dealloc(register dictobject
*mp
)
621 register dictentry
*ep
;
622 Py_TRASHCAN_SAFE_BEGIN(mp
)
623 PyObject_GC_Fini(mp
);
624 for (i
= 0, ep
= mp
->ma_table
; i
< mp
->ma_size
; i
++, ep
++) {
625 if (ep
->me_key
!= NULL
) {
626 Py_DECREF(ep
->me_key
);
628 if (ep
->me_value
!= NULL
) {
629 Py_DECREF(ep
->me_value
);
632 if (mp
->ma_table
!= NULL
)
633 PyMem_DEL(mp
->ma_table
);
634 mp
= (dictobject
*) PyObject_AS_GC(mp
);
636 Py_TRASHCAN_SAFE_END(mp
)
640 dict_print(register dictobject
*mp
, register FILE *fp
, register int flags
)
644 register dictentry
*ep
;
646 i
= Py_ReprEnter((PyObject
*)mp
);
650 fprintf(fp
, "{...}");
656 for (i
= 0, ep
= mp
->ma_table
; i
< mp
->ma_size
; i
++, ep
++) {
657 if (ep
->me_value
!= NULL
) {
660 if (PyObject_Print((PyObject
*)ep
->me_key
, fp
, 0)!=0) {
661 Py_ReprLeave((PyObject
*)mp
);
665 if (PyObject_Print(ep
->me_value
, fp
, 0) != 0) {
666 Py_ReprLeave((PyObject
*)mp
);
672 Py_ReprLeave((PyObject
*)mp
);
677 dict_repr(dictobject
*mp
)
680 PyObject
*sepa
, *colon
;
683 register dictentry
*ep
;
685 i
= Py_ReprEnter((PyObject
*)mp
);
688 return PyString_FromString("{...}");
692 v
= PyString_FromString("{");
693 sepa
= PyString_FromString(", ");
694 colon
= PyString_FromString(": ");
696 for (i
= 0, ep
= mp
->ma_table
; i
< mp
->ma_size
&& v
; i
++, ep
++) {
697 if (ep
->me_value
!= NULL
) {
699 PyString_Concat(&v
, sepa
);
700 PyString_ConcatAndDel(&v
, PyObject_Repr(ep
->me_key
));
701 PyString_Concat(&v
, colon
);
702 PyString_ConcatAndDel(&v
, PyObject_Repr(ep
->me_value
));
705 PyString_ConcatAndDel(&v
, PyString_FromString("}"));
706 Py_ReprLeave((PyObject
*)mp
);
713 dict_length(dictobject
*mp
)
719 dict_subscript(dictobject
*mp
, register PyObject
*key
)
723 if (mp
->ma_table
== NULL
) {
724 PyErr_SetObject(PyExc_KeyError
, key
);
728 if (!PyString_Check(key
) ||
729 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
732 hash
= PyObject_Hash(key
);
736 v
= (mp
->ma_lookup
)(mp
, key
, hash
) -> me_value
;
738 PyErr_SetObject(PyExc_KeyError
, key
);
745 dict_ass_sub(dictobject
*mp
, PyObject
*v
, PyObject
*w
)
748 return PyDict_DelItem((PyObject
*)mp
, v
);
750 return PyDict_SetItem((PyObject
*)mp
, v
, w
);
753 static PyMappingMethods dict_as_mapping
= {
754 (inquiry
)dict_length
, /*mp_length*/
755 (binaryfunc
)dict_subscript
, /*mp_subscript*/
756 (objobjargproc
)dict_ass_sub
, /*mp_ass_subscript*/
760 dict_keys(register dictobject
*mp
, PyObject
*args
)
762 register PyObject
*v
;
763 register int i
, j
, n
;
765 if (!PyArg_NoArgs(args
))
772 if (n
!= mp
->ma_used
) {
773 /* Durnit. The allocations caused the dict to resize.
774 * Just start over, this shouldn't normally happen.
779 for (i
= 0, j
= 0; i
< mp
->ma_size
; i
++) {
780 if (mp
->ma_table
[i
].me_value
!= NULL
) {
781 PyObject
*key
= mp
->ma_table
[i
].me_key
;
783 PyList_SET_ITEM(v
, j
, key
);
791 dict_values(register dictobject
*mp
, PyObject
*args
)
793 register PyObject
*v
;
794 register int i
, j
, n
;
796 if (!PyArg_NoArgs(args
))
803 if (n
!= mp
->ma_used
) {
804 /* Durnit. The allocations caused the dict to resize.
805 * Just start over, this shouldn't normally happen.
810 for (i
= 0, j
= 0; i
< mp
->ma_size
; i
++) {
811 if (mp
->ma_table
[i
].me_value
!= NULL
) {
812 PyObject
*value
= mp
->ma_table
[i
].me_value
;
814 PyList_SET_ITEM(v
, j
, value
);
822 dict_items(register dictobject
*mp
, PyObject
*args
)
824 register PyObject
*v
;
825 register int i
, j
, n
;
826 PyObject
*item
, *key
, *value
;
828 if (!PyArg_NoArgs(args
))
830 /* Preallocate the list of tuples, to avoid allocations during
831 * the loop over the items, which could trigger GC, which
832 * could resize the dict. :-(
839 for (i
= 0; i
< n
; i
++) {
840 item
= PyTuple_New(2);
845 PyList_SET_ITEM(v
, i
, item
);
847 if (n
!= mp
->ma_used
) {
848 /* Durnit. The allocations caused the dict to resize.
849 * Just start over, this shouldn't normally happen.
854 /* Nothing we do below makes any function calls. */
855 for (i
= 0, j
= 0; i
< mp
->ma_size
; i
++) {
856 if (mp
->ma_table
[i
].me_value
!= NULL
) {
857 key
= mp
->ma_table
[i
].me_key
;
858 value
= mp
->ma_table
[i
].me_value
;
859 item
= PyList_GET_ITEM(v
, j
);
861 PyTuple_SET_ITEM(item
, 0, key
);
863 PyTuple_SET_ITEM(item
, 1, value
);
872 dict_update(register dictobject
*mp
, PyObject
*args
)
877 if (!PyArg_Parse(args
, "O!", &PyDict_Type
, &other
))
879 if (other
== mp
|| other
->ma_used
== 0)
880 goto done
; /* a.update(a) or a.update({}); nothing to do */
881 /* Do one big resize at the start, rather than incrementally
882 resizing as we insert new items. Expect that there will be
883 no (or few) overlapping keys. */
884 if ((mp
->ma_fill
+ other
->ma_used
)*3 >= mp
->ma_size
*2) {
885 if (dictresize(mp
, (mp
->ma_used
+ other
->ma_used
)*3/2) != 0)
888 for (i
= 0; i
< other
->ma_size
; i
++) {
889 entry
= &other
->ma_table
[i
];
890 if (entry
->me_value
!= NULL
) {
891 Py_INCREF(entry
->me_key
);
892 Py_INCREF(entry
->me_value
);
893 insertdict(mp
, entry
->me_key
, entry
->me_hash
,
903 dict_copy(register dictobject
*mp
, PyObject
*args
)
905 if (!PyArg_Parse(args
, ""))
907 return PyDict_Copy((PyObject
*)mp
);
911 PyDict_Copy(PyObject
*o
)
913 register dictobject
*mp
;
918 if (o
== NULL
|| !PyDict_Check(o
)) {
919 PyErr_BadInternalCall();
922 mp
= (dictobject
*)o
;
923 copy
= (dictobject
*)PyDict_New();
926 if (mp
->ma_used
> 0) {
927 if (dictresize(copy
, mp
->ma_used
*3/2) != 0)
929 for (i
= 0; i
< mp
->ma_size
; i
++) {
930 entry
= &mp
->ma_table
[i
];
931 if (entry
->me_value
!= NULL
) {
932 Py_INCREF(entry
->me_key
);
933 Py_INCREF(entry
->me_value
);
934 insertdict(copy
, entry
->me_key
, entry
->me_hash
,
939 return (PyObject
*)copy
;
943 PyDict_Size(PyObject
*mp
)
945 if (mp
== NULL
|| !PyDict_Check(mp
)) {
946 PyErr_BadInternalCall();
949 return ((dictobject
*)mp
)->ma_used
;
953 PyDict_Keys(PyObject
*mp
)
955 if (mp
== NULL
|| !PyDict_Check(mp
)) {
956 PyErr_BadInternalCall();
959 return dict_keys((dictobject
*)mp
, (PyObject
*)NULL
);
963 PyDict_Values(PyObject
*mp
)
965 if (mp
== NULL
|| !PyDict_Check(mp
)) {
966 PyErr_BadInternalCall();
969 return dict_values((dictobject
*)mp
, (PyObject
*)NULL
);
973 PyDict_Items(PyObject
*mp
)
975 if (mp
== NULL
|| !PyDict_Check(mp
)) {
976 PyErr_BadInternalCall();
979 return dict_items((dictobject
*)mp
, (PyObject
*)NULL
);
982 /* Subroutine which returns the smallest key in a for which b's value
983 is different or absent. The value is returned too, through the
984 pval argument. Both are NULL if no key in a is found for which b's status
985 differs. The refcounts on (and only on) non-NULL *pval and function return
986 values must be decremented by the caller (characterize() increments them
987 to ensure that mutating comparison and PyDict_GetItem calls can't delete
988 them before the caller is done looking at them). */
991 characterize(dictobject
*a
, dictobject
*b
, PyObject
**pval
)
993 PyObject
*akey
= NULL
; /* smallest key in a s.t. a[akey] != b[akey] */
994 PyObject
*aval
= NULL
; /* a[akey] */
997 for (i
= 0; i
< a
->ma_size
; i
++) {
998 PyObject
*thiskey
, *thisaval
, *thisbval
;
999 if (a
->ma_table
[i
].me_value
== NULL
)
1001 thiskey
= a
->ma_table
[i
].me_key
;
1002 Py_INCREF(thiskey
); /* keep alive across compares */
1004 cmp
= PyObject_RichCompareBool(akey
, thiskey
, Py_LT
);
1011 a
->ma_table
[i
].me_value
== NULL
)
1013 /* Not the *smallest* a key; or maybe it is
1014 * but the compare shrunk the dict so we can't
1015 * find its associated value anymore; or
1016 * maybe it is but the compare deleted the
1024 /* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
1025 thisaval
= a
->ma_table
[i
].me_value
;
1027 Py_INCREF(thisaval
); /* keep alive */
1028 thisbval
= PyDict_GetItem((PyObject
*)b
, thiskey
);
1029 if (thisbval
== NULL
)
1032 /* both dicts have thiskey: same values? */
1033 cmp
= PyObject_RichCompareBool(
1034 thisaval
, thisbval
, Py_EQ
);
1037 Py_DECREF(thisaval
);
1050 Py_DECREF(thisaval
);
1064 dict_compare(dictobject
*a
, dictobject
*b
)
1066 PyObject
*adiff
, *bdiff
, *aval
, *bval
;
1069 /* Compare lengths first */
1070 if (a
->ma_used
< b
->ma_used
)
1071 return -1; /* a is shorter */
1072 else if (a
->ma_used
> b
->ma_used
)
1073 return 1; /* b is shorter */
1075 /* Same length -- check all keys */
1076 bdiff
= bval
= NULL
;
1077 adiff
= characterize(a
, b
, &aval
);
1078 if (adiff
== NULL
) {
1080 /* Either an error, or a is a subst with the same length so
1083 res
= PyErr_Occurred() ? -1 : 0;
1086 bdiff
= characterize(b
, a
, &bval
);
1087 if (bdiff
== NULL
&& PyErr_Occurred()) {
1094 /* bdiff == NULL "should be" impossible now, but perhaps
1095 * the last comparison done by the characterize() on a had
1096 * the side effect of making the dicts equal!
1098 res
= PyObject_Compare(adiff
, bdiff
);
1100 if (res
== 0 && bval
!= NULL
)
1101 res
= PyObject_Compare(aval
, bval
);
1112 dict_has_key(register dictobject
*mp
, PyObject
*args
)
1117 if (!PyArg_ParseTuple(args
, "O:has_key", &key
))
1120 if (!PyString_Check(key
) ||
1121 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
1124 hash
= PyObject_Hash(key
);
1128 ok
= (mp
->ma_size
!= 0
1129 && (mp
->ma_lookup
)(mp
, key
, hash
)->me_value
!= NULL
);
1130 return PyInt_FromLong(ok
);
1134 dict_get(register dictobject
*mp
, PyObject
*args
)
1137 PyObject
*failobj
= Py_None
;
1138 PyObject
*val
= NULL
;
1141 if (!PyArg_ParseTuple(args
, "O|O:get", &key
, &failobj
))
1143 if (mp
->ma_table
== NULL
)
1147 if (!PyString_Check(key
) ||
1148 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
1151 hash
= PyObject_Hash(key
);
1155 val
= (mp
->ma_lookup
)(mp
, key
, hash
)->me_value
;
1166 dict_setdefault(register dictobject
*mp
, PyObject
*args
)
1169 PyObject
*failobj
= Py_None
;
1170 PyObject
*val
= NULL
;
1173 if (!PyArg_ParseTuple(args
, "O|O:setdefault", &key
, &failobj
))
1175 if (mp
->ma_table
== NULL
)
1179 if (!PyString_Check(key
) ||
1180 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
1183 hash
= PyObject_Hash(key
);
1187 val
= (mp
->ma_lookup
)(mp
, key
, hash
)->me_value
;
1192 if (PyDict_SetItem((PyObject
*)mp
, key
, failobj
))
1201 dict_clear(register dictobject
*mp
, PyObject
*args
)
1203 if (!PyArg_NoArgs(args
))
1205 PyDict_Clear((PyObject
*)mp
);
1211 dict_popitem(dictobject
*mp
, PyObject
*args
)
1217 if (!PyArg_NoArgs(args
))
1219 /* Allocate the result tuple first. Believe it or not,
1220 * this allocation could trigger a garbage collection which
1221 * could resize the dict, which would invalidate the pointer
1222 * (ep) into the dict calculated below.
1223 * So we have to do this first.
1225 res
= PyTuple_New(2);
1228 if (mp
->ma_used
== 0) {
1230 PyErr_SetString(PyExc_KeyError
,
1231 "popitem(): dictionary is empty");
1234 /* Set ep to "the first" dict entry with a value. We abuse the hash
1235 * field of slot 0 to hold a search finger:
1236 * If slot 0 has a value, use slot 0.
1237 * Else slot 0 is being used to hold a search finger,
1238 * and we use its hash value as the first index to look.
1240 ep
= &mp
->ma_table
[0];
1241 if (ep
->me_value
== NULL
) {
1242 i
= (int)ep
->me_hash
;
1243 /* The hash field may be uninitialized trash, or it
1244 * may be a real hash value, or it may be a legit
1245 * search finger, or it may be a once-legit search
1246 * finger that's out of bounds now because it
1247 * wrapped around or the table shrunk -- simply
1248 * make sure it's in bounds now.
1250 if (i
>= mp
->ma_size
|| i
< 1)
1251 i
= 1; /* skip slot 0 */
1252 while ((ep
= &mp
->ma_table
[i
])->me_value
== NULL
) {
1254 if (i
>= mp
->ma_size
)
1258 PyTuple_SET_ITEM(res
, 0, ep
->me_key
);
1259 PyTuple_SET_ITEM(res
, 1, ep
->me_value
);
1262 ep
->me_value
= NULL
;
1264 assert(mp
->ma_table
[0].me_value
== NULL
);
1265 mp
->ma_table
[0].me_hash
= i
+ 1; /* next place to start */
1270 dict_traverse(PyObject
*op
, visitproc visit
, void *arg
)
1276 while (PyDict_Next(op
, &i
, &pk
, &pv
)) {
1277 err
= visit(pk
, arg
);
1280 err
= visit(pv
, arg
);
1288 dict_tp_clear(PyObject
*op
)
1295 static char has_key__doc__
[] =
1296 "D.has_key(k) -> 1 if D has a key k, else 0";
1298 static char get__doc__
[] =
1299 "D.get(k[,d]) -> D[k] if D.has_key(k), else d. d defaults to None.";
1301 static char setdefault_doc__
[] =
1302 "D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if not D.has_key(k)";
1304 static char popitem__doc__
[] =
1305 "D.popitem() -> (k, v), remove and return some (key, value) pair as a\n\
1306 2-tuple; but raise KeyError if D is empty";
1308 static char keys__doc__
[] =
1309 "D.keys() -> list of D's keys";
1311 static char items__doc__
[] =
1312 "D.items() -> list of D's (key, value) pairs, as 2-tuples";
1314 static char values__doc__
[] =
1315 "D.values() -> list of D's values";
1317 static char update__doc__
[] =
1318 "D.update(E) -> None. Update D from E: for k in E.keys(): D[k] = E[k]";
1320 static char clear__doc__
[] =
1321 "D.clear() -> None. Remove all items from D.";
1323 static char copy__doc__
[] =
1324 "D.copy() -> a shallow copy of D";
1326 static PyMethodDef mapp_methods
[] = {
1327 {"has_key", (PyCFunction
)dict_has_key
, METH_VARARGS
,
1329 {"get", (PyCFunction
)dict_get
, METH_VARARGS
,
1331 {"setdefault", (PyCFunction
)dict_setdefault
, METH_VARARGS
,
1333 {"popitem", (PyCFunction
)dict_popitem
, METH_OLDARGS
,
1335 {"keys", (PyCFunction
)dict_keys
, METH_OLDARGS
,
1337 {"items", (PyCFunction
)dict_items
, METH_OLDARGS
,
1339 {"values", (PyCFunction
)dict_values
, METH_OLDARGS
,
1341 {"update", (PyCFunction
)dict_update
, METH_OLDARGS
,
1343 {"clear", (PyCFunction
)dict_clear
, METH_OLDARGS
,
1345 {"copy", (PyCFunction
)dict_copy
, METH_OLDARGS
,
1347 {NULL
, NULL
} /* sentinel */
1351 dict_getattr(dictobject
*mp
, char *name
)
1353 return Py_FindMethod(mapp_methods
, (PyObject
*)mp
, name
);
1356 PyTypeObject PyDict_Type
= {
1357 PyObject_HEAD_INIT(&PyType_Type
)
1360 sizeof(dictobject
) + PyGC_HEAD_SIZE
,
1362 (destructor
)dict_dealloc
, /* tp_dealloc */
1363 (printfunc
)dict_print
, /* tp_print */
1364 (getattrfunc
)dict_getattr
, /* tp_getattr */
1366 (cmpfunc
)dict_compare
, /* tp_compare */
1367 (reprfunc
)dict_repr
, /* tp_repr */
1368 0, /* tp_as_number */
1369 0, /* tp_as_sequence */
1370 &dict_as_mapping
, /* tp_as_mapping */
1374 0, /* tp_getattro */
1375 0, /* tp_setattro */
1376 0, /* tp_as_buffer */
1377 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_GC
, /* tp_flags */
1379 (traverseproc
)dict_traverse
, /* tp_traverse */
1380 (inquiry
)dict_tp_clear
, /* tp_clear */
1381 0, /* tp_richcompare */
1384 /* For backward compatibility with old dictionary interface */
1387 PyDict_GetItemString(PyObject
*v
, char *key
)
1390 kv
= PyString_FromString(key
);
1393 rv
= PyDict_GetItem(v
, kv
);
1399 PyDict_SetItemString(PyObject
*v
, char *key
, PyObject
*item
)
1403 kv
= PyString_FromString(key
);
1406 PyString_InternInPlace(&kv
); /* XXX Should we really? */
1407 err
= PyDict_SetItem(v
, kv
, item
);
1413 PyDict_DelItemString(PyObject
*v
, char *key
)
1417 kv
= PyString_FromString(key
);
1420 err
= PyDict_DelItem(v
, kv
);