1 /***********************************************************
2 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
7 Permission to use, copy, modify, and distribute this software and its
8 documentation for any purpose and without fee is hereby granted,
9 provided that the above copyright notice appear in all copies and that
10 both that copyright notice and this permission notice appear in
11 supporting documentation, and that the names of Stichting Mathematisch
12 Centrum or CWI or Corporation for National Research Initiatives or
13 CNRI not be used in advertising or publicity pertaining to
14 distribution of the software without specific, written prior
17 While CWI is the initial source for this software, a modified version
18 is made available by the Corporation for National Research Initiatives
19 (CNRI) at the Internet address ftp://ftp.python.org.
21 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28 PERFORMANCE OF THIS SOFTWARE.
30 ******************************************************************/
32 /* Dictionary object implementation using a hash table */
38 * MINSIZE is the minimum size of a dictionary.
44 Table of irreducible polynomials to efficiently cycle through
45 GF(2^n)-{0}, 2<=n<=30.
47 static long polys
[] = {
80 /* Object used as dummy key to fill deleted entries */
81 static PyObject
*dummy
; /* Initialized by first call to newdictobject() */
84 Invariant for entries: when in use, me_value is not NULL and me_key is
85 not NULL and not dummy; when not in use, me_value is NULL and me_key
86 is either NULL or dummy. A dummy key value cannot be replaced by
87 NULL, since otherwise other keys may be lost.
93 #ifdef USE_CACHE_ALIGNED
99 To ensure the lookup algorithm terminates, the table size must be a
100 prime number and there must be at least one NULL key in the table.
101 The value ma_fill is the number of non-NULL keys; ma_used is the number
102 of non-NULL, non-dummy keys.
103 To avoid slowing down lookups on a near-full table, we resize the table
104 when it is more than half filled.
118 register dictobject
*mp
;
119 if (dummy
== NULL
) { /* Auto-initialize dummy */
120 dummy
= PyString_FromString("<dummy key>");
124 mp
= PyObject_NEW(dictobject
, &PyDict_Type
);
132 return (PyObject
*)mp
;
136 The basic lookup function used by all operations.
137 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
138 Open addressing is preferred over chaining since the link overhead for
139 chaining would be substantial (100% with typical malloc overhead).
140 However, instead of going through the table at constant steps, we cycle
141 through the values of GF(2^n)-{0}. This avoids modulo computations, being
142 much cheaper on RISC machines, without leading to clustering.
144 The initial probe index is computed as hash mod the table size.
145 Subsequent probe indices use the values of x^i in GF(2^n) as an offset,
146 where x is a root. The initial value is derived from hash, too.
148 All arithmetic on hash should ignore overflow.
150 (This version is due to Reimer Behrends, some ideas are also due to
151 Jyrki Alakuijala and Vladimir Marangozov.)
153 static dictentry
*lookdict
Py_PROTO((dictobject
*, PyObject
*, long));
155 lookdict(mp
, key
, hash
)
161 register unsigned incr
;
162 register dictentry
*freeslot
;
163 register unsigned int mask
= mp
->ma_size
-1;
164 dictentry
*ep0
= mp
->ma_table
;
165 register dictentry
*ep
;
166 /* We must come up with (i, incr) such that 0 <= i < ma_size
167 and 0 < incr < ma_size and both are a function of hash */
169 /* We use ~hash instead of hash, as degenerate hash functions, such
170 as for ints <sigh>, can have lots of leading zeros. It's not
171 really a performance risk, but better safe than sorry. */
173 if (ep
->me_key
== NULL
|| ep
->me_key
== key
)
175 if (ep
->me_key
== dummy
)
178 if (ep
->me_hash
== hash
&&
179 PyObject_Compare(ep
->me_key
, key
) == 0)
185 /* XXX What if PyObject_Compare returned an exception? */
186 /* Derive incr from hash, just to make it more arbitrary. Note that
187 incr must not be 0, or we will get into an infinite loop.*/
188 incr
= (hash
^ ((unsigned long)hash
>> 3)) & mask
;
192 ep
= &ep0
[(i
+incr
)&mask
];
193 if (ep
->me_key
== NULL
) {
194 if (freeslot
!= NULL
)
199 if (ep
->me_key
== dummy
) {
200 if (freeslot
== NULL
)
203 else if (ep
->me_key
== key
||
204 (ep
->me_hash
== hash
&&
205 PyObject_Compare(ep
->me_key
, key
) == 0)) {
208 /* XXX What if PyObject_Compare returned an exception? */
209 /* Cycle through GF(2^n)-{0} */
212 incr
^= mp
->ma_poly
; /* This will implicitely clear
218 Internal routine to insert a new item into the table.
219 Used both by the internal resize routine and by the public insert routine.
220 Eats a reference to key and one to value.
222 static void insertdict
223 Py_PROTO((dictobject
*, PyObject
*, long, PyObject
*));
225 insertdict(mp
, key
, hash
, value
)
226 register dictobject
*mp
;
232 register dictentry
*ep
;
233 ep
= lookdict(mp
, key
, hash
);
234 if (ep
->me_value
!= NULL
) {
235 old_value
= ep
->me_value
;
236 ep
->me_value
= value
;
237 Py_DECREF(old_value
); /* which **CAN** re-enter */
241 if (ep
->me_key
== NULL
)
244 Py_DECREF(ep
->me_key
);
247 ep
->me_value
= value
;
253 Restructure the table by allocating a new table and reinserting all
254 items again. When entries have been deleted, the new table may
255 actually be smaller than the old one.
257 static int dictresize
Py_PROTO((dictobject
*, int));
259 dictresize(mp
, minused
)
263 register int oldsize
= mp
->ma_size
;
264 register int newsize
, newpoly
;
265 register dictentry
*oldtable
= mp
->ma_table
;
266 register dictentry
*newtable
;
267 register dictentry
*ep
;
269 for (i
= 0, newsize
= MINSIZE
; ; i
++, newsize
<<= 1) {
270 if (i
> sizeof(polys
)/sizeof(polys
[0])) {
271 /* Ran out of polynomials */
275 if (newsize
> minused
) {
280 newtable
= (dictentry
*) malloc(sizeof(dictentry
) * newsize
);
281 if (newtable
== NULL
) {
285 memset(newtable
, '\0', sizeof(dictentry
) * newsize
);
286 mp
->ma_size
= newsize
;
287 mp
->ma_poly
= newpoly
;
288 mp
->ma_table
= newtable
;
292 /* Make two passes, so we can avoid decrefs
293 (and possible side effects) till the table is copied */
294 for (i
= 0, ep
= oldtable
; i
< oldsize
; i
++, ep
++) {
295 if (ep
->me_value
!= NULL
)
296 insertdict(mp
,ep
->me_key
,ep
->me_hash
,ep
->me_value
);
298 for (i
= 0, ep
= oldtable
; i
< oldsize
; i
++, ep
++) {
299 if (ep
->me_value
== NULL
) {
300 Py_XDECREF(ep
->me_key
);
304 PyMem_XDEL(oldtable
);
309 PyDict_GetItem(op
, key
)
314 if (!PyDict_Check(op
)) {
317 if (((dictobject
*)op
)->ma_table
== NULL
)
320 if (!PyString_Check(key
) ||
321 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
324 hash
= PyObject_Hash(key
);
330 return lookdict((dictobject
*)op
, key
, hash
) -> me_value
;
334 PyDict_SetItem(op
, key
, value
)
335 register PyObject
*op
;
339 register dictobject
*mp
;
341 if (!PyDict_Check(op
)) {
342 PyErr_BadInternalCall();
345 mp
= (dictobject
*)op
;
347 if (PyString_Check(key
)) {
348 #ifdef INTERN_STRINGS
349 if (((PyStringObject
*)key
)->ob_sinterned
!= NULL
) {
350 key
= ((PyStringObject
*)key
)->ob_sinterned
;
351 hash
= ((PyStringObject
*)key
)->ob_shash
;
356 hash
= ((PyStringObject
*)key
)->ob_shash
;
358 hash
= PyObject_Hash(key
);
364 hash
= PyObject_Hash(key
);
368 /* if fill >= 2/3 size, double in size */
369 if (mp
->ma_fill
*3 >= mp
->ma_size
*2) {
370 if (dictresize(mp
, mp
->ma_used
*2) != 0) {
371 if (mp
->ma_fill
+1 > mp
->ma_size
)
377 insertdict(mp
, key
, hash
, value
);
382 PyDict_DelItem(op
, key
)
386 register dictobject
*mp
;
388 register dictentry
*ep
;
389 PyObject
*old_value
, *old_key
;
391 if (!PyDict_Check(op
)) {
392 PyErr_BadInternalCall();
396 if (!PyString_Check(key
) ||
397 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
400 hash
= PyObject_Hash(key
);
404 mp
= (dictobject
*)op
;
405 if (((dictobject
*)op
)->ma_table
== NULL
)
407 ep
= lookdict(mp
, key
, hash
);
408 if (ep
->me_value
== NULL
) {
410 PyErr_SetObject(PyExc_KeyError
, key
);
413 old_key
= ep
->me_key
;
416 old_value
= ep
->me_value
;
419 Py_DECREF(old_value
);
429 register dictentry
*table
;
431 if (!PyDict_Check(op
))
433 mp
= (dictobject
*)op
;
434 table
= mp
->ma_table
;
438 mp
->ma_size
= mp
->ma_used
= mp
->ma_fill
= 0;
440 for (i
= 0; i
< n
; i
++) {
441 Py_XDECREF(table
[i
].me_key
);
442 Py_XDECREF(table
[i
].me_value
);
448 PyDict_Next(op
, ppos
, pkey
, pvalue
)
455 register dictobject
*mp
;
456 if (!PyDict_Check(op
))
458 mp
= (dictobject
*)op
;
462 while (i
< mp
->ma_size
&& mp
->ma_table
[i
].me_value
== NULL
)
465 if (i
>= mp
->ma_size
)
468 *pkey
= mp
->ma_table
[i
].me_key
;
470 *pvalue
= mp
->ma_table
[i
].me_value
;
478 register dictobject
*mp
;
481 register dictentry
*ep
;
482 for (i
= 0, ep
= mp
->ma_table
; i
< mp
->ma_size
; i
++, ep
++) {
483 if (ep
->me_key
!= NULL
) {
484 Py_DECREF(ep
->me_key
);
486 if (ep
->me_value
!= NULL
) {
487 Py_DECREF(ep
->me_value
);
490 PyMem_XDEL(mp
->ma_table
);
495 dict_print(mp
, fp
, flags
)
496 register dictobject
*mp
;
502 register dictentry
*ep
;
504 i
= Py_ReprEnter((PyObject
*)mp
);
508 fprintf(fp
, "{...}");
514 for (i
= 0, ep
= mp
->ma_table
; i
< mp
->ma_size
; i
++, ep
++) {
515 if (ep
->me_value
!= NULL
) {
518 if (PyObject_Print((PyObject
*)ep
->me_key
, fp
, 0)!=0) {
519 Py_ReprLeave((PyObject
*)mp
);
523 if (PyObject_Print(ep
->me_value
, fp
, 0) != 0) {
524 Py_ReprLeave((PyObject
*)mp
);
530 Py_ReprLeave((PyObject
*)mp
);
539 PyObject
*sepa
, *colon
;
542 register dictentry
*ep
;
544 i
= Py_ReprEnter((PyObject
*)mp
);
547 return PyString_FromString("{...}");
551 v
= PyString_FromString("{");
552 sepa
= PyString_FromString(", ");
553 colon
= PyString_FromString(": ");
555 for (i
= 0, ep
= mp
->ma_table
; i
< mp
->ma_size
&& v
; i
++, ep
++) {
556 if (ep
->me_value
!= NULL
) {
558 PyString_Concat(&v
, sepa
);
559 PyString_ConcatAndDel(&v
, PyObject_Repr(ep
->me_key
));
560 PyString_Concat(&v
, colon
);
561 PyString_ConcatAndDel(&v
, PyObject_Repr(ep
->me_value
));
564 PyString_ConcatAndDel(&v
, PyString_FromString("}"));
565 Py_ReprLeave((PyObject
*)mp
);
579 dict_subscript(mp
, key
)
581 register PyObject
*key
;
585 if (mp
->ma_table
== NULL
) {
586 PyErr_SetObject(PyExc_KeyError
, key
);
590 if (!PyString_Check(key
) ||
591 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
594 hash
= PyObject_Hash(key
);
598 v
= lookdict(mp
, key
, hash
) -> me_value
;
600 PyErr_SetObject(PyExc_KeyError
, key
);
607 dict_ass_sub(mp
, v
, w
)
612 return PyDict_DelItem((PyObject
*)mp
, v
);
614 return PyDict_SetItem((PyObject
*)mp
, v
, w
);
617 static PyMappingMethods dict_as_mapping
= {
618 (inquiry
)dict_length
, /*mp_length*/
619 (binaryfunc
)dict_subscript
, /*mp_subscript*/
620 (objobjargproc
)dict_ass_sub
, /*mp_ass_subscript*/
625 register dictobject
*mp
;
628 register PyObject
*v
;
630 if (!PyArg_NoArgs(args
))
632 v
= PyList_New(mp
->ma_used
);
635 for (i
= 0, j
= 0; i
< mp
->ma_size
; i
++) {
636 if (mp
->ma_table
[i
].me_value
!= NULL
) {
637 PyObject
*key
= mp
->ma_table
[i
].me_key
;
639 PyList_SetItem(v
, j
, key
);
647 dict_values(mp
, args
)
648 register dictobject
*mp
;
651 register PyObject
*v
;
653 if (!PyArg_NoArgs(args
))
655 v
= PyList_New(mp
->ma_used
);
658 for (i
= 0, j
= 0; i
< mp
->ma_size
; i
++) {
659 if (mp
->ma_table
[i
].me_value
!= NULL
) {
660 PyObject
*value
= mp
->ma_table
[i
].me_value
;
662 PyList_SetItem(v
, j
, value
);
671 register dictobject
*mp
;
674 register PyObject
*v
;
676 if (!PyArg_NoArgs(args
))
678 v
= PyList_New(mp
->ma_used
);
681 for (i
= 0, j
= 0; i
< mp
->ma_size
; i
++) {
682 if (mp
->ma_table
[i
].me_value
!= NULL
) {
683 PyObject
*key
= mp
->ma_table
[i
].me_key
;
684 PyObject
*value
= mp
->ma_table
[i
].me_value
;
685 PyObject
*item
= PyTuple_New(2);
691 PyTuple_SetItem(item
, 0, key
);
693 PyTuple_SetItem(item
, 1, value
);
694 PyList_SetItem(v
, j
, item
);
702 dict_update(mp
, args
)
703 register dictobject
*mp
;
709 if (!PyArg_Parse(args
, "O!", &PyDict_Type
, &other
))
712 goto done
; /* a.update(a); nothing to do */
713 /* Do one big resize at the start, rather than incrementally
714 resizing as we insert new items. Expect that there will be
715 no (or few) overlapping keys. */
716 if ((mp
->ma_fill
+ other
->ma_used
)*3 >= mp
->ma_size
*2) {
717 if (dictresize(mp
, (mp
->ma_used
+ other
->ma_used
)*3/2) != 0)
720 for (i
= 0; i
< other
->ma_size
; i
++) {
721 entry
= &other
->ma_table
[i
];
722 if (entry
->me_value
!= NULL
) {
723 Py_INCREF(entry
->me_key
);
724 Py_INCREF(entry
->me_value
);
725 insertdict(mp
, entry
->me_key
, entry
->me_hash
,
736 register dictobject
*mp
;
742 if (!PyArg_Parse(args
, ""))
744 copy
= (dictobject
*)PyDict_New();
747 if (mp
->ma_used
> 0) {
748 if (dictresize(copy
, mp
->ma_used
*3/2) != 0)
750 for (i
= 0; i
< mp
->ma_size
; i
++) {
751 entry
= &mp
->ma_table
[i
];
752 if (entry
->me_value
!= NULL
) {
753 Py_INCREF(entry
->me_key
);
754 Py_INCREF(entry
->me_value
);
755 insertdict(copy
, entry
->me_key
, entry
->me_hash
,
760 return (PyObject
*)copy
;
767 if (mp
== NULL
|| !PyDict_Check(mp
)) {
768 PyErr_BadInternalCall();
771 return ((dictobject
*)mp
)->ma_used
;
778 if (mp
== NULL
|| !PyDict_Check(mp
)) {
779 PyErr_BadInternalCall();
782 return dict_keys((dictobject
*)mp
, (PyObject
*)NULL
);
789 if (mp
== NULL
|| !PyDict_Check(mp
)) {
790 PyErr_BadInternalCall();
793 return dict_values((dictobject
*)mp
, (PyObject
*)NULL
);
800 if (mp
== NULL
|| !PyDict_Check(mp
)) {
801 PyErr_BadInternalCall();
804 return dict_items((dictobject
*)mp
, (PyObject
*)NULL
);
811 /* Subroutine which returns the smallest key in a for which b's value
812 is different or absent. The value is returned too, through the
813 pval argument. No reference counts are incremented. */
816 characterize(a
, b
, pval
)
821 PyObject
*diff
= NULL
;
825 for (i
= 0; i
< a
->ma_size
; i
++) {
826 if (a
->ma_table
[i
].me_value
!= NULL
) {
827 PyObject
*key
= a
->ma_table
[i
].me_key
;
828 PyObject
*aval
, *bval
;
829 /* XXX What if PyObject_Compare raises an exception? */
830 if (diff
!= NULL
&& PyObject_Compare(key
, diff
) > 0)
832 aval
= a
->ma_table
[i
].me_value
;
833 bval
= PyDict_GetItem((PyObject
*)b
, key
);
834 /* XXX What if PyObject_Compare raises an exception? */
835 if (bval
== NULL
|| PyObject_Compare(aval
, bval
) != 0)
849 PyObject
*adiff
, *bdiff
, *aval
, *bval
;
852 /* Compare lengths first */
853 if (a
->ma_used
< b
->ma_used
)
854 return -1; /* a is shorter */
855 else if (a
->ma_used
> b
->ma_used
)
856 return 1; /* b is shorter */
857 /* Same length -- check all keys */
858 adiff
= characterize(a
, b
, &aval
);
859 if (PyErr_Occurred())
862 return 0; /* a is a subset with the same length */
863 bdiff
= characterize(b
, a
, &bval
);
864 if (PyErr_Occurred())
866 /* bdiff == NULL would be impossible now */
867 res
= PyObject_Compare(adiff
, bdiff
);
869 res
= PyObject_Compare(aval
, bval
);
879 PyObject
*akeys
, *bkeys
;
883 if (a
->ma_used
== 0) {
893 akeys
= dict_keys(a
, (PyObject
*)NULL
);
894 bkeys
= dict_keys(b
, (PyObject
*)NULL
);
895 if (akeys
== NULL
|| bkeys
== NULL
) {
896 /* Oops, out of memory -- what to do? */
897 /* For now, sort on address! */
907 n
= a
->ma_used
< b
->ma_used
? a
->ma_used
: b
->ma_used
; /* smallest */
909 for (i
= 0; i
< n
; i
++) {
910 PyObject
*akey
, *bkey
, *aval
, *bval
;
912 akey
= PyList_GetItem(akeys
, i
);
913 bkey
= PyList_GetItem(bkeys
, i
);
914 res
= PyObject_Compare(akey
, bkey
);
918 if (!PyString_Check(akey
) ||
919 (ahash
= ((PyStringObject
*) akey
)->ob_shash
) == -1)
922 ahash
= PyObject_Hash(akey
);
924 PyErr_Clear(); /* Don't want errors here */
927 if (!PyString_Check(bkey
) ||
928 (bhash
= ((PyStringObject
*) bkey
)->ob_shash
) == -1)
931 bhash
= PyObject_Hash(bkey
);
933 PyErr_Clear(); /* Don't want errors here */
935 aval
= lookdict(a
, akey
, ahash
) -> me_value
;
936 bval
= lookdict(b
, bkey
, bhash
) -> me_value
;
937 res
= PyObject_Compare(aval
, bval
);
942 if (a
->ma_used
< b
->ma_used
)
944 else if (a
->ma_used
> b
->ma_used
)
955 dict_has_key(mp
, args
)
956 register dictobject
*mp
;
962 if (!PyArg_Parse(args
, "O", &key
))
965 if (!PyString_Check(key
) ||
966 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
969 hash
= PyObject_Hash(key
);
973 ok
= mp
->ma_size
!= 0 && lookdict(mp
, key
, hash
)->me_value
!= NULL
;
974 return PyInt_FromLong(ok
);
979 register dictobject
*mp
;
983 PyObject
*failobj
= Py_None
;
984 PyObject
*val
= NULL
;
987 if (!PyArg_ParseTuple(args
, "O|O", &key
, &failobj
))
989 if (mp
->ma_table
== NULL
)
993 if (!PyString_Check(key
) ||
994 (hash
= ((PyStringObject
*) key
)->ob_shash
) == -1)
997 hash
= PyObject_Hash(key
);
1001 val
= lookdict(mp
, key
, hash
)->me_value
;
1012 dict_clear(mp
, args
)
1013 register dictobject
*mp
;
1016 if (!PyArg_NoArgs(args
))
1018 PyDict_Clear((PyObject
*)mp
);
1023 static PyMethodDef mapp_methods
[] = {
1024 {"has_key", (PyCFunction
)dict_has_key
},
1025 {"keys", (PyCFunction
)dict_keys
},
1026 {"items", (PyCFunction
)dict_items
},
1027 {"values", (PyCFunction
)dict_values
},
1028 {"update", (PyCFunction
)dict_update
},
1029 {"clear", (PyCFunction
)dict_clear
},
1030 {"copy", (PyCFunction
)dict_copy
},
1031 {"get", (PyCFunction
)dict_get
, 1},
1032 {NULL
, NULL
} /* sentinel */
1036 dict_getattr(mp
, name
)
1040 return Py_FindMethod(mapp_methods
, (PyObject
*)mp
, name
);
1043 PyTypeObject PyDict_Type
= {
1044 PyObject_HEAD_INIT(&PyType_Type
)
1049 (destructor
)dict_dealloc
, /*tp_dealloc*/
1050 (printfunc
)dict_print
, /*tp_print*/
1051 (getattrfunc
)dict_getattr
, /*tp_getattr*/
1053 (cmpfunc
)dict_compare
, /*tp_compare*/
1054 (reprfunc
)dict_repr
, /*tp_repr*/
1056 0, /*tp_as_sequence*/
1057 &dict_as_mapping
, /*tp_as_mapping*/
1060 /* For backward compatibility with old dictionary interface */
1063 PyDict_GetItemString(v
, key
)
1068 kv
= PyString_FromString(key
);
1071 rv
= PyDict_GetItem(v
, kv
);
1077 PyDict_SetItemString(v
, key
, item
)
1084 kv
= PyString_FromString(key
);
1087 PyString_InternInPlace(&kv
); /* XXX Should we really? */
1088 err
= PyDict_SetItem(v
, kv
, item
);
1094 PyDict_DelItemString(v
, key
)
1100 kv
= PyString_FromString(key
);
1103 err
= PyDict_DelItem(v
, kv
);