3 Reference Cycle Garbage Collection
4 ==================================
6 Neil Schemenauer <nas@arctrix.com>
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
11 http://www.arctrix.com/nas/python/gc.html
12 http://www.python.org/pipermail/python-dev/2000-March/003869.html
13 http://www.python.org/pipermail/python-dev/2000-March/004010.html
14 http://www.python.org/pipermail/python-dev/2000-March/004022.html
16 For a highlevel view of the collection process, read the collect
26 /* magic gc_refs value */
29 /*** Global GC state ***/
31 /* linked lists of container objects */
32 static PyGC_Head generation0
= {&generation0
, &generation0
, 0};
33 static PyGC_Head generation1
= {&generation1
, &generation1
, 0};
34 static PyGC_Head generation2
= {&generation2
, &generation2
, 0};
35 static int generation
= 0; /* current generation being collected */
37 /* collection frequencies, XXX tune these */
38 static int enabled
= 1; /* automatic collection enabled? */
39 static int threshold0
= 700; /* net new containers before collection */
40 static int threshold1
= 10; /* generation0 collections before collecting 1 */
41 static int threshold2
= 10; /* generation1 collections before collecting 2 */
43 /* net new objects allocated since last collection */
46 /* set for debugging information */
47 #define DEBUG_STATS (1<<0) /* print collection statistics */
48 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
49 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
50 #define DEBUG_INSTANCES (1<<3) /* print instances */
51 #define DEBUG_OBJECTS (1<<4) /* print other objects */
52 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
53 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
54 DEBUG_UNCOLLECTABLE | \
60 /* list of uncollectable objects */
61 static PyObject
*garbage
;
63 /* Python string to use if unhandled exception occurs */
64 static PyObject
*gc_str
;
66 /*** list functions ***/
69 gc_list_init(PyGC_Head
*list
)
76 gc_list_append(PyGC_Head
*node
, PyGC_Head
*list
)
79 node
->gc_prev
= list
->gc_prev
;
80 node
->gc_prev
->gc_next
= node
;
85 gc_list_remove(PyGC_Head
*node
)
87 node
->gc_prev
->gc_next
= node
->gc_next
;
88 node
->gc_next
->gc_prev
= node
->gc_prev
;
96 gc_list_move(PyGC_Head
*from
, PyGC_Head
*to
)
98 if (from
->gc_next
== from
) {
103 to
->gc_next
= from
->gc_next
;
104 to
->gc_next
->gc_prev
= to
;
105 to
->gc_prev
= from
->gc_prev
;
106 to
->gc_prev
->gc_next
= to
;
111 /* append a list onto another list, from becomes an empty list */
113 gc_list_merge(PyGC_Head
*from
, PyGC_Head
*to
)
116 if (from
->gc_next
!= from
) {
118 tail
->gc_next
= from
->gc_next
;
119 tail
->gc_next
->gc_prev
= tail
;
120 to
->gc_prev
= from
->gc_prev
;
121 to
->gc_prev
->gc_next
= to
;
127 gc_list_size(PyGC_Head
*list
)
131 for (gc
= list
->gc_next
; gc
!= list
; gc
= gc
->gc_next
) {
137 /*** end of list stuff ***/
140 /* Set all gc_refs = ob_refcnt */
142 update_refs(PyGC_Head
*containers
)
144 PyGC_Head
*gc
= containers
->gc_next
;
145 for (; gc
!= containers
; gc
=gc
->gc_next
) {
146 gc
->gc_refs
= PyObject_FROM_GC(gc
)->ob_refcnt
;
151 visit_decref(PyObject
*op
, void *data
)
153 if (op
&& PyObject_IS_GC(op
)) {
154 PyObject_AS_GC(op
)->gc_refs
--;
159 /* Subtract internal references from gc_refs */
161 subtract_refs(PyGC_Head
*containers
)
163 traverseproc traverse
;
164 PyGC_Head
*gc
= containers
->gc_next
;
165 for (; gc
!= containers
; gc
=gc
->gc_next
) {
166 traverse
= PyObject_FROM_GC(gc
)->ob_type
->tp_traverse
;
167 (void) traverse(PyObject_FROM_GC(gc
),
168 (visitproc
)visit_decref
,
173 /* Append objects with gc_refs > 0 to roots list */
175 move_roots(PyGC_Head
*containers
, PyGC_Head
*roots
)
178 PyGC_Head
*gc
= containers
->gc_next
;
179 while (gc
!= containers
) {
181 if (gc
->gc_refs
> 0) {
183 gc_list_append(gc
, roots
);
184 gc
->gc_refs
= GC_MOVED
;
191 visit_reachable(PyObject
*op
, PyGC_Head
*roots
)
193 if (PyObject_IS_GC(op
)) {
194 PyGC_Head
*gc
= PyObject_AS_GC(op
);
195 if (gc
&& gc
->gc_refs
!= GC_MOVED
) {
197 gc_list_append(gc
, roots
);
198 gc
->gc_refs
= GC_MOVED
;
204 /* Move objects referenced from reachable to reachable set. */
206 move_root_reachable(PyGC_Head
*reachable
)
208 traverseproc traverse
;
209 PyGC_Head
*gc
= reachable
->gc_next
;
210 for (; gc
!= reachable
; gc
=gc
->gc_next
) {
211 /* careful, reachable list is growing here */
212 PyObject
*op
= PyObject_FROM_GC(gc
);
213 traverse
= op
->ob_type
->tp_traverse
;
215 (visitproc
)visit_reachable
,
220 /* move all objects with finalizers (instances with __del__) */
222 move_finalizers(PyGC_Head
*unreachable
, PyGC_Head
*finalizers
)
225 PyGC_Head
*gc
= unreachable
->gc_next
;
226 static PyObject
*delstr
= NULL
;
227 if (delstr
== NULL
) {
228 delstr
= PyString_InternFromString("__del__");
230 Py_FatalError("PyGC: can't initialize __del__ string");
232 for (; gc
!= unreachable
; gc
=next
) {
233 PyObject
*op
= PyObject_FROM_GC(gc
);
235 if (PyInstance_Check(op
) && PyObject_HasAttr(op
, delstr
)) {
237 gc_list_append(gc
, finalizers
);
243 /* called by tp_traverse */
245 visit_finalizer_reachable(PyObject
*op
, PyGC_Head
*finalizers
)
247 if (PyObject_IS_GC(op
)) {
248 PyGC_Head
*gc
= PyObject_AS_GC(op
);
249 if (gc
&& gc
->gc_refs
!= GC_MOVED
) {
251 gc_list_append(gc
, finalizers
);
252 gc
->gc_refs
= GC_MOVED
;
258 /* Move objects referenced from roots to roots */
260 move_finalizer_reachable(PyGC_Head
*finalizers
)
262 traverseproc traverse
;
263 PyGC_Head
*gc
= finalizers
->gc_next
;
264 for (; gc
!= finalizers
; gc
=gc
->gc_next
) {
265 /* careful, finalizers list is growing here */
266 traverse
= PyObject_FROM_GC(gc
)->ob_type
->tp_traverse
;
267 (void) traverse(PyObject_FROM_GC(gc
),
268 (visitproc
)visit_finalizer_reachable
,
274 debug_instance(char *msg
, PyInstanceObject
*inst
)
277 /* be careful not to create new dictionaries */
278 PyObject
*classname
= inst
->in_class
->cl_name
;
279 if (classname
!= NULL
&& PyString_Check(classname
))
280 cname
= PyString_AsString(classname
);
283 PySys_WriteStderr("gc: %.100s <%.100s instance at %p>\n",
288 debug_cycle(char *msg
, PyObject
*op
)
290 if ((debug
& DEBUG_INSTANCES
) && PyInstance_Check(op
)) {
291 debug_instance(msg
, (PyInstanceObject
*)op
);
293 else if (debug
& DEBUG_OBJECTS
) {
294 PySys_WriteStderr("gc: %.100s <%.100s %p>\n",
295 msg
, op
->ob_type
->tp_name
, op
);
299 /* Handle uncollectable garbage (cycles with finalizers). */
301 handle_finalizers(PyGC_Head
*finalizers
, PyGC_Head
*old
)
304 if (garbage
== NULL
) {
305 garbage
= PyList_New(0);
307 for (gc
= finalizers
->gc_next
; gc
!= finalizers
;
308 gc
= finalizers
->gc_next
) {
309 PyObject
*op
= PyObject_FROM_GC(gc
);
310 if ((debug
& DEBUG_SAVEALL
) || PyInstance_Check(op
)) {
311 /* If SAVEALL is not set then just append
312 * instances to the list of garbage. We assume
313 * that all objects in the finalizers list are
314 * reachable from instances. */
315 PyList_Append(garbage
, op
);
317 /* object is now reachable again */
319 gc_list_append(gc
, old
);
323 /* Break reference cycles by clearing the containers involved. This is
324 * tricky business as the lists can be changing and we don't know which
325 * objects may be freed. It is possible I screwed something up here. */
327 delete_garbage(PyGC_Head
*unreachable
, PyGC_Head
*old
)
331 while (unreachable
->gc_next
!= unreachable
) {
332 PyGC_Head
*gc
= unreachable
->gc_next
;
333 PyObject
*op
= PyObject_FROM_GC(gc
);
334 if (debug
& DEBUG_SAVEALL
) {
335 PyList_Append(garbage
, op
);
338 if ((clear
= op
->ob_type
->tp_clear
) != NULL
) {
340 clear((PyObject
*)op
);
344 if (unreachable
->gc_next
== gc
) {
345 /* object is still alive, move it, it may die later */
347 gc_list_append(gc
, old
);
352 /* This is the main function. Read this to understand how the
353 * collection process works. */
355 collect(PyGC_Head
*young
, PyGC_Head
*old
)
360 PyGC_Head unreachable
;
361 PyGC_Head finalizers
;
364 if (debug
& DEBUG_STATS
) {
366 "gc: collecting generation %d...\n"
367 "gc: objects in each generation: %ld %ld %ld\n",
369 gc_list_size(&generation0
),
370 gc_list_size(&generation1
),
371 gc_list_size(&generation2
));
374 /* Using ob_refcnt and gc_refs, calculate which objects in the
375 * container set are reachable from outside the set (ie. have a
376 * refcount greater than 0 when all the references within the
377 * set are taken into account */
379 subtract_refs(young
);
381 /* Move everything reachable from outside the set into the
382 * reachable set (ie. gc_refs > 0). Next, move everything
383 * reachable from objects in the reachable set. */
384 gc_list_init(&reachable
);
385 move_roots(young
, &reachable
);
386 move_root_reachable(&reachable
);
388 /* move unreachable objects to a temporary list, new objects can be
389 * allocated after this point */
390 gc_list_init(&unreachable
);
391 gc_list_move(young
, &unreachable
);
393 /* move reachable objects to next generation */
394 gc_list_merge(&reachable
, old
);
396 /* Move objects reachable from finalizers, we can't safely delete
397 * them. Python programmers should take care not to create such
398 * things. For Python finalizers means instance objects with
399 * __del__ methods. */
400 gc_list_init(&finalizers
);
401 move_finalizers(&unreachable
, &finalizers
);
402 move_finalizer_reachable(&finalizers
);
404 /* Collect statistics on collectable objects found and print
405 * debugging information. */
406 for (gc
= unreachable
.gc_next
; gc
!= &unreachable
;
409 if (debug
& DEBUG_COLLECTABLE
) {
410 debug_cycle("collectable", PyObject_FROM_GC(gc
));
413 /* call tp_clear on objects in the collectable set. This will cause
414 * the reference cycles to be broken. It may also cause some objects in
415 * finalizers to be freed */
416 delete_garbage(&unreachable
, old
);
418 /* Collect statistics on uncollectable objects found and print
419 * debugging information. */
420 for (gc
= finalizers
.gc_next
; gc
!= &finalizers
;
423 if (debug
& DEBUG_UNCOLLECTABLE
) {
424 debug_cycle("uncollectable", PyObject_FROM_GC(gc
));
427 if (debug
& DEBUG_STATS
) {
428 if (m
== 0 && n
== 0) {
429 PySys_WriteStderr("gc: done.\n");
433 "gc: done, %ld unreachable, %ld uncollectable.\n",
438 /* Append instances in the uncollectable set to a Python
439 * reachable list of garbage. The programmer has to deal with
440 * this if they insist on creating this type of structure. */
441 handle_finalizers(&finalizers
, old
);
443 if (PyErr_Occurred()) {
444 if (gc_str
== NULL
) {
445 gc_str
= PyString_FromString("garbage collection");
447 PyErr_WriteUnraisable(gc_str
);
448 Py_FatalError("unexpected exception during garbage collection");
455 collect_generations(void)
457 static long collections0
= 0;
458 static long collections1
= 0;
462 if (collections1
> threshold2
) {
464 gc_list_merge(&generation0
, &generation2
);
465 gc_list_merge(&generation1
, &generation2
);
466 if (generation2
.gc_next
!= &generation2
) {
467 n
= collect(&generation2
, &generation2
);
471 else if (collections0
> threshold1
) {
474 gc_list_merge(&generation0
, &generation1
);
475 if (generation1
.gc_next
!= &generation1
) {
476 n
= collect(&generation1
, &generation2
);
483 if (generation0
.gc_next
!= &generation0
) {
484 n
= collect(&generation0
, &generation1
);
491 _PyGC_Insert(PyObject
*op
)
493 /* collection lock since collecting may cause allocations */
494 static int collecting
= 0;
497 if (!PyObject_IS_GC(op
)) {
501 if (allocated
> threshold0
&&
507 collect_generations();
511 gc_list_append(PyObject_AS_GC(op
), &generation0
);
515 _PyGC_Remove(PyObject
*op
)
517 PyGC_Head
*g
= PyObject_AS_GC(op
);
519 if (!PyObject_IS_GC(op
)) {
529 static char gc_enable__doc__
[] =
532 "Enable automatic garbage collection.\n"
536 gc_enable(PyObject
*self
, PyObject
*args
)
539 if (!PyArg_ParseTuple(args
, ":enable")) /* check no args */
548 static char gc_disable__doc__
[] =
549 "disable() -> None\n"
551 "Disable automatic garbage collection.\n"
555 gc_disable(PyObject
*self
, PyObject
*args
)
558 if (!PyArg_ParseTuple(args
, ":disable")) /* check no args */
567 static char gc_isenabled__doc__
[] =
568 "isenabled() -> status\n"
570 "Returns true if automatic garbage collection is enabled.\n"
574 gc_isenabled(PyObject
*self
, PyObject
*args
)
577 if (!PyArg_ParseTuple(args
, ":isenabled")) /* check no args */
580 return Py_BuildValue("i", enabled
);
583 static char gc_collect__doc__
[] =
586 "Run a full collection. The number of unreachable objects is returned.\n"
590 gc_collect(PyObject
*self
, PyObject
*args
)
594 if (!PyArg_ParseTuple(args
, ":collect")) /* check no args */
598 gc_list_merge(&generation0
, &generation2
);
599 gc_list_merge(&generation1
, &generation2
);
600 n
= collect(&generation2
, &generation2
);
602 return Py_BuildValue("l", n
);
605 static char gc_set_debug__doc__
[] =
606 "set_debug(flags) -> None\n"
608 "Set the garbage collection debugging flags. Debugging information is\n"
609 "written to sys.stderr.\n"
611 "flags is an integer and can have the following bits turned on:\n"
613 " DEBUG_STATS - Print statistics during collection.\n"
614 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
615 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
616 " DEBUG_INSTANCES - Print instance objects.\n"
617 " DEBUG_OBJECTS - Print objects other than instances.\n"
618 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
619 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n"
623 gc_set_debug(PyObject
*self
, PyObject
*args
)
625 if (!PyArg_ParseTuple(args
, "i:set_debug", &debug
))
632 static char gc_get_debug__doc__
[] =
633 "get_debug() -> flags\n"
635 "Get the garbage collection debugging flags.\n"
639 gc_get_debug(PyObject
*self
, PyObject
*args
)
641 if (!PyArg_ParseTuple(args
, ":get_debug")) /* no args */
644 return Py_BuildValue("i", debug
);
647 static char gc_set_thresh__doc__
[] =
648 "set_threshold(threshold0, [threhold1, threshold2]) -> None\n"
650 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
655 gc_set_thresh(PyObject
*self
, PyObject
*args
)
657 if (!PyArg_ParseTuple(args
, "i|ii:set_threshold", &threshold0
,
658 &threshold1
, &threshold2
))
665 static char gc_get_thresh__doc__
[] =
666 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
668 "Return the current collection thresholds\n"
672 gc_get_thresh(PyObject
*self
, PyObject
*args
)
674 if (!PyArg_ParseTuple(args
, ":get_threshold")) /* no args */
677 return Py_BuildValue("(iii)", threshold0
, threshold1
, threshold2
);
681 static char gc__doc__
[] =
682 "This module provides access to the garbage collector for reference cycles.\n"
684 "enable() -- Enable automatic garbage collection.\n"
685 "disable() -- Disable automatic garbage collection.\n"
686 "isenabled() -- Returns true if automatic collection is enabled.\n"
687 "collect() -- Do a full collection right now.\n"
688 "set_debug() -- Set debugging flags.\n"
689 "get_debug() -- Get debugging flags.\n"
690 "set_threshold() -- Set the collection thresholds.\n"
691 "get_threshold() -- Return the current the collection thresholds.\n"
694 static PyMethodDef GcMethods
[] = {
695 {"enable", gc_enable
, METH_VARARGS
, gc_enable__doc__
},
696 {"disable", gc_disable
, METH_VARARGS
, gc_disable__doc__
},
697 {"isenabled", gc_isenabled
, METH_VARARGS
, gc_isenabled__doc__
},
698 {"set_debug", gc_set_debug
, METH_VARARGS
, gc_set_debug__doc__
},
699 {"get_debug", gc_get_debug
, METH_VARARGS
, gc_get_debug__doc__
},
700 {"set_threshold", gc_set_thresh
, METH_VARARGS
, gc_set_thresh__doc__
},
701 {"get_threshold", gc_get_thresh
, METH_VARARGS
, gc_get_thresh__doc__
},
702 {"collect", gc_collect
, METH_VARARGS
, gc_collect__doc__
},
703 {NULL
, NULL
} /* Sentinel */
712 m
= Py_InitModule4("gc",
717 d
= PyModule_GetDict(m
);
718 if (garbage
== NULL
) {
719 garbage
= PyList_New(0);
721 PyDict_SetItemString(d
, "garbage", garbage
);
722 PyDict_SetItemString(d
, "DEBUG_STATS",
723 PyInt_FromLong(DEBUG_STATS
));
724 PyDict_SetItemString(d
, "DEBUG_COLLECTABLE",
725 PyInt_FromLong(DEBUG_COLLECTABLE
));
726 PyDict_SetItemString(d
, "DEBUG_UNCOLLECTABLE",
727 PyInt_FromLong(DEBUG_UNCOLLECTABLE
));
728 PyDict_SetItemString(d
, "DEBUG_INSTANCES",
729 PyInt_FromLong(DEBUG_INSTANCES
));
730 PyDict_SetItemString(d
, "DEBUG_OBJECTS",
731 PyInt_FromLong(DEBUG_OBJECTS
));
732 PyDict_SetItemString(d
, "DEBUG_SAVEALL",
733 PyInt_FromLong(DEBUG_SAVEALL
));
734 PyDict_SetItemString(d
, "DEBUG_LEAK",
735 PyInt_FromLong(DEBUG_LEAK
));
738 #endif /* WITH_CYCLE_GC */