1 /* An object allocator for Python.
3 Here is an introduction to the layers of the Python memory architecture,
4 showing where the object allocator is actually used (layer +2), It is
5 called for every object allocation and deallocation (PyObject_New/Del),
6 unless the object-specific allocators implement a proprietary allocation
7 scheme (ex.: ints use a simple free list). This is also the place where
8 the cyclic garbage collector operates selectively on container objects.
11 Object-specific allocators
12 _____ ______ ______ ________
13 [ int ] [ dict ] [ list ] ... [ string ] Python core |
14 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
15 _______________________________ | |
16 [ Python's object allocator ] | |
17 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
18 ______________________________________________________________ |
19 [ Python's raw memory allocator (PyMem_ API) ] |
20 +1 | <----- Python memory (under PyMem manager's control) ------> | |
21 __________________________________________________________________
22 [ Underlying general-purpose allocator (ex: C library malloc) ]
23 0 | <------ Virtual memory allocated for the python process -------> |
25 =========================================================================
26 _______________________________________________________________________
27 [ OS-specific Virtual Memory Manager (VMM) ]
28 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
29 __________________________________ __________________________________
31 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
34 /*==========================================================================*/
36 /* A fast, special-purpose memory allocator for small blocks, to be used
37 on top of a general-purpose malloc -- heavily based on previous art. */
39 /* Vladimir Marangozov -- August 2000 */
42 * "Memory management is where the rubber meets the road -- if we do the wrong
43 * thing at any level, the results will not be good. And if we don't make the
44 * levels work well together, we are in serious trouble." (1)
46 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
47 * "Dynamic Storage Allocation: A Survey and Critical Review",
48 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
51 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
52 #define WITH_MALLOC_HOOKS /* for profiling & debugging */
54 /*==========================================================================*/
57 * Public functions exported by this allocator.
59 * -- Define and use these names in your code to obtain or release memory --
61 #define _THIS_MALLOC PyCore_OBJECT_MALLOC_FUNC
62 #define _THIS_CALLOC /* unused */
63 #define _THIS_REALLOC PyCore_OBJECT_REALLOC_FUNC
64 #define _THIS_FREE PyCore_OBJECT_FREE_FUNC
67 * Underlying allocator's functions called by this allocator.
68 * The underlying allocator is usually the one which comes with libc.
70 * -- Don't use these functions in your code (to avoid mixing allocators) --
72 * Redefine these __only__ if you are using a 3rd party general purpose
73 * allocator which exports functions with names _other_ than the standard
74 * malloc, calloc, realloc, free.
76 #define _SYSTEM_MALLOC PyCore_MALLOC_FUNC
77 #define _SYSTEM_CALLOC /* unused */
78 #define _SYSTEM_REALLOC PyCore_REALLOC_FUNC
79 #define _SYSTEM_FREE PyCore_FREE_FUNC
82 * If malloc hooks are needed, names of the hooks' set & fetch
83 * functions exported by this allocator.
85 #ifdef WITH_MALLOC_HOOKS
86 #define _SET_HOOKS _PyCore_ObjectMalloc_SetHooks
87 #define _FETCH_HOOKS _PyCore_ObjectMalloc_FetchHooks
90 /*==========================================================================*/
93 * Allocation strategy abstract:
95 * For small requests, the allocator sub-allocates <Big> blocks of memory.
96 * Requests greater than 256 bytes are routed to the system's allocator.
98 * Small requests are grouped in size classes spaced 8 bytes apart, due
99 * to the required valid alignment of the returned address. Requests of
100 * a particular size are serviced from memory pools of 4K (one VMM page).
101 * Pools are fragmented on demand and contain free lists of blocks of one
102 * particular size class. In other words, there is a fixed-size allocator
103 * for each size class. Free pools are shared by the different allocators
104 * thus minimizing the space reserved for a particular size class.
106 * This allocation strategy is a variant of what is known as "simple
107 * segregated storage based on array of free lists". The main drawback of
108 * simple segregated storage is that we might end up with lot of reserved
109 * memory for the different free lists, which degenerate in time. To avoid
110 * this, we partition each free list in pools and we share dynamically the
111 * reserved space between all free lists. This technique is quite efficient
112 * for memory intensive programs which allocate mainly small-sized blocks.
114 * For small requests we have the following table:
116 * Request in bytes Size of allocated block Size class idx
117 * ----------------------------------------------------------------
131 * 0, 257 and up: routed to the underlying allocator.
134 /*==========================================================================*/
137 * -- Main tunable settings section --
141 * Alignment of addresses returned to the user. 8-bytes alignment works
142 * on most current architectures (with 32-bit or 64-bit address busses).
143 * The alignment value is also used for grouping small requests in size
144 * classes spaced ALIGNMENT bytes apart.
146 * You shouldn't change this unless you know what you are doing.
149 #define ALIGNMENT 8 /* must be 2^N */
150 #define ALIGNMENT_SHIFT 3
151 #define ALIGNMENT_MASK (ALIGNMENT - 1)
154 * Max size threshold below which malloc requests are considered to be
155 * small enough in order to use preallocated memory pools. You can tune
156 * this value according to your application behaviour and memory needs.
158 * The following invariants must hold:
159 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 256
160 * 2) SMALL_REQUEST_THRESHOLD == N * ALIGNMENT
162 * Although not required, for better performance and space efficiency,
163 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
167 * For Python compiled on systems with 32 bit pointers and integers,
168 * a value of 64 (= 8 * 8) is a reasonable speed/space tradeoff for
169 * the object allocator. To adjust automatically this threshold for
170 * systems with 64 bit pointers, we make this setting depend on a
171 * Python-specific slot size unit = sizeof(long) + sizeof(void *),
172 * which is expected to be 8, 12 or 16 bytes.
175 #define _PYOBJECT_THRESHOLD ((SIZEOF_LONG + SIZEOF_VOID_P) * ALIGNMENT)
177 #define SMALL_REQUEST_THRESHOLD _PYOBJECT_THRESHOLD /* must be N * ALIGNMENT */
179 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
182 * The system's VMM page size can be obtained on most unices with a
183 * getpagesize() call or deduced from various header files. To make
184 * things simpler, we assume that it is 4K, which is OK for most systems.
185 * It is probably better if this is the native page size, but it doesn't
189 #define SYSTEM_PAGE_SIZE (4 * 1024)
190 #define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
193 * Maximum amount of memory managed by the allocator for small requests.
196 #ifdef WITH_MEMORY_LIMITS
197 #ifndef SMALL_MEMORY_LIMIT
198 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
203 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
204 * on a page boundary. This is a reserved virtual address space for the
205 * current process (obtained through a malloc call). In no way this means
206 * that the memory arenas will be used entirely. A malloc(<Big>) is usually
207 * an address range reservation for <Big> bytes, unless all pages within this
208 * space are referenced subsequently. So malloc'ing big blocks and not using
209 * them does not mean "wasting memory". It's an addressable range wastage...
211 * Therefore, allocating arenas with malloc is not optimal, because there is
212 * some address space wastage, but this is the most portable way to request
213 * memory from the system accross various platforms.
216 #define ARENA_SIZE (256 * 1024 - SYSTEM_PAGE_SIZE) /* 256k - 1p */
218 #ifdef WITH_MEMORY_LIMITS
219 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
223 * Size of the pools used for small blocks. Should be a power of 2,
224 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k, eventually 8k.
227 #define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
228 #define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
229 #define POOL_MAGIC 0x74D3A651 /* authentication id */
231 #define ARENA_NB_POOLS (ARENA_SIZE / POOL_SIZE)
232 #define ARENA_NB_PAGES (ARENA_SIZE / SYSTEM_PAGE_SIZE)
235 * -- End of tunable settings section --
238 /*==========================================================================*/
243 * To reduce lock contention, it would probably be better to refine the
244 * crude function locking with per size class locking. I'm not positive
245 * however, whether it's worth switching to such locking policy because
246 * of the performance penalty it might introduce.
248 * The following macros describe the simplest (should also be the fastest)
249 * lock object on a particular platform and the init/fini/lock/unlock
250 * operations on it. The locks defined here are not expected to be recursive
251 * because it is assumed that they will always be called in the order:
252 * INIT, [LOCK, UNLOCK]*, FINI.
256 * Python's threads are serialized, so object malloc locking is disabled.
258 #define SIMPLELOCK_DECL(lock) /* simple lock declaration */
259 #define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */
260 #define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */
261 #define SIMPLELOCK_LOCK(lock) /* acquire released lock */
262 #define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
266 * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
270 #define uchar unsigned char /* assuming == 8 bits */
273 #define ushort unsigned short /* assuming >= 16 bits */
276 #define uint unsigned int /* assuming >= 16 bits */
279 #define ulong unsigned long /* assuming >= 32 bits */
282 #define off_t uint /* 16 bits <= off_t <= 64 bits */
284 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
287 /* Pool for small blocks */
289 union { block
*_padding
;
290 uint count
; } ref
; /* number of allocated blocks */
291 block
*freeblock
; /* pool's free list head */
292 struct pool_header
*nextpool
; /* next pool of this size class */
293 struct pool_header
*prevpool
; /* previous pool "" */
294 struct pool_header
*pooladdr
; /* pool address (always aligned) */
295 uint magic
; /* pool magic number */
296 uint szidx
; /* block size class index */
297 uint capacity
; /* pool capacity in # of blocks */
300 typedef struct pool_header
*poolp
;
303 #define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK)
304 #define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header))
306 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
308 /*==========================================================================*/
313 SIMPLELOCK_DECL(_malloc_lock
);
314 #define LOCK() SIMPLELOCK_LOCK(_malloc_lock)
315 #define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock)
316 #define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock)
317 #define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock)
320 * Pool table -- doubly linked lists of partially used pools
322 #define PTA(x) ((poolp )((uchar *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
323 #define PT(x) PTA(x), PTA(x)
325 static poolp usedpools
[2 * ((NB_SMALL_SIZE_CLASSES
+ 7) / 8) * 8] = {
326 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
327 #if NB_SMALL_SIZE_CLASSES > 8
328 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
329 #if NB_SMALL_SIZE_CLASSES > 16
330 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
331 #if NB_SMALL_SIZE_CLASSES > 24
332 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
333 #if NB_SMALL_SIZE_CLASSES > 32
334 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
335 #if NB_SMALL_SIZE_CLASSES > 40
336 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
337 #if NB_SMALL_SIZE_CLASSES > 48
338 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
339 #if NB_SMALL_SIZE_CLASSES > 56
340 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
341 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
342 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
343 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
344 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
345 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
346 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
347 #endif /* NB_SMALL_SIZE_CLASSES > 8 */
351 * Free (cached) pools
353 static poolp freepools
= NULL
; /* free list for cached pools */
358 static uint arenacnt
= 0; /* number of allocated arenas */
359 static uint watermark
= ARENA_NB_POOLS
; /* number of pools allocated from
361 static block
*arenalist
= NULL
; /* list of allocated arenas */
362 static block
*arenabase
= NULL
; /* free space start address in
368 #ifdef WITH_MALLOC_HOOKS
369 static void *(*malloc_hook
)(size_t) = NULL
;
370 static void *(*calloc_hook
)(size_t, size_t) = NULL
;
371 static void *(*realloc_hook
)(void *, size_t) = NULL
;
372 static void (*free_hook
)(void *) = NULL
;
373 #endif /* !WITH_MALLOC_HOOKS */
375 /*==========================================================================*/
380 * The basic blocks are ordered by decreasing execution frequency,
381 * which minimizes the number of jumps in the most common cases,
382 * improves branching prediction and instruction scheduling (small
383 * block allocations typically result in a couple of instructions).
384 * Unless the optimizer reorders everything, being too smart...
388 _THIS_MALLOC(size_t nbytes
)
395 #ifdef WITH_MALLOC_HOOKS
396 if (malloc_hook
!= NULL
)
397 return (*malloc_hook
)(nbytes
);
401 * This implicitly redirects malloc(0)
403 if ((nbytes
- 1) < SMALL_REQUEST_THRESHOLD
) {
406 * Most frequent paths first
408 size
= (uint
)(nbytes
- 1) >> ALIGNMENT_SHIFT
;
409 pool
= usedpools
[size
+ size
];
410 if (pool
!= pool
->nextpool
) {
412 * There is a used pool for this size class.
413 * Pick up the head block of its free list.
416 bp
= pool
->freeblock
;
417 if ((pool
->freeblock
= *(block
**)bp
) != NULL
) {
422 * Reached the end of the free list, try to extend it
424 if (pool
->ref
.count
< pool
->capacity
) {
426 * There is room for another block
429 size
<<= ALIGNMENT_SHIFT
; /* block size */
430 pool
->freeblock
= (block
*)pool
+ \
432 pool
->ref
.count
* size
;
433 *(block
**)(pool
->freeblock
) = NULL
;
438 * Pool is full, unlink from used pools
440 next
= pool
->nextpool
;
441 pool
= pool
->prevpool
;
442 next
->prevpool
= pool
;
443 pool
->nextpool
= next
;
448 * Try to get a cached free pool
453 * Unlink from cached pools
455 freepools
= pool
->nextpool
;
458 * Frontlink to used pools
460 next
= usedpools
[size
+ size
]; /* == prev */
461 pool
->nextpool
= next
;
462 pool
->prevpool
= next
;
463 next
->nextpool
= pool
;
464 next
->prevpool
= pool
;
466 if (pool
->szidx
== size
) {
468 * Luckily, this pool last contained blocks
469 * of the same size class, so its header
470 * and free list are already initialized.
472 bp
= pool
->freeblock
;
473 pool
->freeblock
= *(block
**)bp
;
478 * Initialize the pool header and free list
479 * then return the first block.
483 size
<<= ALIGNMENT_SHIFT
; /* block size */
484 bp
= (block
*)pool
+ POOL_OVERHEAD
;
485 pool
->freeblock
= bp
+ size
;
486 *(block
**)(pool
->freeblock
) = NULL
;
487 pool
->capacity
= (POOL_SIZE
- POOL_OVERHEAD
) / size
;
494 if (watermark
< ARENA_NB_POOLS
) {
495 /* commit malloc(POOL_SIZE) from the current arena */
498 pool
= (poolp
)arenabase
;
499 arenabase
+= POOL_SIZE
;
500 pool
->pooladdr
= pool
;
501 pool
->magic
= (uint
)POOL_MAGIC
;
502 pool
->szidx
= DUMMY_SIZE_IDX
;
508 #ifdef WITH_MEMORY_LIMITS
509 if (!(arenacnt
< MAX_ARENAS
)) {
515 * With malloc, we can't avoid loosing one page address space
516 * per arena due to the required alignment on page boundaries.
518 bp
= (block
*)_SYSTEM_MALLOC(ARENA_SIZE
+ SYSTEM_PAGE_SIZE
);
524 * Keep a reference in the list of allocated arenas. We might
525 * want to release (some of) them in the future. The first
526 * word is never used, no matter whether the returned address
527 * is page-aligned or not, so we safely store a pointer in it.
529 *(block
**)bp
= arenalist
;
534 arenabase
= bp
+ (SYSTEM_PAGE_SIZE
-
535 ((off_t
)bp
& SYSTEM_PAGE_SIZE_MASK
));
539 /* The small block allocator ends here. */
544 * Redirect the original request to the underlying (libc) allocator.
545 * We jump here on bigger requests, on error in the code above (as a
546 * last chance to serve the request) or when the max memory limit
549 return (void *)_SYSTEM_MALLOC(nbytes
);
562 #ifdef WITH_MALLOC_HOOKS
563 if (free_hook
!= NULL
) {
569 if (p
== NULL
) /* free(NULL) has no effect */
572 offset
= (off_t
)p
& POOL_SIZE_MASK
;
573 pool
= (poolp
)((block
*)p
- offset
);
574 if (pool
->pooladdr
!= pool
|| pool
->magic
!= (uint
)POOL_MAGIC
) {
581 * At this point, the pool is not empty
583 if ((*(block
**)p
= pool
->freeblock
) == NULL
) {
587 pool
->freeblock
= (block
*)p
;
590 * Frontlink to used pools
591 * This mimics LRU pool usage for new allocations and
592 * targets optimal filling when several pools contain
593 * blocks of the same size class.
596 next
= usedpools
[size
+ size
];
597 prev
= next
->prevpool
;
598 pool
->nextpool
= next
;
599 pool
->prevpool
= prev
;
600 next
->prevpool
= pool
;
601 prev
->nextpool
= pool
;
608 pool
->freeblock
= (block
*)p
;
609 if (--pool
->ref
.count
!= 0) {
614 * Pool is now empty, unlink from used pools
616 next
= pool
->nextpool
;
617 prev
= pool
->prevpool
;
618 next
->prevpool
= prev
;
619 prev
->nextpool
= next
;
621 * Frontlink to free pools
622 * This ensures that previously freed pools will be allocated
623 * later (being not referenced, they are perhaps paged out).
625 pool
->nextpool
= freepools
;
634 _THIS_REALLOC(void *p
, size_t nbytes
)
640 #ifdef WITH_MALLOC_HOOKS
641 if (realloc_hook
!= NULL
)
642 return (*realloc_hook
)(p
, nbytes
);
646 return _THIS_MALLOC(nbytes
);
648 /* realloc(p, 0) on big blocks is redirected. */
649 pool
= (poolp
)((block
*)p
- ((off_t
)p
& POOL_SIZE_MASK
));
650 if (pool
->pooladdr
!= pool
|| pool
->magic
!= (uint
)POOL_MAGIC
) {
651 /* We haven't allocated this block */
652 if (!(nbytes
> SMALL_REQUEST_THRESHOLD
) && nbytes
) {
655 goto malloc_copy_free
;
657 bp
= (block
*)_SYSTEM_REALLOC(p
, nbytes
);
660 /* We're in charge of this block */
661 size
= (pool
->szidx
+ 1) << ALIGNMENT_SHIFT
; /* block size */
662 if (size
>= nbytes
) {
663 /* Don't bother if a smaller size was requested
664 except for realloc(p, 0) == free(p), ret NULL */
676 bp
= (block
*)_THIS_MALLOC(nbytes
);
690 _THIS_CALLOC(size_t nbel, size_t elsz)
695 #ifdef WITH_MALLOC_HOOKS
696 if (calloc_hook != NULL)
697 return (*calloc_hook)(nbel, elsz);
700 nbytes = nbel * elsz;
701 p = _THIS_MALLOC(nbytes);
703 memset(p, 0, nbytes);
708 /*==========================================================================*/
714 #ifdef WITH_MALLOC_HOOKS
717 _SET_HOOKS( void *(*malloc_func
)(size_t),
718 void *(*calloc_func
)(size_t, size_t),
719 void *(*realloc_func
)(void *, size_t),
720 void (*free_func
)(void *) )
723 malloc_hook
= malloc_func
;
724 calloc_hook
= calloc_func
;
725 realloc_hook
= realloc_func
;
726 free_hook
= free_func
;
731 _FETCH_HOOKS( void *(**malloc_funcp
)(size_t),
732 void *(**calloc_funcp
)(size_t, size_t),
733 void *(**realloc_funcp
)(void *, size_t),
734 void (**free_funcp
)(void *) )
737 *malloc_funcp
= malloc_hook
;
738 *calloc_funcp
= calloc_hook
;
739 *realloc_funcp
= realloc_hook
;
740 *free_funcp
= free_hook
;
743 #endif /* !WITH_MALLOC_HOOKS */