1 /******************************************************************************
4 * A fully recycling epoch-based garbage collector. Works by counting
5 * threads in and out of critical regions, to work out when
6 * garbage queues can be fully deleted.
8 * Copyright (c) 2001-2003, K A Fraser
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials provided
19 * with the distribution. Neither the name of the Keir Fraser
20 * nor the names of its contributors may be used to endorse or
21 * promote products derived from this software without specific
22 * prior written permission.
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include <afsconfig.h>
38 #include <afs/param.h>
46 #include "portable_defns.h"
49 #include <afs/afsutil.h>
51 /*#define MINIMAL_GC*/
52 /*#define YIELD_TO_HELP_PROGRESS*/
55 /* Recycled nodes are filled with this value if WEAK_MEM_ORDER. */
56 #define INVALID_BYTE 0
57 #define INITIALISE_NODES(_p,_c) memset((_p), INVALID_BYTE, (_c));
59 /* Number of unique block sizes we can deal with. Equivalently, the
60 * number of unique object caches which can be created. */
66 * The initial number of allocation chunks for each per-blocksize list.
67 * Popular allocation lists will steadily increase the allocation unit
68 * in line with demand.
70 #define ALLOC_CHUNKS_PER_LIST 10
73 * How many times should a thread call gc_enter(), seeing the same epoch
74 * each time, before it makes a reclaim attempt?
76 #define ENTRIES_PER_RECLAIM_ATTEMPT 100
79 * 0: current epoch -- threads are moving to this;
80 * -1: some threads may still throw garbage into this epoch;
81 * -2: no threads can see this epoch => we can zero garbage lists;
82 * -3: all threads see zeros in these garbage lists => move to alloc lists.
91 * A chunk amortises the cost of allocation from shared lists. It also
92 * helps when zeroing nodes, as it increases per-cacheline pointer density
93 * and means that node locations don't need to be brought into the cache
94 * (most architectures have a non-temporal store instruction).
96 #define BLKS_PER_CHUNK 100
97 typedef struct chunk_st chunk_t
;
100 chunk_t
*next
; /* chunk chaining */
101 unsigned int i
; /* the next entry in blk[] to use */
102 void *blk
[BLKS_PER_CHUNK
];
105 static struct gc_global_st
109 /* The current epoch. */
110 VOLATILE
unsigned int current
;
113 /* Exclusive access to gc_reclaim(). */
114 VOLATILE
unsigned int inreclaim
;
118 /* Allocator caches currently defined */
122 * RUN-TIME CONSTANTS (to first approximation)
125 /* Memory page size, in bytes. */
126 unsigned int page_size
;
128 /* Node sizes (run-time constants). */
130 int blk_sizes
[MAX_SIZES
];
132 /* tags (trace support) */
133 char *tags
[MAX_SIZES
];
135 /* Registered epoch hooks. */
137 hook_fn_t hook_fns
[MAX_HOOKS
];
141 * DATA WE MAY HIT HARD
144 /* Chain of free, empty chunks. */
145 chunk_t
* VOLATILE free_chunks
;
147 /* Main allocation lists. */
148 chunk_t
* VOLATILE alloc
[MAX_SIZES
];
149 VOLATILE
unsigned int alloc_size
[MAX_SIZES
];
151 VOLATILE
unsigned int total_size
;
152 VOLATILE
unsigned int allocations
;
157 /* Per-thread state. */
160 /* Epoch that this thread sees. */
163 /* Number of calls to gc_entry() since last gc_reclaim() attempt. */
164 unsigned int entries_since_reclaim
;
166 #ifdef YIELD_TO_HELP_PROGRESS
167 /* Number of calls to gc_reclaim() since we last yielded. */
168 unsigned int reclaim_attempts_since_yield
;
171 /* Used by gc_async_barrier(). */
173 int async_page_state
;
176 chunk_t
*garbage
[NR_EPOCHS
][MAX_SIZES
];
177 chunk_t
*garbage_tail
[NR_EPOCHS
][MAX_SIZES
];
178 chunk_t
*chunk_cache
;
180 /* Local allocation lists. */
181 chunk_t
*alloc
[MAX_SIZES
];
182 unsigned int alloc_chunks
[MAX_SIZES
];
184 /* Hook pointer lists. */
185 chunk_t
*hook
[NR_EPOCHS
][MAX_HOOKS
];
191 #define MEM_FAIL(_s) \
193 fprintf(stderr, "OUT OF MEMORY: %d bytes at line %d\n", (_s), __LINE__); \
198 /* Allocate more empty chunks from the heap. */
199 #define CHUNKS_PER_ALLOC 1000
200 static chunk_t
*alloc_more_chunks(void)
205 ViceLog(11, ("GC: alloc_more_chunks alloc %lu chunks\n",
208 h
= p
= ALIGNED_ALLOC(CHUNKS_PER_ALLOC
* sizeof(*h
));
209 if ( h
== NULL
) MEM_FAIL(CHUNKS_PER_ALLOC
* sizeof(*h
));
211 for ( i
= 1; i
< CHUNKS_PER_ALLOC
; i
++ )
223 /* Put a chain of chunks onto a list. */
224 static void add_chunks_to_list(chunk_t
*ch
, chunk_t
*head
)
226 chunk_t
*h_next
, *new_h_next
, *ch_next
;
228 new_h_next
= head
->next
;
229 do { ch
->next
= h_next
= new_h_next
; WMB_NEAR_CAS(); }
230 while ( (new_h_next
= CASPO(&head
->next
, h_next
, ch_next
)) != h_next
);
234 /* Allocate a chain of @n empty chunks. Pointers may be garbage. */
235 static chunk_t
*get_empty_chunks(int n
)
238 chunk_t
*new_rh
, *rh
, *rt
, *head
;
241 head
= gc_global
.free_chunks
;
246 WEAK_DEP_ORDER_RMB();
247 for ( i
= 0; i
< n
; i
++ )
249 if ( (rt
= rt
->next
) == head
)
251 /* Allocate some more chunks. */
252 add_chunks_to_list(alloc_more_chunks(), head
);
257 while ( (new_rh
= CASPO(&head
->next
, rh
, rt
->next
)) != rh
);
264 /* Get @n filled chunks, pointing at blocks of @sz bytes each. */
265 static chunk_t
*get_filled_chunks(int n
, int sz
)
272 ADD_TO(gc_global
.total_size
, n
* BLKS_PER_CHUNK
* sz
);
273 ADD_TO(gc_global
.allocations
, 1);
276 node
= ALIGNED_ALLOC(n
* BLKS_PER_CHUNK
* sz
);
277 if ( node
== NULL
) MEM_FAIL(n
* BLKS_PER_CHUNK
* sz
);
278 #ifdef WEAK_MEM_ORDER
279 INITIALISE_NODES(node
, n
* BLKS_PER_CHUNK
* sz
);
282 h
= p
= get_empty_chunks(n
);
284 p
->i
= BLKS_PER_CHUNK
;
285 for ( i
= 0; i
< BLKS_PER_CHUNK
; i
++ )
291 while ( (p
= p
->next
) != h
);
298 * gc_async_barrier: Cause an asynchronous barrier in all other threads. We do
299 * this by causing a TLB shootdown to be propagated to all other processors.
300 * Each time such an action is required, this function calls:
301 * mprotect(async_page, <page size>, <new flags>)
302 * Each thread's state contains a memory page dedicated for this purpose.
304 #ifdef WEAK_MEM_ORDER
305 static void gc_async_barrier(gc_t
*gc
)
307 mprotect(gc
->async_page
, gc_global
.page_size
,
308 gc
->async_page_state
? PROT_READ
: PROT_NONE
);
309 gc
->async_page_state
= !gc
->async_page_state
;
312 #define gc_async_barrier(_g) ((void)0)
316 /* Grab a level @i allocation chunk from main chain. */
317 static chunk_t
*get_alloc_chunk(gc_t
*gc
, int i
)
319 chunk_t
*alloc
, *p
, *new_p
, *nh
;
322 alloc
= gc_global
.alloc
[i
];
329 sz
= gc_global
.alloc_size
[i
];
330 nh
= get_filled_chunks(sz
, gc_global
.blk_sizes
[i
]);
331 ADD_TO(gc_global
.alloc_size
[i
], sz
>> 3);
332 gc_async_barrier(gc
);
333 add_chunks_to_list(nh
, alloc
);
336 WEAK_DEP_ORDER_RMB();
338 while ( (new_p
= CASPO(&alloc
->next
, p
, p
->next
)) != p
);
341 assert(p
->i
== BLKS_PER_CHUNK
);
348 * gc_reclaim: Scans the list of struct gc_perthread looking for the lowest
349 * maximum epoch number seen by a thread that's in the list code. If it's the
350 * current epoch, the "nearly-free" lists from the previous epoch are
351 * reclaimed, and the epoch is incremented.
353 static void gc_reclaim(void)
355 ptst_t
*ptst
, *first_ptst
, *our_ptst
= NULL
;
357 unsigned long curr_epoch
;
359 int two_ago
, three_ago
, i
, j
;
361 ViceLog(11, ("GC: gc_reclaim enter\n"));
363 /* Barrier to entering the reclaim critical section. */
364 if ( gc_global
.inreclaim
|| CASIO(&gc_global
.inreclaim
, 0, 1) ) return;
366 ViceLog(11, ("GC: gc_reclaim after inreclaim barrier\n"));
369 * Grab first ptst structure *before* barrier -- prevent bugs
370 * on weak-ordered architectures.
372 first_ptst
= ptst_first();
374 curr_epoch
= gc_global
.current
;
376 /* Have all threads seen the current epoch, or not in mutator code? */
377 for ( ptst
= first_ptst
; ptst
!= NULL
; ptst
= ptst_next(ptst
) )
379 if ( (ptst
->count
> 1) && (ptst
->gc
->epoch
!= curr_epoch
) ) goto out
;
383 ViceLog(11, ("GC: gc_reclaim all-threads see current epoch\n"));
386 * Three-epoch-old garbage lists move to allocation lists.
387 * Two-epoch-old garbage lists are cleaned out.
389 two_ago
= (curr_epoch
+2) % NR_EPOCHS
;
390 three_ago
= (curr_epoch
+1) % NR_EPOCHS
;
391 if ( gc_global
.nr_hooks
!= 0 )
392 our_ptst
= (ptst_t
*)pthread_getspecific(ptst_key
);
393 for ( ptst
= first_ptst
; ptst
!= NULL
; ptst
= ptst_next(ptst
) )
397 for ( i
= 0; i
< gc_global
.nr_sizes
; i
++ )
399 #ifdef WEAK_MEM_ORDER
400 int sz
= gc_global
.blk_sizes
[i
];
401 if ( gc
->garbage
[two_ago
][i
] != NULL
)
403 chunk_t
*head
= gc
->garbage
[two_ago
][i
];
407 for ( j
= 0; j
< ch
->i
; j
++ )
408 INITIALISE_NODES(ch
->blk
[j
], sz
);
410 while ( (ch
= ch
->next
) != head
);
414 /* NB. Leave one chunk behind, as it is probably not yet full. */
415 t
= gc
->garbage
[three_ago
][i
];
416 if ( (t
== NULL
) || ((ch
= t
->next
) == t
) ) continue;
417 gc
->garbage_tail
[three_ago
][i
]->next
= ch
;
418 gc
->garbage_tail
[three_ago
][i
] = t
;
421 /* gc inst: compute and log size of returned list */
423 chunk_t
*ch_head
, *ch_next
;
424 int r_ix
, r_len
, r_size
;
429 /* XXX: nonfatal, may be missing multiplier */
433 } while (ch
->next
&& (ch
->next
!= ch_head
)
434 && (ch_next
= ch
->next
));
436 ViceLog(11, ("GC: return %d chunks of size %d to "
437 "gc_global.alloc[%d]\n",
439 gc_global
.blk_sizes
[i
],
444 add_chunks_to_list(ch
, gc_global
.alloc
[i
]);
447 for ( i
= 0; i
< gc_global
.nr_hooks
; i
++ )
449 hook_fn_t fn
= gc_global
.hook_fns
[i
];
450 ch
= gc
->hook
[three_ago
][i
];
451 if ( ch
== NULL
) continue;
452 gc
->hook
[three_ago
][i
] = NULL
;
455 do { for ( j
= 0; j
< t
->i
; j
++ ) fn(our_ptst
, t
->blk
[j
]); }
456 while ( (t
= t
->next
) != ch
);
458 /* gc inst: compute and log size of returned list */
460 chunk_t
*ch_head
, *ch_next
;
461 int r_ix
, r_len
, r_size
;
465 /* XXX: nonfatal, may be missing multiplier */
469 } while (ch
->next
&& (ch
->next
!= ch_head
)
470 && (ch_next
= ch
->next
));
472 ViceLog(11, ("GC: return %d chunks to gc_global.free_chunks\n",
476 add_chunks_to_list(ch
, gc_global
.free_chunks
);
480 /* Update current epoch. */
481 ViceLog(11, ("GC: gc_reclaim epoch transition (leaving %lu)\n",
485 gc_global
.current
= (curr_epoch
+1) % NR_EPOCHS
;
488 gc_global
.inreclaim
= 0;
490 #endif /* MINIMAL_GC */
493 void *gc_alloc(ptst_t
*ptst
, int alloc_id
)
498 ch
= gc
->alloc
[alloc_id
];
501 if ( gc
->alloc_chunks
[alloc_id
]++ == 100 )
503 gc
->alloc_chunks
[alloc_id
] = 0;
504 add_chunks_to_list(ch
, gc_global
.free_chunks
);
505 gc
->alloc
[alloc_id
] = ch
= get_alloc_chunk(gc
, alloc_id
);
510 ch
= get_alloc_chunk(gc
, alloc_id
);
511 ch
->next
= och
->next
;
513 gc
->alloc
[alloc_id
] = ch
;
517 return ch
->blk
[--ch
->i
];
521 gc_get_blocksize(int alloc_id
)
523 return (gc_global
.blk_sizes
[alloc_id
]);
527 gc_get_tag(int alloc_id
)
529 return (gc_global
.tags
[alloc_id
]);
532 static chunk_t
*chunk_from_cache(gc_t
*gc
)
534 chunk_t
*ch
= gc
->chunk_cache
, *p
= ch
->next
;
538 gc
->chunk_cache
= get_empty_chunks(100);
551 void gc_free(ptst_t
*ptst
, void *p
, int alloc_id
)
555 chunk_t
*prev
, *new, *ch
= gc
->garbage
[gc
->epoch
][alloc_id
];
559 gc
->garbage
[gc
->epoch
][alloc_id
] = ch
= chunk_from_cache(gc
);
560 gc
->garbage_tail
[gc
->epoch
][alloc_id
] = ch
;
562 else if ( ch
->i
== BLKS_PER_CHUNK
)
564 prev
= gc
->garbage_tail
[gc
->epoch
][alloc_id
];
565 new = chunk_from_cache(gc
);
566 gc
->garbage
[gc
->epoch
][alloc_id
] = new;
572 ch
->blk
[ch
->i
++] = p
;
577 void gc_add_ptr_to_hook_list(ptst_t
*ptst
, void *ptr
, int hook_id
)
580 chunk_t
*och
, *ch
= gc
->hook
[gc
->epoch
][hook_id
];
584 gc
->hook
[gc
->epoch
][hook_id
] = ch
= chunk_from_cache(gc
);
589 if ( ch
->i
== BLKS_PER_CHUNK
)
591 och
= gc
->hook
[gc
->epoch
][hook_id
];
592 ch
= chunk_from_cache(gc
);
593 ch
->next
= och
->next
;
598 ch
->blk
[ch
->i
++] = ptr
;
602 void gc_unsafe_free(ptst_t
*ptst
, void *p
, int alloc_id
)
607 ch
= gc
->alloc
[alloc_id
];
608 if ( ch
->i
< BLKS_PER_CHUNK
)
610 ch
->blk
[ch
->i
++] = p
;
614 gc_free(ptst
, p
, alloc_id
);
619 void gc_enter(ptst_t
*ptst
)
633 new_epoch
= gc_global
.current
;
634 if ( gc
->epoch
!= new_epoch
)
636 gc
->epoch
= new_epoch
;
637 gc
->entries_since_reclaim
= 0;
638 #ifdef YIELD_TO_HELP_PROGRESS
639 gc
->reclaim_attempts_since_yield
= 0;
642 else if ( gc
->entries_since_reclaim
++ == 100 )
645 #ifdef YIELD_TO_HELP_PROGRESS
646 if ( gc
->reclaim_attempts_since_yield
++ == 10000 )
648 gc
->reclaim_attempts_since_yield
= 0;
652 gc
->entries_since_reclaim
= 0;
661 void gc_exit(ptst_t
*ptst
)
673 gc
= ALIGNED_ALLOC(sizeof(*gc
));
674 if ( gc
== NULL
) MEM_FAIL(sizeof(*gc
));
675 memset(gc
, 0, sizeof(*gc
));
677 #ifdef WEAK_MEM_ORDER
678 /* Initialise shootdown state. */
679 gc
->async_page
= mmap(NULL
, gc_global
.page_size
, PROT_NONE
,
680 MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
681 if ( gc
->async_page
== (void *)MAP_FAILED
) MEM_FAIL(gc_global
.page_size
);
682 gc
->async_page_state
= 1;
685 gc
->chunk_cache
= get_empty_chunks(100);
687 /* Get ourselves a set of allocation chunks. */
688 for ( i
= 0; i
< gc_global
.nr_sizes
; i
++ )
690 gc
->alloc
[i
] = get_alloc_chunk(gc
, i
);
692 for ( ; i
< MAX_SIZES
; i
++ )
694 gc
->alloc
[i
] = chunk_from_cache(gc
);
702 gc_add_allocator(int alloc_size
, char *tag
)
707 FASPO(&gc_global
.n_allocators
, gc_global
.n_allocators
+ 1);
708 if (gc_global
.n_allocators
> MAX_SIZES
) {
711 printf("MCAS gc max allocators exceeded, aborting\n");
716 i
= gc_global
.nr_sizes
;
717 while ((ni
= CASIO(&gc_global
.nr_sizes
, i
, i
+ 1)) != i
)
719 gc_global
.blk_sizes
[i
] = alloc_size
;
720 gc_global
.tags
[i
] = strdup(tag
);
721 gc_global
.alloc_size
[i
] = ALLOC_CHUNKS_PER_LIST
;
722 gc_global
.alloc
[i
] = get_filled_chunks(ALLOC_CHUNKS_PER_LIST
, alloc_size
);
727 void gc_remove_allocator(int alloc_id
)
729 /* This is a no-op for now. */
733 int gc_add_hook(hook_fn_t fn
)
735 int ni
, i
= gc_global
.nr_hooks
;
736 while ( (ni
= CASIO(&gc_global
.nr_hooks
, i
, i
+1)) != i
) i
= ni
;
737 gc_global
.hook_fns
[i
] = fn
;
742 void gc_remove_hook(int hook_id
)
744 /* This is a no-op for now. */
748 void _destroy_gc_subsystem(void)
751 printf("Total heap: %u bytes (%.2fMB) in %u allocations\n",
752 gc_global
.total_size
, (double)gc_global
.total_size
/ 1000000,
753 gc_global
.allocations
);
758 void _init_gc_subsystem(void)
760 memset(&gc_global
, 0, sizeof(gc_global
));
762 gc_global
.page_size
= (unsigned int)sysconf(_SC_PAGESIZE
);
763 gc_global
.free_chunks
= alloc_more_chunks();
765 gc_global
.nr_hooks
= 0;
766 gc_global
.nr_sizes
= 0;