Update NEWS for 1.6.22
[pkg-k5-afs_openafs.git] / src / mcas / gc.c
blob53812c4834004e82b2c1018e80b6c7a83819db79
1 /******************************************************************************
2 * gc.c
4 * A fully recycling epoch-based garbage collector. Works by counting
5 * threads in and out of critical regions, to work out when
6 * garbage queues can be fully deleted.
8 * Copyright (c) 2001-2003, K A Fraser
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are
12 met:
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials provided
19 * with the distribution. Neither the name of the Keir Fraser
20 * nor the names of its contributors may be used to endorse or
21 * promote products derived from this software without specific
22 * prior written permission.
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include <afsconfig.h>
38 #include <afs/param.h>
40 #include <assert.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sys/mman.h>
45 #include <unistd.h>
46 #include "portable_defns.h"
47 #include "gc.h"
49 #include <afs/afsutil.h>
51 /*#define MINIMAL_GC*/
52 /*#define YIELD_TO_HELP_PROGRESS*/
53 #define PROFILE_GC
55 /* Recycled nodes are filled with this value if WEAK_MEM_ORDER. */
56 #define INVALID_BYTE 0
57 #define INITIALISE_NODES(_p,_c) memset((_p), INVALID_BYTE, (_c));
59 /* Number of unique block sizes we can deal with. Equivalently, the
60 * number of unique object caches which can be created. */
61 #define MAX_SIZES 30
63 #define MAX_HOOKS 4
66 * The initial number of allocation chunks for each per-blocksize list.
67 * Popular allocation lists will steadily increase the allocation unit
68 * in line with demand.
70 #define ALLOC_CHUNKS_PER_LIST 10
73 * How many times should a thread call gc_enter(), seeing the same epoch
74 * each time, before it makes a reclaim attempt?
76 #define ENTRIES_PER_RECLAIM_ATTEMPT 100
79 * 0: current epoch -- threads are moving to this;
80 * -1: some threads may still throw garbage into this epoch;
81 * -2: no threads can see this epoch => we can zero garbage lists;
82 * -3: all threads see zeros in these garbage lists => move to alloc lists.
84 #ifdef WEAK_MEM_ORDER
85 #define NR_EPOCHS 4
86 #else
87 #define NR_EPOCHS 3
88 #endif
91 * A chunk amortises the cost of allocation from shared lists. It also
92 * helps when zeroing nodes, as it increases per-cacheline pointer density
93 * and means that node locations don't need to be brought into the cache
94 * (most architectures have a non-temporal store instruction).
96 #define BLKS_PER_CHUNK 100
97 typedef struct chunk_st chunk_t;
98 struct chunk_st
100 chunk_t *next; /* chunk chaining */
101 unsigned int i; /* the next entry in blk[] to use */
102 void *blk[BLKS_PER_CHUNK];
105 static struct gc_global_st
107 CACHE_PAD(0);
109 /* The current epoch. */
110 VOLATILE unsigned int current;
111 CACHE_PAD(1);
113 /* Exclusive access to gc_reclaim(). */
114 VOLATILE unsigned int inreclaim;
115 CACHE_PAD(2);
118 /* Allocator caches currently defined */
119 long n_allocators;
122 * RUN-TIME CONSTANTS (to first approximation)
125 /* Memory page size, in bytes. */
126 unsigned int page_size;
128 /* Node sizes (run-time constants). */
129 int nr_sizes;
130 int blk_sizes[MAX_SIZES];
132 /* tags (trace support) */
133 char *tags[MAX_SIZES];
135 /* Registered epoch hooks. */
136 int nr_hooks;
137 hook_fn_t hook_fns[MAX_HOOKS];
138 CACHE_PAD(3);
141 * DATA WE MAY HIT HARD
144 /* Chain of free, empty chunks. */
145 chunk_t * VOLATILE free_chunks;
147 /* Main allocation lists. */
148 chunk_t * VOLATILE alloc[MAX_SIZES];
149 VOLATILE unsigned int alloc_size[MAX_SIZES];
150 #ifdef PROFILE_GC
151 VOLATILE unsigned int total_size;
152 VOLATILE unsigned int allocations;
153 #endif
154 } gc_global;
157 /* Per-thread state. */
158 struct gc_st
160 /* Epoch that this thread sees. */
161 unsigned int epoch;
163 /* Number of calls to gc_entry() since last gc_reclaim() attempt. */
164 unsigned int entries_since_reclaim;
166 #ifdef YIELD_TO_HELP_PROGRESS
167 /* Number of calls to gc_reclaim() since we last yielded. */
168 unsigned int reclaim_attempts_since_yield;
169 #endif
171 /* Used by gc_async_barrier(). */
172 void *async_page;
173 int async_page_state;
175 /* Garbage lists. */
176 chunk_t *garbage[NR_EPOCHS][MAX_SIZES];
177 chunk_t *garbage_tail[NR_EPOCHS][MAX_SIZES];
178 chunk_t *chunk_cache;
180 /* Local allocation lists. */
181 chunk_t *alloc[MAX_SIZES];
182 unsigned int alloc_chunks[MAX_SIZES];
184 /* Hook pointer lists. */
185 chunk_t *hook[NR_EPOCHS][MAX_HOOKS];
188 /* XXX generalize */
189 #ifndef KERNEL
191 #define MEM_FAIL(_s) \
192 do { \
193 fprintf(stderr, "OUT OF MEMORY: %d bytes at line %d\n", (_s), __LINE__); \
194 abort(); \
195 } while ( 0 )
196 #endif
198 /* Allocate more empty chunks from the heap. */
199 #define CHUNKS_PER_ALLOC 1000
200 static chunk_t *alloc_more_chunks(void)
202 int i;
203 chunk_t *h, *p;
205 ViceLog(11, ("GC: alloc_more_chunks alloc %lu chunks\n",
206 CHUNKS_PER_ALLOC));
208 h = p = ALIGNED_ALLOC(CHUNKS_PER_ALLOC * sizeof(*h));
209 if ( h == NULL ) MEM_FAIL(CHUNKS_PER_ALLOC * sizeof(*h));
211 for ( i = 1; i < CHUNKS_PER_ALLOC; i++ )
213 p->next = p + 1;
214 p++;
217 p->next = h;
219 return(h);
223 /* Put a chain of chunks onto a list. */
224 static void add_chunks_to_list(chunk_t *ch, chunk_t *head)
226 chunk_t *h_next, *new_h_next, *ch_next;
227 ch_next = ch->next;
228 new_h_next = head->next;
229 do { ch->next = h_next = new_h_next; WMB_NEAR_CAS(); }
230 while ( (new_h_next = CASPO(&head->next, h_next, ch_next)) != h_next );
234 /* Allocate a chain of @n empty chunks. Pointers may be garbage. */
235 static chunk_t *get_empty_chunks(int n)
237 int i;
238 chunk_t *new_rh, *rh, *rt, *head;
240 retry:
241 head = gc_global.free_chunks;
242 new_rh = head->next;
243 do {
244 rh = new_rh;
245 rt = head;
246 WEAK_DEP_ORDER_RMB();
247 for ( i = 0; i < n; i++ )
249 if ( (rt = rt->next) == head )
251 /* Allocate some more chunks. */
252 add_chunks_to_list(alloc_more_chunks(), head);
253 goto retry;
257 while ( (new_rh = CASPO(&head->next, rh, rt->next)) != rh );
259 rt->next = rh;
260 return(rh);
264 /* Get @n filled chunks, pointing at blocks of @sz bytes each. */
265 static chunk_t *get_filled_chunks(int n, int sz)
267 chunk_t *h, *p;
268 char *node;
269 int i;
271 #ifdef PROFILE_GC
272 ADD_TO(gc_global.total_size, n * BLKS_PER_CHUNK * sz);
273 ADD_TO(gc_global.allocations, 1);
274 #endif
276 node = ALIGNED_ALLOC(n * BLKS_PER_CHUNK * sz);
277 if ( node == NULL ) MEM_FAIL(n * BLKS_PER_CHUNK * sz);
278 #ifdef WEAK_MEM_ORDER
279 INITIALISE_NODES(node, n * BLKS_PER_CHUNK * sz);
280 #endif
282 h = p = get_empty_chunks(n);
283 do {
284 p->i = BLKS_PER_CHUNK;
285 for ( i = 0; i < BLKS_PER_CHUNK; i++ )
287 p->blk[i] = node;
288 node += sz;
291 while ( (p = p->next) != h );
293 return(h);
298 * gc_async_barrier: Cause an asynchronous barrier in all other threads. We do
299 * this by causing a TLB shootdown to be propagated to all other processors.
300 * Each time such an action is required, this function calls:
301 * mprotect(async_page, <page size>, <new flags>)
302 * Each thread's state contains a memory page dedicated for this purpose.
304 #ifdef WEAK_MEM_ORDER
305 static void gc_async_barrier(gc_t *gc)
307 mprotect(gc->async_page, gc_global.page_size,
308 gc->async_page_state ? PROT_READ : PROT_NONE);
309 gc->async_page_state = !gc->async_page_state;
311 #else
312 #define gc_async_barrier(_g) ((void)0)
313 #endif
316 /* Grab a level @i allocation chunk from main chain. */
317 static chunk_t *get_alloc_chunk(gc_t *gc, int i)
319 chunk_t *alloc, *p, *new_p, *nh;
320 unsigned int sz;
322 alloc = gc_global.alloc[i];
323 new_p = alloc->next;
325 do {
326 p = new_p;
327 while ( p == alloc )
329 sz = gc_global.alloc_size[i];
330 nh = get_filled_chunks(sz, gc_global.blk_sizes[i]);
331 ADD_TO(gc_global.alloc_size[i], sz >> 3);
332 gc_async_barrier(gc);
333 add_chunks_to_list(nh, alloc);
334 p = alloc->next;
336 WEAK_DEP_ORDER_RMB();
338 while ( (new_p = CASPO(&alloc->next, p, p->next)) != p );
340 p->next = p;
341 assert(p->i == BLKS_PER_CHUNK);
342 return(p);
346 #ifndef MINIMAL_GC
348 * gc_reclaim: Scans the list of struct gc_perthread looking for the lowest
349 * maximum epoch number seen by a thread that's in the list code. If it's the
350 * current epoch, the "nearly-free" lists from the previous epoch are
351 * reclaimed, and the epoch is incremented.
353 static void gc_reclaim(void)
355 ptst_t *ptst, *first_ptst, *our_ptst = NULL;
356 gc_t *gc = NULL;
357 unsigned long curr_epoch;
358 chunk_t *ch, *t;
359 int two_ago, three_ago, i, j;
361 ViceLog(11, ("GC: gc_reclaim enter\n"));
363 /* Barrier to entering the reclaim critical section. */
364 if ( gc_global.inreclaim || CASIO(&gc_global.inreclaim, 0, 1) ) return;
366 ViceLog(11, ("GC: gc_reclaim after inreclaim barrier\n"));
369 * Grab first ptst structure *before* barrier -- prevent bugs
370 * on weak-ordered architectures.
372 first_ptst = ptst_first();
373 MB();
374 curr_epoch = gc_global.current;
376 /* Have all threads seen the current epoch, or not in mutator code? */
377 for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) )
379 if ( (ptst->count > 1) && (ptst->gc->epoch != curr_epoch) ) goto out;
383 ViceLog(11, ("GC: gc_reclaim all-threads see current epoch\n"));
386 * Three-epoch-old garbage lists move to allocation lists.
387 * Two-epoch-old garbage lists are cleaned out.
389 two_ago = (curr_epoch+2) % NR_EPOCHS;
390 three_ago = (curr_epoch+1) % NR_EPOCHS;
391 if ( gc_global.nr_hooks != 0 )
392 our_ptst = (ptst_t *)pthread_getspecific(ptst_key);
393 for ( ptst = first_ptst; ptst != NULL; ptst = ptst_next(ptst) )
395 gc = ptst->gc;
397 for ( i = 0; i < gc_global.nr_sizes; i++ )
399 #ifdef WEAK_MEM_ORDER
400 int sz = gc_global.blk_sizes[i];
401 if ( gc->garbage[two_ago][i] != NULL )
403 chunk_t *head = gc->garbage[two_ago][i];
404 ch = head;
405 do {
406 int j;
407 for ( j = 0; j < ch->i; j++ )
408 INITIALISE_NODES(ch->blk[j], sz);
410 while ( (ch = ch->next) != head );
412 #endif
414 /* NB. Leave one chunk behind, as it is probably not yet full. */
415 t = gc->garbage[three_ago][i];
416 if ( (t == NULL) || ((ch = t->next) == t) ) continue;
417 gc->garbage_tail[three_ago][i]->next = ch;
418 gc->garbage_tail[three_ago][i] = t;
419 t->next = t;
421 /* gc inst: compute and log size of returned list */
423 chunk_t *ch_head, *ch_next;
424 int r_ix, r_len, r_size;
425 r_ix = 0;
426 r_len = 0;
427 r_size = 0;
429 /* XXX: nonfatal, may be missing multiplier */
430 ch_head = ch;
431 do {
432 r_len++;
433 } while (ch->next && (ch->next != ch_head)
434 && (ch_next = ch->next));
436 ViceLog(11, ("GC: return %d chunks of size %d to "
437 "gc_global.alloc[%d]\n",
438 r_len,
439 gc_global.blk_sizes[i],
440 i));
444 add_chunks_to_list(ch, gc_global.alloc[i]);
447 for ( i = 0; i < gc_global.nr_hooks; i++ )
449 hook_fn_t fn = gc_global.hook_fns[i];
450 ch = gc->hook[three_ago][i];
451 if ( ch == NULL ) continue;
452 gc->hook[three_ago][i] = NULL;
454 t = ch;
455 do { for ( j = 0; j < t->i; j++ ) fn(our_ptst, t->blk[j]); }
456 while ( (t = t->next) != ch );
458 /* gc inst: compute and log size of returned list */
460 chunk_t *ch_head, *ch_next;
461 int r_ix, r_len, r_size;
462 r_ix = 0;
463 r_len = 0;
465 /* XXX: nonfatal, may be missing multiplier */
466 ch_head = ch;
467 do {
468 r_len++;
469 } while (ch->next && (ch->next != ch_head)
470 && (ch_next = ch->next));
472 ViceLog(11, ("GC: return %d chunks to gc_global.free_chunks\n",
473 r_len));
476 add_chunks_to_list(ch, gc_global.free_chunks);
480 /* Update current epoch. */
481 ViceLog(11, ("GC: gc_reclaim epoch transition (leaving %lu)\n",
482 curr_epoch));
484 WMB();
485 gc_global.current = (curr_epoch+1) % NR_EPOCHS;
487 out:
488 gc_global.inreclaim = 0;
490 #endif /* MINIMAL_GC */
493 void *gc_alloc(ptst_t *ptst, int alloc_id)
495 gc_t *gc = ptst->gc;
496 chunk_t *ch;
498 ch = gc->alloc[alloc_id];
499 if ( ch->i == 0 )
501 if ( gc->alloc_chunks[alloc_id]++ == 100 )
503 gc->alloc_chunks[alloc_id] = 0;
504 add_chunks_to_list(ch, gc_global.free_chunks);
505 gc->alloc[alloc_id] = ch = get_alloc_chunk(gc, alloc_id);
507 else
509 chunk_t *och = ch;
510 ch = get_alloc_chunk(gc, alloc_id);
511 ch->next = och->next;
512 och->next = ch;
513 gc->alloc[alloc_id] = ch;
517 return ch->blk[--ch->i];
521 gc_get_blocksize(int alloc_id)
523 return (gc_global.blk_sizes[alloc_id]);
526 char *
527 gc_get_tag(int alloc_id)
529 return (gc_global.tags[alloc_id]);
532 static chunk_t *chunk_from_cache(gc_t *gc)
534 chunk_t *ch = gc->chunk_cache, *p = ch->next;
536 if ( ch == p )
538 gc->chunk_cache = get_empty_chunks(100);
540 else
542 ch->next = p->next;
543 p->next = p;
546 p->i = 0;
547 return(p);
551 void gc_free(ptst_t *ptst, void *p, int alloc_id)
553 #ifndef MINIMAL_GC
554 gc_t *gc = ptst->gc;
555 chunk_t *prev, *new, *ch = gc->garbage[gc->epoch][alloc_id];
557 if ( ch == NULL )
559 gc->garbage[gc->epoch][alloc_id] = ch = chunk_from_cache(gc);
560 gc->garbage_tail[gc->epoch][alloc_id] = ch;
562 else if ( ch->i == BLKS_PER_CHUNK )
564 prev = gc->garbage_tail[gc->epoch][alloc_id];
565 new = chunk_from_cache(gc);
566 gc->garbage[gc->epoch][alloc_id] = new;
567 new->next = ch;
568 prev->next = new;
569 ch = new;
572 ch->blk[ch->i++] = p;
573 #endif
577 void gc_add_ptr_to_hook_list(ptst_t *ptst, void *ptr, int hook_id)
579 gc_t *gc = ptst->gc;
580 chunk_t *och, *ch = gc->hook[gc->epoch][hook_id];
582 if ( ch == NULL )
584 gc->hook[gc->epoch][hook_id] = ch = chunk_from_cache(gc);
586 else
588 ch = ch->next;
589 if ( ch->i == BLKS_PER_CHUNK )
591 och = gc->hook[gc->epoch][hook_id];
592 ch = chunk_from_cache(gc);
593 ch->next = och->next;
594 och->next = ch;
598 ch->blk[ch->i++] = ptr;
602 void gc_unsafe_free(ptst_t *ptst, void *p, int alloc_id)
604 gc_t *gc = ptst->gc;
605 chunk_t *ch;
607 ch = gc->alloc[alloc_id];
608 if ( ch->i < BLKS_PER_CHUNK )
610 ch->blk[ch->i++] = p;
612 else
614 gc_free(ptst, p, alloc_id);
619 void gc_enter(ptst_t *ptst)
621 #ifdef MINIMAL_GC
622 ptst->count++;
623 MB();
624 #else
625 gc_t *gc = ptst->gc;
626 int new_epoch, cnt;
628 retry:
629 cnt = ptst->count++;
630 MB();
631 if ( cnt == 1 )
633 new_epoch = gc_global.current;
634 if ( gc->epoch != new_epoch )
636 gc->epoch = new_epoch;
637 gc->entries_since_reclaim = 0;
638 #ifdef YIELD_TO_HELP_PROGRESS
639 gc->reclaim_attempts_since_yield = 0;
640 #endif
642 else if ( gc->entries_since_reclaim++ == 100 )
644 ptst->count--;
645 #ifdef YIELD_TO_HELP_PROGRESS
646 if ( gc->reclaim_attempts_since_yield++ == 10000 )
648 gc->reclaim_attempts_since_yield = 0;
649 sched_yield();
651 #endif
652 gc->entries_since_reclaim = 0;
653 gc_reclaim();
654 goto retry;
657 #endif
661 void gc_exit(ptst_t *ptst)
663 MB();
664 ptst->count--;
668 gc_t *gc_init(void)
670 gc_t *gc;
671 int i;
673 gc = ALIGNED_ALLOC(sizeof(*gc));
674 if ( gc == NULL ) MEM_FAIL(sizeof(*gc));
675 memset(gc, 0, sizeof(*gc));
677 #ifdef WEAK_MEM_ORDER
678 /* Initialise shootdown state. */
679 gc->async_page = mmap(NULL, gc_global.page_size, PROT_NONE,
680 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
681 if ( gc->async_page == (void *)MAP_FAILED ) MEM_FAIL(gc_global.page_size);
682 gc->async_page_state = 1;
683 #endif
685 gc->chunk_cache = get_empty_chunks(100);
687 /* Get ourselves a set of allocation chunks. */
688 for ( i = 0; i < gc_global.nr_sizes; i++ )
690 gc->alloc[i] = get_alloc_chunk(gc, i);
692 for ( ; i < MAX_SIZES; i++ )
694 gc->alloc[i] = chunk_from_cache(gc);
697 return(gc);
702 gc_add_allocator(int alloc_size, char *tag)
704 int ni, i;
706 RMB();
707 FASPO(&gc_global.n_allocators, gc_global.n_allocators + 1);
708 if (gc_global.n_allocators > MAX_SIZES) {
709 /* critical error */
710 #if !defined(KERNEL)
711 printf("MCAS gc max allocators exceeded, aborting\n");
712 #endif
713 abort();
716 i = gc_global.nr_sizes;
717 while ((ni = CASIO(&gc_global.nr_sizes, i, i + 1)) != i)
718 i = ni;
719 gc_global.blk_sizes[i] = alloc_size;
720 gc_global.tags[i] = strdup(tag);
721 gc_global.alloc_size[i] = ALLOC_CHUNKS_PER_LIST;
722 gc_global.alloc[i] = get_filled_chunks(ALLOC_CHUNKS_PER_LIST, alloc_size);
723 return i;
727 void gc_remove_allocator(int alloc_id)
729 /* This is a no-op for now. */
733 int gc_add_hook(hook_fn_t fn)
735 int ni, i = gc_global.nr_hooks;
736 while ( (ni = CASIO(&gc_global.nr_hooks, i, i+1)) != i ) i = ni;
737 gc_global.hook_fns[i] = fn;
738 return i;
742 void gc_remove_hook(int hook_id)
744 /* This is a no-op for now. */
748 void _destroy_gc_subsystem(void)
750 #ifdef PROFILE_GC
751 printf("Total heap: %u bytes (%.2fMB) in %u allocations\n",
752 gc_global.total_size, (double)gc_global.total_size / 1000000,
753 gc_global.allocations);
754 #endif
758 void _init_gc_subsystem(void)
760 memset(&gc_global, 0, sizeof(gc_global));
762 gc_global.page_size = (unsigned int)sysconf(_SC_PAGESIZE);
763 gc_global.free_chunks = alloc_more_chunks();
765 gc_global.nr_hooks = 0;
766 gc_global.nr_sizes = 0;