perf: Key the interpreter symbol cache by Name rather than FastString
[ghc.git] / rts / CheckUnload.c
blob118aa39450bda215098c5b9f5da6e11a0e0f815f
1 /* ----------------------------------------------------------------------------
3 * (c) The GHC Team, 2013-
5 * Check whether dynamically-loaded object code can be safely
6 * unloaded, by searching for references to it from the heap and RTS
7 * data structures.
9 * --------------------------------------------------------------------------*/
11 #include "rts/PosixSource.h"
12 #include "Rts.h"
14 #include "RtsUtils.h"
15 #include "Hash.h"
16 #include "LinkerInternals.h"
17 #include "CheckUnload.h"
18 #include "sm/Storage.h"
19 #include "sm/GCThread.h"
20 #include "sm/HeapUtils.h"
23 // Note [Object unloading]
24 // ~~~~~~~~~~~~~~~~~~~~~~~
26 // Overview of object unloading:
28 // - In a major GC, for every static object we mark the object's object code and
29 // its dependencies as 'live'. This is done by `markObjectCode`, called by
30 // `evacuate`.
32 // - Marking object code is done using a global "section index table"
33 // (global_s_indices below). When we load an object code we add its section
34 // indices to the table. `markObjectCode` does binary search on this table to
35 // find object code for the marked object, and mark it and its dependencies.
37 // Dependency of an object code is simply other object code that the object
38 // code refers to in its code. We know these dependencies by the relocations
39 // present in the referent. This is recorded by lookupSymbolDependent.
41 // - global_s_indices is updated as we load and unload objects. When we load an
42 // object code we add its section indices to the table, we remove those
43 // indices when we unload.
45 // The table is sorted and old indices are removed in `checkUnload`, instead
46 // on every load/unload, to avoid quadratic behavior when we load a list of
47 // objects.
49 // - After a major GC `checkUnload` unloads objects that are (1) explicitly
50 // asked for unloading (via `unloadObj`) and (2) are not marked during GC.
52 // Note that, crucially, we don't unload an object code even if it's not
53 // reachable from the heap, unless it's explicitly asked for unloading (via
54 // `unloadObj`). This is a feature and not a bug! Two use cases:
56 // - The user might request a symbol from a loaded object at any point with
57 // lookupSymbol (e.g. GHCi might do this).
59 // - Sometimes we load objects that are not Haskell objects.
61 // To avoid unloading objects that are unreachable but are not asked for
62 // unloading we maintain a "root set" of object code, `loaded_objects` below.
63 // `loadObj` adds the loaded objects (and its dependencies) to the list.
64 // `unloadObj` removes. After a major GC, `checkUnload` first marks the root set
65 // (`loaded_objects`) to avoid unloading objects that are not asked for
66 // unloading.
68 // Two other lists `objects` and `old_objects` are similar to large object lists
69 // in GC. Before a major GC we move `objects` to `old_objects`, and move marked
70 // objects back to `objects` during evacuation and when marking roots in
71 // `checkUnload`. Any objects in `old_objects` after that is unloaded.
73 // TODO: We currently don't unload objects when non-moving GC is enabled. The
74 // implementation would be similar to `nonmovingGcCafs`:
76 // - Maintain a "snapshot":
78 // - Copy `loaded_objects` as the root set of the snapshot
80 // - Stash `objects` to `old_objects` as the snapshot. We don't need a new
81 // list for this as `old_objects` won't be used by any other code when
82 // non-moving GC is enabled.
84 // - Copy `global_s_indices` table to be able to mark objects while mutators
85 // call `loadObj_` and `unloadObj_` concurrently.
87 // - Don't mark object code in `evacuate`, marking will be done in the
88 // non-moving collector.
90 // - After preparation, bump the object code mark bit (`object_code_mark_bit`
91 // below) and mark static objects using a version of `markObjectCode` that
92 // basically does the same thing but:
94 // - Needs to update `objects` list in a thread-safe way, as mutators will be
95 // concurrently calling `loadObj_` and add new stuff to `objects`.
96 // (alternatively we could have a new list for non-moving GC's objects list,
97 // and then merge it to the global list in the pause before moving to
98 // concurrent sweep phase)
100 // - Needs to use the copied `global_s_indices`
102 // - After marking anything left in `old_objects` are unreachable objects within
103 // the snapshot, unload those. The unload loop will be the same as in
104 // `checkUnload`. This step needs to happen in the final sync (before sweep
105 // begins) to avoid races when updating `global_s_indices`.
107 // - NOTE: We don't need write barriers in loadObj/unloadObj as we don't
108 // introduce a dependency from an already-loaded object to a newly loaded
109 // object and we don't delete existing dependencies.
112 uint8_t object_code_mark_bit = 0;
114 typedef struct {
115 W_ start;
116 W_ end;
117 ObjectCode *oc;
118 } OCSectionIndex;
120 typedef struct {
121 int capacity; // Doubled on resize
122 int n_sections;
123 bool sorted; // Invalidated on insertion. Sorted in checkUnload.
124 bool unloaded; // Whether we removed anything from the table in
125 // removeOCSectionIndices. If this is set we "compact" the
126 // table (remove unused entries) in `sortOCSectionIndices.
127 OCSectionIndex *indices;
128 } OCSectionIndices;
130 // List of currently live objects. Moved to `old_objects` before unload check.
131 // Marked objects moved back to this list in `markObjectLive`. Remaining objects
132 // are freed at the end of `checkUnload`.
134 // Double-linked list to be able to remove marked objects. List formed with
135 // `next` and `prev` fields of `ObjectCode`.
137 // Not static: used in Linker.c.
138 ObjectCode *objects = NULL;
140 // `objects` list is moved here before unload check. Marked objects are moved
141 // back to `objects`. Remaining objects are freed.
142 static ObjectCode *old_objects = NULL;
144 // Number of objects that we want to unload. When this value is 0 we skip static
145 // object marking during GC and `checkUnload`.
147 // Not static: we use this value to skip static object marking in evacuate when
148 // this is 0.
150 // Incremented in `unloadObj_`, decremented as we unload objects in
151 // `checkUnload`.
152 int n_unloaded_objects = 0;
154 // List of objects that we don't want to unload (i.e. we haven't called
155 // unloadObj on these yet). Used as root set for unload check in checkUnload.
156 // Objects are added with loadObj_ and removed with unloadObj_.
158 // List formed with `next_loaded_object` field of `ObjectCode`.
160 // Not static: used in Linker.c.
161 ObjectCode *loaded_objects;
163 // Section index table for currently loaded objects. New indices are added by
164 // `loadObj_`, indices of unloaded objects are removed in `checkUnload`. Used to
165 // map static closures to their ObjectCode.
166 static OCSectionIndices *global_s_indices = NULL;
168 // Is it safe for us to unload code?
169 static bool tryToUnload(void)
171 if (RtsFlags.ProfFlags.doHeapProfile != NO_HEAP_PROFILING) {
172 // We mustn't unload anything as the heap census may contain
173 // references into static data (e.g. cost centre names).
174 // See #24512.
175 return false;
178 return global_s_indices != NULL;
181 static OCSectionIndices *createOCSectionIndices(void)
183 // TODO (osa): Maybe initialize as empty (without allocation) and allocate
184 // on first insertion?
185 OCSectionIndices *s_indices = stgMallocBytes(sizeof(OCSectionIndices), "OCSectionIndices");
186 int capacity = 1024;
187 s_indices->capacity = capacity;
188 s_indices->n_sections = 0;
189 s_indices->sorted = true;
190 s_indices->unloaded = false;
191 s_indices->indices = stgMallocBytes(capacity * sizeof(OCSectionIndex),
192 "OCSectionIndices::indices");
193 return s_indices;
196 static void freeOCSectionIndices(OCSectionIndices *s_indices)
198 stgFree(s_indices->indices);
199 stgFree(s_indices);
202 void initUnloadCheck(void)
204 global_s_indices = createOCSectionIndices();
207 void exitUnloadCheck(void)
209 freeOCSectionIndices(global_s_indices);
210 global_s_indices = NULL;
213 static int cmpSectionIndex(const void* indexa, const void *indexb)
215 W_ s1 = ((OCSectionIndex*)indexa)->start;
216 W_ s2 = ((OCSectionIndex*)indexb)->start;
217 if (s1 < s2) {
218 return -1;
219 } else if (s1 > s2) {
220 return 1;
222 return 0;
225 static void reserveOCSectionIndices(OCSectionIndices *s_indices, int len)
227 int current_capacity = s_indices->capacity;
228 int current_len = s_indices->n_sections;
229 if (current_capacity - current_len >= len) {
230 return;
233 // Round up to nearest power of 2
234 int new_capacity = 1 << (int)ceil(log2(current_len + len));
236 OCSectionIndex *old_indices = s_indices->indices;
237 OCSectionIndex *new_indices = stgMallocBytes(new_capacity * sizeof(OCSectionIndex),
238 "reserveOCSectionIndices");
240 for (int i = 0; i < current_len; ++i) {
241 new_indices[i] = old_indices[i];
244 s_indices->capacity = new_capacity;
245 s_indices->indices = new_indices;
247 stgFree(old_indices);
250 // Insert object section indices of a single ObjectCode. Invalidates 'sorted'
251 // state.
252 void insertOCSectionIndices(ObjectCode *oc)
254 // after we finish the section table will no longer be sorted.
255 global_s_indices->sorted = false;
257 if (oc->type == DYNAMIC_OBJECT) {
258 // First count the ranges
259 int n_ranges = 0;
260 for (NativeCodeRange *ncr = oc->nc_ranges; ncr != NULL; ncr = ncr->next) {
261 n_ranges++;
264 // Next reserve the appropriate number of table entries...
265 reserveOCSectionIndices(global_s_indices, n_ranges);
267 // Now insert the new ranges...
268 int s_i = global_s_indices->n_sections;
269 for (NativeCodeRange *ncr = oc->nc_ranges; ncr != NULL; ncr = ncr->next) {
270 OCSectionIndex *ent = &global_s_indices->indices[s_i];
271 ent->start = (W_)ncr->start;
272 ent->end = (W_)ncr->end;
273 ent->oc = oc;
274 s_i++;
277 global_s_indices->n_sections = s_i;
278 } else {
279 reserveOCSectionIndices(global_s_indices, oc->n_sections);
280 int s_i = global_s_indices->n_sections;
281 for (int i = 0; i < oc->n_sections; i++) {
282 if (oc->sections[i].kind != SECTIONKIND_OTHER) {
283 OCSectionIndex *ent = &global_s_indices->indices[s_i];
284 ent->start = (W_)oc->sections[i].start;
285 ent->end = (W_)oc->sections[i].start + oc->sections[i].size;
286 ent->oc = oc;
287 s_i++;
291 global_s_indices->n_sections = s_i;
294 // Add object to 'objects' list
295 if (objects != NULL) {
296 objects->prev = oc;
298 oc->next = objects;
299 objects = oc;
302 static int findSectionIdx(OCSectionIndices *s_indices, const void *addr);
304 static void removeOCSectionIndices(OCSectionIndices *s_indices, ObjectCode *oc)
306 // To avoid quadratic behavior in checkUnload we set `oc` fields of indices
307 // of unloaded objects NULL here. Removing unused entries is done in
308 // `sortOCSectionIndices`.
310 s_indices->unloaded = true;
312 for (int i = 0; i < oc->n_sections; i++) {
313 if (oc->sections[i].kind != SECTIONKIND_OTHER) {
314 int section_idx = findSectionIdx(s_indices, oc->sections[i].start);
315 if (section_idx != -1) {
316 s_indices->indices[section_idx].oc = NULL;
322 static void sortOCSectionIndices(OCSectionIndices *s_indices) {
323 if (s_indices->sorted) {
324 return;
327 qsort(s_indices->indices,
328 s_indices->n_sections,
329 sizeof(OCSectionIndex),
330 cmpSectionIndex);
332 s_indices->sorted = true;
335 static void removeRemovedOCSections(OCSectionIndices *s_indices) {
336 if (!s_indices->unloaded) {
337 return;
340 int next_free_idx = 0;
341 for (int i = 0; i < s_indices->n_sections; ++i) {
342 if (s_indices->indices[i].oc == NULL) {
343 // free entry, skip
344 } else if (i == next_free_idx) {
345 ++next_free_idx;
346 } else {
347 s_indices->indices[next_free_idx] = s_indices->indices[i];
348 ++next_free_idx;
352 s_indices->n_sections = next_free_idx;
353 s_indices->unloaded = true;
356 // Returns -1 if not found
357 static int findSectionIdx(OCSectionIndices *s_indices, const void *addr) {
358 ASSERT(s_indices->sorted);
360 W_ w_addr = (W_)addr;
361 if (s_indices->n_sections <= 0) {
362 return -1;
364 if (w_addr < s_indices->indices[0].start) {
365 return -1;
368 int left = 0, right = s_indices->n_sections;
369 while (left + 1 < right) {
370 int mid = (left + right)/2;
371 W_ w_mid = s_indices->indices[mid].start;
372 if (w_mid <= w_addr) {
373 left = mid;
374 } else {
375 right = mid;
378 ASSERT(w_addr >= s_indices->indices[left].start);
379 if (w_addr < s_indices->indices[left].end) {
380 return left;
382 return -1;
385 static ObjectCode *findOC(OCSectionIndices *s_indices, const void *addr) {
386 int oc_idx = findSectionIdx(s_indices, addr);
388 if (oc_idx == -1) {
389 return NULL;
392 return s_indices->indices[oc_idx].oc;
395 static bool markObjectLive(void *data STG_UNUSED, StgWord key, const void *value STG_UNUSED) {
396 ObjectCode *oc = (ObjectCode*)key;
398 // N.B. we may be called by the parallel GC and therefore this must be
399 // thread-safe. To avoid taking the linker_mutex in the fast path
400 // (when the object is already marked) we do an atomic exchange here and
401 // only take the lock in the case that the object is unmarked.
402 if (xchg(&oc->mark, object_code_mark_bit) == object_code_mark_bit) {
403 return true; // for hash table iteration
406 ACQUIRE_LOCK(&linker_mutex);
407 // Remove from 'old_objects' list
408 if (oc->prev != NULL) {
409 // TODO(osa): Maybe 'prev' should be a pointer to the referencing
410 // *field* ? (instead of referencing *object*)
411 oc->prev->next = oc->next;
412 } else {
413 old_objects = oc->next;
415 if (oc->next != NULL) {
416 oc->next->prev = oc->prev;
419 // Add it to 'objects' list
420 oc->prev = NULL;
421 oc->next = objects;
422 if (objects != NULL) {
423 objects->prev = oc;
425 objects = oc;
426 RELEASE_LOCK(&linker_mutex);
428 // Mark its dependencies
429 iterHashTable(oc->dependencies, NULL, markObjectLive);
431 return true; // for hash table iteration
434 void markObjectCode(const void *addr)
436 if (!tryToUnload()) {
437 return;
440 // This should be checked at the call site
441 ASSERT(!HEAP_ALLOCED(addr));
443 ObjectCode *oc = findOC(global_s_indices, addr);
444 if (oc != NULL) {
445 // Mark the object code and its dependencies
446 markObjectLive(NULL, (W_)oc, NULL);
450 // Returns whether or not the GC that follows needs to mark code for potential
451 // unloading.
452 bool prepareUnloadCheck(void)
454 if (!tryToUnload()) {
455 return false;
458 removeRemovedOCSections(global_s_indices);
459 sortOCSectionIndices(global_s_indices);
461 ASSERT(old_objects == NULL);
463 object_code_mark_bit = ~object_code_mark_bit;
464 old_objects = objects;
465 objects = NULL;
466 return true;
469 void checkUnload(void)
471 // At this point we've marked all dynamically loaded static objects
472 // (including their dependencies) during GC, but not the root set of object
473 // code (loaded_objects). Mark the roots first, then unload any unmarked
474 // objects.
476 if (tryToUnload()) {
477 OCSectionIndices *s_indices = global_s_indices;
478 ASSERT(s_indices->sorted);
480 // Mark roots
481 for (ObjectCode *oc = loaded_objects; oc != NULL; oc = oc->next_loaded_object) {
482 markObjectLive(NULL, (W_)oc, NULL);
485 // Free unmarked objects
486 ObjectCode *next = NULL;
487 for (ObjectCode *oc = old_objects; oc != NULL; oc = next) {
488 next = oc->next;
489 ASSERT(oc->status == OBJECT_UNLOADED);
491 // Symbols should be removed by unloadObj_.
492 // NB (osa): If this assertion doesn't hold then freeObjectCode below
493 // will corrupt symhash as keys of that table live in ObjectCodes. If
494 // you see a segfault in a hash table operation in linker (in non-debug
495 // RTS) then it's probably because this assertion did not hold.
496 ASSERT(oc->symbols == NULL);
498 if (oc->unloadable) {
499 removeOCSectionIndices(s_indices, oc);
500 freeObjectCode(oc);
501 n_unloaded_objects -= 1;
502 } else {
503 // If we don't have enough information to
504 // accurately determine the reachability of
505 // the object then hold onto it.
506 oc->next = objects;
507 objects = oc;
512 old_objects = NULL;