1 /* ----------------------------------------------------------------------------
3 * (c) The GHC Team, 2013-
5 * Check whether dynamically-loaded object code can be safely
6 * unloaded, by searching for references to it from the heap and RTS
9 * --------------------------------------------------------------------------*/
11 #include "rts/PosixSource.h"
16 #include "LinkerInternals.h"
17 #include "CheckUnload.h"
18 #include "sm/Storage.h"
19 #include "sm/GCThread.h"
20 #include "sm/HeapUtils.h"
23 // Note [Object unloading]
24 // ~~~~~~~~~~~~~~~~~~~~~~~
26 // Overview of object unloading:
28 // - In a major GC, for every static object we mark the object's object code and
29 // its dependencies as 'live'. This is done by `markObjectCode`, called by
32 // - Marking object code is done using a global "section index table"
33 // (global_s_indices below). When we load an object code we add its section
34 // indices to the table. `markObjectCode` does binary search on this table to
35 // find object code for the marked object, and mark it and its dependencies.
37 // Dependency of an object code is simply other object code that the object
38 // code refers to in its code. We know these dependencies by the relocations
39 // present in the referent. This is recorded by lookupSymbolDependent.
41 // - global_s_indices is updated as we load and unload objects. When we load an
42 // object code we add its section indices to the table, we remove those
43 // indices when we unload.
45 // The table is sorted and old indices are removed in `checkUnload`, instead
46 // on every load/unload, to avoid quadratic behavior when we load a list of
49 // - After a major GC `checkUnload` unloads objects that are (1) explicitly
50 // asked for unloading (via `unloadObj`) and (2) are not marked during GC.
52 // Note that, crucially, we don't unload an object code even if it's not
53 // reachable from the heap, unless it's explicitly asked for unloading (via
54 // `unloadObj`). This is a feature and not a bug! Two use cases:
56 // - The user might request a symbol from a loaded object at any point with
57 // lookupSymbol (e.g. GHCi might do this).
59 // - Sometimes we load objects that are not Haskell objects.
61 // To avoid unloading objects that are unreachable but are not asked for
62 // unloading we maintain a "root set" of object code, `loaded_objects` below.
63 // `loadObj` adds the loaded objects (and its dependencies) to the list.
64 // `unloadObj` removes. After a major GC, `checkUnload` first marks the root set
65 // (`loaded_objects`) to avoid unloading objects that are not asked for
68 // Two other lists `objects` and `old_objects` are similar to large object lists
69 // in GC. Before a major GC we move `objects` to `old_objects`, and move marked
70 // objects back to `objects` during evacuation and when marking roots in
71 // `checkUnload`. Any objects in `old_objects` after that is unloaded.
73 // TODO: We currently don't unload objects when non-moving GC is enabled. The
74 // implementation would be similar to `nonmovingGcCafs`:
76 // - Maintain a "snapshot":
78 // - Copy `loaded_objects` as the root set of the snapshot
80 // - Stash `objects` to `old_objects` as the snapshot. We don't need a new
81 // list for this as `old_objects` won't be used by any other code when
82 // non-moving GC is enabled.
84 // - Copy `global_s_indices` table to be able to mark objects while mutators
85 // call `loadObj_` and `unloadObj_` concurrently.
87 // - Don't mark object code in `evacuate`, marking will be done in the
88 // non-moving collector.
90 // - After preparation, bump the object code mark bit (`object_code_mark_bit`
91 // below) and mark static objects using a version of `markObjectCode` that
92 // basically does the same thing but:
94 // - Needs to update `objects` list in a thread-safe way, as mutators will be
95 // concurrently calling `loadObj_` and add new stuff to `objects`.
96 // (alternatively we could have a new list for non-moving GC's objects list,
97 // and then merge it to the global list in the pause before moving to
98 // concurrent sweep phase)
100 // - Needs to use the copied `global_s_indices`
102 // - After marking anything left in `old_objects` are unreachable objects within
103 // the snapshot, unload those. The unload loop will be the same as in
104 // `checkUnload`. This step needs to happen in the final sync (before sweep
105 // begins) to avoid races when updating `global_s_indices`.
107 // - NOTE: We don't need write barriers in loadObj/unloadObj as we don't
108 // introduce a dependency from an already-loaded object to a newly loaded
109 // object and we don't delete existing dependencies.
112 uint8_t object_code_mark_bit
= 0;
121 int capacity
; // Doubled on resize
123 bool sorted
; // Invalidated on insertion. Sorted in checkUnload.
124 bool unloaded
; // Whether we removed anything from the table in
125 // removeOCSectionIndices. If this is set we "compact" the
126 // table (remove unused entries) in `sortOCSectionIndices.
127 OCSectionIndex
*indices
;
130 // List of currently live objects. Moved to `old_objects` before unload check.
131 // Marked objects moved back to this list in `markObjectLive`. Remaining objects
132 // are freed at the end of `checkUnload`.
134 // Double-linked list to be able to remove marked objects. List formed with
135 // `next` and `prev` fields of `ObjectCode`.
137 // Not static: used in Linker.c.
138 ObjectCode
*objects
= NULL
;
140 // `objects` list is moved here before unload check. Marked objects are moved
141 // back to `objects`. Remaining objects are freed.
142 static ObjectCode
*old_objects
= NULL
;
144 // Number of objects that we want to unload. When this value is 0 we skip static
145 // object marking during GC and `checkUnload`.
147 // Not static: we use this value to skip static object marking in evacuate when
150 // Incremented in `unloadObj_`, decremented as we unload objects in
152 int n_unloaded_objects
= 0;
154 // List of objects that we don't want to unload (i.e. we haven't called
155 // unloadObj on these yet). Used as root set for unload check in checkUnload.
156 // Objects are added with loadObj_ and removed with unloadObj_.
158 // List formed with `next_loaded_object` field of `ObjectCode`.
160 // Not static: used in Linker.c.
161 ObjectCode
*loaded_objects
;
163 // Section index table for currently loaded objects. New indices are added by
164 // `loadObj_`, indices of unloaded objects are removed in `checkUnload`. Used to
165 // map static closures to their ObjectCode.
166 static OCSectionIndices
*global_s_indices
= NULL
;
168 // Is it safe for us to unload code?
169 static bool tryToUnload(void)
171 if (RtsFlags
.ProfFlags
.doHeapProfile
!= NO_HEAP_PROFILING
) {
172 // We mustn't unload anything as the heap census may contain
173 // references into static data (e.g. cost centre names).
178 return global_s_indices
!= NULL
;
181 static OCSectionIndices
*createOCSectionIndices(void)
183 // TODO (osa): Maybe initialize as empty (without allocation) and allocate
184 // on first insertion?
185 OCSectionIndices
*s_indices
= stgMallocBytes(sizeof(OCSectionIndices
), "OCSectionIndices");
187 s_indices
->capacity
= capacity
;
188 s_indices
->n_sections
= 0;
189 s_indices
->sorted
= true;
190 s_indices
->unloaded
= false;
191 s_indices
->indices
= stgMallocBytes(capacity
* sizeof(OCSectionIndex
),
192 "OCSectionIndices::indices");
196 static void freeOCSectionIndices(OCSectionIndices
*s_indices
)
198 stgFree(s_indices
->indices
);
202 void initUnloadCheck(void)
204 global_s_indices
= createOCSectionIndices();
207 void exitUnloadCheck(void)
209 freeOCSectionIndices(global_s_indices
);
210 global_s_indices
= NULL
;
213 static int cmpSectionIndex(const void* indexa
, const void *indexb
)
215 W_ s1
= ((OCSectionIndex
*)indexa
)->start
;
216 W_ s2
= ((OCSectionIndex
*)indexb
)->start
;
219 } else if (s1
> s2
) {
225 static void reserveOCSectionIndices(OCSectionIndices
*s_indices
, int len
)
227 int current_capacity
= s_indices
->capacity
;
228 int current_len
= s_indices
->n_sections
;
229 if (current_capacity
- current_len
>= len
) {
233 // Round up to nearest power of 2
234 int new_capacity
= 1 << (int)ceil(log2(current_len
+ len
));
236 OCSectionIndex
*old_indices
= s_indices
->indices
;
237 OCSectionIndex
*new_indices
= stgMallocBytes(new_capacity
* sizeof(OCSectionIndex
),
238 "reserveOCSectionIndices");
240 for (int i
= 0; i
< current_len
; ++i
) {
241 new_indices
[i
] = old_indices
[i
];
244 s_indices
->capacity
= new_capacity
;
245 s_indices
->indices
= new_indices
;
247 stgFree(old_indices
);
250 // Insert object section indices of a single ObjectCode. Invalidates 'sorted'
252 void insertOCSectionIndices(ObjectCode
*oc
)
254 // after we finish the section table will no longer be sorted.
255 global_s_indices
->sorted
= false;
257 if (oc
->type
== DYNAMIC_OBJECT
) {
258 // First count the ranges
260 for (NativeCodeRange
*ncr
= oc
->nc_ranges
; ncr
!= NULL
; ncr
= ncr
->next
) {
264 // Next reserve the appropriate number of table entries...
265 reserveOCSectionIndices(global_s_indices
, n_ranges
);
267 // Now insert the new ranges...
268 int s_i
= global_s_indices
->n_sections
;
269 for (NativeCodeRange
*ncr
= oc
->nc_ranges
; ncr
!= NULL
; ncr
= ncr
->next
) {
270 OCSectionIndex
*ent
= &global_s_indices
->indices
[s_i
];
271 ent
->start
= (W_
)ncr
->start
;
272 ent
->end
= (W_
)ncr
->end
;
277 global_s_indices
->n_sections
= s_i
;
279 reserveOCSectionIndices(global_s_indices
, oc
->n_sections
);
280 int s_i
= global_s_indices
->n_sections
;
281 for (int i
= 0; i
< oc
->n_sections
; i
++) {
282 if (oc
->sections
[i
].kind
!= SECTIONKIND_OTHER
) {
283 OCSectionIndex
*ent
= &global_s_indices
->indices
[s_i
];
284 ent
->start
= (W_
)oc
->sections
[i
].start
;
285 ent
->end
= (W_
)oc
->sections
[i
].start
+ oc
->sections
[i
].size
;
291 global_s_indices
->n_sections
= s_i
;
294 // Add object to 'objects' list
295 if (objects
!= NULL
) {
302 static int findSectionIdx(OCSectionIndices
*s_indices
, const void *addr
);
304 static void removeOCSectionIndices(OCSectionIndices
*s_indices
, ObjectCode
*oc
)
306 // To avoid quadratic behavior in checkUnload we set `oc` fields of indices
307 // of unloaded objects NULL here. Removing unused entries is done in
308 // `sortOCSectionIndices`.
310 s_indices
->unloaded
= true;
312 for (int i
= 0; i
< oc
->n_sections
; i
++) {
313 if (oc
->sections
[i
].kind
!= SECTIONKIND_OTHER
) {
314 int section_idx
= findSectionIdx(s_indices
, oc
->sections
[i
].start
);
315 if (section_idx
!= -1) {
316 s_indices
->indices
[section_idx
].oc
= NULL
;
322 static void sortOCSectionIndices(OCSectionIndices
*s_indices
) {
323 if (s_indices
->sorted
) {
327 qsort(s_indices
->indices
,
328 s_indices
->n_sections
,
329 sizeof(OCSectionIndex
),
332 s_indices
->sorted
= true;
335 static void removeRemovedOCSections(OCSectionIndices
*s_indices
) {
336 if (!s_indices
->unloaded
) {
340 int next_free_idx
= 0;
341 for (int i
= 0; i
< s_indices
->n_sections
; ++i
) {
342 if (s_indices
->indices
[i
].oc
== NULL
) {
344 } else if (i
== next_free_idx
) {
347 s_indices
->indices
[next_free_idx
] = s_indices
->indices
[i
];
352 s_indices
->n_sections
= next_free_idx
;
353 s_indices
->unloaded
= true;
356 // Returns -1 if not found
357 static int findSectionIdx(OCSectionIndices
*s_indices
, const void *addr
) {
358 ASSERT(s_indices
->sorted
);
360 W_ w_addr
= (W_
)addr
;
361 if (s_indices
->n_sections
<= 0) {
364 if (w_addr
< s_indices
->indices
[0].start
) {
368 int left
= 0, right
= s_indices
->n_sections
;
369 while (left
+ 1 < right
) {
370 int mid
= (left
+ right
)/2;
371 W_ w_mid
= s_indices
->indices
[mid
].start
;
372 if (w_mid
<= w_addr
) {
378 ASSERT(w_addr
>= s_indices
->indices
[left
].start
);
379 if (w_addr
< s_indices
->indices
[left
].end
) {
385 static ObjectCode
*findOC(OCSectionIndices
*s_indices
, const void *addr
) {
386 int oc_idx
= findSectionIdx(s_indices
, addr
);
392 return s_indices
->indices
[oc_idx
].oc
;
395 static bool markObjectLive(void *data STG_UNUSED
, StgWord key
, const void *value STG_UNUSED
) {
396 ObjectCode
*oc
= (ObjectCode
*)key
;
398 // N.B. we may be called by the parallel GC and therefore this must be
399 // thread-safe. To avoid taking the linker_mutex in the fast path
400 // (when the object is already marked) we do an atomic exchange here and
401 // only take the lock in the case that the object is unmarked.
402 if (xchg(&oc
->mark
, object_code_mark_bit
) == object_code_mark_bit
) {
403 return true; // for hash table iteration
406 ACQUIRE_LOCK(&linker_mutex
);
407 // Remove from 'old_objects' list
408 if (oc
->prev
!= NULL
) {
409 // TODO(osa): Maybe 'prev' should be a pointer to the referencing
410 // *field* ? (instead of referencing *object*)
411 oc
->prev
->next
= oc
->next
;
413 old_objects
= oc
->next
;
415 if (oc
->next
!= NULL
) {
416 oc
->next
->prev
= oc
->prev
;
419 // Add it to 'objects' list
422 if (objects
!= NULL
) {
426 RELEASE_LOCK(&linker_mutex
);
428 // Mark its dependencies
429 iterHashTable(oc
->dependencies
, NULL
, markObjectLive
);
431 return true; // for hash table iteration
434 void markObjectCode(const void *addr
)
436 if (!tryToUnload()) {
440 // This should be checked at the call site
441 ASSERT(!HEAP_ALLOCED(addr
));
443 ObjectCode
*oc
= findOC(global_s_indices
, addr
);
445 // Mark the object code and its dependencies
446 markObjectLive(NULL
, (W_
)oc
, NULL
);
450 // Returns whether or not the GC that follows needs to mark code for potential
452 bool prepareUnloadCheck(void)
454 if (!tryToUnload()) {
458 removeRemovedOCSections(global_s_indices
);
459 sortOCSectionIndices(global_s_indices
);
461 ASSERT(old_objects
== NULL
);
463 object_code_mark_bit
= ~object_code_mark_bit
;
464 old_objects
= objects
;
469 void checkUnload(void)
471 // At this point we've marked all dynamically loaded static objects
472 // (including their dependencies) during GC, but not the root set of object
473 // code (loaded_objects). Mark the roots first, then unload any unmarked
477 OCSectionIndices
*s_indices
= global_s_indices
;
478 ASSERT(s_indices
->sorted
);
481 for (ObjectCode
*oc
= loaded_objects
; oc
!= NULL
; oc
= oc
->next_loaded_object
) {
482 markObjectLive(NULL
, (W_
)oc
, NULL
);
485 // Free unmarked objects
486 ObjectCode
*next
= NULL
;
487 for (ObjectCode
*oc
= old_objects
; oc
!= NULL
; oc
= next
) {
489 ASSERT(oc
->status
== OBJECT_UNLOADED
);
491 // Symbols should be removed by unloadObj_.
492 // NB (osa): If this assertion doesn't hold then freeObjectCode below
493 // will corrupt symhash as keys of that table live in ObjectCodes. If
494 // you see a segfault in a hash table operation in linker (in non-debug
495 // RTS) then it's probably because this assertion did not hold.
496 ASSERT(oc
->symbols
== NULL
);
498 if (oc
->unloadable
) {
499 removeOCSectionIndices(s_indices
, oc
);
501 n_unloaded_objects
-= 1;
503 // If we don't have enough information to
504 // accurately determine the reachability of
505 // the object then hold onto it.