perf: Key the interpreter symbol cache by Name rather than FastString
[ghc.git] / rts / LinkerInternals.h
blob6977a93f413b1775f8443f5efea4fe6d2d5c5e0b
1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 2000
5 * RTS Object Linker
7 * ---------------------------------------------------------------------------*/
9 #pragma once
11 #include "Rts.h"
12 #include "RtsSymbols.h"
13 #include "Hash.h"
14 #include "linker/M32Alloc.h"
16 #if RTS_LINKER_USE_MMAP
17 #include <sys/mman.h>
18 #endif
20 void printLoadedObjects(void);
22 /* Which object file format are we targeting? */
23 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \
24 || defined(linux_android_HOST_OS) \
25 || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \
26 || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \
27 || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
28 # define OBJFORMAT_ELF
29 #elif defined(mingw32_HOST_OS)
30 # define OBJFORMAT_PEi386
31 #elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
32 # define OBJFORMAT_MACHO
33 #elif defined(wasm32_HOST_ARCH)
34 # define OBJFORMAT_WASM32
35 #endif
37 typedef struct _ObjectCode ObjectCode;
38 typedef struct _Section Section;
41 * Note [Processing overflowed relocations]
42 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 * When processing relocations whose targets exceed the relocation's maximum
44 * displacement, we can take advantage of knowledge of the symbol type to avoid
45 * linker failures. In particular, if we know that a symbol is a code symbol
46 * then we can handle the relocation by creating a "jump island", a small bit
47 * of code which immediately jumps (with an instruction sequence capable of
48 * larger displacement) to the target.
50 * This is not possible for data symbols (or, for that matter, Haskell symbols
51 * when TNTC is in use). In these cases we have to rather fail and ask the user
52 * to recompile their program as position-independent.
55 #if defined(OBJFORMAT_ELF)
56 # include "linker/ElfTypes.h"
57 #elif defined(OBJFORMAT_PEi386)
58 # include "linker/PEi386Types.h"
59 #elif defined(OBJFORMAT_MACHO)
60 # include "linker/MachOTypes.h"
61 #elif defined(OBJFORMAT_WASM32)
62 # include "linker/Wasm32Types.h"
63 #else
64 # error "Unknown OBJECT_FORMAT for HOST_OS"
65 #endif
68 /* Hold extended information about a symbol in case we need to resolve it at a
69 late stage. */
70 typedef struct _Symbol
72 SymbolName *name;
73 SymbolAddr *addr;
74 SymType type;
75 } Symbol_t;
77 typedef struct NativeCodeRange_ {
78 void *start, *end;
80 /* Allow a chain of these things */
81 struct NativeCodeRange_ *next;
82 } NativeCodeRange;
84 /* Indication of section kinds for loaded objects. Needed by
85 the GC for deciding whether or not a pointer on the stack
86 is a code pointer.
87 See Note [BFD import library].
89 typedef
90 enum { /* Section is code or readonly. e.g. .text or .r(o)data. */
91 SECTIONKIND_CODE_OR_RODATA,
92 /* Section contains read/write data. e.g. .data. */
93 SECTIONKIND_RWDATA,
94 /* Static initializer section. e.g. .ctors. */
95 SECTIONKIND_INIT_ARRAY,
96 /* Static finalizer section. e.g. .dtors. */
97 SECTIONKIND_FINI_ARRAY,
98 /* We don't know what the section is and don't care. */
99 SECTIONKIND_OTHER,
102 * Windows-specific section kinds
105 /* Section contains debug information. e.g. .debug$. */
106 SECTIONKIND_DEBUG,
107 /* Section contains exception table. e.g. .pdata. */
108 SECTIONKIND_EXCEPTION_TABLE,
109 /* Section contains unwind info. e.g. .xdata. */
110 SECTIONKIND_EXCEPTION_UNWIND,
111 /* Section belongs to an import section group. e.g. .idata$. */
112 SECTIONKIND_IMPORT,
113 /* Section defines the head section of a BFD-style import library, e.g. idata$7. */
114 SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD,
115 /* Section defines an import library entry, e.g. idata$7. */
116 SECTIONKIND_BFD_IMPORT_LIBRARY,
118 SectionKind;
120 typedef
121 enum { SECTION_NOMEM,
122 SECTION_M32,
123 SECTION_MMAP,
124 SECTION_MALLOC
126 SectionAlloc;
128 /* Indicates a desired memory protection for pages within a segment. Defined as
129 * enum since it's more explicit and look nicer in a debugger.
131 * Can be used directly as a substitution for a combination of PROT_X flags on
132 * POSIX systems.
134 typedef enum {
135 #if RTS_LINKER_USE_MMAP
136 SEGMENT_PROT_RO = PROT_READ,
137 SEGMENT_PROT_RX = PROT_READ | PROT_EXEC,
138 SEGMENT_PROT_RWO = PROT_READ | PROT_WRITE,
139 #else
140 SEGMENT_PROT_RO,
141 SEGMENT_PROT_RX,
142 SEGMENT_PROT_RWO,
143 #endif
144 } SegmentProt;
147 * Note [No typedefs for customizable types]
148 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
149 * Some pointer-to-struct types are defined opaquely
150 * first, and customized later to architecture/ABI-specific
151 * instantiations. Having the usual
152 * typedef struct _Foo {...} Foo;
153 * wrappers is hard to get right with older versions of GCC,
154 * so just have a
155 * struct Foo {...};
156 * and always refer to it with the 'struct' qualifier.
159 struct _Section {
160 void* start; /* actual start of section in memory */
161 StgWord size; /* actual size of section in memory */
162 SectionKind kind;
163 SectionAlloc alloc;
166 * The following fields are relevant for SECTION_MMAP sections only
168 StgWord mapped_offset; /* offset from the image of mapped_start */
169 void* mapped_start; /* start of mmap() block */
170 StgWord mapped_size; /* size of mmap() block */
172 /* A customizable type to augment the Section type.
173 * See Note [No typedefs for customizable types]
175 struct SectionFormatInfo* info;
178 typedef
179 struct _ProddableBlock {
180 void* start;
181 int size;
182 struct _ProddableBlock* next;
184 ProddableBlock;
186 typedef struct _Segment {
187 void *start; /* page aligned start address of a segment */
188 size_t size; /* page rounded size of a segment */
189 SegmentProt prot; /* mem protection to set after all symbols were
190 * resolved */
192 int *sections_idx; /* an array of section indexes assigned to this segment */
193 int n_sections;
194 } Segment;
196 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
197 #define NEED_SYMBOL_EXTRAS 1
198 #endif
201 * We use the m32 allocator for symbol extras on Windows and other mmap-using
202 * platforms.
204 #if RTS_LINKER_USE_MMAP || defined(mingw32_HOST_ARCH)
205 #define NEED_M32 1
206 #endif
208 /* Jump Islands are sniplets of machine code required for relative address
209 * relocations on the PowerPC, x86_64 and ARM. On RISCV64 we use symbolextras
210 * like a GOT for locals where SymbolExtra represents one entry.
212 typedef struct {
213 #if defined(powerpc_HOST_ARCH)
214 struct {
215 short lis_r12, hi_addr;
216 short ori_r12_r12, lo_addr;
217 long mtctr_r12;
218 long bctr;
219 } jumpIsland;
220 #elif defined(x86_64_HOST_ARCH)
221 uint64_t addr;
222 // See Note [TLSGD relocation] in elf_tlsgd.c
223 uint8_t jumpIsland[8];
224 #elif defined(arm_HOST_ARCH)
225 uint8_t jumpIsland[16];
226 #elif defined(riscv64_HOST_ARCH)
227 uint64_t addr;
228 #endif
229 } SymbolExtra;
231 typedef enum {
232 /* Objects that were loaded by this linker */
233 STATIC_OBJECT,
235 /* Objects that were loaded by dlopen */
236 DYNAMIC_OBJECT,
237 } ObjectType;
239 typedef void (*cxa_finalize_fn)(void *);
241 /* Top-level structure for an object module. One of these is allocated
242 * for each object file in use.
244 struct _ObjectCode {
245 OStatus status;
246 pathchar *fileName;
247 int fileSize; /* also mapped image size when using mmap() */
248 char* formatName; /* e.g. "ELF32", "DLL", "COFF", etc. */
249 ObjectType type; /* who loaded this object? */
251 /* If this object is a member of an archive, archiveMemberName is
252 * like "libarchive.a(object.o)". Otherwise it's NULL.
254 pathchar* archiveMemberName;
256 /* An array containing ptrs to all the symbol names copied from
257 this object into the global symbol hash table. This is so that
258 we know which parts of the latter mapping to nuke when this
259 object is removed from the system. */
260 Symbol_t *symbols;
261 int n_symbols;
263 /* ptr to mem containing the object file image */
264 char* image;
266 /* A customizable type, that formats can use to augment ObjectCode
267 * See Note [No typedefs for customizable types]
269 struct ObjectCodeFormatInfo* info;
271 /* non-zero if the object file was mmap'd, otherwise malloc'd */
272 int imageMapped;
274 /* record by how much image has been deliberately misaligned
275 after allocation, so that we can use realloc */
276 int misalignment;
278 /* The address of __cxa_finalize; set when at least one finalizer was
279 * register and therefore we must call __cxa_finalize before unloading.
280 * See Note [Resolving __dso_handle]. */
281 cxa_finalize_fn cxa_finalize;
283 /* The section-kind entries for this object module. An array. */
284 int n_sections;
285 Section* sections;
287 int n_segments;
288 Segment *segments;
291 // Garbage collection fields
294 // Next object in `objects` list
295 struct _ObjectCode *next;
297 // Previous object in `objects` list
298 struct _ObjectCode *prev;
300 // Next object in `loaded_objects` list
301 struct _ObjectCode *next_loaded_object;
303 // Mark bit
304 // N.B. This is a full word as we CAS it.
305 StgWord mark;
307 // Can this object be safely unloaded? Not true for
308 // dynamic objects when dlinfo is not available as
309 // we cannot determine liveness.
310 bool unloadable;
312 // Set of dependencies (ObjectCode*) of the object file. Traverse
313 // dependencies using `iterHashTable`.
315 // New entries are added as we resolve symbols in an object file, in
316 // `lookupDependentSymbol`. When an object file uses multiple symbols from
317 // another object file we add the dependent multiple times, so we use a
318 // `HashTable` here rather than a list/array to avoid copies.
320 // Used when unloading object files. See Note [Object unloading] in
321 // CheckUnload.c.
322 HashSet *dependencies;
325 // End of garbage collection fields
328 /* SANITY CHECK ONLY: a list of the only memory regions which may
329 safely be prodded during relocation. Any attempt to prod
330 outside one of these is an error in the linker. */
331 ProddableBlock* proddables;
333 #if defined(NEED_SYMBOL_EXTRAS)
334 SymbolExtra *symbol_extras;
335 unsigned long first_symbol_extra;
336 unsigned long n_symbol_extras;
337 #endif
338 /* Additional memory that is preallocated and contiguous with image
339 which can be used to relocate bss sections. */
340 char* bssBegin;
341 char* bssEnd;
343 /* a list of all ForeignExportsLists owned by this object */
344 struct ForeignExportsList *foreign_exports;
346 /* Holds the list of symbols in the .o file which
347 require extra information.*/
348 StrHashTable *extraInfos;
350 #if defined(NEED_M32)
351 /* The m32 allocators used for allocating small sections and symbol extras
352 * during loading. We have two: one for (writeable) data and one for
353 * (read-only/executable) code. */
354 m32_allocator *rw_m32, *rx_m32;
355 #endif
357 #if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX)
358 /* Cached address of ELF's shndx table, or SHNDX_TABLE_UNINIT if not
359 * initialized yet. It would be better to put it info ELF-specific
360 * ObjectCodeFormatInfo, but unfortunately shndx table is needed in
361 * ocVerifyImage_ELF which runs before ObjectCodeFormatInfo is
362 * initialized by ocInit_ELF. */
363 Elf_Word *shndx_table;
364 #endif
367 * The following are only valid if .type == DYNAMIC_OBJECT
370 /* handle returned from dlopen */
371 void *dlopen_handle;
373 /* virtual memory ranges of loaded code. NULL if no range information is
374 * available (e.g. if dlinfo is unavailable on the current platform).
376 NativeCodeRange *nc_ranges;
379 #if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX)
380 /* We cannot simply use NULL to signal uninitialised shndx_table because NULL
381 * is valid return value of get_shndx_table. Thus SHNDX_TABLE_UNINIT is defined
382 * as the address of global variable shndx_table_uninit_label, defined in
383 * rts/linker/Elf.c, which is definitely unequal to any heap-allocated address */
384 extern Elf_Word shndx_table_uninit_label;
385 #define SHNDX_TABLE_UNINIT (&shndx_table_uninit_label)
386 #endif
388 #define OC_INFORMATIVE_FILENAME(OC) \
389 ( (OC)->archiveMemberName ? \
390 (OC)->archiveMemberName : \
391 (OC)->fileName \
394 #define ocDebugBelch(oc, s, ...) \
395 debugBelch("%s(%" PATH_FMT ": " s, \
396 __func__, \
397 OC_INFORMATIVE_FILENAME(oc), \
398 ##__VA_ARGS__)
401 #if defined(THREADED_RTS)
402 extern Mutex linker_mutex;
403 #endif /* THREADED_RTS */
405 /* Type of an initializer */
406 typedef void (*init_t) (int argc, char **argv, char **env);
408 /* Type of a finalizer */
409 typedef void (*fini_t) (void);
411 /* SymbolInfo tracks a symbol's address, the object code from which
412 it originated, and whether or not it's weak.
414 RtsSymbolInfo is used to track the state of the symbols currently
415 loaded or to be loaded by the Linker.
417 Where the information in the `ObjectCode` is used to track the
418 original status of the symbol inside the `ObjectCode`.
420 A weak symbol that has been used will still be marked as weak
421 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
423 typedef struct _RtsSymbolInfo {
424 SymbolAddr* value;
425 ObjectCode *owner;
426 SymStrength strength;
427 SymType type;
428 } RtsSymbolInfo;
430 #include "BeginPrivate.h"
432 void exitLinker( void );
434 void freeObjectCode (ObjectCode *oc);
435 SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo);
437 void addProddableBlock ( ObjectCode* oc, void* start, int size );
438 void checkProddableBlock (ObjectCode *oc, void *addr, size_t size );
439 void freeProddableBlocks (ObjectCode *oc);
441 void addSection (Section *s, SectionKind kind, SectionAlloc alloc,
442 void* start, StgWord size, StgWord mapped_offset,
443 void* mapped_start, StgWord mapped_size);
445 HsBool ghciLookupSymbolInfo(StrHashTable *table,
446 const SymbolName* key, RtsSymbolInfo **result);
448 int ghciInsertSymbolTable(
449 pathchar* obj_name,
450 StrHashTable *table,
451 const SymbolName* key,
452 SymbolAddr* data,
453 SymStrength weak,
454 SymType type,
455 ObjectCode *owner);
457 /* Lock-free version of lookupSymbol. When 'dependent' is not NULL, adds it as a
458 * dependent to the owner of the symbol. The type of the symbol is stored in 'type'. */
459 SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type);
461 /* Perform TLSGD symbol lookup returning the address of the resulting GOT entry,
462 * which in this case holds the module id and the symbol offset. */
463 StgInt64 lookupTlsgdSymbol(const char *, unsigned long, ObjectCode *);
465 extern StrHashTable *symhash;
467 pathchar*
468 resolveSymbolAddr (pathchar* buffer, int size,
469 SymbolAddr* symbol, uintptr_t* top);
471 /* defined in LoadArchive.c */
472 bool isArchive (pathchar *path);
473 HsInt loadArchive_ (pathchar *path);
475 /*************************************************
476 * Various bits of configuration
477 *************************************************/
479 /* PowerPC and ARM have relative branch instructions with only 24 bit
480 * displacements and therefore need jump islands contiguous with each object
481 * code module.
483 #if defined(powerpc_HOST_ARCH)
484 #define SHORT_REL_BRANCH 1
485 #endif
486 #if defined(arm_HOST_ARCH)
487 #define SHORT_REL_BRANCH 1
488 #endif
490 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
491 #define USE_CONTIGUOUS_MMAP 1
492 #else
493 #define USE_CONTIGUOUS_MMAP 0
494 #endif
496 HsInt isAlreadyLoaded( pathchar *path );
497 OStatus getObjectLoadStatus_ (pathchar *path);
498 ObjectCode *lookupObjectByPath(pathchar *path);
499 HsInt loadOc( ObjectCode* oc );
500 ObjectCode* mkOc( ObjectType type, pathchar *path, char *image, int imageSize,
501 bool mapped, pathchar *archiveMemberName,
502 int misalignment
505 void initSegment(Segment *s, void *start, size_t size, SegmentProt prot, int n_sections);
506 void freeSegments(ObjectCode *oc);
508 #include "EndPrivate.h"