1 /* -----------------------------------------------------------------------------
3 * (c) The GHC Team, 2000
7 * ---------------------------------------------------------------------------*/
12 #include "RtsSymbols.h"
14 #include "linker/M32Alloc.h"
16 #if RTS_LINKER_USE_MMAP
20 void printLoadedObjects(void);
22 /* Which object file format are we targeting? */
23 #if defined(linux_HOST_OS) || defined(solaris2_HOST_OS) \
24 || defined(linux_android_HOST_OS) \
25 || defined(freebsd_HOST_OS) || defined(kfreebsdgnu_HOST_OS) \
26 || defined(dragonfly_HOST_OS) || defined(netbsd_HOST_OS) \
27 || defined(openbsd_HOST_OS) || defined(gnu_HOST_OS)
28 # define OBJFORMAT_ELF
29 #elif defined(mingw32_HOST_OS)
30 # define OBJFORMAT_PEi386
31 #elif defined(darwin_HOST_OS) || defined(ios_HOST_OS)
32 # define OBJFORMAT_MACHO
33 #elif defined(wasm32_HOST_ARCH)
34 # define OBJFORMAT_WASM32
37 typedef struct _ObjectCode ObjectCode
;
38 typedef struct _Section Section
;
41 * Note [Processing overflowed relocations]
42 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
43 * When processing relocations whose targets exceed the relocation's maximum
44 * displacement, we can take advantage of knowledge of the symbol type to avoid
45 * linker failures. In particular, if we know that a symbol is a code symbol
46 * then we can handle the relocation by creating a "jump island", a small bit
47 * of code which immediately jumps (with an instruction sequence capable of
48 * larger displacement) to the target.
50 * This is not possible for data symbols (or, for that matter, Haskell symbols
51 * when TNTC is in use). In these cases we have to rather fail and ask the user
52 * to recompile their program as position-independent.
55 #if defined(OBJFORMAT_ELF)
56 # include "linker/ElfTypes.h"
57 #elif defined(OBJFORMAT_PEi386)
58 # include "linker/PEi386Types.h"
59 #elif defined(OBJFORMAT_MACHO)
60 # include "linker/MachOTypes.h"
61 #elif defined(OBJFORMAT_WASM32)
62 # include "linker/Wasm32Types.h"
64 # error "Unknown OBJECT_FORMAT for HOST_OS"
68 /* Hold extended information about a symbol in case we need to resolve it at a
70 typedef struct _Symbol
77 typedef struct NativeCodeRange_
{
80 /* Allow a chain of these things */
81 struct NativeCodeRange_
*next
;
84 /* Indication of section kinds for loaded objects. Needed by
85 the GC for deciding whether or not a pointer on the stack
87 See Note [BFD import library].
90 enum { /* Section is code or readonly. e.g. .text or .r(o)data. */
91 SECTIONKIND_CODE_OR_RODATA
,
92 /* Section contains read/write data. e.g. .data. */
94 /* Static initializer section. e.g. .ctors. */
95 SECTIONKIND_INIT_ARRAY
,
96 /* Static finalizer section. e.g. .dtors. */
97 SECTIONKIND_FINI_ARRAY
,
98 /* We don't know what the section is and don't care. */
102 * Windows-specific section kinds
105 /* Section contains debug information. e.g. .debug$. */
107 /* Section contains exception table. e.g. .pdata. */
108 SECTIONKIND_EXCEPTION_TABLE
,
109 /* Section contains unwind info. e.g. .xdata. */
110 SECTIONKIND_EXCEPTION_UNWIND
,
111 /* Section belongs to an import section group. e.g. .idata$. */
113 /* Section defines the head section of a BFD-style import library, e.g. idata$7. */
114 SECTIONKIND_BFD_IMPORT_LIBRARY_HEAD
,
115 /* Section defines an import library entry, e.g. idata$7. */
116 SECTIONKIND_BFD_IMPORT_LIBRARY
,
121 enum { SECTION_NOMEM
,
128 /* Indicates a desired memory protection for pages within a segment. Defined as
129 * enum since it's more explicit and look nicer in a debugger.
131 * Can be used directly as a substitution for a combination of PROT_X flags on
135 #if RTS_LINKER_USE_MMAP
136 SEGMENT_PROT_RO
= PROT_READ
,
137 SEGMENT_PROT_RX
= PROT_READ
| PROT_EXEC
,
138 SEGMENT_PROT_RWO
= PROT_READ
| PROT_WRITE
,
147 * Note [No typedefs for customizable types]
148 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
149 * Some pointer-to-struct types are defined opaquely
150 * first, and customized later to architecture/ABI-specific
151 * instantiations. Having the usual
152 * typedef struct _Foo {...} Foo;
153 * wrappers is hard to get right with older versions of GCC,
156 * and always refer to it with the 'struct' qualifier.
160 void* start
; /* actual start of section in memory */
161 StgWord size
; /* actual size of section in memory */
166 * The following fields are relevant for SECTION_MMAP sections only
168 StgWord mapped_offset
; /* offset from the image of mapped_start */
169 void* mapped_start
; /* start of mmap() block */
170 StgWord mapped_size
; /* size of mmap() block */
172 /* A customizable type to augment the Section type.
173 * See Note [No typedefs for customizable types]
175 struct SectionFormatInfo
* info
;
179 struct _ProddableBlock
{
182 struct _ProddableBlock
* next
;
186 typedef struct _Segment
{
187 void *start
; /* page aligned start address of a segment */
188 size_t size
; /* page rounded size of a segment */
189 SegmentProt prot
; /* mem protection to set after all symbols were
192 int *sections_idx
; /* an array of section indexes assigned to this segment */
196 #if defined(powerpc_HOST_ARCH) || defined(x86_64_HOST_ARCH) || defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) || defined(riscv64_HOST_ARCH)
197 #define NEED_SYMBOL_EXTRAS 1
201 * We use the m32 allocator for symbol extras on Windows and other mmap-using
204 #if RTS_LINKER_USE_MMAP || defined(mingw32_HOST_ARCH)
208 /* Jump Islands are sniplets of machine code required for relative address
209 * relocations on the PowerPC, x86_64 and ARM. On RISCV64 we use symbolextras
210 * like a GOT for locals where SymbolExtra represents one entry.
213 #if defined(powerpc_HOST_ARCH)
215 short lis_r12
, hi_addr
;
216 short ori_r12_r12
, lo_addr
;
220 #elif defined(x86_64_HOST_ARCH)
222 // See Note [TLSGD relocation] in elf_tlsgd.c
223 uint8_t jumpIsland
[8];
224 #elif defined(arm_HOST_ARCH)
225 uint8_t jumpIsland
[16];
226 #elif defined(riscv64_HOST_ARCH)
232 /* Objects that were loaded by this linker */
235 /* Objects that were loaded by dlopen */
239 typedef void (*cxa_finalize_fn
)(void *);
241 /* Top-level structure for an object module. One of these is allocated
242 * for each object file in use.
247 int fileSize
; /* also mapped image size when using mmap() */
248 char* formatName
; /* e.g. "ELF32", "DLL", "COFF", etc. */
249 ObjectType type
; /* who loaded this object? */
251 /* If this object is a member of an archive, archiveMemberName is
252 * like "libarchive.a(object.o)". Otherwise it's NULL.
254 pathchar
* archiveMemberName
;
256 /* An array containing ptrs to all the symbol names copied from
257 this object into the global symbol hash table. This is so that
258 we know which parts of the latter mapping to nuke when this
259 object is removed from the system. */
263 /* ptr to mem containing the object file image */
266 /* A customizable type, that formats can use to augment ObjectCode
267 * See Note [No typedefs for customizable types]
269 struct ObjectCodeFormatInfo
* info
;
271 /* non-zero if the object file was mmap'd, otherwise malloc'd */
274 /* record by how much image has been deliberately misaligned
275 after allocation, so that we can use realloc */
278 /* The address of __cxa_finalize; set when at least one finalizer was
279 * register and therefore we must call __cxa_finalize before unloading.
280 * See Note [Resolving __dso_handle]. */
281 cxa_finalize_fn cxa_finalize
;
283 /* The section-kind entries for this object module. An array. */
291 // Garbage collection fields
294 // Next object in `objects` list
295 struct _ObjectCode
*next
;
297 // Previous object in `objects` list
298 struct _ObjectCode
*prev
;
300 // Next object in `loaded_objects` list
301 struct _ObjectCode
*next_loaded_object
;
304 // N.B. This is a full word as we CAS it.
307 // Can this object be safely unloaded? Not true for
308 // dynamic objects when dlinfo is not available as
309 // we cannot determine liveness.
312 // Set of dependencies (ObjectCode*) of the object file. Traverse
313 // dependencies using `iterHashTable`.
315 // New entries are added as we resolve symbols in an object file, in
316 // `lookupDependentSymbol`. When an object file uses multiple symbols from
317 // another object file we add the dependent multiple times, so we use a
318 // `HashTable` here rather than a list/array to avoid copies.
320 // Used when unloading object files. See Note [Object unloading] in
322 HashSet
*dependencies
;
325 // End of garbage collection fields
328 /* SANITY CHECK ONLY: a list of the only memory regions which may
329 safely be prodded during relocation. Any attempt to prod
330 outside one of these is an error in the linker. */
331 ProddableBlock
* proddables
;
333 #if defined(NEED_SYMBOL_EXTRAS)
334 SymbolExtra
*symbol_extras
;
335 unsigned long first_symbol_extra
;
336 unsigned long n_symbol_extras
;
338 /* Additional memory that is preallocated and contiguous with image
339 which can be used to relocate bss sections. */
343 /* a list of all ForeignExportsLists owned by this object */
344 struct ForeignExportsList
*foreign_exports
;
346 /* Holds the list of symbols in the .o file which
347 require extra information.*/
348 StrHashTable
*extraInfos
;
350 #if defined(NEED_M32)
351 /* The m32 allocators used for allocating small sections and symbol extras
352 * during loading. We have two: one for (writeable) data and one for
353 * (read-only/executable) code. */
354 m32_allocator
*rw_m32
, *rx_m32
;
357 #if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX)
358 /* Cached address of ELF's shndx table, or SHNDX_TABLE_UNINIT if not
359 * initialized yet. It would be better to put it info ELF-specific
360 * ObjectCodeFormatInfo, but unfortunately shndx table is needed in
361 * ocVerifyImage_ELF which runs before ObjectCodeFormatInfo is
362 * initialized by ocInit_ELF. */
363 Elf_Word
*shndx_table
;
367 * The following are only valid if .type == DYNAMIC_OBJECT
370 /* handle returned from dlopen */
373 /* virtual memory ranges of loaded code. NULL if no range information is
374 * available (e.g. if dlinfo is unavailable on the current platform).
376 NativeCodeRange
*nc_ranges
;
379 #if defined(OBJFORMAT_ELF) && defined(SHN_XINDEX)
380 /* We cannot simply use NULL to signal uninitialised shndx_table because NULL
381 * is valid return value of get_shndx_table. Thus SHNDX_TABLE_UNINIT is defined
382 * as the address of global variable shndx_table_uninit_label, defined in
383 * rts/linker/Elf.c, which is definitely unequal to any heap-allocated address */
384 extern Elf_Word shndx_table_uninit_label
;
385 #define SHNDX_TABLE_UNINIT (&shndx_table_uninit_label)
388 #define OC_INFORMATIVE_FILENAME(OC) \
389 ( (OC)->archiveMemberName ? \
390 (OC)->archiveMemberName : \
394 #define ocDebugBelch(oc, s, ...) \
395 debugBelch("%s(%" PATH_FMT ": " s, \
397 OC_INFORMATIVE_FILENAME(oc), \
401 #if defined(THREADED_RTS)
402 extern Mutex linker_mutex
;
403 #endif /* THREADED_RTS */
405 /* Type of an initializer */
406 typedef void (*init_t
) (int argc
, char **argv
, char **env
);
408 /* Type of a finalizer */
409 typedef void (*fini_t
) (void);
411 /* SymbolInfo tracks a symbol's address, the object code from which
412 it originated, and whether or not it's weak.
414 RtsSymbolInfo is used to track the state of the symbols currently
415 loaded or to be loaded by the Linker.
417 Where the information in the `ObjectCode` is used to track the
418 original status of the symbol inside the `ObjectCode`.
420 A weak symbol that has been used will still be marked as weak
421 in the `ObjectCode` but in the `RtsSymbolInfo` it won't be.
423 typedef struct _RtsSymbolInfo
{
426 SymStrength strength
;
430 #include "BeginPrivate.h"
432 void exitLinker( void );
434 void freeObjectCode (ObjectCode
*oc
);
435 SymbolAddr
* loadSymbol(SymbolName
*lbl
, RtsSymbolInfo
*pinfo
);
437 void addProddableBlock ( ObjectCode
* oc
, void* start
, int size
);
438 void checkProddableBlock (ObjectCode
*oc
, void *addr
, size_t size
);
439 void freeProddableBlocks (ObjectCode
*oc
);
441 void addSection (Section
*s
, SectionKind kind
, SectionAlloc alloc
,
442 void* start
, StgWord size
, StgWord mapped_offset
,
443 void* mapped_start
, StgWord mapped_size
);
445 HsBool
ghciLookupSymbolInfo(StrHashTable
*table
,
446 const SymbolName
* key
, RtsSymbolInfo
**result
);
448 int ghciInsertSymbolTable(
451 const SymbolName
* key
,
457 /* Lock-free version of lookupSymbol. When 'dependent' is not NULL, adds it as a
458 * dependent to the owner of the symbol. The type of the symbol is stored in 'type'. */
459 SymbolAddr
* lookupDependentSymbol (SymbolName
* lbl
, ObjectCode
*dependent
, SymType
*type
);
461 /* Perform TLSGD symbol lookup returning the address of the resulting GOT entry,
462 * which in this case holds the module id and the symbol offset. */
463 StgInt64
lookupTlsgdSymbol(const char *, unsigned long, ObjectCode
*);
465 extern StrHashTable
*symhash
;
468 resolveSymbolAddr (pathchar
* buffer
, int size
,
469 SymbolAddr
* symbol
, uintptr_t* top
);
471 /* defined in LoadArchive.c */
472 bool isArchive (pathchar
*path
);
473 HsInt
loadArchive_ (pathchar
*path
);
475 /*************************************************
476 * Various bits of configuration
477 *************************************************/
479 /* PowerPC and ARM have relative branch instructions with only 24 bit
480 * displacements and therefore need jump islands contiguous with each object
483 #if defined(powerpc_HOST_ARCH)
484 #define SHORT_REL_BRANCH 1
486 #if defined(arm_HOST_ARCH)
487 #define SHORT_REL_BRANCH 1
490 #if (RTS_LINKER_USE_MMAP && defined(SHORT_REL_BRANCH) && defined(linux_HOST_OS))
491 #define USE_CONTIGUOUS_MMAP 1
493 #define USE_CONTIGUOUS_MMAP 0
496 HsInt
isAlreadyLoaded( pathchar
*path
);
497 OStatus
getObjectLoadStatus_ (pathchar
*path
);
498 ObjectCode
*lookupObjectByPath(pathchar
*path
);
499 HsInt
loadOc( ObjectCode
* oc
);
500 ObjectCode
* mkOc( ObjectType type
, pathchar
*path
, char *image
, int imageSize
,
501 bool mapped
, pathchar
*archiveMemberName
,
505 void initSegment(Segment
*s
, void *start
, size_t size
, SegmentProt prot
, int n_sections
);
506 void freeSegments(ObjectCode
*oc
);
508 #include "EndPrivate.h"