1 //=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Nate Begeman and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the MachOWriter class.
12 //===----------------------------------------------------------------------===//
17 #include "llvm/Constants.h"
18 #include "llvm/DerivedTypes.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineRelocation.h"
21 #include "llvm/Target/TargetData.h"
22 #include "llvm/Target/TargetMachine.h"
23 #include "llvm/Target/TargetMachOWriterInfo.h"
28 class MachineCodeEmitter
;
29 class MachOCodeEmitter
;
32 /// MachOSym - This struct contains information about each symbol that is
33 /// added to logical symbol table for the module. This is eventually
34 /// turned into a real symbol table in the file.
36 const GlobalValue
*GV
; // The global value this corresponds to.
37 std::string GVName
; // The mangled name of the global value.
38 uint32_t n_strx
; // index into the string table
39 uint8_t n_type
; // type flag
40 uint8_t n_sect
; // section number or NO_SECT
41 int16_t n_desc
; // see <mach-o/stab.h>
42 uint64_t n_value
; // value for this symbol (or stab offset)
44 // Constants for the n_sect field
45 // see <mach-o/nlist.h>
46 enum { NO_SECT
= 0 }; // symbol is not in any section
48 // Constants for the n_type field
49 // see <mach-o/nlist.h>
50 enum { N_UNDF
= 0x0, // undefined, n_sect == NO_SECT
51 N_ABS
= 0x2, // absolute, n_sect == NO_SECT
52 N_SECT
= 0xe, // defined in section number n_sect
53 N_PBUD
= 0xc, // prebound undefined (defined in a dylib)
54 N_INDR
= 0xa // indirect
56 // The following bits are OR'd into the types above. For example, a type
57 // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
58 enum { N_EXT
= 0x01, // external symbol bit
59 N_PEXT
= 0x10 // private external symbol bit
62 // Constants for the n_desc field
63 // see <mach-o/loader.h>
64 enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY
= 0,
65 REFERENCE_FLAG_UNDEFINED_LAZY
= 1,
66 REFERENCE_FLAG_DEFINED
= 2,
67 REFERENCE_FLAG_PRIVATE_DEFINED
= 3,
68 REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY
= 4,
69 REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY
= 5
71 enum { N_NO_DEAD_STRIP
= 0x0020, // symbol is not to be dead stripped
72 N_WEAK_REF
= 0x0040, // symbol is weak referenced
73 N_WEAK_DEF
= 0x0080 // coalesced symbol is a weak definition
76 MachOSym(const GlobalValue
*gv
, std::string name
, uint8_t sect
,
80 /// MachOWriter - This class implements the common target-independent code for
81 /// writing Mach-O files. Targets should derive a class from this to
82 /// parameterize the output format.
84 class MachOWriter
: public MachineFunctionPass
{
85 friend class MachOCodeEmitter
;
88 MachineCodeEmitter
&getMachineCodeEmitter() const {
89 return *(MachineCodeEmitter
*)MCE
;
92 MachOWriter(std::ostream
&O
, TargetMachine
&TM
);
93 virtual ~MachOWriter();
95 virtual const char *getPassName() const {
96 return "Mach-O Writer";
99 typedef std::vector
<unsigned char> DataBuffer
;
101 /// Output stream to send the resultant object file to.
105 /// Target machine description.
109 /// Mang - The object used to perform name mangling for this module.
113 /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
114 /// code for functions to the .o file.
115 MachOCodeEmitter
*MCE
;
117 /// is64Bit/isLittleEndian - This information is inferred from the target
118 /// machine directly, indicating what header values and flags to set.
119 bool is64Bit
, isLittleEndian
;
121 /// doInitialization - Emit the file header and all of the global variables
122 /// for the module to the Mach-O file.
123 bool doInitialization(Module
&M
);
125 bool runOnMachineFunction(MachineFunction
&MF
);
127 /// doFinalization - Now that the module has been completely processed, emit
128 /// the Mach-O file to 'O'.
129 bool doFinalization(Module
&M
);
131 /// MachOHeader - This struct contains the header information about a
132 /// specific architecture type/subtype pair that is emitted to the file.
134 uint32_t magic
; // mach magic number identifier
135 uint32_t filetype
; // type of file
136 uint32_t ncmds
; // number of load commands
137 uint32_t sizeofcmds
; // the size of all the load commands
138 uint32_t flags
; // flags
139 uint32_t reserved
; // 64-bit only
141 /// HeaderData - The actual data for the header which we are building
142 /// up for emission to the file.
143 DataBuffer HeaderData
;
145 // Constants for the filetype field
146 // see <mach-o/loader.h> for additional info on the various types
147 enum { MH_OBJECT
= 1, // relocatable object file
148 MH_EXECUTE
= 2, // demand paged executable file
149 MH_FVMLIB
= 3, // fixed VM shared library file
150 MH_CORE
= 4, // core file
151 MH_PRELOAD
= 5, // preloaded executable file
152 MH_DYLIB
= 6, // dynamically bound shared library
153 MH_DYLINKER
= 7, // dynamic link editor
154 MH_BUNDLE
= 8, // dynamically bound bundle file
155 MH_DYLIB_STUB
= 9, // shared library stub for static linking only
156 MH_DSYM
= 10 // companion file wiht only debug sections
159 // Constants for the flags field
160 enum { MH_NOUNDEFS
= 1 << 0,
161 // the object file has no undefined references
162 MH_INCRLINK
= 1 << 1,
163 // the object file is the output of an incremental link against
164 // a base file and cannot be link edited again
165 MH_DYLDLINK
= 1 << 2,
166 // the object file is input for the dynamic linker and cannot be
167 // statically link edited again.
168 MH_BINDATLOAD
= 1 << 3,
169 // the object file's undefined references are bound by the
170 // dynamic linker when loaded.
171 MH_PREBOUND
= 1 << 4,
172 // the file has its dynamic undefined references prebound
173 MH_SPLIT_SEGS
= 1 << 5,
174 // the file has its read-only and read-write segments split
175 // see <mach/shared_memory_server.h>
176 MH_LAZY_INIT
= 1 << 6,
177 // the shared library init routine is to be run lazily via
178 // catching memory faults to its writable segments (obsolete)
179 MH_TWOLEVEL
= 1 << 7,
180 // the image is using two-level namespace bindings
181 MH_FORCE_FLAT
= 1 << 8,
182 // the executable is forcing all images to use flat namespace
184 MH_NOMULTIDEFS
= 1 << 8,
185 // this umbrella guarantees no multiple definitions of symbols
186 // in its sub-images so the two-level namespace hints can
188 MH_NOFIXPREBINDING
= 1 << 10,
189 // do not have dyld notify the prebidning agent about this
191 MH_PREBINDABLE
= 1 << 11,
192 // the binary is not prebound but can have its prebinding
193 // redone. only used when MH_PREBOUND is not set.
194 MH_ALLMODSBOUND
= 1 << 12,
195 // indicates that this binary binds to all two-level namespace
196 // modules of its dependent libraries. Only used when
197 // MH_PREBINDABLE and MH_TWOLEVEL are both set.
198 MH_SUBSECTIONS_VIA_SYMBOLS
= 1 << 13,
199 // safe to divide up the sections into sub-sections via symbols
200 // for dead code stripping.
201 MH_CANONICAL
= 1 << 14,
202 // the binary has been canonicalized via the unprebind operation
203 MH_WEAK_DEFINES
= 1 << 15,
204 // the final linked image contains external weak symbols
205 MH_BINDS_TO_WEAK
= 1 << 16,
206 // the final linked image uses weak symbols
207 MH_ALLOW_STACK_EXECUTION
= 1 << 17
208 // When this bit is set, all stacks in the task will be given
209 // stack execution privilege. Only used in MH_EXECUTE filetype
212 MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
215 /// cmdSize - This routine returns the size of the MachOSection as written
216 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
217 unsigned cmdSize(bool is64Bit
) const {
219 return 8 * sizeof(uint32_t);
221 return 7 * sizeof(uint32_t);
224 /// setMagic - This routine sets the appropriate value for the 'magic'
225 /// field based on pointer size and endianness.
226 void setMagic(bool isLittleEndian
, bool is64Bit
) {
228 if (is64Bit
) magic
= 0xcffaedfe;
229 else magic
= 0xcefaedfe;
231 if (is64Bit
) magic
= 0xfeedfacf;
232 else magic
= 0xfeedface;
236 /// Header - An instance of MachOHeader that we will update while we build
237 /// the file, and then emit during finalization.
240 /// MachOSegment - This struct contains the necessary information to
241 /// emit the load commands for each section in the file.
242 struct MachOSegment
{
243 uint32_t cmd
; // LC_SEGMENT or LC_SEGMENT_64
244 uint32_t cmdsize
; // Total size of this struct and section commands
245 std::string segname
; // segment name
246 uint64_t vmaddr
; // address of this segment
247 uint64_t vmsize
; // size of this segment, may be larger than filesize
248 uint64_t fileoff
; // offset in file
249 uint64_t filesize
; // amount to read from file
250 uint32_t maxprot
; // maximum VM protection
251 uint32_t initprot
; // initial VM protection
252 uint32_t nsects
; // number of sections in this segment
253 uint32_t flags
; // flags
255 // The following constants are getting pulled in by one of the
256 // system headers, which creates a neat clash with the enum.
257 #if !defined(VM_PROT_NONE)
258 #define VM_PROT_NONE 0x00
260 #if !defined(VM_PROT_READ)
261 #define VM_PROT_READ 0x01
263 #if !defined(VM_PROT_WRITE)
264 #define VM_PROT_WRITE 0x02
266 #if !defined(VM_PROT_EXECUTE)
267 #define VM_PROT_EXECUTE 0x04
269 #if !defined(VM_PROT_ALL)
270 #define VM_PROT_ALL 0x07
273 // Constants for the vm protection fields
274 // see <mach-o/vm_prot.h>
275 enum { SEG_VM_PROT_NONE
= VM_PROT_NONE
,
276 SEG_VM_PROT_READ
= VM_PROT_READ
, // read permission
277 SEG_VM_PROT_WRITE
= VM_PROT_WRITE
, // write permission
278 SEG_VM_PROT_EXECUTE
= VM_PROT_EXECUTE
,
279 SEG_VM_PROT_ALL
= VM_PROT_ALL
282 // Constants for the cmd field
283 // see <mach-o/loader.h>
284 enum { LC_SEGMENT
= 0x01, // segment of this file to be mapped
285 LC_SEGMENT_64
= 0x19 // 64-bit segment of this file to be mapped
288 /// cmdSize - This routine returns the size of the MachOSection as written
289 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
290 unsigned cmdSize(bool is64Bit
) const {
292 return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
294 return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
297 MachOSegment(const std::string
&seg
, bool is64Bit
)
298 : cmd(is64Bit
? LC_SEGMENT_64
: LC_SEGMENT
), cmdsize(0), segname(seg
),
299 vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL
),
300 initprot(VM_PROT_ALL
), nsects(0), flags(0) { }
303 /// MachOSection - This struct contains information about each section in a
304 /// particular segment that is emitted to the file. This is eventually
305 /// turned into the SectionCommand in the load command for a particlar
307 struct MachOSection
{
308 std::string sectname
; // name of this section,
309 std::string segname
; // segment this section goes in
310 uint64_t addr
; // memory address of this section
311 uint64_t size
; // size in bytes of this section
312 uint32_t offset
; // file offset of this section
313 uint32_t align
; // section alignment (power of 2)
314 uint32_t reloff
; // file offset of relocation entries
315 uint32_t nreloc
; // number of relocation entries
316 uint32_t flags
; // flags (section type and attributes)
317 uint32_t reserved1
; // reserved (for offset or index)
318 uint32_t reserved2
; // reserved (for count or sizeof)
319 uint32_t reserved3
; // reserved (64 bit only)
321 /// A unique number for this section, which will be used to match symbols
322 /// to the correct section.
325 /// SectionData - The actual data for this section which we are building
326 /// up for emission to the file.
327 DataBuffer SectionData
;
329 /// RelocBuffer - A buffer to hold the mach-o relocations before we write
330 /// them out at the appropriate location in the file.
331 DataBuffer RelocBuffer
;
333 /// Relocations - The relocations that we have encountered so far in this
334 /// section that we will need to convert to MachORelocation entries when
335 /// the file is written.
336 std::vector
<MachineRelocation
> Relocations
;
338 // Constants for the section types (low 8 bits of flags field)
339 // see <mach-o/loader.h>
340 enum { S_REGULAR
= 0,
343 // zero fill on demand section
344 S_CSTRING_LITERALS
= 2,
345 // section with only literal C strings
346 S_4BYTE_LITERALS
= 3,
347 // section with only 4 byte literals
348 S_8BYTE_LITERALS
= 4,
349 // section with only 8 byte literals
350 S_LITERAL_POINTERS
= 5,
351 // section with only pointers to literals
352 S_NON_LAZY_SYMBOL_POINTERS
= 6,
353 // section with only non-lazy symbol pointers
354 S_LAZY_SYMBOL_POINTERS
= 7,
355 // section with only lazy symbol pointers
357 // section with only symbol stubs
358 // byte size of stub in the reserved2 field
359 S_MOD_INIT_FUNC_POINTERS
= 9,
360 // section with only function pointers for initialization
361 S_MOD_TERM_FUNC_POINTERS
= 10,
362 // section with only function pointers for termination
364 // section contains symbols that are coalesced
366 // zero fill on demand section (that can be larger than 4GB)
368 // section with only pairs of function pointers for interposing
369 S_16BYTE_LITERALS
= 14
370 // section with only 16 byte literals
373 // Constants for the section flags (high 24 bits of flags field)
374 // see <mach-o/loader.h>
375 enum { S_ATTR_PURE_INSTRUCTIONS
= 1 << 31,
376 // section contains only true machine instructions
377 S_ATTR_NO_TOC
= 1 << 30,
378 // section contains coalesced symbols that are not to be in a
379 // ranlib table of contents
380 S_ATTR_STRIP_STATIC_SYMS
= 1 << 29,
381 // ok to strip static symbols in this section in files with the
383 S_ATTR_NO_DEAD_STRIP
= 1 << 28,
385 S_ATTR_LIVE_SUPPORT
= 1 << 27,
386 // blocks are live if they reference live blocks
387 S_ATTR_SELF_MODIFYING_CODE
= 1 << 26,
388 // used with i386 code stubs written on by dyld
389 S_ATTR_DEBUG
= 1 << 25,
391 S_ATTR_SOME_INSTRUCTIONS
= 1 << 10,
392 // section contains some machine instructions
393 S_ATTR_EXT_RELOC
= 1 << 9,
394 // section has external relocation entries
395 S_ATTR_LOC_RELOC
= 1 << 8
396 // section has local relocation entries
399 /// cmdSize - This routine returns the size of the MachOSection as written
400 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
401 unsigned cmdSize(bool is64Bit
) const {
403 return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
405 return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
408 MachOSection(const std::string
&seg
, const std::string
§
)
409 : sectname(sect
), segname(seg
), addr(0), size(0), offset(0), align(2),
410 reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
416 /// SectionList - This is the list of sections that we have emitted to the
417 /// file. Once the file has been completely built, the segment load command
418 /// SectionCommands are constructed from this info.
419 std::vector
<MachOSection
*> SectionList
;
421 /// SectionLookup - This is a mapping from section name to SectionList entry
422 std::map
<std::string
, MachOSection
*> SectionLookup
;
424 /// GVSection - This is a mapping from a GlobalValue to a MachOSection,
425 /// to aid in emitting relocations.
426 std::map
<GlobalValue
*, MachOSection
*> GVSection
;
428 /// GVOffset - This is a mapping from a GlobalValue to an offset from the
429 /// start of the section in which the GV resides, to aid in emitting
431 std::map
<GlobalValue
*, intptr_t> GVOffset
;
433 /// getSection - Return the section with the specified name, creating a new
434 /// section if one does not already exist.
435 MachOSection
*getSection(const std::string
&seg
, const std::string
§
,
436 unsigned Flags
= 0) {
437 MachOSection
*MOS
= SectionLookup
[seg
+sect
];
440 MOS
= new MachOSection(seg
, sect
);
441 SectionList
.push_back(MOS
);
442 MOS
->Index
= SectionList
.size();
443 MOS
->flags
= MachOSection::S_REGULAR
| Flags
;
444 SectionLookup
[seg
+sect
] = MOS
;
447 MachOSection
*getTextSection(bool isCode
= true) {
449 return getSection("__TEXT", "__text",
450 MachOSection::S_ATTR_PURE_INSTRUCTIONS
|
451 MachOSection::S_ATTR_SOME_INSTRUCTIONS
);
453 return getSection("__TEXT", "__text");
455 MachOSection
*getBSSSection() {
456 return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL
);
458 MachOSection
*getDataSection() {
459 return getSection("__DATA", "__data");
461 MachOSection
*getConstSection(Constant
*C
) {
462 const ConstantArray
*CVA
= dyn_cast
<ConstantArray
>(C
);
463 if (CVA
&& CVA
->isCString())
464 return getSection("__TEXT", "__cstring",
465 MachOSection::S_CSTRING_LITERALS
);
467 const Type
*Ty
= C
->getType();
468 if (Ty
->isPrimitiveType() || Ty
->isInteger()) {
469 unsigned Size
= TM
.getTargetData()->getTypeSize(Ty
);
471 default: break; // Fall through to __TEXT,__const
473 return getSection("__TEXT", "__literal4",
474 MachOSection::S_4BYTE_LITERALS
);
476 return getSection("__TEXT", "__literal8",
477 MachOSection::S_8BYTE_LITERALS
);
479 return getSection("__TEXT", "__literal16",
480 MachOSection::S_16BYTE_LITERALS
);
483 return getSection("__TEXT", "__const");
485 MachOSection
*getJumpTableSection() {
486 if (TM
.getRelocationModel() == Reloc::PIC_
)
487 return getTextSection(false);
489 return getSection("__TEXT", "__const");
492 /// MachOSymTab - This struct contains information about the offsets and
493 /// size of symbol table information.
496 uint32_t cmd
; // LC_SYMTAB
497 uint32_t cmdsize
; // sizeof( MachOSymTab )
498 uint32_t symoff
; // symbol table offset
499 uint32_t nsyms
; // number of symbol table entries
500 uint32_t stroff
; // string table offset
501 uint32_t strsize
; // string table size in bytes
503 // Constants for the cmd field
504 // see <mach-o/loader.h>
505 enum { LC_SYMTAB
= 0x02 // link-edit stab symbol table info
508 MachOSymTab() : cmd(LC_SYMTAB
), cmdsize(6 * sizeof(uint32_t)), symoff(0),
509 nsyms(0), stroff(0), strsize(0) { }
512 /// MachOSymTab - This struct contains information about the offsets and
513 /// size of symbol table information.
515 struct MachODySymTab
{
516 uint32_t cmd
; // LC_DYSYMTAB
517 uint32_t cmdsize
; // sizeof( MachODySymTab )
518 uint32_t ilocalsym
; // index to local symbols
519 uint32_t nlocalsym
; // number of local symbols
520 uint32_t iextdefsym
; // index to externally defined symbols
521 uint32_t nextdefsym
; // number of externally defined symbols
522 uint32_t iundefsym
; // index to undefined symbols
523 uint32_t nundefsym
; // number of undefined symbols
524 uint32_t tocoff
; // file offset to table of contents
525 uint32_t ntoc
; // number of entries in table of contents
526 uint32_t modtaboff
; // file offset to module table
527 uint32_t nmodtab
; // number of module table entries
528 uint32_t extrefsymoff
; // offset to referenced symbol table
529 uint32_t nextrefsyms
; // number of referenced symbol table entries
530 uint32_t indirectsymoff
; // file offset to the indirect symbol table
531 uint32_t nindirectsyms
; // number of indirect symbol table entries
532 uint32_t extreloff
; // offset to external relocation entries
533 uint32_t nextrel
; // number of external relocation entries
534 uint32_t locreloff
; // offset to local relocation entries
535 uint32_t nlocrel
; // number of local relocation entries
537 // Constants for the cmd field
538 // see <mach-o/loader.h>
539 enum { LC_DYSYMTAB
= 0x0B // dynamic link-edit symbol table info
542 MachODySymTab() : cmd(LC_DYSYMTAB
), cmdsize(20 * sizeof(uint32_t)),
543 ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
544 iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
545 nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
546 nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
549 /// SymTab - The "stab" style symbol table information
551 /// DySymTab - symbol table info for the dynamic link editor
552 MachODySymTab DySymTab
;
555 // FIXME: this does not appear to be sorting 'f' after 'F'
556 bool operator()(const MachOSym
&LHS
, const MachOSym
&RHS
) {
557 return LHS
.GVName
< RHS
.GVName
;
561 /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
562 /// a local symbol rather than an external symbol.
563 static bool PartitionByLocal(const MachOSym
&Sym
);
565 /// PartitionByDefined - Simple boolean predicate that returns true if Sym
566 /// is defined in this module.
567 static bool PartitionByDefined(const MachOSym
&Sym
);
571 /// SymbolTable - This is the list of symbols we have emitted to the file.
572 /// This actually gets rearranged before emission to the file (to put the
573 /// local symbols first in the list).
574 std::vector
<MachOSym
> SymbolTable
;
576 /// SymT - A buffer to hold the symbol table before we write it out at the
577 /// appropriate location in the file.
580 /// StrT - A buffer to hold the string table before we write it out at the
581 /// appropriate location in the file.
584 /// PendingSyms - This is a list of externally defined symbols that we have
585 /// been asked to emit, but have not seen a reference to. When a reference
586 /// is seen, the symbol will move from this list to the SymbolTable.
587 std::vector
<GlobalValue
*> PendingGlobals
;
589 /// DynamicSymbolTable - This is just a vector of indices into
590 /// SymbolTable to aid in emitting the DYSYMTAB load command.
591 std::vector
<unsigned> DynamicSymbolTable
;
593 static void InitMem(const Constant
*C
, void *Addr
, intptr_t Offset
,
594 const TargetData
*TD
,
595 std::vector
<MachineRelocation
> &MRs
);
598 void AddSymbolToSection(MachOSection
*MOS
, GlobalVariable
*GV
);
599 void EmitGlobal(GlobalVariable
*GV
);
600 void EmitHeaderAndLoadCommands();
602 void BufferSymbolAndStringTable();
603 void CalculateRelocations(MachOSection
&MOS
);
605 MachineRelocation
GetJTRelocation(unsigned Offset
,
606 MachineBasicBlock
*MBB
) const {
607 return TM
.getMachOWriterInfo()->GetJTRelocation(Offset
, MBB
);
610 /// GetTargetRelocation - Returns the number of relocations.
611 unsigned GetTargetRelocation(MachineRelocation
&MR
,
615 OutputBuffer
&RelocOut
,
616 OutputBuffer
&SecOut
,
619 return TM
.getMachOWriterInfo()->GetTargetRelocation(MR
, FromIdx
, ToAddr
,