1 //=== MachO.h - Mach-O structures and constants -----------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines MachO .
12 //===----------------------------------------------------------------------===//
17 #include "llvm/CodeGen/BinaryObject.h"
26 /// MachOSym - This struct contains information about each symbol that is
27 /// added to logical symbol table for the module. This is eventually
28 /// turned into a real symbol table in the file.
30 const GlobalValue
*GV
; // The global value this corresponds to.
31 std::string GVName
; // The mangled name of the global value.
32 uint32_t n_strx
; // index into the string table
33 uint8_t n_type
; // type flag
34 uint8_t n_sect
; // section number or NO_SECT
35 int16_t n_desc
; // see <mach-o/stab.h>
36 uint64_t n_value
; // value for this symbol (or stab offset)
38 // Constants for the n_sect field
39 // see <mach-o/nlist.h>
40 enum { NO_SECT
= 0 }; // symbol is not in any section
42 // Constants for the n_type field
43 // see <mach-o/nlist.h>
44 enum { N_UNDF
= 0x0, // undefined, n_sect == NO_SECT
45 N_ABS
= 0x2, // absolute, n_sect == NO_SECT
46 N_SECT
= 0xe, // defined in section number n_sect
47 N_PBUD
= 0xc, // prebound undefined (defined in a dylib)
48 N_INDR
= 0xa // indirect
50 // The following bits are OR'd into the types above. For example, a type
51 // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
52 enum { N_EXT
= 0x01, // external symbol bit
53 N_PEXT
= 0x10 // private external symbol bit
56 // Constants for the n_desc field
57 // see <mach-o/loader.h>
58 enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY
= 0,
59 REFERENCE_FLAG_UNDEFINED_LAZY
= 1,
60 REFERENCE_FLAG_DEFINED
= 2,
61 REFERENCE_FLAG_PRIVATE_DEFINED
= 3,
62 REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY
= 4,
63 REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY
= 5
65 enum { N_NO_DEAD_STRIP
= 0x0020, // symbol is not to be dead stripped
66 N_WEAK_REF
= 0x0040, // symbol is weak referenced
67 N_WEAK_DEF
= 0x0080 // coalesced symbol is a weak definition
70 MachOSym(const GlobalValue
*gv
, std::string name
, uint8_t sect
,
71 const TargetAsmInfo
*TAI
);
74 // FIXME: this does not appear to be sorting 'f' after 'F'
75 bool operator()(const MachOSym
&LHS
, const MachOSym
&RHS
) {
76 return LHS
.GVName
< RHS
.GVName
;
81 /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
82 /// a local symbol rather than an external symbol.
84 static inline bool PartitionByLocal(const MachOSym
&Sym
) {
85 return (Sym
.n_type
& (MachOSym::N_EXT
| MachOSym::N_PEXT
)) == 0;
88 /// PartitionByDefined - Simple boolean predicate that returns true if Sym is
89 /// defined in this module.
91 static inline bool PartitionByDefined(const MachOSym
&Sym
) {
92 // FIXME: Do N_ABS or N_INDR count as defined?
93 return (Sym
.n_type
& MachOSym::N_SECT
) == MachOSym::N_SECT
;
96 }; // end struct MachOSym
98 /// MachOHeader - This struct contains the header information about a
99 /// specific architecture type/subtype pair that is emitted to the file.
102 uint32_t magic
; // mach magic number identifier
103 uint32_t filetype
; // type of file
104 uint32_t ncmds
; // number of load commands
105 uint32_t sizeofcmds
; // the size of all the load commands
106 uint32_t flags
; // flags
107 uint32_t reserved
; // 64-bit only
109 /// HeaderData - The actual data for the header which we are building
110 /// up for emission to the file.
111 std::vector
<unsigned char> HeaderData
;
113 // Constants for the filetype field
114 // see <mach-o/loader.h> for additional info on the various types
115 enum { MH_OBJECT
= 1, // relocatable object file
116 MH_EXECUTE
= 2, // demand paged executable file
117 MH_FVMLIB
= 3, // fixed VM shared library file
118 MH_CORE
= 4, // core file
119 MH_PRELOAD
= 5, // preloaded executable file
120 MH_DYLIB
= 6, // dynamically bound shared library
121 MH_DYLINKER
= 7, // dynamic link editor
122 MH_BUNDLE
= 8, // dynamically bound bundle file
123 MH_DYLIB_STUB
= 9, // shared library stub for static linking only
124 MH_DSYM
= 10 // companion file wiht only debug sections
127 // Constants for the flags field
128 enum { MH_NOUNDEFS
= 1 << 0,
129 // the object file has no undefined references
130 MH_INCRLINK
= 1 << 1,
131 // the object file is the output of an incremental link against
132 // a base file and cannot be link edited again
133 MH_DYLDLINK
= 1 << 2,
134 // the object file is input for the dynamic linker and cannot be
135 // statically link edited again.
136 MH_BINDATLOAD
= 1 << 3,
137 // the object file's undefined references are bound by the
138 // dynamic linker when loaded.
139 MH_PREBOUND
= 1 << 4,
140 // the file has its dynamic undefined references prebound
141 MH_SPLIT_SEGS
= 1 << 5,
142 // the file has its read-only and read-write segments split
143 // see <mach/shared_memory_server.h>
144 MH_LAZY_INIT
= 1 << 6,
145 // the shared library init routine is to be run lazily via
146 // catching memory faults to its writable segments (obsolete)
147 MH_TWOLEVEL
= 1 << 7,
148 // the image is using two-level namespace bindings
149 MH_FORCE_FLAT
= 1 << 8,
150 // the executable is forcing all images to use flat namespace
152 MH_NOMULTIDEFS
= 1 << 8,
153 // this umbrella guarantees no multiple definitions of symbols
154 // in its sub-images so the two-level namespace hints can
156 MH_NOFIXPREBINDING
= 1 << 10,
157 // do not have dyld notify the prebidning agent about this
159 MH_PREBINDABLE
= 1 << 11,
160 // the binary is not prebound but can have its prebinding
161 // redone. only used when MH_PREBOUND is not set.
162 MH_ALLMODSBOUND
= 1 << 12,
163 // indicates that this binary binds to all two-level namespace
164 // modules of its dependent libraries. Only used when
165 // MH_PREBINDABLE and MH_TWOLEVEL are both set.
166 MH_SUBSECTIONS_VIA_SYMBOLS
= 1 << 13,
167 // safe to divide up the sections into sub-sections via symbols
168 // for dead code stripping.
169 MH_CANONICAL
= 1 << 14,
170 // the binary has been canonicalized via the unprebind operation
171 MH_WEAK_DEFINES
= 1 << 15,
172 // the final linked image contains external weak symbols
173 MH_BINDS_TO_WEAK
= 1 << 16,
174 // the final linked image uses weak symbols
175 MH_ALLOW_STACK_EXECUTION
= 1 << 17
176 // When this bit is set, all stacks in the task will be given
177 // stack execution privilege. Only used in MH_EXECUTE filetype
180 MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0),
183 /// cmdSize - This routine returns the size of the MachOSection as written
184 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
185 unsigned cmdSize(bool is64Bit
) const {
187 return 8 * sizeof(uint32_t);
189 return 7 * sizeof(uint32_t);
192 /// setMagic - This routine sets the appropriate value for the 'magic'
193 /// field based on pointer size and endianness.
194 void setMagic(bool isLittleEndian
, bool is64Bit
) {
196 if (is64Bit
) magic
= 0xcffaedfe;
197 else magic
= 0xcefaedfe;
199 if (is64Bit
) magic
= 0xfeedfacf;
200 else magic
= 0xfeedface;
203 }; // end struct MachOHeader
205 /// MachOSegment - This struct contains the necessary information to
206 /// emit the load commands for each section in the file.
207 struct MachOSegment
{
208 uint32_t cmd
; // LC_SEGMENT or LC_SEGMENT_64
209 uint32_t cmdsize
; // Total size of this struct and section commands
210 std::string segname
; // segment name
211 uint64_t vmaddr
; // address of this segment
212 uint64_t vmsize
; // size of this segment, may be larger than filesize
213 uint64_t fileoff
; // offset in file
214 uint64_t filesize
; // amount to read from file
215 uint32_t maxprot
; // maximum VM protection
216 uint32_t initprot
; // initial VM protection
217 uint32_t nsects
; // number of sections in this segment
218 uint32_t flags
; // flags
220 // The following constants are getting pulled in by one of the
221 // system headers, which creates a neat clash with the enum.
222 #if !defined(VM_PROT_NONE)
223 #define VM_PROT_NONE 0x00
225 #if !defined(VM_PROT_READ)
226 #define VM_PROT_READ 0x01
228 #if !defined(VM_PROT_WRITE)
229 #define VM_PROT_WRITE 0x02
231 #if !defined(VM_PROT_EXECUTE)
232 #define VM_PROT_EXECUTE 0x04
234 #if !defined(VM_PROT_ALL)
235 #define VM_PROT_ALL 0x07
238 // Constants for the vm protection fields
239 // see <mach-o/vm_prot.h>
240 enum { SEG_VM_PROT_NONE
= VM_PROT_NONE
,
241 SEG_VM_PROT_READ
= VM_PROT_READ
, // read permission
242 SEG_VM_PROT_WRITE
= VM_PROT_WRITE
, // write permission
243 SEG_VM_PROT_EXECUTE
= VM_PROT_EXECUTE
,
244 SEG_VM_PROT_ALL
= VM_PROT_ALL
247 // Constants for the cmd field
248 // see <mach-o/loader.h>
249 enum { LC_SEGMENT
= 0x01, // segment of this file to be mapped
250 LC_SEGMENT_64
= 0x19 // 64-bit segment of this file to be mapped
253 /// cmdSize - This routine returns the size of the MachOSection as written
254 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
255 unsigned cmdSize(bool is64Bit
) const {
257 return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
259 return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
262 MachOSegment(const std::string
&seg
, bool is64Bit
)
263 : cmd(is64Bit
? LC_SEGMENT_64
: LC_SEGMENT
), cmdsize(0), segname(seg
),
264 vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL
),
265 initprot(VM_PROT_ALL
), nsects(0), flags(0) { }
268 /// MachOSection - This struct contains information about each section in a
269 /// particular segment that is emitted to the file. This is eventually
270 /// turned into the SectionCommand in the load command for a particlar
273 struct MachOSection
: public BinaryObject
{
274 std::string sectname
; // name of this section,
275 std::string segname
; // segment this section goes in
276 uint64_t addr
; // memory address of this section
277 uint32_t offset
; // file offset of this section
278 uint32_t align
; // section alignment (power of 2)
279 uint32_t reloff
; // file offset of relocation entries
280 uint32_t nreloc
; // number of relocation entries
281 uint32_t flags
; // flags (section type and attributes)
282 uint32_t reserved1
; // reserved (for offset or index)
283 uint32_t reserved2
; // reserved (for count or sizeof)
284 uint32_t reserved3
; // reserved (64 bit only)
286 /// A unique number for this section, which will be used to match symbols
287 /// to the correct section.
290 /// RelocBuffer - A buffer to hold the mach-o relocations before we write
291 /// them out at the appropriate location in the file.
292 std::vector
<unsigned char> RelocBuffer
;
294 // Constants for the section types (low 8 bits of flags field)
295 // see <mach-o/loader.h>
296 enum { S_REGULAR
= 0,
299 // zero fill on demand section
300 S_CSTRING_LITERALS
= 2,
301 // section with only literal C strings
302 S_4BYTE_LITERALS
= 3,
303 // section with only 4 byte literals
304 S_8BYTE_LITERALS
= 4,
305 // section with only 8 byte literals
306 S_LITERAL_POINTERS
= 5,
307 // section with only pointers to literals
308 S_NON_LAZY_SYMBOL_POINTERS
= 6,
309 // section with only non-lazy symbol pointers
310 S_LAZY_SYMBOL_POINTERS
= 7,
311 // section with only lazy symbol pointers
313 // section with only symbol stubs
314 // byte size of stub in the reserved2 field
315 S_MOD_INIT_FUNC_POINTERS
= 9,
316 // section with only function pointers for initialization
317 S_MOD_TERM_FUNC_POINTERS
= 10,
318 // section with only function pointers for termination
320 // section contains symbols that are coalesced
322 // zero fill on demand section (that can be larger than 4GB)
324 // section with only pairs of function pointers for interposing
325 S_16BYTE_LITERALS
= 14
326 // section with only 16 byte literals
329 // Constants for the section flags (high 24 bits of flags field)
330 // see <mach-o/loader.h>
331 enum { S_ATTR_PURE_INSTRUCTIONS
= 1 << 31,
332 // section contains only true machine instructions
333 S_ATTR_NO_TOC
= 1 << 30,
334 // section contains coalesced symbols that are not to be in a
335 // ranlib table of contents
336 S_ATTR_STRIP_STATIC_SYMS
= 1 << 29,
337 // ok to strip static symbols in this section in files with the
339 S_ATTR_NO_DEAD_STRIP
= 1 << 28,
341 S_ATTR_LIVE_SUPPORT
= 1 << 27,
342 // blocks are live if they reference live blocks
343 S_ATTR_SELF_MODIFYING_CODE
= 1 << 26,
344 // used with i386 code stubs written on by dyld
345 S_ATTR_DEBUG
= 1 << 25,
347 S_ATTR_SOME_INSTRUCTIONS
= 1 << 10,
348 // section contains some machine instructions
349 S_ATTR_EXT_RELOC
= 1 << 9,
350 // section has external relocation entries
351 S_ATTR_LOC_RELOC
= 1 << 8
352 // section has local relocation entries
355 /// cmdSize - This routine returns the size of the MachOSection as written
356 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
357 unsigned cmdSize(bool is64Bit
) const {
359 return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
361 return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
364 MachOSection(const std::string
&seg
, const std::string
§
)
365 : BinaryObject(), sectname(sect
), segname(seg
), addr(0), offset(0),
366 align(2), reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
369 }; // end struct MachOSection
371 /// MachOSymTab - This struct contains information about the offsets and
372 /// size of symbol table information.
374 struct MachODySymTab
{
375 uint32_t cmd
; // LC_DYSYMTAB
376 uint32_t cmdsize
; // sizeof(MachODySymTab)
377 uint32_t ilocalsym
; // index to local symbols
378 uint32_t nlocalsym
; // number of local symbols
379 uint32_t iextdefsym
; // index to externally defined symbols
380 uint32_t nextdefsym
; // number of externally defined symbols
381 uint32_t iundefsym
; // index to undefined symbols
382 uint32_t nundefsym
; // number of undefined symbols
383 uint32_t tocoff
; // file offset to table of contents
384 uint32_t ntoc
; // number of entries in table of contents
385 uint32_t modtaboff
; // file offset to module table
386 uint32_t nmodtab
; // number of module table entries
387 uint32_t extrefsymoff
; // offset to referenced symbol table
388 uint32_t nextrefsyms
; // number of referenced symbol table entries
389 uint32_t indirectsymoff
; // file offset to the indirect symbol table
390 uint32_t nindirectsyms
; // number of indirect symbol table entries
391 uint32_t extreloff
; // offset to external relocation entries
392 uint32_t nextrel
; // number of external relocation entries
393 uint32_t locreloff
; // offset to local relocation entries
394 uint32_t nlocrel
; // number of local relocation entries
396 // Constants for the cmd field
397 // see <mach-o/loader.h>
398 enum { LC_DYSYMTAB
= 0x0B // dynamic link-edit symbol table info
401 MachODySymTab() : cmd(LC_DYSYMTAB
), cmdsize(20 * sizeof(uint32_t)),
402 ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
403 iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
404 nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
405 nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) {}
407 }; // end struct MachODySymTab
409 } // end namespace llvm