1 //===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the target-independent Mach-O writer. This file writes
11 // out the Mach-O file in the following order:
13 // #1 FatHeader (universal-only)
14 // #2 FatArch (universal-only, 1 per universal arch)
23 //===----------------------------------------------------------------------===//
25 #include "MachOWriter.h"
26 #include "llvm/Constants.h"
27 #include "llvm/DerivedTypes.h"
28 #include "llvm/Module.h"
29 #include "llvm/PassManager.h"
30 #include "llvm/CodeGen/FileWriters.h"
31 #include "llvm/CodeGen/MachineCodeEmitter.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineJumpTableInfo.h"
34 #include "llvm/Target/TargetAsmInfo.h"
35 #include "llvm/Target/TargetJITInfo.h"
36 #include "llvm/Support/Mangler.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Support/OutputBuffer.h"
39 #include "llvm/Support/Streams.h"
40 #include "llvm/Support/raw_ostream.h"
45 /// AddMachOWriter - Concrete function to add the Mach-O writer to the function
47 MachineCodeEmitter
*llvm::AddMachOWriter(PassManagerBase
&PM
,
50 MachOWriter
*MOW
= new MachOWriter(O
, TM
);
52 return &MOW
->getMachineCodeEmitter();
55 //===----------------------------------------------------------------------===//
56 // MachOCodeEmitter Implementation
57 //===----------------------------------------------------------------------===//
60 /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code
61 /// for functions to the Mach-O file.
62 class MachOCodeEmitter
: public MachineCodeEmitter
{
65 /// Target machine description.
68 /// is64Bit/isLittleEndian - This information is inferred from the target
69 /// machine directly, indicating what header values and flags to set.
70 bool is64Bit
, isLittleEndian
;
72 /// Relocations - These are the relocations that the function needs, as
74 std::vector
<MachineRelocation
> Relocations
;
76 /// CPLocations - This is a map of constant pool indices to offsets from the
77 /// start of the section for that constant pool index.
78 std::vector
<uintptr_t> CPLocations
;
80 /// CPSections - This is a map of constant pool indices to the MachOSection
81 /// containing the constant pool entry for that index.
82 std::vector
<unsigned> CPSections
;
84 /// JTLocations - This is a map of jump table indices to offsets from the
85 /// start of the section for that jump table index.
86 std::vector
<uintptr_t> JTLocations
;
88 /// MBBLocations - This vector is a mapping from MBB ID's to their address.
89 /// It is filled in by the StartMachineBasicBlock callback and queried by
90 /// the getMachineBasicBlockAddress callback.
91 std::vector
<uintptr_t> MBBLocations
;
94 MachOCodeEmitter(MachOWriter
&mow
) : MOW(mow
), TM(MOW
.TM
) {
95 is64Bit
= TM
.getTargetData()->getPointerSizeInBits() == 64;
96 isLittleEndian
= TM
.getTargetData()->isLittleEndian();
99 virtual void startFunction(MachineFunction
&MF
);
100 virtual bool finishFunction(MachineFunction
&MF
);
102 virtual void addRelocation(const MachineRelocation
&MR
) {
103 Relocations
.push_back(MR
);
106 void emitConstantPool(MachineConstantPool
*MCP
);
107 void emitJumpTables(MachineJumpTableInfo
*MJTI
);
109 virtual uintptr_t getConstantPoolEntryAddress(unsigned Index
) const {
110 assert(CPLocations
.size() > Index
&& "CP not emitted!");
111 return CPLocations
[Index
];
113 virtual uintptr_t getJumpTableEntryAddress(unsigned Index
) const {
114 assert(JTLocations
.size() > Index
&& "JT not emitted!");
115 return JTLocations
[Index
];
118 virtual void StartMachineBasicBlock(MachineBasicBlock
*MBB
) {
119 if (MBBLocations
.size() <= (unsigned)MBB
->getNumber())
120 MBBLocations
.resize((MBB
->getNumber()+1)*2);
121 MBBLocations
[MBB
->getNumber()] = getCurrentPCOffset();
124 virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock
*MBB
) const {
125 assert(MBBLocations
.size() > (unsigned)MBB
->getNumber() &&
126 MBBLocations
[MBB
->getNumber()] && "MBB not emitted!");
127 return MBBLocations
[MBB
->getNumber()];
130 virtual uintptr_t getLabelAddress(uint64_t Label
) const {
131 assert(0 && "get Label not implemented");
136 virtual void emitLabel(uint64_t LabelID
) {
137 assert(0 && "emit Label not implemented");
142 virtual void setModuleInfo(llvm::MachineModuleInfo
* MMI
) { }
144 /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
145 virtual void startGVStub(const GlobalValue
* F
, unsigned StubSize
,
146 unsigned Alignment
= 1) {
147 assert(0 && "JIT specific function called!");
150 virtual void startGVStub(const GlobalValue
* F
, void *Buffer
,
152 assert(0 && "JIT specific function called!");
155 virtual void *finishGVStub(const GlobalValue
* F
) {
156 assert(0 && "JIT specific function called!");
163 /// startFunction - This callback is invoked when a new machine function is
164 /// about to be emitted.
165 void MachOCodeEmitter::startFunction(MachineFunction
&MF
) {
166 const TargetData
*TD
= TM
.getTargetData();
167 const Function
*F
= MF
.getFunction();
169 // Align the output buffer to the appropriate alignment, power of 2.
170 unsigned FnAlign
= F
->getAlignment();
171 unsigned TDAlign
= TD
->getPrefTypeAlignment(F
->getType());
172 unsigned Align
= Log2_32(std::max(FnAlign
, TDAlign
));
173 assert(!(Align
& (Align
-1)) && "Alignment is not a power of two!");
175 // Get the Mach-O Section that this function belongs in.
176 MachOWriter::MachOSection
*MOS
= MOW
.getTextSection();
178 // FIXME: better memory management
179 MOS
->SectionData
.reserve(4096);
180 BufferBegin
= &MOS
->SectionData
[0];
181 BufferEnd
= BufferBegin
+ MOS
->SectionData
.capacity();
183 // Upgrade the section alignment if required.
184 if (MOS
->align
< Align
) MOS
->align
= Align
;
186 // Round the size up to the correct alignment for starting the new function.
187 if ((MOS
->size
& ((1 << Align
) - 1)) != 0) {
188 MOS
->size
+= (1 << Align
);
189 MOS
->size
&= ~((1 << Align
) - 1);
192 // FIXME: Using MOS->size directly here instead of calculating it from the
193 // output buffer size (impossible because the code emitter deals only in raw
194 // bytes) forces us to manually synchronize size and write padding zero bytes
195 // to the output buffer for all non-text sections. For text sections, we do
196 // not synchonize the output buffer, and we just blow up if anyone tries to
197 // write non-code to it. An assert should probably be added to
198 // AddSymbolToSection to prevent calling it on the text section.
199 CurBufferPtr
= BufferBegin
+ MOS
->size
;
201 // Clear per-function data structures.
205 MBBLocations
.clear();
208 /// finishFunction - This callback is invoked after the function is completely
210 bool MachOCodeEmitter::finishFunction(MachineFunction
&MF
) {
211 // Get the Mach-O Section that this function belongs in.
212 MachOWriter::MachOSection
*MOS
= MOW
.getTextSection();
214 // Get a symbol for the function to add to the symbol table
215 // FIXME: it seems like we should call something like AddSymbolToSection
216 // in startFunction rather than changing the section size and symbol n_value
218 const GlobalValue
*FuncV
= MF
.getFunction();
219 MachOSym
FnSym(FuncV
, MOW
.Mang
->getValueName(FuncV
), MOS
->Index
, TM
);
220 FnSym
.n_value
= MOS
->size
;
221 MOS
->size
= CurBufferPtr
- BufferBegin
;
223 // Emit constant pool to appropriate section(s)
224 emitConstantPool(MF
.getConstantPool());
226 // Emit jump tables to appropriate section
227 emitJumpTables(MF
.getJumpTableInfo());
229 // If we have emitted any relocations to function-specific objects such as
230 // basic blocks, constant pools entries, or jump tables, record their
231 // addresses now so that we can rewrite them with the correct addresses
233 for (unsigned i
= 0, e
= Relocations
.size(); i
!= e
; ++i
) {
234 MachineRelocation
&MR
= Relocations
[i
];
237 if (MR
.isBasicBlock()) {
238 Addr
= getMachineBasicBlockAddress(MR
.getBasicBlock());
239 MR
.setConstantVal(MOS
->Index
);
240 MR
.setResultPointer((void*)Addr
);
241 } else if (MR
.isJumpTableIndex()) {
242 Addr
= getJumpTableEntryAddress(MR
.getJumpTableIndex());
243 MR
.setConstantVal(MOW
.getJumpTableSection()->Index
);
244 MR
.setResultPointer((void*)Addr
);
245 } else if (MR
.isConstantPoolIndex()) {
246 Addr
= getConstantPoolEntryAddress(MR
.getConstantPoolIndex());
247 MR
.setConstantVal(CPSections
[MR
.getConstantPoolIndex()]);
248 MR
.setResultPointer((void*)Addr
);
249 } else if (MR
.isGlobalValue()) {
250 // FIXME: This should be a set or something that uniques
251 MOW
.PendingGlobals
.push_back(MR
.getGlobalValue());
253 assert(0 && "Unhandled relocation type");
255 MOS
->Relocations
.push_back(MR
);
259 // Finally, add it to the symtab.
260 MOW
.SymbolTable
.push_back(FnSym
);
264 /// emitConstantPool - For each constant pool entry, figure out which section
265 /// the constant should live in, allocate space for it, and emit it to the
266 /// Section data buffer.
267 void MachOCodeEmitter::emitConstantPool(MachineConstantPool
*MCP
) {
268 const std::vector
<MachineConstantPoolEntry
> &CP
= MCP
->getConstants();
269 if (CP
.empty()) return;
271 // FIXME: handle PIC codegen
272 assert(TM
.getRelocationModel() != Reloc::PIC_
&&
273 "PIC codegen not yet handled for mach-o jump tables!");
275 // Although there is no strict necessity that I am aware of, we will do what
276 // gcc for OS X does and put each constant pool entry in a section of constant
277 // objects of a certain size. That means that float constants go in the
278 // literal4 section, and double objects go in literal8, etc.
280 // FIXME: revisit this decision if we ever do the "stick everything into one
281 // "giant object for PIC" optimization.
282 for (unsigned i
= 0, e
= CP
.size(); i
!= e
; ++i
) {
283 const Type
*Ty
= CP
[i
].getType();
284 unsigned Size
= TM
.getTargetData()->getTypeAllocSize(Ty
);
286 MachOWriter::MachOSection
*Sec
= MOW
.getConstSection(CP
[i
].Val
.ConstVal
);
287 OutputBuffer
SecDataOut(Sec
->SectionData
, is64Bit
, isLittleEndian
);
289 CPLocations
.push_back(Sec
->SectionData
.size());
290 CPSections
.push_back(Sec
->Index
);
292 // FIXME: remove when we have unified size + output buffer
295 // Allocate space in the section for the global.
296 // FIXME: need alignment?
297 // FIXME: share between here and AddSymbolToSection?
298 for (unsigned j
= 0; j
< Size
; ++j
)
299 SecDataOut
.outbyte(0);
301 MOW
.InitMem(CP
[i
].Val
.ConstVal
, &Sec
->SectionData
[0], CPLocations
[i
],
302 TM
.getTargetData(), Sec
->Relocations
);
306 /// emitJumpTables - Emit all the jump tables for a given jump table info
307 /// record to the appropriate section.
308 void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo
*MJTI
) {
309 const std::vector
<MachineJumpTableEntry
> &JT
= MJTI
->getJumpTables();
310 if (JT
.empty()) return;
312 // FIXME: handle PIC codegen
313 assert(TM
.getRelocationModel() != Reloc::PIC_
&&
314 "PIC codegen not yet handled for mach-o jump tables!");
316 MachOWriter::MachOSection
*Sec
= MOW
.getJumpTableSection();
317 unsigned TextSecIndex
= MOW
.getTextSection()->Index
;
318 OutputBuffer
SecDataOut(Sec
->SectionData
, is64Bit
, isLittleEndian
);
320 for (unsigned i
= 0, e
= JT
.size(); i
!= e
; ++i
) {
321 // For each jump table, record its offset from the start of the section,
322 // reserve space for the relocations to the MBBs, and add the relocations.
323 const std::vector
<MachineBasicBlock
*> &MBBs
= JT
[i
].MBBs
;
324 JTLocations
.push_back(Sec
->SectionData
.size());
325 for (unsigned mi
= 0, me
= MBBs
.size(); mi
!= me
; ++mi
) {
326 MachineRelocation
MR(MOW
.GetJTRelocation(Sec
->SectionData
.size(),
328 MR
.setResultPointer((void *)JTLocations
[i
]);
329 MR
.setConstantVal(TextSecIndex
);
330 Sec
->Relocations
.push_back(MR
);
331 SecDataOut
.outaddr(0);
334 // FIXME: remove when we have unified size + output buffer
335 Sec
->size
= Sec
->SectionData
.size();
338 //===----------------------------------------------------------------------===//
339 // MachOWriter Implementation
340 //===----------------------------------------------------------------------===//
342 char MachOWriter::ID
= 0;
343 MachOWriter::MachOWriter(raw_ostream
&o
, TargetMachine
&tm
)
344 : MachineFunctionPass(&ID
), O(o
), TM(tm
) {
345 is64Bit
= TM
.getTargetData()->getPointerSizeInBits() == 64;
346 isLittleEndian
= TM
.getTargetData()->isLittleEndian();
348 // Create the machine code emitter object for this target.
349 MCE
= new MachOCodeEmitter(*this);
352 MachOWriter::~MachOWriter() {
356 void MachOWriter::AddSymbolToSection(MachOSection
*Sec
, GlobalVariable
*GV
) {
357 const Type
*Ty
= GV
->getType()->getElementType();
358 unsigned Size
= TM
.getTargetData()->getTypeAllocSize(Ty
);
359 unsigned Align
= TM
.getTargetData()->getPreferredAlignment(GV
);
361 // Reserve space in the .bss section for this symbol while maintaining the
362 // desired section alignment, which must be at least as much as required by
364 OutputBuffer
SecDataOut(Sec
->SectionData
, is64Bit
, isLittleEndian
);
367 uint64_t OrigSize
= Sec
->size
;
368 Align
= Log2_32(Align
);
369 Sec
->align
= std::max(unsigned(Sec
->align
), Align
);
370 Sec
->size
= (Sec
->size
+ Align
- 1) & ~(Align
-1);
372 // Add alignment padding to buffer as well.
373 // FIXME: remove when we have unified size + output buffer
374 unsigned AlignedSize
= Sec
->size
- OrigSize
;
375 for (unsigned i
= 0; i
< AlignedSize
; ++i
)
376 SecDataOut
.outbyte(0);
378 // Globals without external linkage apparently do not go in the symbol table.
379 if (!GV
->hasLocalLinkage()) {
380 MachOSym
Sym(GV
, Mang
->getValueName(GV
), Sec
->Index
, TM
);
381 Sym
.n_value
= Sec
->size
;
382 SymbolTable
.push_back(Sym
);
385 // Record the offset of the symbol, and then allocate space for it.
386 // FIXME: remove when we have unified size + output buffer
389 // Now that we know what section the GlovalVariable is going to be emitted
390 // into, update our mappings.
391 // FIXME: We may also need to update this when outputting non-GlobalVariable
392 // GlobalValues such as functions.
394 GVOffset
[GV
] = Sec
->SectionData
.size();
396 // Allocate space in the section for the global.
397 for (unsigned i
= 0; i
< Size
; ++i
)
398 SecDataOut
.outbyte(0);
401 void MachOWriter::EmitGlobal(GlobalVariable
*GV
) {
402 const Type
*Ty
= GV
->getType()->getElementType();
403 unsigned Size
= TM
.getTargetData()->getTypeAllocSize(Ty
);
404 bool NoInit
= !GV
->hasInitializer();
406 // If this global has a zero initializer, it is part of the .bss or common
408 if (NoInit
|| GV
->getInitializer()->isNullValue()) {
409 // If this global is part of the common block, add it now. Variables are
410 // part of the common block if they are zero initialized and allowed to be
411 // merged with other symbols.
412 if (NoInit
|| GV
->hasLinkOnceLinkage() || GV
->hasWeakLinkage() ||
413 GV
->hasCommonLinkage()) {
414 MachOSym
ExtOrCommonSym(GV
, Mang
->getValueName(GV
), MachOSym::NO_SECT
,TM
);
415 // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in
416 // bytes of the symbol.
417 ExtOrCommonSym
.n_value
= Size
;
418 SymbolTable
.push_back(ExtOrCommonSym
);
419 // Remember that we've seen this symbol
423 // Otherwise, this symbol is part of the .bss section.
424 MachOSection
*BSS
= getBSSSection();
425 AddSymbolToSection(BSS
, GV
);
429 // Scalar read-only data goes in a literal section if the scalar is 4, 8, or
430 // 16 bytes, or a cstring. Other read only data goes into a regular const
431 // section. Read-write data goes in the data section.
432 MachOSection
*Sec
= GV
->isConstant() ? getConstSection(GV
->getInitializer()) :
434 AddSymbolToSection(Sec
, GV
);
435 InitMem(GV
->getInitializer(), &Sec
->SectionData
[0], GVOffset
[GV
],
436 TM
.getTargetData(), Sec
->Relocations
);
440 bool MachOWriter::runOnMachineFunction(MachineFunction
&MF
) {
441 // Nothing to do here, this is all done through the MCE object.
445 bool MachOWriter::doInitialization(Module
&M
) {
446 // Set the magic value, now that we know the pointer size and endianness
447 Header
.setMagic(isLittleEndian
, is64Bit
);
450 // FIXME: this only works for object files, we do not support the creation
451 // of dynamic libraries or executables at this time.
452 Header
.filetype
= MachOHeader::MH_OBJECT
;
454 Mang
= new Mangler(M
);
458 /// doFinalization - Now that the module has been completely processed, emit
459 /// the Mach-O file to 'O'.
460 bool MachOWriter::doFinalization(Module
&M
) {
461 // FIXME: we don't handle debug info yet, we should probably do that.
463 // Okay, the.text section has been completed, build the .data, .bss, and
464 // "common" sections next.
465 for (Module::global_iterator I
= M
.global_begin(), E
= M
.global_end();
469 // Emit the header and load commands.
470 EmitHeaderAndLoadCommands();
472 // Emit the various sections and their relocation info.
475 // Write the symbol table and the string table to the end of the file.
476 O
.write((char*)&SymT
[0], SymT
.size());
477 O
.write((char*)&StrT
[0], StrT
.size());
479 // We are done with the abstract symbols.
482 DynamicSymbolTable
.clear();
484 // Release the name mangler object.
485 delete Mang
; Mang
= 0;
489 void MachOWriter::EmitHeaderAndLoadCommands() {
490 // Step #0: Fill in the segment load command size, since we need it to figure
491 // out the rest of the header fields
492 MachOSegment
SEG("", is64Bit
);
493 SEG
.nsects
= SectionList
.size();
494 SEG
.cmdsize
= SEG
.cmdSize(is64Bit
) +
495 SEG
.nsects
* SectionList
[0]->cmdSize(is64Bit
);
497 // Step #1: calculate the number of load commands. We always have at least
498 // one, for the LC_SEGMENT load command, plus two for the normal
499 // and dynamic symbol tables, if there are any symbols.
500 Header
.ncmds
= SymbolTable
.empty() ? 1 : 3;
502 // Step #2: calculate the size of the load commands
503 Header
.sizeofcmds
= SEG
.cmdsize
;
504 if (!SymbolTable
.empty())
505 Header
.sizeofcmds
+= SymTab
.cmdsize
+ DySymTab
.cmdsize
;
507 // Step #3: write the header to the file
508 // Local alias to shortenify coming code.
509 DataBuffer
&FH
= Header
.HeaderData
;
510 OutputBuffer
FHOut(FH
, is64Bit
, isLittleEndian
);
512 FHOut
.outword(Header
.magic
);
513 FHOut
.outword(TM
.getMachOWriterInfo()->getCPUType());
514 FHOut
.outword(TM
.getMachOWriterInfo()->getCPUSubType());
515 FHOut
.outword(Header
.filetype
);
516 FHOut
.outword(Header
.ncmds
);
517 FHOut
.outword(Header
.sizeofcmds
);
518 FHOut
.outword(Header
.flags
);
520 FHOut
.outword(Header
.reserved
);
522 // Step #4: Finish filling in the segment load command and write it out
523 for (std::vector
<MachOSection
*>::iterator I
= SectionList
.begin(),
524 E
= SectionList
.end(); I
!= E
; ++I
)
525 SEG
.filesize
+= (*I
)->size
;
527 SEG
.vmsize
= SEG
.filesize
;
528 SEG
.fileoff
= Header
.cmdSize(is64Bit
) + Header
.sizeofcmds
;
530 FHOut
.outword(SEG
.cmd
);
531 FHOut
.outword(SEG
.cmdsize
);
532 FHOut
.outstring(SEG
.segname
, 16);
533 FHOut
.outaddr(SEG
.vmaddr
);
534 FHOut
.outaddr(SEG
.vmsize
);
535 FHOut
.outaddr(SEG
.fileoff
);
536 FHOut
.outaddr(SEG
.filesize
);
537 FHOut
.outword(SEG
.maxprot
);
538 FHOut
.outword(SEG
.initprot
);
539 FHOut
.outword(SEG
.nsects
);
540 FHOut
.outword(SEG
.flags
);
542 // Step #5: Finish filling in the fields of the MachOSections
543 uint64_t currentAddr
= 0;
544 for (std::vector
<MachOSection
*>::iterator I
= SectionList
.begin(),
545 E
= SectionList
.end(); I
!= E
; ++I
) {
546 MachOSection
*MOS
= *I
;
547 MOS
->addr
= currentAddr
;
548 MOS
->offset
= currentAddr
+ SEG
.fileoff
;
550 // FIXME: do we need to do something with alignment here?
551 currentAddr
+= MOS
->size
;
554 // Step #6: Emit the symbol table to temporary buffers, so that we know the
555 // size of the string table when we write the next load command. This also
556 // sorts and assigns indices to each of the symbols, which is necessary for
557 // emitting relocations to externally-defined objects.
558 BufferSymbolAndStringTable();
560 // Step #7: Calculate the number of relocations for each section and write out
561 // the section commands for each section
562 currentAddr
+= SEG
.fileoff
;
563 for (std::vector
<MachOSection
*>::iterator I
= SectionList
.begin(),
564 E
= SectionList
.end(); I
!= E
; ++I
) {
565 MachOSection
*MOS
= *I
;
566 // Convert the relocations to target-specific relocations, and fill in the
567 // relocation offset for this section.
568 CalculateRelocations(*MOS
);
569 MOS
->reloff
= MOS
->nreloc
? currentAddr
: 0;
570 currentAddr
+= MOS
->nreloc
* 8;
572 // write the finalized section command to the output buffer
573 FHOut
.outstring(MOS
->sectname
, 16);
574 FHOut
.outstring(MOS
->segname
, 16);
575 FHOut
.outaddr(MOS
->addr
);
576 FHOut
.outaddr(MOS
->size
);
577 FHOut
.outword(MOS
->offset
);
578 FHOut
.outword(MOS
->align
);
579 FHOut
.outword(MOS
->reloff
);
580 FHOut
.outword(MOS
->nreloc
);
581 FHOut
.outword(MOS
->flags
);
582 FHOut
.outword(MOS
->reserved1
);
583 FHOut
.outword(MOS
->reserved2
);
585 FHOut
.outword(MOS
->reserved3
);
588 // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands
589 SymTab
.symoff
= currentAddr
;
590 SymTab
.nsyms
= SymbolTable
.size();
591 SymTab
.stroff
= SymTab
.symoff
+ SymT
.size();
592 SymTab
.strsize
= StrT
.size();
593 FHOut
.outword(SymTab
.cmd
);
594 FHOut
.outword(SymTab
.cmdsize
);
595 FHOut
.outword(SymTab
.symoff
);
596 FHOut
.outword(SymTab
.nsyms
);
597 FHOut
.outword(SymTab
.stroff
);
598 FHOut
.outword(SymTab
.strsize
);
600 // FIXME: set DySymTab fields appropriately
601 // We should probably just update these in BufferSymbolAndStringTable since
602 // thats where we're partitioning up the different kinds of symbols.
603 FHOut
.outword(DySymTab
.cmd
);
604 FHOut
.outword(DySymTab
.cmdsize
);
605 FHOut
.outword(DySymTab
.ilocalsym
);
606 FHOut
.outword(DySymTab
.nlocalsym
);
607 FHOut
.outword(DySymTab
.iextdefsym
);
608 FHOut
.outword(DySymTab
.nextdefsym
);
609 FHOut
.outword(DySymTab
.iundefsym
);
610 FHOut
.outword(DySymTab
.nundefsym
);
611 FHOut
.outword(DySymTab
.tocoff
);
612 FHOut
.outword(DySymTab
.ntoc
);
613 FHOut
.outword(DySymTab
.modtaboff
);
614 FHOut
.outword(DySymTab
.nmodtab
);
615 FHOut
.outword(DySymTab
.extrefsymoff
);
616 FHOut
.outword(DySymTab
.nextrefsyms
);
617 FHOut
.outword(DySymTab
.indirectsymoff
);
618 FHOut
.outword(DySymTab
.nindirectsyms
);
619 FHOut
.outword(DySymTab
.extreloff
);
620 FHOut
.outword(DySymTab
.nextrel
);
621 FHOut
.outword(DySymTab
.locreloff
);
622 FHOut
.outword(DySymTab
.nlocrel
);
624 O
.write((char*)&FH
[0], FH
.size());
627 /// EmitSections - Now that we have constructed the file header and load
628 /// commands, emit the data for each section to the file.
629 void MachOWriter::EmitSections() {
630 for (std::vector
<MachOSection
*>::iterator I
= SectionList
.begin(),
631 E
= SectionList
.end(); I
!= E
; ++I
)
632 // Emit the contents of each section
633 O
.write((char*)&(*I
)->SectionData
[0], (*I
)->size
);
634 for (std::vector
<MachOSection
*>::iterator I
= SectionList
.begin(),
635 E
= SectionList
.end(); I
!= E
; ++I
)
636 // Emit the relocation entry data for each section.
637 O
.write((char*)&(*I
)->RelocBuffer
[0], (*I
)->RelocBuffer
.size());
640 /// PartitionByLocal - Simple boolean predicate that returns true if Sym is
641 /// a local symbol rather than an external symbol.
642 bool MachOWriter::PartitionByLocal(const MachOSym
&Sym
) {
643 return (Sym
.n_type
& (MachOSym::N_EXT
| MachOSym::N_PEXT
)) == 0;
646 /// PartitionByDefined - Simple boolean predicate that returns true if Sym is
647 /// defined in this module.
648 bool MachOWriter::PartitionByDefined(const MachOSym
&Sym
) {
649 // FIXME: Do N_ABS or N_INDR count as defined?
650 return (Sym
.n_type
& MachOSym::N_SECT
) == MachOSym::N_SECT
;
653 /// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them
654 /// each a string table index so that they appear in the correct order in the
656 void MachOWriter::BufferSymbolAndStringTable() {
657 // The order of the symbol table is:
659 // 2. defined external symbols (sorted by name)
660 // 3. undefined external symbols (sorted by name)
662 // Before sorting the symbols, check the PendingGlobals for any undefined
663 // globals that need to be put in the symbol table.
664 for (std::vector
<GlobalValue
*>::iterator I
= PendingGlobals
.begin(),
665 E
= PendingGlobals
.end(); I
!= E
; ++I
) {
666 if (GVOffset
[*I
] == 0 && GVSection
[*I
] == 0) {
667 MachOSym
UndfSym(*I
, Mang
->getValueName(*I
), MachOSym::NO_SECT
, TM
);
668 SymbolTable
.push_back(UndfSym
);
673 // Sort the symbols by name, so that when we partition the symbols by scope
674 // of definition, we won't have to sort by name within each partition.
675 std::sort(SymbolTable
.begin(), SymbolTable
.end(), MachOSymCmp());
677 // Parition the symbol table entries so that all local symbols come before
678 // all symbols with external linkage. { 1 | 2 3 }
679 std::partition(SymbolTable
.begin(), SymbolTable
.end(), PartitionByLocal
);
681 // Advance iterator to beginning of external symbols and partition so that
682 // all external symbols defined in this module come before all external
683 // symbols defined elsewhere. { 1 | 2 | 3 }
684 for (std::vector
<MachOSym
>::iterator I
= SymbolTable
.begin(),
685 E
= SymbolTable
.end(); I
!= E
; ++I
) {
686 if (!PartitionByLocal(*I
)) {
687 std::partition(I
, E
, PartitionByDefined
);
692 // Calculate the starting index for each of the local, extern defined, and
693 // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB
695 for (std::vector
<MachOSym
>::iterator I
= SymbolTable
.begin(),
696 E
= SymbolTable
.end(); I
!= E
; ++I
) {
697 if (PartitionByLocal(*I
)) {
698 ++DySymTab
.nlocalsym
;
699 ++DySymTab
.iextdefsym
;
700 ++DySymTab
.iundefsym
;
701 } else if (PartitionByDefined(*I
)) {
702 ++DySymTab
.nextdefsym
;
703 ++DySymTab
.iundefsym
;
705 ++DySymTab
.nundefsym
;
709 // Write out a leading zero byte when emitting string table, for n_strx == 0
710 // which means an empty string.
711 OutputBuffer
StrTOut(StrT
, is64Bit
, isLittleEndian
);
714 // The order of the string table is:
715 // 1. strings for external symbols
716 // 2. strings for local symbols
717 // Since this is the opposite order from the symbol table, which we have just
718 // sorted, we can walk the symbol table backwards to output the string table.
719 for (std::vector
<MachOSym
>::reverse_iterator I
= SymbolTable
.rbegin(),
720 E
= SymbolTable
.rend(); I
!= E
; ++I
) {
721 if (I
->GVName
== "") {
724 I
->n_strx
= StrT
.size();
725 StrTOut
.outstring(I
->GVName
, I
->GVName
.length()+1);
729 OutputBuffer
SymTOut(SymT
, is64Bit
, isLittleEndian
);
732 for (std::vector
<MachOSym
>::iterator I
= SymbolTable
.begin(),
733 E
= SymbolTable
.end(); I
!= E
; ++I
, ++index
) {
734 // Add the section base address to the section offset in the n_value field
735 // to calculate the full address.
736 // FIXME: handle symbols where the n_value field is not the address
737 GlobalValue
*GV
= const_cast<GlobalValue
*>(I
->GV
);
738 if (GV
&& GVSection
[GV
])
739 I
->n_value
+= GVSection
[GV
]->addr
;
740 if (GV
&& (GVOffset
[GV
] == -1))
741 GVOffset
[GV
] = index
;
743 // Emit nlist to buffer
744 SymTOut
.outword(I
->n_strx
);
745 SymTOut
.outbyte(I
->n_type
);
746 SymTOut
.outbyte(I
->n_sect
);
747 SymTOut
.outhalf(I
->n_desc
);
748 SymTOut
.outaddr(I
->n_value
);
752 /// CalculateRelocations - For each MachineRelocation in the current section,
753 /// calculate the index of the section containing the object to be relocated,
754 /// and the offset into that section. From this information, create the
755 /// appropriate target-specific MachORelocation type and add buffer it to be
756 /// written out after we are finished writing out sections.
757 void MachOWriter::CalculateRelocations(MachOSection
&MOS
) {
758 for (unsigned i
= 0, e
= MOS
.Relocations
.size(); i
!= e
; ++i
) {
759 MachineRelocation
&MR
= MOS
.Relocations
[i
];
760 unsigned TargetSection
= MR
.getConstantVal();
761 unsigned TargetAddr
= 0;
762 unsigned TargetIndex
= 0;
764 // This is a scattered relocation entry if it points to a global value with
765 // a non-zero offset.
766 bool Scattered
= false;
769 // Since we may not have seen the GlobalValue we were interested in yet at
770 // the time we emitted the relocation for it, fix it up now so that it
771 // points to the offset into the correct section.
772 if (MR
.isGlobalValue()) {
773 GlobalValue
*GV
= MR
.getGlobalValue();
774 MachOSection
*MOSPtr
= GVSection
[GV
];
775 intptr_t Offset
= GVOffset
[GV
];
777 // If we have never seen the global before, it must be to a symbol
778 // defined in another module (N_UNDF).
780 // FIXME: need to append stub suffix
783 TargetIndex
= GVOffset
[GV
];
785 Scattered
= TargetSection
!= 0;
786 TargetSection
= MOSPtr
->Index
;
788 MR
.setResultPointer((void*)Offset
);
791 // If the symbol is locally defined, pass in the address of the section and
792 // the section index to the code which will generate the target relocation.
794 MachOSection
&To
= *SectionList
[TargetSection
- 1];
795 TargetAddr
= To
.addr
;
796 TargetIndex
= To
.Index
;
799 OutputBuffer
RelocOut(MOS
.RelocBuffer
, is64Bit
, isLittleEndian
);
800 OutputBuffer
SecOut(MOS
.SectionData
, is64Bit
, isLittleEndian
);
802 MOS
.nreloc
+= GetTargetRelocation(MR
, MOS
.Index
, TargetAddr
, TargetIndex
,
803 RelocOut
, SecOut
, Scattered
, Extern
);
807 // InitMem - Write the value of a Constant to the specified memory location,
808 // converting it into bytes and relocations.
809 void MachOWriter::InitMem(const Constant
*C
, void *Addr
, intptr_t Offset
,
810 const TargetData
*TD
,
811 std::vector
<MachineRelocation
> &MRs
) {
812 typedef std::pair
<const Constant
*, intptr_t> CPair
;
813 std::vector
<CPair
> WorkList
;
815 WorkList
.push_back(CPair(C
,(intptr_t)Addr
+ Offset
));
817 intptr_t ScatteredOffset
= 0;
819 while (!WorkList
.empty()) {
820 const Constant
*PC
= WorkList
.back().first
;
821 intptr_t PA
= WorkList
.back().second
;
824 if (isa
<UndefValue
>(PC
)) {
826 } else if (const ConstantVector
*CP
= dyn_cast
<ConstantVector
>(PC
)) {
827 unsigned ElementSize
=
828 TD
->getTypeAllocSize(CP
->getType()->getElementType());
829 for (unsigned i
= 0, e
= CP
->getNumOperands(); i
!= e
; ++i
)
830 WorkList
.push_back(CPair(CP
->getOperand(i
), PA
+i
*ElementSize
));
831 } else if (const ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(PC
)) {
833 // FIXME: Handle ConstantExpression. See EE::getConstantValue()
835 switch (CE
->getOpcode()) {
836 case Instruction::GetElementPtr
: {
837 SmallVector
<Value
*, 8> Indices(CE
->op_begin()+1, CE
->op_end());
838 ScatteredOffset
= TD
->getIndexedOffset(CE
->getOperand(0)->getType(),
839 &Indices
[0], Indices
.size());
840 WorkList
.push_back(CPair(CE
->getOperand(0), PA
));
843 case Instruction::Add
:
845 cerr
<< "ConstantExpr not handled as global var init: " << *CE
<< "\n";
849 } else if (PC
->getType()->isSingleValueType()) {
850 unsigned char *ptr
= (unsigned char *)PA
;
851 switch (PC
->getType()->getTypeID()) {
852 case Type::IntegerTyID
: {
853 unsigned NumBits
= cast
<IntegerType
>(PC
->getType())->getBitWidth();
854 uint64_t val
= cast
<ConstantInt
>(PC
)->getZExtValue();
857 else if (NumBits
<= 16) {
858 if (TD
->isBigEndian())
859 val
= ByteSwap_16(val
);
862 } else if (NumBits
<= 32) {
863 if (TD
->isBigEndian())
864 val
= ByteSwap_32(val
);
869 } else if (NumBits
<= 64) {
870 if (TD
->isBigEndian())
871 val
= ByteSwap_64(val
);
881 assert(0 && "Not implemented: bit widths > 64");
885 case Type::FloatTyID
: {
886 uint32_t val
= cast
<ConstantFP
>(PC
)->getValueAPF().bitcastToAPInt().
888 if (TD
->isBigEndian())
889 val
= ByteSwap_32(val
);
896 case Type::DoubleTyID
: {
897 uint64_t val
= cast
<ConstantFP
>(PC
)->getValueAPF().bitcastToAPInt().
899 if (TD
->isBigEndian())
900 val
= ByteSwap_64(val
);
911 case Type::PointerTyID
:
912 if (isa
<ConstantPointerNull
>(PC
))
913 memset(ptr
, 0, TD
->getPointerSize());
914 else if (const GlobalValue
* GV
= dyn_cast
<GlobalValue
>(PC
)) {
915 // FIXME: what about function stubs?
916 MRs
.push_back(MachineRelocation::getGV(PA
-(intptr_t)Addr
,
917 MachineRelocation::VANILLA
,
918 const_cast<GlobalValue
*>(GV
),
922 assert(0 && "Unknown constant pointer type!");
925 cerr
<< "ERROR: Constant unimp for type: " << *PC
->getType() << "\n";
928 } else if (isa
<ConstantAggregateZero
>(PC
)) {
929 memset((void*)PA
, 0, (size_t)TD
->getTypeAllocSize(PC
->getType()));
930 } else if (const ConstantArray
*CPA
= dyn_cast
<ConstantArray
>(PC
)) {
931 unsigned ElementSize
=
932 TD
->getTypeAllocSize(CPA
->getType()->getElementType());
933 for (unsigned i
= 0, e
= CPA
->getNumOperands(); i
!= e
; ++i
)
934 WorkList
.push_back(CPair(CPA
->getOperand(i
), PA
+i
*ElementSize
));
935 } else if (const ConstantStruct
*CPS
= dyn_cast
<ConstantStruct
>(PC
)) {
936 const StructLayout
*SL
=
937 TD
->getStructLayout(cast
<StructType
>(CPS
->getType()));
938 for (unsigned i
= 0, e
= CPS
->getNumOperands(); i
!= e
; ++i
)
939 WorkList
.push_back(CPair(CPS
->getOperand(i
),
940 PA
+SL
->getElementOffset(i
)));
942 cerr
<< "Bad Type: " << *PC
->getType() << "\n";
943 assert(0 && "Unknown constant type to initialize memory with!");
948 MachOSym::MachOSym(const GlobalValue
*gv
, std::string name
, uint8_t sect
,
950 GV(gv
), n_strx(0), n_type(sect
== NO_SECT
? N_UNDF
: N_SECT
), n_sect(sect
),
951 n_desc(0), n_value(0) {
953 const TargetAsmInfo
*TAI
= TM
.getTargetAsmInfo();
955 switch (GV
->getLinkage()) {
957 assert(0 && "Unexpected linkage type!");
959 case GlobalValue::WeakAnyLinkage
:
960 case GlobalValue::WeakODRLinkage
:
961 case GlobalValue::LinkOnceAnyLinkage
:
962 case GlobalValue::LinkOnceODRLinkage
:
963 case GlobalValue::CommonLinkage
:
964 assert(!isa
<Function
>(gv
) && "Unexpected linkage type for Function!");
965 case GlobalValue::ExternalLinkage
:
966 GVName
= TAI
->getGlobalPrefix() + name
;
967 n_type
|= GV
->hasHiddenVisibility() ? N_PEXT
: N_EXT
;
969 case GlobalValue::PrivateLinkage
:
970 GVName
= TAI
->getPrivateGlobalPrefix() + name
;
972 case GlobalValue::InternalLinkage
:
973 GVName
= TAI
->getGlobalPrefix() + name
;