Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Core / Exceptions.cpp
blob667f1757e13d7101859025ec9b67bd25a865b099
1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for handling C++ exception meta data.
11 // Some of the code is taken from examples/ExceptionDemo
13 //===----------------------------------------------------------------------===//
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <map>
30 #undef DEBUG_TYPE
31 #define DEBUG_TYPE "bolt-exceptions"
33 using namespace llvm::dwarf;
35 namespace opts {
37 extern llvm::cl::OptionCategory BoltCategory;
39 extern llvm::cl::opt<unsigned> Verbosity;
41 static llvm::cl::opt<bool>
42 PrintExceptions("print-exceptions",
43 llvm::cl::desc("print exception handling data"),
44 llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
46 } // namespace opts
48 namespace llvm {
49 namespace bolt {
51 // Read and dump the .gcc_exception_table section entry.
53 // .gcc_except_table section contains a set of Language-Specific Data Areas -
54 // a fancy name for exception handling tables. There's one LSDA entry per
55 // function. However, we can't actually tell which function LSDA refers to
56 // unless we parse .eh_frame entry that refers to the LSDA.
57 // Then inside LSDA most addresses are encoded relative to the function start,
58 // so we need the function context in order to get to real addresses.
60 // The best visual representation of the tables comprising LSDA and
61 // relationships between them is illustrated at:
62 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63 // Keep in mind that GCC implementation deviates slightly from that document.
65 // To summarize, there are 4 tables in LSDA: call site table, actions table,
66 // types table, and types index table (for indirection). The main table contains
67 // call site entries. Each call site includes a PC range that can throw an
68 // exception, a handler (landing pad), and a reference to an entry in the action
69 // table. The handler and/or action could be 0. The action entry is a head
70 // of a list of actions associated with a call site. The action table contains
71 // all such lists (it could be optimized to share list tails). Each action could
72 // be either to catch an exception of a given type, to perform a cleanup, or to
73 // propagate the exception after filtering it out (e.g. to make sure function
74 // exception specification is not violated). Catch action contains a reference
75 // to an entry in the type table, and filter action refers to an entry in the
76 // type index table to encode a set of types to filter.
78 // Call site table follows LSDA header. Action table immediately follows the
79 // call site table.
81 // Both types table and type index table start at the same location, but they
82 // grow in opposite directions (types go up, indices go down). The beginning of
83 // these tables is encoded in LSDA header. Sizes for both of the tables are not
84 // included anywhere.
86 // We have to parse all of the tables to determine their sizes. Then we have
87 // to parse the call site table and associate discovered information with
88 // actual call instructions and landing pad blocks.
90 // For the purpose of rewriting exception handling tables, we can reuse action,
91 // and type index tables in their original binary format.
93 // Type table could be encoded using position-independent references, and thus
94 // may require relocation.
96 // Ideally we should be able to re-write LSDA in-place, without the need to
97 // allocate a new space for it. Sadly there's no guarantee that the new call
98 // site table will be the same size as GCC uses uleb encodings for PC offsets.
100 // Note: some functions have LSDA entries with 0 call site entries.
101 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102 uint64_t LSDASectionAddress) {
103 assert(CurrentState == State::Disassembled && "unexpected function state");
105 if (!getLSDAAddress())
106 return;
108 DWARFDataExtractor Data(
109 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110 LSDASectionData.size()),
111 BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
112 uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
113 assert(Data.isValidOffset(Offset) && "wrong LSDA address");
115 uint8_t LPStartEncoding = Data.getU8(&Offset);
116 uint64_t LPStart = 0;
117 // Convert to offset if LPStartEncoding is typed absptr DW_EH_PE_absptr
118 if (std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
119 &Offset, LPStartEncoding, Offset + LSDASectionAddress))
120 LPStart = (LPStartEncoding && 0xFF == 0) ? *MaybeLPStart
121 : *MaybeLPStart - Address;
123 const uint8_t TTypeEncoding = Data.getU8(&Offset);
124 LSDATypeEncoding = TTypeEncoding;
125 size_t TTypeEncodingSize = 0;
126 uintptr_t TTypeEnd = 0;
127 if (TTypeEncoding != DW_EH_PE_omit) {
128 TTypeEnd = Data.getULEB128(&Offset);
129 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
132 if (opts::PrintExceptions) {
133 outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
134 << " for function " << *this << "]:\n";
135 outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
136 << '\n';
137 outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
138 outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
139 outs() << "TType End = " << TTypeEnd << '\n';
142 // Table to store list of indices in type table. Entries are uleb128 values.
143 const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
145 // Offset past the last decoded index.
146 uint64_t MaxTypeIndexTableOffset = 0;
148 // Max positive index used in type table.
149 unsigned MaxTypeIndex = 0;
151 // The actual type info table starts at the same location, but grows in
152 // opposite direction. TTypeEncoding is used to encode stored values.
153 const uint64_t TypeTableStart = Offset + TTypeEnd;
155 uint8_t CallSiteEncoding = Data.getU8(&Offset);
156 uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
157 uint64_t CallSiteTableStart = Offset;
158 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
159 uint64_t CallSitePtr = CallSiteTableStart;
160 uint64_t ActionTableStart = CallSiteTableEnd;
162 if (opts::PrintExceptions) {
163 outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
164 outs() << "CallSite table length = " << CallSiteTableLength << '\n';
165 outs() << '\n';
168 this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
169 const uint64_t RangeBase = getAddress();
170 while (CallSitePtr < CallSiteTableEnd) {
171 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
172 CallSitePtr + LSDASectionAddress);
173 uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
174 CallSitePtr + LSDASectionAddress);
175 uint64_t LandingPad = *Data.getEncodedPointer(
176 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
177 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
179 uint64_t LPOffset = LPStart + LandingPad;
180 uint64_t LPAddress = Address + LPOffset;
182 // Verify if landing pad code is located outside current function
183 // Support landing pad to builtin_unreachable
184 if (LPAddress < Address || LPAddress > Address + getSize()) {
185 BinaryFunction *Fragment =
186 BC.getBinaryFunctionContainingAddress(LPAddress);
187 assert(Fragment != nullptr &&
188 "BOLT-ERROR: cannot find landing pad fragment");
189 BC.addInterproceduralReference(this, Fragment->getAddress());
190 BC.processInterproceduralReferences();
191 assert(isParentOrChildOf(*Fragment) &&
192 "BOLT-ERROR: cannot have landing pads in different functions");
193 setHasIndirectTargetToSplitFragment(true);
194 BC.addFragmentsToSkip(this);
195 return;
198 if (opts::PrintExceptions) {
199 outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
200 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
201 << "); landing pad: 0x" << Twine::utohexstr(LPOffset)
202 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
203 outs() << " current offset is " << (CallSitePtr - CallSiteTableStart)
204 << '\n';
207 // Create a handler entry if necessary.
208 MCSymbol *LPSymbol = nullptr;
209 if (LPOffset) {
210 if (!getInstructionAtOffset(LPOffset)) {
211 if (opts::Verbosity >= 1)
212 errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LPOffset)
213 << " not pointing to an instruction in function " << *this
214 << " - ignoring.\n";
215 } else {
216 auto Label = Labels.find(LPOffset);
217 if (Label != Labels.end()) {
218 LPSymbol = Label->second;
219 } else {
220 LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
221 Labels[LPOffset] = LPSymbol;
226 // Mark all call instructions in the range.
227 auto II = Instructions.find(Start);
228 auto IE = Instructions.end();
229 assert(II != IE && "exception range not pointing to an instruction");
230 do {
231 MCInst &Instruction = II->second;
232 if (BC.MIB->isCall(Instruction) &&
233 !BC.MIB->getConditionalTailCall(Instruction)) {
234 assert(!BC.MIB->isInvoke(Instruction) &&
235 "overlapping exception ranges detected");
236 // Add extra operands to a call instruction making it an invoke from
237 // now on.
238 BC.MIB->addEHInfo(Instruction,
239 MCPlus::MCLandingPad(LPSymbol, ActionEntry));
241 ++II;
242 } while (II != IE && II->first < Start + Length);
244 if (ActionEntry != 0) {
245 auto printType = [&](int Index, raw_ostream &OS) {
246 assert(Index > 0 && "only positive indices are valid");
247 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
248 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
249 uint64_t TypeAddress =
250 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
251 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
252 TypeAddress = 0;
253 if (TypeAddress == 0) {
254 OS << "<all>";
255 return;
257 if (TTypeEncoding & DW_EH_PE_indirect) {
258 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
259 assert(PointerOrErr && "failed to decode indirect address");
260 TypeAddress = *PointerOrErr;
262 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
263 OS << TypeSymBD->getName();
264 else
265 OS << "0x" << Twine::utohexstr(TypeAddress);
267 if (opts::PrintExceptions)
268 outs() << " actions: ";
269 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
270 int64_t ActionType;
271 int64_t ActionNext;
272 const char *Sep = "";
273 do {
274 ActionType = Data.getSLEB128(&ActionPtr);
275 const uint32_t Self = ActionPtr;
276 ActionNext = Data.getSLEB128(&ActionPtr);
277 if (opts::PrintExceptions)
278 outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
279 if (ActionType == 0) {
280 if (opts::PrintExceptions)
281 outs() << "cleanup";
282 } else if (ActionType > 0) {
283 // It's an index into a type table.
284 MaxTypeIndex =
285 std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
286 if (opts::PrintExceptions) {
287 outs() << "catch type ";
288 printType(ActionType, outs());
290 } else { // ActionType < 0
291 if (opts::PrintExceptions)
292 outs() << "filter exception types ";
293 const char *TSep = "";
294 // ActionType is a negative *byte* offset into *uleb128-encoded* table
295 // of indices with base 1.
296 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
297 // encoded using uleb128 thus we cannot directly dereference them.
298 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
299 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
300 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
301 if (opts::PrintExceptions) {
302 outs() << TSep;
303 printType(Index, outs());
304 TSep = ", ";
307 MaxTypeIndexTableOffset = std::max(
308 MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
311 Sep = "; ";
313 ActionPtr = Self + ActionNext;
314 } while (ActionNext);
315 if (opts::PrintExceptions)
316 outs() << '\n';
319 if (opts::PrintExceptions)
320 outs() << '\n';
322 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
323 Data.getData().size() &&
324 "LSDA entry has crossed section boundary");
326 if (TTypeEnd) {
327 LSDAActionTable = LSDASectionData.slice(
328 ActionTableStart, TypeIndexTableStart -
329 MaxTypeIndex * TTypeEncodingSize -
330 ActionTableStart);
331 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
332 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
333 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
334 uint64_t TypeAddress =
335 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
336 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
337 TypeAddress = 0;
338 if (TTypeEncoding & DW_EH_PE_indirect) {
339 LSDATypeAddressTable.emplace_back(TypeAddress);
340 if (TypeAddress) {
341 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
342 assert(PointerOrErr && "failed to decode indirect address");
343 TypeAddress = *PointerOrErr;
346 LSDATypeTable.emplace_back(TypeAddress);
348 LSDATypeIndexTable =
349 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
353 void BinaryFunction::updateEHRanges() {
354 if (getSize() == 0)
355 return;
357 assert(CurrentState == State::CFG_Finalized && "unexpected state");
359 // Build call sites table.
360 struct EHInfo {
361 const MCSymbol *LP; // landing pad
362 uint64_t Action;
365 // Sites to update.
366 CallSitesList Sites;
368 for (FunctionFragment &FF : getLayout().fragments()) {
369 // If previous call can throw, this is its exception handler.
370 EHInfo PreviousEH = {nullptr, 0};
372 // Marker for the beginning of exceptions range.
373 const MCSymbol *StartRange = nullptr;
375 for (BinaryBasicBlock *const BB : FF) {
376 for (auto II = BB->begin(); II != BB->end(); ++II) {
377 if (!BC.MIB->isCall(*II))
378 continue;
380 // Instruction can throw an exception that should be handled.
381 const bool Throws = BC.MIB->isInvoke(*II);
383 // Ignore the call if it's a continuation of a no-throw gap.
384 if (!Throws && !StartRange)
385 continue;
387 // Extract exception handling information from the instruction.
388 const MCSymbol *LP = nullptr;
389 uint64_t Action = 0;
390 if (const std::optional<MCPlus::MCLandingPad> EHInfo =
391 BC.MIB->getEHInfo(*II))
392 std::tie(LP, Action) = *EHInfo;
394 // No action if the exception handler has not changed.
395 if (Throws && StartRange && PreviousEH.LP == LP &&
396 PreviousEH.Action == Action)
397 continue;
399 // Same symbol is used for the beginning and the end of the range.
400 const MCSymbol *EHSymbol;
401 MCInst EHLabel;
403 std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
404 EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
405 BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
408 II = std::next(BB->insertPseudoInstr(II, EHLabel));
410 // At this point we could be in one of the following states:
412 // I. Exception handler has changed and we need to close previous range
413 // and start a new one.
415 // II. Start a new exception range after the gap.
417 // III. Close current exception range and start a new gap.
418 const MCSymbol *EndRange;
419 if (StartRange) {
420 // I, III:
421 EndRange = EHSymbol;
422 } else {
423 // II:
424 StartRange = EHSymbol;
425 EndRange = nullptr;
428 // Close the previous range.
429 if (EndRange)
430 Sites.emplace_back(
431 FF.getFragmentNum(),
432 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
434 if (Throws) {
435 // I, II:
436 StartRange = EHSymbol;
437 PreviousEH = EHInfo{LP, Action};
438 } else {
439 StartRange = nullptr;
444 // Check if we need to close the range.
445 if (StartRange) {
446 const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum());
447 Sites.emplace_back(
448 FF.getFragmentNum(),
449 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
453 addCallSites(Sites);
456 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
458 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
459 // Prepare FDEs for fast lookup
460 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
461 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
462 // Skip CIEs.
463 if (!CurFDE)
464 continue;
465 // There could me multiple FDEs with the same initial address, and perhaps
466 // different sizes (address ranges). Use the first entry with non-zero size.
467 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
468 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
469 if (CurFDE->getAddressRange()) {
470 if (FDEI->second->getAddressRange() == 0) {
471 FDEI->second = CurFDE;
472 } else if (opts::Verbosity > 0) {
473 errs() << "BOLT-WARNING: different FDEs for function at 0x"
474 << Twine::utohexstr(FDEI->first)
475 << " detected; sizes: " << FDEI->second->getAddressRange()
476 << " and " << CurFDE->getAddressRange() << '\n';
479 } else {
480 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
485 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
486 uint64_t Address = Function.getAddress();
487 auto I = FDEs.find(Address);
488 // Ignore zero-length FDE ranges.
489 if (I == FDEs.end() || !I->second->getAddressRange())
490 return true;
492 const FDE &CurFDE = *I->second;
493 std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
494 Function.setLSDAAddress(LSDA ? *LSDA : 0);
496 uint64_t Offset = Function.getFirstInstructionOffset();
497 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
498 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
499 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
500 Function.setPersonalityFunction(
501 *CurFDE.getLinkedCIE()->getPersonalityAddress());
502 Function.setPersonalityEncoding(
503 *CurFDE.getLinkedCIE()->getPersonalityEncoding());
506 auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment,
507 DataAlignment](
508 const CFIProgram::Instruction &Instr) {
509 uint8_t Opcode = Instr.Opcode;
510 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
511 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
512 switch (Instr.Opcode) {
513 case DW_CFA_nop:
514 break;
515 case DW_CFA_advance_loc4:
516 case DW_CFA_advance_loc2:
517 case DW_CFA_advance_loc1:
518 case DW_CFA_advance_loc:
519 // Advance our current address
520 Offset += CodeAlignment * int64_t(Instr.Ops[0]);
521 break;
522 case DW_CFA_offset_extended_sf:
523 Function.addCFIInstruction(
524 Offset,
525 MCCFIInstruction::createOffset(
526 nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
527 break;
528 case DW_CFA_offset_extended:
529 case DW_CFA_offset:
530 Function.addCFIInstruction(
531 Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
532 DataAlignment * Instr.Ops[1]));
533 break;
534 case DW_CFA_restore_extended:
535 case DW_CFA_restore:
536 Function.addCFIInstruction(
537 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
538 break;
539 case DW_CFA_set_loc:
540 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
541 assert(Instr.Ops[0] <= Address + Function.getSize() &&
542 "set_loc out of function bounds");
543 Offset = Instr.Ops[0] - Address;
544 break;
546 case DW_CFA_undefined:
547 Function.addCFIInstruction(
548 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
549 break;
550 case DW_CFA_same_value:
551 Function.addCFIInstruction(
552 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
553 break;
554 case DW_CFA_register:
555 Function.addCFIInstruction(
556 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
557 Instr.Ops[1]));
558 break;
559 case DW_CFA_remember_state:
560 Function.addCFIInstruction(
561 Offset, MCCFIInstruction::createRememberState(nullptr));
562 break;
563 case DW_CFA_restore_state:
564 Function.addCFIInstruction(Offset,
565 MCCFIInstruction::createRestoreState(nullptr));
566 break;
567 case DW_CFA_def_cfa:
568 Function.addCFIInstruction(
569 Offset,
570 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
571 break;
572 case DW_CFA_def_cfa_sf:
573 Function.addCFIInstruction(
574 Offset,
575 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
576 DataAlignment * int64_t(Instr.Ops[1])));
577 break;
578 case DW_CFA_def_cfa_register:
579 Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
580 nullptr, Instr.Ops[0]));
581 break;
582 case DW_CFA_def_cfa_offset:
583 Function.addCFIInstruction(
584 Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
585 break;
586 case DW_CFA_def_cfa_offset_sf:
587 Function.addCFIInstruction(
588 Offset, MCCFIInstruction::cfiDefCfaOffset(
589 nullptr, DataAlignment * int64_t(Instr.Ops[0])));
590 break;
591 case DW_CFA_GNU_args_size:
592 Function.addCFIInstruction(
593 Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
594 Function.setUsesGnuArgsSize();
595 break;
596 case DW_CFA_val_offset_sf:
597 case DW_CFA_val_offset:
598 if (opts::Verbosity >= 1) {
599 errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
601 return false;
602 case DW_CFA_def_cfa_expression:
603 case DW_CFA_val_expression:
604 case DW_CFA_expression: {
605 StringRef ExprBytes = Instr.Expression->getData();
606 std::string Str;
607 raw_string_ostream OS(Str);
608 // Manually encode this instruction using CFI escape
609 OS << Opcode;
610 if (Opcode != DW_CFA_def_cfa_expression)
611 encodeULEB128(Instr.Ops[0], OS);
612 encodeULEB128(ExprBytes.size(), OS);
613 OS << ExprBytes;
614 Function.addCFIInstruction(
615 Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
616 break;
618 case DW_CFA_MIPS_advance_loc8:
619 if (opts::Verbosity >= 1)
620 errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
621 return false;
622 case DW_CFA_GNU_window_save:
623 // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
624 // id but mean different things. The latter is used in AArch64.
625 if (Function.getBinaryContext().isAArch64()) {
626 Function.addCFIInstruction(
627 Offset, MCCFIInstruction::createNegateRAState(nullptr));
628 break;
630 if (opts::Verbosity >= 1)
631 errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
632 return false;
633 case DW_CFA_lo_user:
634 case DW_CFA_hi_user:
635 if (opts::Verbosity >= 1)
636 errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
637 return false;
638 default:
639 if (opts::Verbosity >= 1)
640 errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
641 << '\n';
642 return false;
645 return true;
648 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
649 if (!decodeFrameInstruction(Instr))
650 return false;
652 for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
653 if (!decodeFrameInstruction(Instr))
654 return false;
656 return true;
659 std::vector<char> CFIReaderWriter::generateEHFrameHeader(
660 const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
661 uint64_t EHFrameHeaderAddress,
662 std::vector<uint64_t> &FailedAddresses) const {
663 // Common PC -> FDE map to be written into .eh_frame_hdr.
664 std::map<uint64_t, uint64_t> PCToFDE;
666 // Presort array for binary search.
667 llvm::sort(FailedAddresses);
669 // Initialize PCToFDE using NewEHFrame.
670 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
671 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
672 if (FDE == nullptr)
673 continue;
674 const uint64_t FuncAddress = FDE->getInitialLocation();
675 const uint64_t FDEAddress =
676 NewEHFrame.getEHFrameAddress() + FDE->getOffset();
678 // Ignore unused FDEs.
679 if (FuncAddress == 0)
680 continue;
682 // Add the address to the map unless we failed to write it.
683 if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
684 FuncAddress)) {
685 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
686 << Twine::utohexstr(FuncAddress) << " is at 0x"
687 << Twine::utohexstr(FDEAddress) << '\n');
688 PCToFDE[FuncAddress] = FDEAddress;
692 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
693 << llvm::size(NewEHFrame.entries()) << " entries\n");
695 // Add entries from the original .eh_frame corresponding to the functions
696 // that we did not update.
697 for (const dwarf::FrameEntry &Entry : OldEHFrame) {
698 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
699 if (FDE == nullptr)
700 continue;
701 const uint64_t FuncAddress = FDE->getInitialLocation();
702 const uint64_t FDEAddress =
703 OldEHFrame.getEHFrameAddress() + FDE->getOffset();
705 // Add the address if we failed to write it.
706 if (PCToFDE.count(FuncAddress) == 0) {
707 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
708 << Twine::utohexstr(FuncAddress) << " is at 0x"
709 << Twine::utohexstr(FDEAddress) << '\n');
710 PCToFDE[FuncAddress] = FDEAddress;
714 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
715 << llvm::size(OldEHFrame.entries()) << " entries\n");
717 // Generate a new .eh_frame_hdr based on the new map.
719 // Header plus table of entries of size 8 bytes.
720 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
722 // Version is 1.
723 EHFrameHeader[0] = 1;
724 // Encoding of the eh_frame pointer.
725 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
726 // Encoding of the count field to follow.
727 EHFrameHeader[2] = DW_EH_PE_udata4;
728 // Encoding of the table entries - 4-byte offset from the start of the header.
729 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
731 // Address of eh_frame. Use the new one.
732 support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
733 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
735 // Number of entries in the table (FDE count).
736 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
738 // Write the table at offset 12.
739 char *Ptr = EHFrameHeader.data();
740 uint32_t Offset = 12;
741 for (const auto &PCI : PCToFDE) {
742 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
743 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
744 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
745 Offset += 4;
746 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
747 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
748 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
749 Offset += 4;
752 return EHFrameHeader;
755 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
756 uint8_t Version = Data.getU8(&Offset);
757 const char *Augmentation = Data.getCStr(&Offset);
758 StringRef AugmentationString(Augmentation ? Augmentation : "");
759 uint8_t AddressSize =
760 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
761 Data.setAddressSize(AddressSize);
762 // Skip segment descriptor size
763 if (Version >= 4)
764 Offset += 1;
765 // Skip code alignment factor
766 Data.getULEB128(&Offset);
767 // Skip data alignment
768 Data.getSLEB128(&Offset);
769 // Skip return address register
770 if (Version == 1)
771 Offset += 1;
772 else
773 Data.getULEB128(&Offset);
775 uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
776 uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
777 // Walk the augmentation string to get all the augmentation data.
778 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
779 switch (AugmentationString[i]) {
780 default:
781 return createStringError(
782 errc::invalid_argument,
783 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
784 case 'L':
785 LSDAPointerEncoding = Data.getU8(&Offset);
786 break;
787 case 'P': {
788 uint32_t PersonalityEncoding = Data.getU8(&Offset);
789 std::optional<uint64_t> Personality =
790 Data.getEncodedPointer(&Offset, PersonalityEncoding,
791 EHFrameAddress ? EHFrameAddress + Offset : 0);
792 // Patch personality address
793 if (Personality)
794 PatcherCallback(*Personality, Offset, PersonalityEncoding);
795 break;
797 case 'R':
798 FDEPointerEncoding = Data.getU8(&Offset);
799 break;
800 case 'z':
801 if (i)
802 return createStringError(
803 errc::invalid_argument,
804 "'z' must be the first character at 0x%" PRIx64, StartOffset);
805 // Skip augmentation length
806 Data.getULEB128(&Offset);
807 break;
808 case 'S':
809 case 'B':
810 break;
813 Entries.emplace_back(std::make_unique<CIEInfo>(
814 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
815 CIEs[StartOffset] = &*Entries.back();
816 return Error::success();
819 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
820 uint64_t StartStructureOffset) {
821 std::optional<uint64_t> LSDAAddress;
822 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
824 // The address size is encoded in the CIE we reference.
825 if (!Cie)
826 return createStringError(errc::invalid_argument,
827 "parsing FDE data at 0x%" PRIx64
828 " failed due to missing CIE",
829 StartStructureOffset);
830 // Patch initial location
831 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
832 EHFrameAddress + Offset)) {
833 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
835 // Skip address range
836 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
838 // Process augmentation data for this FDE.
839 StringRef AugmentationString = Cie->AugmentationString;
840 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
841 // Skip augmentation length
842 Data.getULEB128(&Offset);
843 LSDAAddress =
844 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
845 EHFrameAddress ? Offset + EHFrameAddress : 0);
846 // Patch LSDA address
847 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
849 return Error::success();
852 Error EHFrameParser::parse() {
853 while (Data.isValidOffset(Offset)) {
854 const uint64_t StartOffset = Offset;
856 uint64_t Length;
857 DwarfFormat Format;
858 std::tie(Length, Format) = Data.getInitialLength(&Offset);
860 // If the Length is 0, then this CIE is a terminator
861 if (Length == 0)
862 break;
864 const uint64_t StartStructureOffset = Offset;
865 const uint64_t EndStructureOffset = Offset + Length;
867 Error Err = Error::success();
868 const uint64_t Id = Data.getRelocatedValue(4, &Offset,
869 /*SectionIndex=*/nullptr, &Err);
870 if (Err)
871 return Err;
873 if (!Id) {
874 if (Error Err = parseCIE(StartOffset))
875 return Err;
876 } else {
877 if (Error Err = parseFDE(Id, StartStructureOffset))
878 return Err;
880 Offset = EndStructureOffset;
883 return Error::success();
886 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
887 PatcherCallbackTy PatcherCallback) {
888 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
889 return Parser.parse();
892 } // namespace bolt
893 } // namespace llvm