1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements functions for handling C++ exception meta data.
11 // Some of the code is taken from examples/ExceptionDemo
13 //===----------------------------------------------------------------------===//
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
31 #define DEBUG_TYPE "bolt-exceptions"
33 using namespace llvm::dwarf
;
37 extern llvm::cl::OptionCategory BoltCategory
;
39 extern llvm::cl::opt
<unsigned> Verbosity
;
41 static llvm::cl::opt
<bool>
42 PrintExceptions("print-exceptions",
43 llvm::cl::desc("print exception handling data"),
44 llvm::cl::Hidden
, llvm::cl::cat(BoltCategory
));
51 // Read and dump the .gcc_exception_table section entry.
53 // .gcc_except_table section contains a set of Language-Specific Data Areas -
54 // a fancy name for exception handling tables. There's one LSDA entry per
55 // function. However, we can't actually tell which function LSDA refers to
56 // unless we parse .eh_frame entry that refers to the LSDA.
57 // Then inside LSDA most addresses are encoded relative to the function start,
58 // so we need the function context in order to get to real addresses.
60 // The best visual representation of the tables comprising LSDA and
61 // relationships between them is illustrated at:
62 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63 // Keep in mind that GCC implementation deviates slightly from that document.
65 // To summarize, there are 4 tables in LSDA: call site table, actions table,
66 // types table, and types index table (for indirection). The main table contains
67 // call site entries. Each call site includes a PC range that can throw an
68 // exception, a handler (landing pad), and a reference to an entry in the action
69 // table. The handler and/or action could be 0. The action entry is a head
70 // of a list of actions associated with a call site. The action table contains
71 // all such lists (it could be optimized to share list tails). Each action could
72 // be either to catch an exception of a given type, to perform a cleanup, or to
73 // propagate the exception after filtering it out (e.g. to make sure function
74 // exception specification is not violated). Catch action contains a reference
75 // to an entry in the type table, and filter action refers to an entry in the
76 // type index table to encode a set of types to filter.
78 // Call site table follows LSDA header. Action table immediately follows the
81 // Both types table and type index table start at the same location, but they
82 // grow in opposite directions (types go up, indices go down). The beginning of
83 // these tables is encoded in LSDA header. Sizes for both of the tables are not
86 // We have to parse all of the tables to determine their sizes. Then we have
87 // to parse the call site table and associate discovered information with
88 // actual call instructions and landing pad blocks.
90 // For the purpose of rewriting exception handling tables, we can reuse action,
91 // and type index tables in their original binary format.
93 // Type table could be encoded using position-independent references, and thus
94 // may require relocation.
96 // Ideally we should be able to re-write LSDA in-place, without the need to
97 // allocate a new space for it. Sadly there's no guarantee that the new call
98 // site table will be the same size as GCC uses uleb encodings for PC offsets.
100 // Note: some functions have LSDA entries with 0 call site entries.
101 Error
BinaryFunction::parseLSDA(ArrayRef
<uint8_t> LSDASectionData
,
102 uint64_t LSDASectionAddress
) {
103 assert(CurrentState
== State::Disassembled
&& "unexpected function state");
105 if (!getLSDAAddress())
106 return Error::success();
108 DWARFDataExtractor
Data(
109 StringRef(reinterpret_cast<const char *>(LSDASectionData
.data()),
110 LSDASectionData
.size()),
111 BC
.DwCtx
->getDWARFObj().isLittleEndian(),
112 BC
.DwCtx
->getDWARFObj().getAddressSize());
113 uint64_t Offset
= getLSDAAddress() - LSDASectionAddress
;
114 assert(Data
.isValidOffset(Offset
) && "wrong LSDA address");
116 const uint8_t LPStartEncoding
= Data
.getU8(&Offset
);
117 uint64_t LPStart
= Address
;
118 if (LPStartEncoding
!= dwarf::DW_EH_PE_omit
) {
119 std::optional
<uint64_t> MaybeLPStart
= Data
.getEncodedPointer(
120 &Offset
, LPStartEncoding
, Offset
+ LSDASectionAddress
);
122 BC
.errs() << "BOLT-ERROR: unsupported LPStartEncoding: "
123 << (unsigned)LPStartEncoding
<< '\n';
124 return createFatalBOLTError("");
126 LPStart
= *MaybeLPStart
;
129 const uint8_t TTypeEncoding
= Data
.getU8(&Offset
);
130 LSDATypeEncoding
= TTypeEncoding
;
131 size_t TTypeEncodingSize
= 0;
132 uintptr_t TTypeEnd
= 0;
133 if (TTypeEncoding
!= DW_EH_PE_omit
) {
134 TTypeEnd
= Data
.getULEB128(&Offset
);
135 TTypeEncodingSize
= BC
.getDWARFEncodingSize(TTypeEncoding
);
138 if (opts::PrintExceptions
) {
139 BC
.outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
140 << " for function " << *this << "]:\n";
141 BC
.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding
)
143 BC
.outs() << "LPStart = 0x" << Twine::utohexstr(LPStart
) << '\n';
144 BC
.outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding
)
146 BC
.outs() << "TType End = " << TTypeEnd
<< '\n';
149 // Table to store list of indices in type table. Entries are uleb128 values.
150 const uint64_t TypeIndexTableStart
= Offset
+ TTypeEnd
;
152 // Offset past the last decoded index.
153 uint64_t MaxTypeIndexTableOffset
= 0;
155 // Max positive index used in type table.
156 unsigned MaxTypeIndex
= 0;
158 // The actual type info table starts at the same location, but grows in
159 // opposite direction. TTypeEncoding is used to encode stored values.
160 const uint64_t TypeTableStart
= Offset
+ TTypeEnd
;
162 uint8_t CallSiteEncoding
= Data
.getU8(&Offset
);
163 uint32_t CallSiteTableLength
= Data
.getULEB128(&Offset
);
164 uint64_t CallSiteTableStart
= Offset
;
165 uint64_t CallSiteTableEnd
= CallSiteTableStart
+ CallSiteTableLength
;
166 uint64_t CallSitePtr
= CallSiteTableStart
;
167 uint64_t ActionTableStart
= CallSiteTableEnd
;
169 if (opts::PrintExceptions
) {
170 BC
.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding
<< '\n';
171 BC
.outs() << "CallSite table length = " << CallSiteTableLength
<< '\n';
175 this->HasEHRanges
= CallSitePtr
< CallSiteTableEnd
;
176 const uint64_t RangeBase
= getAddress();
177 while (CallSitePtr
< CallSiteTableEnd
) {
178 uint64_t Start
= *Data
.getEncodedPointer(&CallSitePtr
, CallSiteEncoding
,
179 CallSitePtr
+ LSDASectionAddress
);
180 uint64_t Length
= *Data
.getEncodedPointer(&CallSitePtr
, CallSiteEncoding
,
181 CallSitePtr
+ LSDASectionAddress
);
182 uint64_t LandingPad
= *Data
.getEncodedPointer(
183 &CallSitePtr
, CallSiteEncoding
, CallSitePtr
+ LSDASectionAddress
);
184 uint64_t ActionEntry
= Data
.getULEB128(&CallSitePtr
);
186 LandingPad
+= LPStart
;
188 if (opts::PrintExceptions
) {
189 BC
.outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase
+ Start
)
190 << ", 0x" << Twine::utohexstr(RangeBase
+ Start
+ Length
)
191 << "); landing pad: 0x" << Twine::utohexstr(LandingPad
)
192 << "; action entry: 0x" << Twine::utohexstr(ActionEntry
)
194 BC
.outs() << " current offset is " << (CallSitePtr
- CallSiteTableStart
)
198 // Create a handler entry if necessary.
199 MCSymbol
*LPSymbol
= nullptr;
201 // Verify if landing pad code is located outside current function
202 // Support landing pad to builtin_unreachable
203 if (LandingPad
< Address
|| LandingPad
> Address
+ getSize()) {
204 BinaryFunction
*Fragment
=
205 BC
.getBinaryFunctionContainingAddress(LandingPad
);
206 assert(Fragment
!= nullptr &&
207 "BOLT-ERROR: cannot find landing pad fragment");
208 BC
.addInterproceduralReference(this, Fragment
->getAddress());
209 BC
.processInterproceduralReferences();
210 assert(isParentOrChildOf(*Fragment
) &&
211 "BOLT-ERROR: cannot have landing pads in different functions");
212 setHasIndirectTargetToSplitFragment(true);
213 BC
.addFragmentsToSkip(this);
214 return Error::success();
217 const uint64_t LPOffset
= LandingPad
- getAddress();
218 if (!getInstructionAtOffset(LPOffset
)) {
219 if (opts::Verbosity
>= 1)
220 BC
.errs() << "BOLT-WARNING: landing pad "
221 << Twine::utohexstr(LPOffset
)
222 << " not pointing to an instruction in function " << *this
225 auto Label
= Labels
.find(LPOffset
);
226 if (Label
!= Labels
.end()) {
227 LPSymbol
= Label
->second
;
229 LPSymbol
= BC
.Ctx
->createNamedTempSymbol("LP");
230 Labels
[LPOffset
] = LPSymbol
;
235 // Mark all call instructions in the range.
236 auto II
= Instructions
.find(Start
);
237 auto IE
= Instructions
.end();
238 assert(II
!= IE
&& "exception range not pointing to an instruction");
240 MCInst
&Instruction
= II
->second
;
241 if (BC
.MIB
->isCall(Instruction
) &&
242 !BC
.MIB
->getConditionalTailCall(Instruction
)) {
243 assert(!BC
.MIB
->isInvoke(Instruction
) &&
244 "overlapping exception ranges detected");
245 // Add extra operands to a call instruction making it an invoke from
247 BC
.MIB
->addEHInfo(Instruction
,
248 MCPlus::MCLandingPad(LPSymbol
, ActionEntry
));
251 } while (II
!= IE
&& II
->first
< Start
+ Length
);
253 if (ActionEntry
!= 0) {
254 auto printType
= [&](int Index
, raw_ostream
&OS
) {
255 assert(Index
> 0 && "only positive indices are valid");
256 uint64_t TTEntry
= TypeTableStart
- Index
* TTypeEncodingSize
;
257 const uint64_t TTEntryAddress
= TTEntry
+ LSDASectionAddress
;
258 uint64_t TypeAddress
=
259 *Data
.getEncodedPointer(&TTEntry
, TTypeEncoding
, TTEntryAddress
);
260 if ((TTypeEncoding
& DW_EH_PE_pcrel
) && TypeAddress
== TTEntryAddress
)
262 if (TypeAddress
== 0) {
266 if (TTypeEncoding
& DW_EH_PE_indirect
) {
267 ErrorOr
<uint64_t> PointerOrErr
= BC
.getPointerAtAddress(TypeAddress
);
268 assert(PointerOrErr
&& "failed to decode indirect address");
269 TypeAddress
= *PointerOrErr
;
271 if (BinaryData
*TypeSymBD
= BC
.getBinaryDataAtAddress(TypeAddress
))
272 OS
<< TypeSymBD
->getName();
274 OS
<< "0x" << Twine::utohexstr(TypeAddress
);
276 if (opts::PrintExceptions
)
277 BC
.outs() << " actions: ";
278 uint64_t ActionPtr
= ActionTableStart
+ ActionEntry
- 1;
281 const char *Sep
= "";
283 ActionType
= Data
.getSLEB128(&ActionPtr
);
284 const uint32_t Self
= ActionPtr
;
285 ActionNext
= Data
.getSLEB128(&ActionPtr
);
286 if (opts::PrintExceptions
)
287 BC
.outs() << Sep
<< "(" << ActionType
<< ", " << ActionNext
<< ") ";
288 if (ActionType
== 0) {
289 if (opts::PrintExceptions
)
290 BC
.outs() << "cleanup";
291 } else if (ActionType
> 0) {
292 // It's an index into a type table.
294 std::max(MaxTypeIndex
, static_cast<unsigned>(ActionType
));
295 if (opts::PrintExceptions
) {
296 BC
.outs() << "catch type ";
297 printType(ActionType
, BC
.outs());
299 } else { // ActionType < 0
300 if (opts::PrintExceptions
)
301 BC
.outs() << "filter exception types ";
302 const char *TSep
= "";
303 // ActionType is a negative *byte* offset into *uleb128-encoded* table
304 // of indices with base 1.
305 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
306 // encoded using uleb128 thus we cannot directly dereference them.
307 uint64_t TypeIndexTablePtr
= TypeIndexTableStart
- ActionType
- 1;
308 while (uint64_t Index
= Data
.getULEB128(&TypeIndexTablePtr
)) {
309 MaxTypeIndex
= std::max(MaxTypeIndex
, static_cast<unsigned>(Index
));
310 if (opts::PrintExceptions
) {
312 printType(Index
, BC
.outs());
316 MaxTypeIndexTableOffset
= std::max(
317 MaxTypeIndexTableOffset
, TypeIndexTablePtr
- TypeIndexTableStart
);
322 ActionPtr
= Self
+ ActionNext
;
323 } while (ActionNext
);
324 if (opts::PrintExceptions
)
328 if (opts::PrintExceptions
)
331 assert(TypeIndexTableStart
+ MaxTypeIndexTableOffset
<=
332 Data
.getData().size() &&
333 "LSDA entry has crossed section boundary");
336 LSDAActionTable
= LSDASectionData
.slice(
337 ActionTableStart
, TypeIndexTableStart
-
338 MaxTypeIndex
* TTypeEncodingSize
-
340 for (unsigned Index
= 1; Index
<= MaxTypeIndex
; ++Index
) {
341 uint64_t TTEntry
= TypeTableStart
- Index
* TTypeEncodingSize
;
342 const uint64_t TTEntryAddress
= TTEntry
+ LSDASectionAddress
;
343 uint64_t TypeAddress
=
344 *Data
.getEncodedPointer(&TTEntry
, TTypeEncoding
, TTEntryAddress
);
345 if ((TTypeEncoding
& DW_EH_PE_pcrel
) && (TypeAddress
== TTEntryAddress
))
347 if (TTypeEncoding
& DW_EH_PE_indirect
) {
348 LSDATypeAddressTable
.emplace_back(TypeAddress
);
350 ErrorOr
<uint64_t> PointerOrErr
= BC
.getPointerAtAddress(TypeAddress
);
351 assert(PointerOrErr
&& "failed to decode indirect address");
352 TypeAddress
= *PointerOrErr
;
355 LSDATypeTable
.emplace_back(TypeAddress
);
358 LSDASectionData
.slice(TypeIndexTableStart
, MaxTypeIndexTableOffset
);
360 return Error::success();
363 void BinaryFunction::updateEHRanges() {
367 assert(CurrentState
== State::CFG_Finalized
&& "unexpected state");
369 // Build call sites table.
371 const MCSymbol
*LP
; // landing pad
378 for (FunctionFragment
&FF
: getLayout().fragments()) {
379 // If previous call can throw, this is its exception handler.
380 EHInfo PreviousEH
= {nullptr, 0};
382 // Marker for the beginning of exceptions range.
383 const MCSymbol
*StartRange
= nullptr;
385 for (BinaryBasicBlock
*const BB
: FF
) {
386 for (MCInst
&Instr
: *BB
) {
387 if (!BC
.MIB
->isCall(Instr
))
390 // Instruction can throw an exception that should be handled.
391 const bool Throws
= BC
.MIB
->isInvoke(Instr
);
393 // Ignore the call if it's a continuation of a no-throw gap.
394 if (!Throws
&& !StartRange
)
397 // Extract exception handling information from the instruction.
398 const MCSymbol
*LP
= nullptr;
400 if (const std::optional
<MCPlus::MCLandingPad
> EHInfo
=
401 BC
.MIB
->getEHInfo(Instr
))
402 std::tie(LP
, Action
) = *EHInfo
;
404 // No action if the exception handler has not changed.
405 if (Throws
&& StartRange
&& PreviousEH
.LP
== LP
&&
406 PreviousEH
.Action
== Action
)
409 // Same symbol is used for the beginning and the end of the range.
411 if (MCSymbol
*InstrLabel
= BC
.MIB
->getInstLabel(Instr
)) {
412 EHSymbol
= InstrLabel
;
414 std::unique_lock
<llvm::sys::RWMutex
> Lock(BC
.CtxMutex
);
415 EHSymbol
= BC
.MIB
->getOrCreateInstLabel(Instr
, "EH", BC
.Ctx
.get());
418 // At this point we could be in one of the following states:
420 // I. Exception handler has changed and we need to close previous range
421 // and start a new one.
423 // II. Start a new exception range after the gap.
425 // III. Close current exception range and start a new gap.
426 const MCSymbol
*EndRange
;
432 StartRange
= EHSymbol
;
436 // Close the previous range.
440 CallSite
{StartRange
, EndRange
, PreviousEH
.LP
, PreviousEH
.Action
});
444 StartRange
= EHSymbol
;
445 PreviousEH
= EHInfo
{LP
, Action
};
447 StartRange
= nullptr;
452 // Check if we need to close the range.
454 const MCSymbol
*EndRange
= getFunctionEndLabel(FF
.getFragmentNum());
457 CallSite
{StartRange
, EndRange
, PreviousEH
.LP
, PreviousEH
.Action
});
464 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK
= 0xc0;
466 CFIReaderWriter::CFIReaderWriter(BinaryContext
&BC
,
467 const DWARFDebugFrame
&EHFrame
)
469 // Prepare FDEs for fast lookup
470 for (const dwarf::FrameEntry
&Entry
: EHFrame
.entries()) {
471 const auto *CurFDE
= dyn_cast
<dwarf::FDE
>(&Entry
);
475 // There could me multiple FDEs with the same initial address, and perhaps
476 // different sizes (address ranges). Use the first entry with non-zero size.
477 auto FDEI
= FDEs
.lower_bound(CurFDE
->getInitialLocation());
478 if (FDEI
!= FDEs
.end() && FDEI
->first
== CurFDE
->getInitialLocation()) {
479 if (CurFDE
->getAddressRange()) {
480 if (FDEI
->second
->getAddressRange() == 0) {
481 FDEI
->second
= CurFDE
;
482 } else if (opts::Verbosity
> 0) {
483 BC
.errs() << "BOLT-WARNING: different FDEs for function at 0x"
484 << Twine::utohexstr(FDEI
->first
)
485 << " detected; sizes: " << FDEI
->second
->getAddressRange()
486 << " and " << CurFDE
->getAddressRange() << '\n';
490 FDEs
.emplace_hint(FDEI
, CurFDE
->getInitialLocation(), CurFDE
);
495 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction
&Function
) const {
496 uint64_t Address
= Function
.getAddress();
497 auto I
= FDEs
.find(Address
);
498 // Ignore zero-length FDE ranges.
499 if (I
== FDEs
.end() || !I
->second
->getAddressRange())
502 const FDE
&CurFDE
= *I
->second
;
503 std::optional
<uint64_t> LSDA
= CurFDE
.getLSDAAddress();
504 Function
.setLSDAAddress(LSDA
? *LSDA
: 0);
506 uint64_t Offset
= Function
.getFirstInstructionOffset();
507 uint64_t CodeAlignment
= CurFDE
.getLinkedCIE()->getCodeAlignmentFactor();
508 uint64_t DataAlignment
= CurFDE
.getLinkedCIE()->getDataAlignmentFactor();
509 if (CurFDE
.getLinkedCIE()->getPersonalityAddress()) {
510 Function
.setPersonalityFunction(
511 *CurFDE
.getLinkedCIE()->getPersonalityAddress());
512 Function
.setPersonalityEncoding(
513 *CurFDE
.getLinkedCIE()->getPersonalityEncoding());
516 auto decodeFrameInstruction
= [this, &Function
, &Offset
, Address
,
517 CodeAlignment
, DataAlignment
](
518 const CFIProgram::Instruction
&Instr
) {
519 uint8_t Opcode
= Instr
.Opcode
;
520 if (Opcode
& DWARF_CFI_PRIMARY_OPCODE_MASK
)
521 Opcode
&= DWARF_CFI_PRIMARY_OPCODE_MASK
;
522 switch (Instr
.Opcode
) {
525 case DW_CFA_advance_loc4
:
526 case DW_CFA_advance_loc2
:
527 case DW_CFA_advance_loc1
:
528 case DW_CFA_advance_loc
:
529 // Advance our current address
530 Offset
+= CodeAlignment
* int64_t(Instr
.Ops
[0]);
532 case DW_CFA_offset_extended_sf
:
533 Function
.addCFIInstruction(
535 MCCFIInstruction::createOffset(
536 nullptr, Instr
.Ops
[0], DataAlignment
* int64_t(Instr
.Ops
[1])));
538 case DW_CFA_offset_extended
:
540 Function
.addCFIInstruction(
541 Offset
, MCCFIInstruction::createOffset(nullptr, Instr
.Ops
[0],
542 DataAlignment
* Instr
.Ops
[1]));
544 case DW_CFA_restore_extended
:
546 Function
.addCFIInstruction(
547 Offset
, MCCFIInstruction::createRestore(nullptr, Instr
.Ops
[0]));
550 assert(Instr
.Ops
[0] >= Address
&& "set_loc out of function bounds");
551 assert(Instr
.Ops
[0] <= Address
+ Function
.getSize() &&
552 "set_loc out of function bounds");
553 Offset
= Instr
.Ops
[0] - Address
;
556 case DW_CFA_undefined
:
557 Function
.addCFIInstruction(
558 Offset
, MCCFIInstruction::createUndefined(nullptr, Instr
.Ops
[0]));
560 case DW_CFA_same_value
:
561 Function
.addCFIInstruction(
562 Offset
, MCCFIInstruction::createSameValue(nullptr, Instr
.Ops
[0]));
564 case DW_CFA_register
:
565 Function
.addCFIInstruction(
566 Offset
, MCCFIInstruction::createRegister(nullptr, Instr
.Ops
[0],
569 case DW_CFA_remember_state
:
570 Function
.addCFIInstruction(
571 Offset
, MCCFIInstruction::createRememberState(nullptr));
573 case DW_CFA_restore_state
:
574 Function
.addCFIInstruction(Offset
,
575 MCCFIInstruction::createRestoreState(nullptr));
578 Function
.addCFIInstruction(
580 MCCFIInstruction::cfiDefCfa(nullptr, Instr
.Ops
[0], Instr
.Ops
[1]));
582 case DW_CFA_def_cfa_sf
:
583 Function
.addCFIInstruction(
585 MCCFIInstruction::cfiDefCfa(nullptr, Instr
.Ops
[0],
586 DataAlignment
* int64_t(Instr
.Ops
[1])));
588 case DW_CFA_def_cfa_register
:
589 Function
.addCFIInstruction(Offset
, MCCFIInstruction::createDefCfaRegister(
590 nullptr, Instr
.Ops
[0]));
592 case DW_CFA_def_cfa_offset
:
593 Function
.addCFIInstruction(
594 Offset
, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr
.Ops
[0]));
596 case DW_CFA_def_cfa_offset_sf
:
597 Function
.addCFIInstruction(
598 Offset
, MCCFIInstruction::cfiDefCfaOffset(
599 nullptr, DataAlignment
* int64_t(Instr
.Ops
[0])));
601 case DW_CFA_GNU_args_size
:
602 Function
.addCFIInstruction(
603 Offset
, MCCFIInstruction::createGnuArgsSize(nullptr, Instr
.Ops
[0]));
604 Function
.setUsesGnuArgsSize();
606 case DW_CFA_val_offset_sf
:
607 case DW_CFA_val_offset
:
608 if (opts::Verbosity
>= 1) {
609 BC
.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
612 case DW_CFA_def_cfa_expression
:
613 case DW_CFA_val_expression
:
614 case DW_CFA_expression
: {
615 StringRef ExprBytes
= Instr
.Expression
->getData();
617 raw_string_ostream
OS(Str
);
618 // Manually encode this instruction using CFI escape
620 if (Opcode
!= DW_CFA_def_cfa_expression
)
621 encodeULEB128(Instr
.Ops
[0], OS
);
622 encodeULEB128(ExprBytes
.size(), OS
);
624 Function
.addCFIInstruction(
625 Offset
, MCCFIInstruction::createEscape(nullptr, OS
.str()));
628 case DW_CFA_MIPS_advance_loc8
:
629 if (opts::Verbosity
>= 1)
630 BC
.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
632 case DW_CFA_GNU_window_save
:
633 // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
634 // id but mean different things. The latter is used in AArch64.
635 if (Function
.getBinaryContext().isAArch64()) {
636 Function
.addCFIInstruction(
637 Offset
, MCCFIInstruction::createNegateRAState(nullptr));
640 if (opts::Verbosity
>= 1)
641 BC
.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
645 if (opts::Verbosity
>= 1)
646 BC
.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
649 if (opts::Verbosity
>= 1)
650 BC
.errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
651 << Instr
.Opcode
<< '\n';
658 for (const CFIProgram::Instruction
&Instr
: CurFDE
.getLinkedCIE()->cfis())
659 if (!decodeFrameInstruction(Instr
))
662 for (const CFIProgram::Instruction
&Instr
: CurFDE
.cfis())
663 if (!decodeFrameInstruction(Instr
))
669 std::vector
<char> CFIReaderWriter::generateEHFrameHeader(
670 const DWARFDebugFrame
&OldEHFrame
, const DWARFDebugFrame
&NewEHFrame
,
671 uint64_t EHFrameHeaderAddress
,
672 std::vector
<uint64_t> &FailedAddresses
) const {
673 // Common PC -> FDE map to be written into .eh_frame_hdr.
674 std::map
<uint64_t, uint64_t> PCToFDE
;
676 // Presort array for binary search.
677 llvm::sort(FailedAddresses
);
679 // Initialize PCToFDE using NewEHFrame.
680 for (dwarf::FrameEntry
&Entry
: NewEHFrame
.entries()) {
681 const dwarf::FDE
*FDE
= dyn_cast
<dwarf::FDE
>(&Entry
);
684 const uint64_t FuncAddress
= FDE
->getInitialLocation();
685 const uint64_t FDEAddress
=
686 NewEHFrame
.getEHFrameAddress() + FDE
->getOffset();
688 // Ignore unused FDEs.
689 if (FuncAddress
== 0)
692 // Add the address to the map unless we failed to write it.
693 if (!std::binary_search(FailedAddresses
.begin(), FailedAddresses
.end(),
695 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
696 << Twine::utohexstr(FuncAddress
) << " is at 0x"
697 << Twine::utohexstr(FDEAddress
) << '\n');
698 PCToFDE
[FuncAddress
] = FDEAddress
;
702 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
703 << llvm::size(NewEHFrame
.entries()) << " entries\n");
705 // Add entries from the original .eh_frame corresponding to the functions
706 // that we did not update.
707 for (const dwarf::FrameEntry
&Entry
: OldEHFrame
) {
708 const dwarf::FDE
*FDE
= dyn_cast
<dwarf::FDE
>(&Entry
);
711 const uint64_t FuncAddress
= FDE
->getInitialLocation();
712 const uint64_t FDEAddress
=
713 OldEHFrame
.getEHFrameAddress() + FDE
->getOffset();
715 // Add the address if we failed to write it.
716 if (PCToFDE
.count(FuncAddress
) == 0) {
717 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
718 << Twine::utohexstr(FuncAddress
) << " is at 0x"
719 << Twine::utohexstr(FDEAddress
) << '\n');
720 PCToFDE
[FuncAddress
] = FDEAddress
;
724 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
725 << llvm::size(OldEHFrame
.entries()) << " entries\n");
727 // Generate a new .eh_frame_hdr based on the new map.
729 // Header plus table of entries of size 8 bytes.
730 std::vector
<char> EHFrameHeader(12 + PCToFDE
.size() * 8);
733 EHFrameHeader
[0] = 1;
734 // Encoding of the eh_frame pointer.
735 EHFrameHeader
[1] = DW_EH_PE_pcrel
| DW_EH_PE_sdata4
;
736 // Encoding of the count field to follow.
737 EHFrameHeader
[2] = DW_EH_PE_udata4
;
738 // Encoding of the table entries - 4-byte offset from the start of the header.
739 EHFrameHeader
[3] = DW_EH_PE_datarel
| DW_EH_PE_sdata4
;
741 // Address of eh_frame. Use the new one.
742 support::ulittle32_t::ref(EHFrameHeader
.data() + 4) =
743 NewEHFrame
.getEHFrameAddress() - (EHFrameHeaderAddress
+ 4);
745 // Number of entries in the table (FDE count).
746 support::ulittle32_t::ref(EHFrameHeader
.data() + 8) = PCToFDE
.size();
748 // Write the table at offset 12.
749 char *Ptr
= EHFrameHeader
.data();
750 uint32_t Offset
= 12;
751 for (const auto &PCI
: PCToFDE
) {
752 int64_t InitialPCOffset
= PCI
.first
- EHFrameHeaderAddress
;
753 assert(isInt
<32>(InitialPCOffset
) && "PC offset out of bounds");
754 support::ulittle32_t::ref(Ptr
+ Offset
) = InitialPCOffset
;
756 int64_t FDEOffset
= PCI
.second
- EHFrameHeaderAddress
;
757 assert(isInt
<32>(FDEOffset
) && "FDE offset out of bounds");
758 support::ulittle32_t::ref(Ptr
+ Offset
) = FDEOffset
;
762 return EHFrameHeader
;
765 Error
EHFrameParser::parseCIE(uint64_t StartOffset
) {
766 uint8_t Version
= Data
.getU8(&Offset
);
767 const char *Augmentation
= Data
.getCStr(&Offset
);
768 StringRef
AugmentationString(Augmentation
? Augmentation
: "");
769 uint8_t AddressSize
=
770 Version
< 4 ? Data
.getAddressSize() : Data
.getU8(&Offset
);
771 Data
.setAddressSize(AddressSize
);
772 // Skip segment descriptor size
775 // Skip code alignment factor
776 Data
.getULEB128(&Offset
);
777 // Skip data alignment
778 Data
.getSLEB128(&Offset
);
779 // Skip return address register
783 Data
.getULEB128(&Offset
);
785 uint32_t FDEPointerEncoding
= DW_EH_PE_absptr
;
786 uint32_t LSDAPointerEncoding
= DW_EH_PE_omit
;
787 // Walk the augmentation string to get all the augmentation data.
788 for (unsigned i
= 0, e
= AugmentationString
.size(); i
!= e
; ++i
) {
789 switch (AugmentationString
[i
]) {
791 return createStringError(
792 errc::invalid_argument
,
793 "unknown augmentation character in entry at 0x%" PRIx64
, StartOffset
);
795 LSDAPointerEncoding
= Data
.getU8(&Offset
);
798 uint32_t PersonalityEncoding
= Data
.getU8(&Offset
);
799 std::optional
<uint64_t> Personality
=
800 Data
.getEncodedPointer(&Offset
, PersonalityEncoding
,
801 EHFrameAddress
? EHFrameAddress
+ Offset
: 0);
802 // Patch personality address
804 PatcherCallback(*Personality
, Offset
, PersonalityEncoding
);
808 FDEPointerEncoding
= Data
.getU8(&Offset
);
812 return createStringError(
813 errc::invalid_argument
,
814 "'z' must be the first character at 0x%" PRIx64
, StartOffset
);
815 // Skip augmentation length
816 Data
.getULEB128(&Offset
);
823 Entries
.emplace_back(std::make_unique
<CIEInfo
>(
824 FDEPointerEncoding
, LSDAPointerEncoding
, AugmentationString
));
825 CIEs
[StartOffset
] = &*Entries
.back();
826 return Error::success();
829 Error
EHFrameParser::parseFDE(uint64_t CIEPointer
,
830 uint64_t StartStructureOffset
) {
831 std::optional
<uint64_t> LSDAAddress
;
832 CIEInfo
*Cie
= CIEs
[StartStructureOffset
- CIEPointer
];
834 // The address size is encoded in the CIE we reference.
836 return createStringError(errc::invalid_argument
,
837 "parsing FDE data at 0x%" PRIx64
838 " failed due to missing CIE",
839 StartStructureOffset
);
840 // Patch initial location
841 if (auto Val
= Data
.getEncodedPointer(&Offset
, Cie
->FDEPtrEncoding
,
842 EHFrameAddress
+ Offset
)) {
843 PatcherCallback(*Val
, Offset
, Cie
->FDEPtrEncoding
);
845 // Skip address range
846 Data
.getEncodedPointer(&Offset
, Cie
->FDEPtrEncoding
, 0);
848 // Process augmentation data for this FDE.
849 StringRef AugmentationString
= Cie
->AugmentationString
;
850 if (!AugmentationString
.empty() && Cie
->LSDAPtrEncoding
!= DW_EH_PE_omit
) {
851 // Skip augmentation length
852 Data
.getULEB128(&Offset
);
854 Data
.getEncodedPointer(&Offset
, Cie
->LSDAPtrEncoding
,
855 EHFrameAddress
? Offset
+ EHFrameAddress
: 0);
856 // Patch LSDA address
857 PatcherCallback(*LSDAAddress
, Offset
, Cie
->LSDAPtrEncoding
);
859 return Error::success();
862 Error
EHFrameParser::parse() {
863 while (Data
.isValidOffset(Offset
)) {
864 const uint64_t StartOffset
= Offset
;
868 std::tie(Length
, Format
) = Data
.getInitialLength(&Offset
);
870 // If the Length is 0, then this CIE is a terminator
874 const uint64_t StartStructureOffset
= Offset
;
875 const uint64_t EndStructureOffset
= Offset
+ Length
;
877 Error Err
= Error::success();
878 const uint64_t Id
= Data
.getRelocatedValue(4, &Offset
,
879 /*SectionIndex=*/nullptr, &Err
);
884 if (Error Err
= parseCIE(StartOffset
))
887 if (Error Err
= parseFDE(Id
, StartStructureOffset
))
890 Offset
= EndStructureOffset
;
893 return Error::success();
896 Error
EHFrameParser::parse(DWARFDataExtractor Data
, uint64_t EHFrameAddress
,
897 PatcherCallbackTy PatcherCallback
) {
898 EHFrameParser
Parser(Data
, EHFrameAddress
, PatcherCallback
);
899 return Parser
.parse();