1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "FileAnalysis.h"
10 #include "GraphBuilder.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrDesc.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCObjectFileInfo.h"
24 #include "llvm/MC/MCRegisterInfo.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCTargetOptions.h"
27 #include "llvm/MC/TargetRegistry.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/COFF.h"
30 #include "llvm/Object/ELFObjectFile.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/TargetSelect.h"
37 #include "llvm/Support/raw_ostream.h"
39 using Instr
= llvm::cfi_verify::FileAnalysis::Instr
;
40 using LLVMSymbolizer
= llvm::symbolize::LLVMSymbolizer
;
43 namespace cfi_verify
{
47 static cl::opt
<bool, true> IgnoreDWARFArg(
50 "Ignore all DWARF data. This relaxes the requirements for all "
51 "statically linked libraries to have been compiled with '-g', but "
52 "will result in false positives for 'CFI unprotected' instructions."),
53 cl::location(IgnoreDWARFFlag
), cl::init(false));
55 StringRef
stringCFIProtectionStatus(CFIProtectionStatus Status
) {
57 case CFIProtectionStatus::PROTECTED
:
59 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
:
60 return "FAIL_NOT_INDIRECT_CF";
61 case CFIProtectionStatus::FAIL_ORPHANS
:
62 return "FAIL_ORPHANS";
63 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH
:
64 return "FAIL_BAD_CONDITIONAL_BRANCH";
65 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED
:
66 return "FAIL_REGISTER_CLOBBERED";
67 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION
:
68 return "FAIL_INVALID_INSTRUCTION";
70 llvm_unreachable("Attempted to stringify an unknown enum value.");
73 Expected
<FileAnalysis
> FileAnalysis::Create(StringRef Filename
) {
74 // Open the filename provided.
75 Expected
<object::OwningBinary
<object::Binary
>> BinaryOrErr
=
76 object::createBinary(Filename
);
78 return BinaryOrErr
.takeError();
80 // Construct the object and allow it to take ownership of the binary.
81 object::OwningBinary
<object::Binary
> Binary
= std::move(BinaryOrErr
.get());
82 FileAnalysis
Analysis(std::move(Binary
));
84 Analysis
.Object
= dyn_cast
<object::ObjectFile
>(Analysis
.Binary
.getBinary());
86 return make_error
<UnsupportedDisassembly
>("Failed to cast object");
88 switch (Analysis
.Object
->getArch()) {
92 case Triple::aarch64_be
:
95 return make_error
<UnsupportedDisassembly
>("Unsupported architecture.");
98 Analysis
.ObjectTriple
= Analysis
.Object
->makeTriple();
99 Expected
<SubtargetFeatures
> Features
= Analysis
.Object
->getFeatures();
101 return Features
.takeError();
103 Analysis
.Features
= *Features
;
105 // Init the rest of the object.
106 if (auto InitResponse
= Analysis
.initialiseDisassemblyMembers())
107 return std::move(InitResponse
);
109 if (auto SectionParseResponse
= Analysis
.parseCodeSections())
110 return std::move(SectionParseResponse
);
112 if (auto SymbolTableParseResponse
= Analysis
.parseSymbolTable())
113 return std::move(SymbolTableParseResponse
);
115 return std::move(Analysis
);
118 FileAnalysis::FileAnalysis(object::OwningBinary
<object::Binary
> Binary
)
119 : Binary(std::move(Binary
)) {}
121 FileAnalysis::FileAnalysis(const Triple
&ObjectTriple
,
122 const SubtargetFeatures
&Features
)
123 : ObjectTriple(ObjectTriple
), Features(Features
) {}
126 FileAnalysis::getPrevInstructionSequential(const Instr
&InstrMeta
) const {
127 std::map
<uint64_t, Instr
>::const_iterator KV
=
128 Instructions
.find(InstrMeta
.VMAddress
);
129 if (KV
== Instructions
.end() || KV
== Instructions
.begin())
132 if (!(--KV
)->second
.Valid
)
139 FileAnalysis::getNextInstructionSequential(const Instr
&InstrMeta
) const {
140 std::map
<uint64_t, Instr
>::const_iterator KV
=
141 Instructions
.find(InstrMeta
.VMAddress
);
142 if (KV
== Instructions
.end() || ++KV
== Instructions
.end())
145 if (!KV
->second
.Valid
)
151 bool FileAnalysis::usesRegisterOperand(const Instr
&InstrMeta
) const {
152 for (const auto &Operand
: InstrMeta
.Instruction
) {
159 const Instr
*FileAnalysis::getInstruction(uint64_t Address
) const {
160 const auto &InstrKV
= Instructions
.find(Address
);
161 if (InstrKV
== Instructions
.end())
164 return &InstrKV
->second
;
167 const Instr
&FileAnalysis::getInstructionOrDie(uint64_t Address
) const {
168 const auto &InstrKV
= Instructions
.find(Address
);
169 assert(InstrKV
!= Instructions
.end() && "Address doesn't exist.");
170 return InstrKV
->second
;
173 bool FileAnalysis::isCFITrap(const Instr
&InstrMeta
) const {
174 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
175 return InstrDesc
.isTrap() || willTrapOnCFIViolation(InstrMeta
);
178 bool FileAnalysis::willTrapOnCFIViolation(const Instr
&InstrMeta
) const {
179 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
180 if (!InstrDesc
.isCall())
183 if (!MIA
->evaluateBranch(InstrMeta
.Instruction
, InstrMeta
.VMAddress
,
184 InstrMeta
.InstructionSize
, Target
))
186 return TrapOnFailFunctionAddresses
.contains(Target
);
189 bool FileAnalysis::canFallThrough(const Instr
&InstrMeta
) const {
190 if (!InstrMeta
.Valid
)
193 if (isCFITrap(InstrMeta
))
196 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
197 if (InstrDesc
.mayAffectControlFlow(InstrMeta
.Instruction
, *RegisterInfo
))
198 return InstrDesc
.isConditionalBranch();
204 FileAnalysis::getDefiniteNextInstruction(const Instr
&InstrMeta
) const {
205 if (!InstrMeta
.Valid
)
208 if (isCFITrap(InstrMeta
))
211 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
212 const Instr
*NextMetaPtr
;
213 if (InstrDesc
.mayAffectControlFlow(InstrMeta
.Instruction
, *RegisterInfo
)) {
214 if (InstrDesc
.isConditionalBranch())
218 if (!MIA
->evaluateBranch(InstrMeta
.Instruction
, InstrMeta
.VMAddress
,
219 InstrMeta
.InstructionSize
, Target
))
222 NextMetaPtr
= getInstruction(Target
);
225 getInstruction(InstrMeta
.VMAddress
+ InstrMeta
.InstructionSize
);
228 if (!NextMetaPtr
|| !NextMetaPtr
->Valid
)
234 std::set
<const Instr
*>
235 FileAnalysis::getDirectControlFlowXRefs(const Instr
&InstrMeta
) const {
236 std::set
<const Instr
*> CFCrossReferences
;
237 const Instr
*PrevInstruction
= getPrevInstructionSequential(InstrMeta
);
239 if (PrevInstruction
&& canFallThrough(*PrevInstruction
))
240 CFCrossReferences
.insert(PrevInstruction
);
242 const auto &TargetRefsKV
= StaticBranchTargetings
.find(InstrMeta
.VMAddress
);
243 if (TargetRefsKV
== StaticBranchTargetings
.end())
244 return CFCrossReferences
;
246 for (uint64_t SourceInstrAddress
: TargetRefsKV
->second
) {
247 const auto &SourceInstrKV
= Instructions
.find(SourceInstrAddress
);
248 if (SourceInstrKV
== Instructions
.end()) {
249 errs() << "Failed to find source instruction at address "
250 << format_hex(SourceInstrAddress
, 2)
251 << " for the cross-reference to instruction at address "
252 << format_hex(InstrMeta
.VMAddress
, 2) << ".\n";
256 CFCrossReferences
.insert(&SourceInstrKV
->second
);
259 return CFCrossReferences
;
262 const std::set
<object::SectionedAddress
> &
263 FileAnalysis::getIndirectInstructions() const {
264 return IndirectInstructions
;
267 const MCRegisterInfo
*FileAnalysis::getRegisterInfo() const {
268 return RegisterInfo
.get();
271 const MCInstrInfo
*FileAnalysis::getMCInstrInfo() const { return MII
.get(); }
273 const MCInstrAnalysis
*FileAnalysis::getMCInstrAnalysis() const {
277 Expected
<DIInliningInfo
>
278 FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address
) {
279 assert(Symbolizer
!= nullptr && "Symbolizer is invalid.");
281 return Symbolizer
->symbolizeInlinedCode(std::string(Object
->getFileName()),
286 FileAnalysis::validateCFIProtection(const GraphResult
&Graph
) const {
287 const Instr
*InstrMetaPtr
= getInstruction(Graph
.BaseAddress
);
289 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION
;
291 const auto &InstrDesc
= MII
->get(InstrMetaPtr
->Instruction
.getOpcode());
292 if (!InstrDesc
.mayAffectControlFlow(InstrMetaPtr
->Instruction
, *RegisterInfo
))
293 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
;
295 if (!usesRegisterOperand(*InstrMetaPtr
))
296 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
;
298 if (!Graph
.OrphanedNodes
.empty())
299 return CFIProtectionStatus::FAIL_ORPHANS
;
301 for (const auto &BranchNode
: Graph
.ConditionalBranchNodes
) {
302 if (!BranchNode
.CFIProtection
)
303 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH
;
306 if (indirectCFOperandClobber(Graph
) != Graph
.BaseAddress
)
307 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED
;
309 return CFIProtectionStatus::PROTECTED
;
312 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult
&Graph
) const {
313 assert(Graph
.OrphanedNodes
.empty() && "Orphaned nodes should be empty.");
315 // Get the set of registers we must check to ensure they're not clobbered.
316 const Instr
&IndirectCF
= getInstructionOrDie(Graph
.BaseAddress
);
317 DenseSet
<unsigned> RegisterNumbers
;
318 for (const auto &Operand
: IndirectCF
.Instruction
) {
320 RegisterNumbers
.insert(Operand
.getReg());
322 assert(RegisterNumbers
.size() && "Zero register operands on indirect CF.");
324 // Now check all branches to indirect CFs and ensure no clobbering happens.
325 for (const auto &Branch
: Graph
.ConditionalBranchNodes
) {
327 if (Branch
.IndirectCFIsOnTargetPath
)
328 Node
= Branch
.Target
;
330 Node
= Branch
.Fallthrough
;
332 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
333 // we allow them one load.
334 bool canLoad
= !MII
->get(IndirectCF
.Instruction
.getOpcode()).mayLoad();
336 // We walk backwards from the indirect CF. It is the last node returned by
337 // Graph.flattenAddress, so we skip it since we already handled it.
338 DenseSet
<unsigned> CurRegisterNumbers
= RegisterNumbers
;
339 std::vector
<uint64_t> Nodes
= Graph
.flattenAddress(Node
);
340 for (auto I
= Nodes
.rbegin() + 1, E
= Nodes
.rend(); I
!= E
; ++I
) {
342 const Instr
&NodeInstr
= getInstructionOrDie(Node
);
343 const auto &InstrDesc
= MII
->get(NodeInstr
.Instruction
.getOpcode());
345 for (auto RI
= CurRegisterNumbers
.begin(), RE
= CurRegisterNumbers
.end();
347 unsigned RegNum
= *RI
;
348 if (InstrDesc
.hasDefOfPhysReg(NodeInstr
.Instruction
, RegNum
,
350 if (!canLoad
|| !InstrDesc
.mayLoad())
353 CurRegisterNumbers
.erase(RI
);
354 // Add the registers this load reads to those we check for clobbers.
355 for (unsigned i
= InstrDesc
.getNumDefs(),
356 e
= InstrDesc
.getNumOperands(); i
!= e
; i
++) {
357 const auto &Operand
= NodeInstr
.Instruction
.getOperand(i
);
359 CurRegisterNumbers
.insert(Operand
.getReg());
367 return Graph
.BaseAddress
;
370 void FileAnalysis::printInstruction(const Instr
&InstrMeta
,
371 raw_ostream
&OS
) const {
372 Printer
->printInst(&InstrMeta
.Instruction
, 0, "", *SubtargetInfo
, OS
);
375 Error
FileAnalysis::initialiseDisassemblyMembers() {
376 std::string TripleName
= ObjectTriple
.getTriple();
379 std::string ErrorString
;
381 LLVMSymbolizer::Options Opt
;
382 Opt
.UseSymbolTable
= false;
383 Symbolizer
.reset(new LLVMSymbolizer(Opt
));
386 TargetRegistry::lookupTarget(ArchName
, ObjectTriple
, ErrorString
);
388 return make_error
<UnsupportedDisassembly
>(
389 (Twine("Couldn't find target \"") + ObjectTriple
.getTriple() +
390 "\", failed with error: " + ErrorString
)
393 RegisterInfo
.reset(ObjectTarget
->createMCRegInfo(TripleName
));
395 return make_error
<UnsupportedDisassembly
>(
396 "Failed to initialise RegisterInfo.");
398 MCTargetOptions MCOptions
;
400 ObjectTarget
->createMCAsmInfo(*RegisterInfo
, TripleName
, MCOptions
));
402 return make_error
<UnsupportedDisassembly
>("Failed to initialise AsmInfo.");
404 SubtargetInfo
.reset(ObjectTarget
->createMCSubtargetInfo(
405 TripleName
, MCPU
, Features
.getString()));
407 return make_error
<UnsupportedDisassembly
>(
408 "Failed to initialise SubtargetInfo.");
410 MII
.reset(ObjectTarget
->createMCInstrInfo());
412 return make_error
<UnsupportedDisassembly
>("Failed to initialise MII.");
414 Context
.reset(new MCContext(Triple(TripleName
), AsmInfo
.get(),
415 RegisterInfo
.get(), SubtargetInfo
.get()));
418 ObjectTarget
->createMCDisassembler(*SubtargetInfo
, *Context
));
421 return make_error
<UnsupportedDisassembly
>(
422 "No disassembler available for target");
424 MIA
.reset(ObjectTarget
->createMCInstrAnalysis(MII
.get()));
426 Printer
.reset(ObjectTarget
->createMCInstPrinter(
427 ObjectTriple
, AsmInfo
->getAssemblerDialect(), *AsmInfo
, *MII
,
430 return Error::success();
433 Error
FileAnalysis::parseCodeSections() {
434 if (!IgnoreDWARFFlag
) {
435 std::unique_ptr
<DWARFContext
> DWARF
= DWARFContext::create(*Object
);
437 return make_error
<StringError
>("Could not create DWARF information.",
438 inconvertibleErrorCode());
440 bool LineInfoValid
= false;
442 for (auto &Unit
: DWARF
->compile_units()) {
443 const auto &LineTable
= DWARF
->getLineTableForUnit(Unit
.get());
444 if (LineTable
&& !LineTable
->Rows
.empty()) {
445 LineInfoValid
= true;
451 return make_error
<StringError
>(
452 "DWARF line information missing. Did you compile with '-g'?",
453 inconvertibleErrorCode());
456 for (const object::SectionRef
&Section
: Object
->sections()) {
457 // Ensure only executable sections get analysed.
458 if (!(object::ELFSectionRef(Section
).getFlags() & ELF::SHF_EXECINSTR
))
461 // Avoid checking the PLT since it produces spurious failures on AArch64
462 // when ignoring DWARF data.
463 Expected
<StringRef
> NameOrErr
= Section
.getName();
464 if (NameOrErr
&& *NameOrErr
== ".plt")
466 consumeError(NameOrErr
.takeError());
468 Expected
<StringRef
> Contents
= Section
.getContents();
470 return Contents
.takeError();
471 ArrayRef
<uint8_t> SectionBytes
= arrayRefFromStringRef(*Contents
);
473 parseSectionContents(SectionBytes
,
474 {Section
.getAddress(), Section
.getIndex()});
476 return Error::success();
479 void FileAnalysis::parseSectionContents(ArrayRef
<uint8_t> SectionBytes
,
480 object::SectionedAddress Address
) {
481 assert(Symbolizer
&& "Symbolizer is uninitialised.");
484 uint64_t InstructionSize
;
486 for (uint64_t Byte
= 0; Byte
< SectionBytes
.size();) {
487 bool ValidInstruction
=
488 Disassembler
->getInstruction(Instruction
, InstructionSize
,
489 SectionBytes
.drop_front(Byte
), 0,
490 outs()) == MCDisassembler::Success
;
492 Byte
+= InstructionSize
;
494 uint64_t VMAddress
= Address
.Address
+ Byte
- InstructionSize
;
495 InstrMeta
.Instruction
= Instruction
;
496 InstrMeta
.VMAddress
= VMAddress
;
497 InstrMeta
.InstructionSize
= InstructionSize
;
498 InstrMeta
.Valid
= ValidInstruction
;
500 addInstruction(InstrMeta
);
502 if (!ValidInstruction
)
505 // Skip additional parsing for instructions that do not affect the control
507 const auto &InstrDesc
= MII
->get(Instruction
.getOpcode());
508 if (!InstrDesc
.mayAffectControlFlow(Instruction
, *RegisterInfo
))
512 if (MIA
->evaluateBranch(Instruction
, VMAddress
, InstructionSize
, Target
)) {
513 // If the target can be evaluated, it's not indirect.
514 StaticBranchTargetings
[Target
].push_back(VMAddress
);
518 if (!usesRegisterOperand(InstrMeta
))
521 if (InstrDesc
.isReturn())
524 // Check if this instruction exists in the range of the DWARF metadata.
525 if (!IgnoreDWARFFlag
) {
527 Symbolizer
->symbolizeCode(std::string(Object
->getFileName()),
528 {VMAddress
, Address
.SectionIndex
});
530 handleAllErrors(LineInfo
.takeError(), [](const ErrorInfoBase
&E
) {
531 errs() << "Symbolizer failed to get line: " << E
.message() << "\n";
536 if (LineInfo
->FileName
== DILineInfo::BadString
)
540 IndirectInstructions
.insert({VMAddress
, Address
.SectionIndex
});
544 void FileAnalysis::addInstruction(const Instr
&Instruction
) {
546 Instructions
.insert(std::make_pair(Instruction
.VMAddress
, Instruction
));
548 errs() << "Failed to add instruction at address "
549 << format_hex(Instruction
.VMAddress
, 2)
550 << ": Instruction at this address already exists.\n";
555 Error
FileAnalysis::parseSymbolTable() {
556 // Functions that will trap on CFI violations.
557 SmallSet
<StringRef
, 4> TrapOnFailFunctions
;
558 TrapOnFailFunctions
.insert("__cfi_slowpath");
559 TrapOnFailFunctions
.insert("__cfi_slowpath_diag");
560 TrapOnFailFunctions
.insert("abort");
562 // Look through the list of symbols for functions that will trap on CFI
564 for (auto &Sym
: Object
->symbols()) {
565 auto SymNameOrErr
= Sym
.getName();
567 consumeError(SymNameOrErr
.takeError());
568 else if (TrapOnFailFunctions
.contains(*SymNameOrErr
)) {
569 auto AddrOrErr
= Sym
.getAddress();
571 consumeError(AddrOrErr
.takeError());
573 TrapOnFailFunctionAddresses
.insert(*AddrOrErr
);
576 if (auto *ElfObject
= dyn_cast
<object::ELFObjectFileBase
>(Object
)) {
577 for (const auto &Plt
: ElfObject
->getPltEntries()) {
580 object::SymbolRef
Sym(*Plt
.Symbol
, Object
);
581 auto SymNameOrErr
= Sym
.getName();
583 consumeError(SymNameOrErr
.takeError());
584 else if (TrapOnFailFunctions
.contains(*SymNameOrErr
))
585 TrapOnFailFunctionAddresses
.insert(Plt
.Address
);
588 return Error::success();
591 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text
)
592 : Text(std::string(Text
)) {}
594 char UnsupportedDisassembly::ID
;
595 void UnsupportedDisassembly::log(raw_ostream
&OS
) const {
596 OS
<< "Could not initialise disassembler: " << Text
;
599 std::error_code
UnsupportedDisassembly::convertToErrorCode() const {
600 return std::error_code();
603 } // namespace cfi_verify