1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "FileAnalysis.h"
10 #include "GraphBuilder.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstPrinter.h"
19 #include "llvm/MC/MCInstrAnalysis.h"
20 #include "llvm/MC/MCInstrDesc.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCTargetOptions.h"
26 #include "llvm/MC/TargetRegistry.h"
27 #include "llvm/Object/Binary.h"
28 #include "llvm/Object/COFF.h"
29 #include "llvm/Object/ELFObjectFile.h"
30 #include "llvm/Object/ObjectFile.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/raw_ostream.h"
38 using Instr
= llvm::cfi_verify::FileAnalysis::Instr
;
39 using LLVMSymbolizer
= llvm::symbolize::LLVMSymbolizer
;
42 namespace cfi_verify
{
46 static cl::opt
<bool, true> IgnoreDWARFArg(
49 "Ignore all DWARF data. This relaxes the requirements for all "
50 "statically linked libraries to have been compiled with '-g', but "
51 "will result in false positives for 'CFI unprotected' instructions."),
52 cl::location(IgnoreDWARFFlag
), cl::init(false));
54 StringRef
stringCFIProtectionStatus(CFIProtectionStatus Status
) {
56 case CFIProtectionStatus::PROTECTED
:
58 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
:
59 return "FAIL_NOT_INDIRECT_CF";
60 case CFIProtectionStatus::FAIL_ORPHANS
:
61 return "FAIL_ORPHANS";
62 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH
:
63 return "FAIL_BAD_CONDITIONAL_BRANCH";
64 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED
:
65 return "FAIL_REGISTER_CLOBBERED";
66 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION
:
67 return "FAIL_INVALID_INSTRUCTION";
69 llvm_unreachable("Attempted to stringify an unknown enum value.");
72 Expected
<FileAnalysis
> FileAnalysis::Create(StringRef Filename
) {
73 // Open the filename provided.
74 Expected
<object::OwningBinary
<object::Binary
>> BinaryOrErr
=
75 object::createBinary(Filename
);
77 return BinaryOrErr
.takeError();
79 // Construct the object and allow it to take ownership of the binary.
80 object::OwningBinary
<object::Binary
> Binary
= std::move(BinaryOrErr
.get());
81 FileAnalysis
Analysis(std::move(Binary
));
83 Analysis
.Object
= dyn_cast
<object::ObjectFile
>(Analysis
.Binary
.getBinary());
85 return make_error
<UnsupportedDisassembly
>("Failed to cast object");
87 switch (Analysis
.Object
->getArch()) {
91 case Triple::aarch64_be
:
94 return make_error
<UnsupportedDisassembly
>("Unsupported architecture.");
97 Analysis
.ObjectTriple
= Analysis
.Object
->makeTriple();
98 Analysis
.Features
= Analysis
.Object
->getFeatures();
100 // Init the rest of the object.
101 if (auto InitResponse
= Analysis
.initialiseDisassemblyMembers())
102 return std::move(InitResponse
);
104 if (auto SectionParseResponse
= Analysis
.parseCodeSections())
105 return std::move(SectionParseResponse
);
107 if (auto SymbolTableParseResponse
= Analysis
.parseSymbolTable())
108 return std::move(SymbolTableParseResponse
);
110 return std::move(Analysis
);
113 FileAnalysis::FileAnalysis(object::OwningBinary
<object::Binary
> Binary
)
114 : Binary(std::move(Binary
)) {}
116 FileAnalysis::FileAnalysis(const Triple
&ObjectTriple
,
117 const SubtargetFeatures
&Features
)
118 : ObjectTriple(ObjectTriple
), Features(Features
) {}
121 FileAnalysis::getPrevInstructionSequential(const Instr
&InstrMeta
) const {
122 std::map
<uint64_t, Instr
>::const_iterator KV
=
123 Instructions
.find(InstrMeta
.VMAddress
);
124 if (KV
== Instructions
.end() || KV
== Instructions
.begin())
127 if (!(--KV
)->second
.Valid
)
134 FileAnalysis::getNextInstructionSequential(const Instr
&InstrMeta
) const {
135 std::map
<uint64_t, Instr
>::const_iterator KV
=
136 Instructions
.find(InstrMeta
.VMAddress
);
137 if (KV
== Instructions
.end() || ++KV
== Instructions
.end())
140 if (!KV
->second
.Valid
)
146 bool FileAnalysis::usesRegisterOperand(const Instr
&InstrMeta
) const {
147 for (const auto &Operand
: InstrMeta
.Instruction
) {
154 const Instr
*FileAnalysis::getInstruction(uint64_t Address
) const {
155 const auto &InstrKV
= Instructions
.find(Address
);
156 if (InstrKV
== Instructions
.end())
159 return &InstrKV
->second
;
162 const Instr
&FileAnalysis::getInstructionOrDie(uint64_t Address
) const {
163 const auto &InstrKV
= Instructions
.find(Address
);
164 assert(InstrKV
!= Instructions
.end() && "Address doesn't exist.");
165 return InstrKV
->second
;
168 bool FileAnalysis::isCFITrap(const Instr
&InstrMeta
) const {
169 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
170 return InstrDesc
.isTrap() || willTrapOnCFIViolation(InstrMeta
);
173 bool FileAnalysis::willTrapOnCFIViolation(const Instr
&InstrMeta
) const {
174 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
175 if (!InstrDesc
.isCall())
178 if (!MIA
->evaluateBranch(InstrMeta
.Instruction
, InstrMeta
.VMAddress
,
179 InstrMeta
.InstructionSize
, Target
))
181 return TrapOnFailFunctionAddresses
.contains(Target
);
184 bool FileAnalysis::canFallThrough(const Instr
&InstrMeta
) const {
185 if (!InstrMeta
.Valid
)
188 if (isCFITrap(InstrMeta
))
191 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
192 if (InstrDesc
.mayAffectControlFlow(InstrMeta
.Instruction
, *RegisterInfo
))
193 return InstrDesc
.isConditionalBranch();
199 FileAnalysis::getDefiniteNextInstruction(const Instr
&InstrMeta
) const {
200 if (!InstrMeta
.Valid
)
203 if (isCFITrap(InstrMeta
))
206 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
207 const Instr
*NextMetaPtr
;
208 if (InstrDesc
.mayAffectControlFlow(InstrMeta
.Instruction
, *RegisterInfo
)) {
209 if (InstrDesc
.isConditionalBranch())
213 if (!MIA
->evaluateBranch(InstrMeta
.Instruction
, InstrMeta
.VMAddress
,
214 InstrMeta
.InstructionSize
, Target
))
217 NextMetaPtr
= getInstruction(Target
);
220 getInstruction(InstrMeta
.VMAddress
+ InstrMeta
.InstructionSize
);
223 if (!NextMetaPtr
|| !NextMetaPtr
->Valid
)
229 std::set
<const Instr
*>
230 FileAnalysis::getDirectControlFlowXRefs(const Instr
&InstrMeta
) const {
231 std::set
<const Instr
*> CFCrossReferences
;
232 const Instr
*PrevInstruction
= getPrevInstructionSequential(InstrMeta
);
234 if (PrevInstruction
&& canFallThrough(*PrevInstruction
))
235 CFCrossReferences
.insert(PrevInstruction
);
237 const auto &TargetRefsKV
= StaticBranchTargetings
.find(InstrMeta
.VMAddress
);
238 if (TargetRefsKV
== StaticBranchTargetings
.end())
239 return CFCrossReferences
;
241 for (uint64_t SourceInstrAddress
: TargetRefsKV
->second
) {
242 const auto &SourceInstrKV
= Instructions
.find(SourceInstrAddress
);
243 if (SourceInstrKV
== Instructions
.end()) {
244 errs() << "Failed to find source instruction at address "
245 << format_hex(SourceInstrAddress
, 2)
246 << " for the cross-reference to instruction at address "
247 << format_hex(InstrMeta
.VMAddress
, 2) << ".\n";
251 CFCrossReferences
.insert(&SourceInstrKV
->second
);
254 return CFCrossReferences
;
257 const std::set
<object::SectionedAddress
> &
258 FileAnalysis::getIndirectInstructions() const {
259 return IndirectInstructions
;
262 const MCRegisterInfo
*FileAnalysis::getRegisterInfo() const {
263 return RegisterInfo
.get();
266 const MCInstrInfo
*FileAnalysis::getMCInstrInfo() const { return MII
.get(); }
268 const MCInstrAnalysis
*FileAnalysis::getMCInstrAnalysis() const {
272 Expected
<DIInliningInfo
>
273 FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address
) {
274 assert(Symbolizer
!= nullptr && "Symbolizer is invalid.");
276 return Symbolizer
->symbolizeInlinedCode(std::string(Object
->getFileName()),
281 FileAnalysis::validateCFIProtection(const GraphResult
&Graph
) const {
282 const Instr
*InstrMetaPtr
= getInstruction(Graph
.BaseAddress
);
284 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION
;
286 const auto &InstrDesc
= MII
->get(InstrMetaPtr
->Instruction
.getOpcode());
287 if (!InstrDesc
.mayAffectControlFlow(InstrMetaPtr
->Instruction
, *RegisterInfo
))
288 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
;
290 if (!usesRegisterOperand(*InstrMetaPtr
))
291 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
;
293 if (!Graph
.OrphanedNodes
.empty())
294 return CFIProtectionStatus::FAIL_ORPHANS
;
296 for (const auto &BranchNode
: Graph
.ConditionalBranchNodes
) {
297 if (!BranchNode
.CFIProtection
)
298 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH
;
301 if (indirectCFOperandClobber(Graph
) != Graph
.BaseAddress
)
302 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED
;
304 return CFIProtectionStatus::PROTECTED
;
307 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult
&Graph
) const {
308 assert(Graph
.OrphanedNodes
.empty() && "Orphaned nodes should be empty.");
310 // Get the set of registers we must check to ensure they're not clobbered.
311 const Instr
&IndirectCF
= getInstructionOrDie(Graph
.BaseAddress
);
312 DenseSet
<unsigned> RegisterNumbers
;
313 for (const auto &Operand
: IndirectCF
.Instruction
) {
315 RegisterNumbers
.insert(Operand
.getReg());
317 assert(RegisterNumbers
.size() && "Zero register operands on indirect CF.");
319 // Now check all branches to indirect CFs and ensure no clobbering happens.
320 for (const auto &Branch
: Graph
.ConditionalBranchNodes
) {
322 if (Branch
.IndirectCFIsOnTargetPath
)
323 Node
= Branch
.Target
;
325 Node
= Branch
.Fallthrough
;
327 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
328 // we allow them one load.
329 bool canLoad
= !MII
->get(IndirectCF
.Instruction
.getOpcode()).mayLoad();
331 // We walk backwards from the indirect CF. It is the last node returned by
332 // Graph.flattenAddress, so we skip it since we already handled it.
333 DenseSet
<unsigned> CurRegisterNumbers
= RegisterNumbers
;
334 std::vector
<uint64_t> Nodes
= Graph
.flattenAddress(Node
);
335 for (auto I
= Nodes
.rbegin() + 1, E
= Nodes
.rend(); I
!= E
; ++I
) {
337 const Instr
&NodeInstr
= getInstructionOrDie(Node
);
338 const auto &InstrDesc
= MII
->get(NodeInstr
.Instruction
.getOpcode());
340 for (auto RI
= CurRegisterNumbers
.begin(), RE
= CurRegisterNumbers
.end();
342 unsigned RegNum
= *RI
;
343 if (InstrDesc
.hasDefOfPhysReg(NodeInstr
.Instruction
, RegNum
,
345 if (!canLoad
|| !InstrDesc
.mayLoad())
348 CurRegisterNumbers
.erase(RI
);
349 // Add the registers this load reads to those we check for clobbers.
350 for (unsigned i
= InstrDesc
.getNumDefs(),
351 e
= InstrDesc
.getNumOperands(); i
!= e
; i
++) {
352 const auto &Operand
= NodeInstr
.Instruction
.getOperand(i
);
354 CurRegisterNumbers
.insert(Operand
.getReg());
362 return Graph
.BaseAddress
;
365 void FileAnalysis::printInstruction(const Instr
&InstrMeta
,
366 raw_ostream
&OS
) const {
367 Printer
->printInst(&InstrMeta
.Instruction
, 0, "", *SubtargetInfo
.get(), OS
);
370 Error
FileAnalysis::initialiseDisassemblyMembers() {
371 std::string TripleName
= ObjectTriple
.getTriple();
374 std::string ErrorString
;
376 LLVMSymbolizer::Options Opt
;
377 Opt
.UseSymbolTable
= false;
378 Symbolizer
.reset(new LLVMSymbolizer(Opt
));
381 TargetRegistry::lookupTarget(ArchName
, ObjectTriple
, ErrorString
);
383 return make_error
<UnsupportedDisassembly
>(
384 (Twine("Couldn't find target \"") + ObjectTriple
.getTriple() +
385 "\", failed with error: " + ErrorString
)
388 RegisterInfo
.reset(ObjectTarget
->createMCRegInfo(TripleName
));
390 return make_error
<UnsupportedDisassembly
>(
391 "Failed to initialise RegisterInfo.");
393 MCTargetOptions MCOptions
;
395 ObjectTarget
->createMCAsmInfo(*RegisterInfo
, TripleName
, MCOptions
));
397 return make_error
<UnsupportedDisassembly
>("Failed to initialise AsmInfo.");
399 SubtargetInfo
.reset(ObjectTarget
->createMCSubtargetInfo(
400 TripleName
, MCPU
, Features
.getString()));
402 return make_error
<UnsupportedDisassembly
>(
403 "Failed to initialise SubtargetInfo.");
405 MII
.reset(ObjectTarget
->createMCInstrInfo());
407 return make_error
<UnsupportedDisassembly
>("Failed to initialise MII.");
409 Context
.reset(new MCContext(Triple(TripleName
), AsmInfo
.get(),
410 RegisterInfo
.get(), SubtargetInfo
.get()));
413 ObjectTarget
->createMCDisassembler(*SubtargetInfo
, *Context
));
416 return make_error
<UnsupportedDisassembly
>(
417 "No disassembler available for target");
419 MIA
.reset(ObjectTarget
->createMCInstrAnalysis(MII
.get()));
421 Printer
.reset(ObjectTarget
->createMCInstPrinter(
422 ObjectTriple
, AsmInfo
->getAssemblerDialect(), *AsmInfo
, *MII
,
425 return Error::success();
428 Error
FileAnalysis::parseCodeSections() {
429 if (!IgnoreDWARFFlag
) {
430 std::unique_ptr
<DWARFContext
> DWARF
= DWARFContext::create(*Object
);
432 return make_error
<StringError
>("Could not create DWARF information.",
433 inconvertibleErrorCode());
435 bool LineInfoValid
= false;
437 for (auto &Unit
: DWARF
->compile_units()) {
438 const auto &LineTable
= DWARF
->getLineTableForUnit(Unit
.get());
439 if (LineTable
&& !LineTable
->Rows
.empty()) {
440 LineInfoValid
= true;
446 return make_error
<StringError
>(
447 "DWARF line information missing. Did you compile with '-g'?",
448 inconvertibleErrorCode());
451 for (const object::SectionRef
&Section
: Object
->sections()) {
452 // Ensure only executable sections get analysed.
453 if (!(object::ELFSectionRef(Section
).getFlags() & ELF::SHF_EXECINSTR
))
456 // Avoid checking the PLT since it produces spurious failures on AArch64
457 // when ignoring DWARF data.
458 Expected
<StringRef
> NameOrErr
= Section
.getName();
459 if (NameOrErr
&& *NameOrErr
== ".plt")
461 consumeError(NameOrErr
.takeError());
463 Expected
<StringRef
> Contents
= Section
.getContents();
465 return Contents
.takeError();
466 ArrayRef
<uint8_t> SectionBytes
= arrayRefFromStringRef(*Contents
);
468 parseSectionContents(SectionBytes
,
469 {Section
.getAddress(), Section
.getIndex()});
471 return Error::success();
474 void FileAnalysis::parseSectionContents(ArrayRef
<uint8_t> SectionBytes
,
475 object::SectionedAddress Address
) {
476 assert(Symbolizer
&& "Symbolizer is uninitialised.");
479 uint64_t InstructionSize
;
481 for (uint64_t Byte
= 0; Byte
< SectionBytes
.size();) {
482 bool ValidInstruction
=
483 Disassembler
->getInstruction(Instruction
, InstructionSize
,
484 SectionBytes
.drop_front(Byte
), 0,
485 outs()) == MCDisassembler::Success
;
487 Byte
+= InstructionSize
;
489 uint64_t VMAddress
= Address
.Address
+ Byte
- InstructionSize
;
490 InstrMeta
.Instruction
= Instruction
;
491 InstrMeta
.VMAddress
= VMAddress
;
492 InstrMeta
.InstructionSize
= InstructionSize
;
493 InstrMeta
.Valid
= ValidInstruction
;
495 addInstruction(InstrMeta
);
497 if (!ValidInstruction
)
500 // Skip additional parsing for instructions that do not affect the control
502 const auto &InstrDesc
= MII
->get(Instruction
.getOpcode());
503 if (!InstrDesc
.mayAffectControlFlow(Instruction
, *RegisterInfo
))
507 if (MIA
->evaluateBranch(Instruction
, VMAddress
, InstructionSize
, Target
)) {
508 // If the target can be evaluated, it's not indirect.
509 StaticBranchTargetings
[Target
].push_back(VMAddress
);
513 if (!usesRegisterOperand(InstrMeta
))
516 if (InstrDesc
.isReturn())
519 // Check if this instruction exists in the range of the DWARF metadata.
520 if (!IgnoreDWARFFlag
) {
522 Symbolizer
->symbolizeCode(std::string(Object
->getFileName()),
523 {VMAddress
, Address
.SectionIndex
});
525 handleAllErrors(LineInfo
.takeError(), [](const ErrorInfoBase
&E
) {
526 errs() << "Symbolizer failed to get line: " << E
.message() << "\n";
531 if (LineInfo
->FileName
== DILineInfo::BadString
)
535 IndirectInstructions
.insert({VMAddress
, Address
.SectionIndex
});
539 void FileAnalysis::addInstruction(const Instr
&Instruction
) {
541 Instructions
.insert(std::make_pair(Instruction
.VMAddress
, Instruction
));
543 errs() << "Failed to add instruction at address "
544 << format_hex(Instruction
.VMAddress
, 2)
545 << ": Instruction at this address already exists.\n";
550 Error
FileAnalysis::parseSymbolTable() {
551 // Functions that will trap on CFI violations.
552 SmallSet
<StringRef
, 4> TrapOnFailFunctions
;
553 TrapOnFailFunctions
.insert("__cfi_slowpath");
554 TrapOnFailFunctions
.insert("__cfi_slowpath_diag");
555 TrapOnFailFunctions
.insert("abort");
557 // Look through the list of symbols for functions that will trap on CFI
559 for (auto &Sym
: Object
->symbols()) {
560 auto SymNameOrErr
= Sym
.getName();
562 consumeError(SymNameOrErr
.takeError());
563 else if (TrapOnFailFunctions
.contains(*SymNameOrErr
)) {
564 auto AddrOrErr
= Sym
.getAddress();
566 consumeError(AddrOrErr
.takeError());
568 TrapOnFailFunctionAddresses
.insert(*AddrOrErr
);
571 if (auto *ElfObject
= dyn_cast
<object::ELFObjectFileBase
>(Object
)) {
572 for (const auto &Addr
: ElfObject
->getPltAddresses()) {
575 object::SymbolRef
Sym(*Addr
.first
, Object
);
576 auto SymNameOrErr
= Sym
.getName();
578 consumeError(SymNameOrErr
.takeError());
579 else if (TrapOnFailFunctions
.contains(*SymNameOrErr
))
580 TrapOnFailFunctionAddresses
.insert(Addr
.second
);
583 return Error::success();
586 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text
)
587 : Text(std::string(Text
)) {}
589 char UnsupportedDisassembly::ID
;
590 void UnsupportedDisassembly::log(raw_ostream
&OS
) const {
591 OS
<< "Could not initialise disassembler: " << Text
;
594 std::error_code
UnsupportedDisassembly::convertToErrorCode() const {
595 return std::error_code();
598 } // namespace cfi_verify