1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "FileAnalysis.h"
10 #include "GraphBuilder.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstPrinter.h"
19 #include "llvm/MC/MCInstrAnalysis.h"
20 #include "llvm/MC/MCInstrDesc.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/Object/Binary.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Object/ELFObjectFile.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/MemoryBuffer.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/TargetSelect.h"
35 #include "llvm/Support/raw_ostream.h"
38 using Instr
= llvm::cfi_verify::FileAnalysis::Instr
;
39 using LLVMSymbolizer
= llvm::symbolize::LLVMSymbolizer
;
42 namespace cfi_verify
{
46 static cl::opt
<bool, true> IgnoreDWARFArg(
49 "Ignore all DWARF data. This relaxes the requirements for all "
50 "statically linked libraries to have been compiled with '-g', but "
51 "will result in false positives for 'CFI unprotected' instructions."),
52 cl::location(IgnoreDWARFFlag
), cl::init(false));
54 StringRef
stringCFIProtectionStatus(CFIProtectionStatus Status
) {
56 case CFIProtectionStatus::PROTECTED
:
58 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
:
59 return "FAIL_NOT_INDIRECT_CF";
60 case CFIProtectionStatus::FAIL_ORPHANS
:
61 return "FAIL_ORPHANS";
62 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH
:
63 return "FAIL_BAD_CONDITIONAL_BRANCH";
64 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED
:
65 return "FAIL_REGISTER_CLOBBERED";
66 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION
:
67 return "FAIL_INVALID_INSTRUCTION";
69 llvm_unreachable("Attempted to stringify an unknown enum value.");
72 Expected
<FileAnalysis
> FileAnalysis::Create(StringRef Filename
) {
73 // Open the filename provided.
74 Expected
<object::OwningBinary
<object::Binary
>> BinaryOrErr
=
75 object::createBinary(Filename
);
77 return BinaryOrErr
.takeError();
79 // Construct the object and allow it to take ownership of the binary.
80 object::OwningBinary
<object::Binary
> Binary
= std::move(BinaryOrErr
.get());
81 FileAnalysis
Analysis(std::move(Binary
));
83 Analysis
.Object
= dyn_cast
<object::ObjectFile
>(Analysis
.Binary
.getBinary());
85 return make_error
<UnsupportedDisassembly
>("Failed to cast object");
87 switch (Analysis
.Object
->getArch()) {
91 case Triple::aarch64_be
:
94 return make_error
<UnsupportedDisassembly
>("Unsupported architecture.");
97 Analysis
.ObjectTriple
= Analysis
.Object
->makeTriple();
98 Analysis
.Features
= Analysis
.Object
->getFeatures();
100 // Init the rest of the object.
101 if (auto InitResponse
= Analysis
.initialiseDisassemblyMembers())
102 return std::move(InitResponse
);
104 if (auto SectionParseResponse
= Analysis
.parseCodeSections())
105 return std::move(SectionParseResponse
);
107 if (auto SymbolTableParseResponse
= Analysis
.parseSymbolTable())
108 return std::move(SymbolTableParseResponse
);
110 return std::move(Analysis
);
113 FileAnalysis::FileAnalysis(object::OwningBinary
<object::Binary
> Binary
)
114 : Binary(std::move(Binary
)) {}
116 FileAnalysis::FileAnalysis(const Triple
&ObjectTriple
,
117 const SubtargetFeatures
&Features
)
118 : ObjectTriple(ObjectTriple
), Features(Features
) {}
121 FileAnalysis::getPrevInstructionSequential(const Instr
&InstrMeta
) const {
122 std::map
<uint64_t, Instr
>::const_iterator KV
=
123 Instructions
.find(InstrMeta
.VMAddress
);
124 if (KV
== Instructions
.end() || KV
== Instructions
.begin())
127 if (!(--KV
)->second
.Valid
)
134 FileAnalysis::getNextInstructionSequential(const Instr
&InstrMeta
) const {
135 std::map
<uint64_t, Instr
>::const_iterator KV
=
136 Instructions
.find(InstrMeta
.VMAddress
);
137 if (KV
== Instructions
.end() || ++KV
== Instructions
.end())
140 if (!KV
->second
.Valid
)
146 bool FileAnalysis::usesRegisterOperand(const Instr
&InstrMeta
) const {
147 for (const auto &Operand
: InstrMeta
.Instruction
) {
154 const Instr
*FileAnalysis::getInstruction(uint64_t Address
) const {
155 const auto &InstrKV
= Instructions
.find(Address
);
156 if (InstrKV
== Instructions
.end())
159 return &InstrKV
->second
;
162 const Instr
&FileAnalysis::getInstructionOrDie(uint64_t Address
) const {
163 const auto &InstrKV
= Instructions
.find(Address
);
164 assert(InstrKV
!= Instructions
.end() && "Address doesn't exist.");
165 return InstrKV
->second
;
168 bool FileAnalysis::isCFITrap(const Instr
&InstrMeta
) const {
169 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
170 return InstrDesc
.isTrap() || willTrapOnCFIViolation(InstrMeta
);
173 bool FileAnalysis::willTrapOnCFIViolation(const Instr
&InstrMeta
) const {
174 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
175 if (!InstrDesc
.isCall())
178 if (!MIA
->evaluateBranch(InstrMeta
.Instruction
, InstrMeta
.VMAddress
,
179 InstrMeta
.InstructionSize
, Target
))
181 return TrapOnFailFunctionAddresses
.count(Target
) > 0;
184 bool FileAnalysis::canFallThrough(const Instr
&InstrMeta
) const {
185 if (!InstrMeta
.Valid
)
188 if (isCFITrap(InstrMeta
))
191 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
192 if (InstrDesc
.mayAffectControlFlow(InstrMeta
.Instruction
, *RegisterInfo
))
193 return InstrDesc
.isConditionalBranch();
199 FileAnalysis::getDefiniteNextInstruction(const Instr
&InstrMeta
) const {
200 if (!InstrMeta
.Valid
)
203 if (isCFITrap(InstrMeta
))
206 const auto &InstrDesc
= MII
->get(InstrMeta
.Instruction
.getOpcode());
207 const Instr
*NextMetaPtr
;
208 if (InstrDesc
.mayAffectControlFlow(InstrMeta
.Instruction
, *RegisterInfo
)) {
209 if (InstrDesc
.isConditionalBranch())
213 if (!MIA
->evaluateBranch(InstrMeta
.Instruction
, InstrMeta
.VMAddress
,
214 InstrMeta
.InstructionSize
, Target
))
217 NextMetaPtr
= getInstruction(Target
);
220 getInstruction(InstrMeta
.VMAddress
+ InstrMeta
.InstructionSize
);
223 if (!NextMetaPtr
|| !NextMetaPtr
->Valid
)
229 std::set
<const Instr
*>
230 FileAnalysis::getDirectControlFlowXRefs(const Instr
&InstrMeta
) const {
231 std::set
<const Instr
*> CFCrossReferences
;
232 const Instr
*PrevInstruction
= getPrevInstructionSequential(InstrMeta
);
234 if (PrevInstruction
&& canFallThrough(*PrevInstruction
))
235 CFCrossReferences
.insert(PrevInstruction
);
237 const auto &TargetRefsKV
= StaticBranchTargetings
.find(InstrMeta
.VMAddress
);
238 if (TargetRefsKV
== StaticBranchTargetings
.end())
239 return CFCrossReferences
;
241 for (uint64_t SourceInstrAddress
: TargetRefsKV
->second
) {
242 const auto &SourceInstrKV
= Instructions
.find(SourceInstrAddress
);
243 if (SourceInstrKV
== Instructions
.end()) {
244 errs() << "Failed to find source instruction at address "
245 << format_hex(SourceInstrAddress
, 2)
246 << " for the cross-reference to instruction at address "
247 << format_hex(InstrMeta
.VMAddress
, 2) << ".\n";
251 CFCrossReferences
.insert(&SourceInstrKV
->second
);
254 return CFCrossReferences
;
257 const std::set
<object::SectionedAddress
> &
258 FileAnalysis::getIndirectInstructions() const {
259 return IndirectInstructions
;
262 const MCRegisterInfo
*FileAnalysis::getRegisterInfo() const {
263 return RegisterInfo
.get();
266 const MCInstrInfo
*FileAnalysis::getMCInstrInfo() const { return MII
.get(); }
268 const MCInstrAnalysis
*FileAnalysis::getMCInstrAnalysis() const {
272 Expected
<DIInliningInfo
>
273 FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address
) {
274 assert(Symbolizer
!= nullptr && "Symbolizer is invalid.");
276 return Symbolizer
->symbolizeInlinedCode(Object
->getFileName(), Address
);
280 FileAnalysis::validateCFIProtection(const GraphResult
&Graph
) const {
281 const Instr
*InstrMetaPtr
= getInstruction(Graph
.BaseAddress
);
283 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION
;
285 const auto &InstrDesc
= MII
->get(InstrMetaPtr
->Instruction
.getOpcode());
286 if (!InstrDesc
.mayAffectControlFlow(InstrMetaPtr
->Instruction
, *RegisterInfo
))
287 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
;
289 if (!usesRegisterOperand(*InstrMetaPtr
))
290 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF
;
292 if (!Graph
.OrphanedNodes
.empty())
293 return CFIProtectionStatus::FAIL_ORPHANS
;
295 for (const auto &BranchNode
: Graph
.ConditionalBranchNodes
) {
296 if (!BranchNode
.CFIProtection
)
297 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH
;
300 if (indirectCFOperandClobber(Graph
) != Graph
.BaseAddress
)
301 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED
;
303 return CFIProtectionStatus::PROTECTED
;
306 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult
&Graph
) const {
307 assert(Graph
.OrphanedNodes
.empty() && "Orphaned nodes should be empty.");
309 // Get the set of registers we must check to ensure they're not clobbered.
310 const Instr
&IndirectCF
= getInstructionOrDie(Graph
.BaseAddress
);
311 DenseSet
<unsigned> RegisterNumbers
;
312 for (const auto &Operand
: IndirectCF
.Instruction
) {
314 RegisterNumbers
.insert(Operand
.getReg());
316 assert(RegisterNumbers
.size() && "Zero register operands on indirect CF.");
318 // Now check all branches to indirect CFs and ensure no clobbering happens.
319 for (const auto &Branch
: Graph
.ConditionalBranchNodes
) {
321 if (Branch
.IndirectCFIsOnTargetPath
)
322 Node
= Branch
.Target
;
324 Node
= Branch
.Fallthrough
;
326 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
327 // we allow them one load.
328 bool canLoad
= !MII
->get(IndirectCF
.Instruction
.getOpcode()).mayLoad();
330 // We walk backwards from the indirect CF. It is the last node returned by
331 // Graph.flattenAddress, so we skip it since we already handled it.
332 DenseSet
<unsigned> CurRegisterNumbers
= RegisterNumbers
;
333 std::vector
<uint64_t> Nodes
= Graph
.flattenAddress(Node
);
334 for (auto I
= Nodes
.rbegin() + 1, E
= Nodes
.rend(); I
!= E
; ++I
) {
336 const Instr
&NodeInstr
= getInstructionOrDie(Node
);
337 const auto &InstrDesc
= MII
->get(NodeInstr
.Instruction
.getOpcode());
339 for (auto RI
= CurRegisterNumbers
.begin(), RE
= CurRegisterNumbers
.end();
341 unsigned RegNum
= *RI
;
342 if (InstrDesc
.hasDefOfPhysReg(NodeInstr
.Instruction
, RegNum
,
344 if (!canLoad
|| !InstrDesc
.mayLoad())
347 CurRegisterNumbers
.erase(RI
);
348 // Add the registers this load reads to those we check for clobbers.
349 for (unsigned i
= InstrDesc
.getNumDefs(),
350 e
= InstrDesc
.getNumOperands(); i
!= e
; i
++) {
351 const auto Operand
= NodeInstr
.Instruction
.getOperand(i
);
353 CurRegisterNumbers
.insert(Operand
.getReg());
361 return Graph
.BaseAddress
;
364 void FileAnalysis::printInstruction(const Instr
&InstrMeta
,
365 raw_ostream
&OS
) const {
366 Printer
->printInst(&InstrMeta
.Instruction
, OS
, "", *SubtargetInfo
.get());
369 Error
FileAnalysis::initialiseDisassemblyMembers() {
370 std::string TripleName
= ObjectTriple
.getTriple();
373 std::string ErrorString
;
375 Symbolizer
.reset(new LLVMSymbolizer());
378 TargetRegistry::lookupTarget(ArchName
, ObjectTriple
, ErrorString
);
380 return make_error
<UnsupportedDisassembly
>(
381 (Twine("Couldn't find target \"") + ObjectTriple
.getTriple() +
382 "\", failed with error: " + ErrorString
)
385 RegisterInfo
.reset(ObjectTarget
->createMCRegInfo(TripleName
));
387 return make_error
<UnsupportedDisassembly
>(
388 "Failed to initialise RegisterInfo.");
390 AsmInfo
.reset(ObjectTarget
->createMCAsmInfo(*RegisterInfo
, TripleName
));
392 return make_error
<UnsupportedDisassembly
>("Failed to initialise AsmInfo.");
394 SubtargetInfo
.reset(ObjectTarget
->createMCSubtargetInfo(
395 TripleName
, MCPU
, Features
.getString()));
397 return make_error
<UnsupportedDisassembly
>(
398 "Failed to initialise SubtargetInfo.");
400 MII
.reset(ObjectTarget
->createMCInstrInfo());
402 return make_error
<UnsupportedDisassembly
>("Failed to initialise MII.");
404 Context
.reset(new MCContext(AsmInfo
.get(), RegisterInfo
.get(), &MOFI
));
407 ObjectTarget
->createMCDisassembler(*SubtargetInfo
, *Context
));
410 return make_error
<UnsupportedDisassembly
>(
411 "No disassembler available for target");
413 MIA
.reset(ObjectTarget
->createMCInstrAnalysis(MII
.get()));
415 Printer
.reset(ObjectTarget
->createMCInstPrinter(
416 ObjectTriple
, AsmInfo
->getAssemblerDialect(), *AsmInfo
, *MII
,
419 return Error::success();
422 Error
FileAnalysis::parseCodeSections() {
423 if (!IgnoreDWARFFlag
) {
424 std::unique_ptr
<DWARFContext
> DWARF
= DWARFContext::create(*Object
);
426 return make_error
<StringError
>("Could not create DWARF information.",
427 inconvertibleErrorCode());
429 bool LineInfoValid
= false;
431 for (auto &Unit
: DWARF
->compile_units()) {
432 const auto &LineTable
= DWARF
->getLineTableForUnit(Unit
.get());
433 if (LineTable
&& !LineTable
->Rows
.empty()) {
434 LineInfoValid
= true;
440 return make_error
<StringError
>(
441 "DWARF line information missing. Did you compile with '-g'?",
442 inconvertibleErrorCode());
445 for (const object::SectionRef
&Section
: Object
->sections()) {
446 // Ensure only executable sections get analysed.
447 if (!(object::ELFSectionRef(Section
).getFlags() & ELF::SHF_EXECINSTR
))
450 // Avoid checking the PLT since it produces spurious failures on AArch64
451 // when ignoring DWARF data.
452 StringRef SectionName
;
453 if (!Section
.getName(SectionName
) && SectionName
== ".plt")
456 Expected
<StringRef
> Contents
= Section
.getContents();
458 return Contents
.takeError();
459 ArrayRef
<uint8_t> SectionBytes
= arrayRefFromStringRef(*Contents
);
461 parseSectionContents(SectionBytes
,
462 {Section
.getAddress(), Section
.getIndex()});
464 return Error::success();
467 void FileAnalysis::parseSectionContents(ArrayRef
<uint8_t> SectionBytes
,
468 object::SectionedAddress Address
) {
469 assert(Symbolizer
&& "Symbolizer is uninitialised.");
472 uint64_t InstructionSize
;
474 for (uint64_t Byte
= 0; Byte
< SectionBytes
.size();) {
475 bool ValidInstruction
=
476 Disassembler
->getInstruction(Instruction
, InstructionSize
,
477 SectionBytes
.drop_front(Byte
), 0, nulls(),
478 outs()) == MCDisassembler::Success
;
480 Byte
+= InstructionSize
;
482 uint64_t VMAddress
= Address
.Address
+ Byte
- InstructionSize
;
483 InstrMeta
.Instruction
= Instruction
;
484 InstrMeta
.VMAddress
= VMAddress
;
485 InstrMeta
.InstructionSize
= InstructionSize
;
486 InstrMeta
.Valid
= ValidInstruction
;
488 addInstruction(InstrMeta
);
490 if (!ValidInstruction
)
493 // Skip additional parsing for instructions that do not affect the control
495 const auto &InstrDesc
= MII
->get(Instruction
.getOpcode());
496 if (!InstrDesc
.mayAffectControlFlow(Instruction
, *RegisterInfo
))
500 if (MIA
->evaluateBranch(Instruction
, VMAddress
, InstructionSize
, Target
)) {
501 // If the target can be evaluated, it's not indirect.
502 StaticBranchTargetings
[Target
].push_back(VMAddress
);
506 if (!usesRegisterOperand(InstrMeta
))
509 if (InstrDesc
.isReturn())
512 // Check if this instruction exists in the range of the DWARF metadata.
513 if (!IgnoreDWARFFlag
) {
514 auto LineInfo
= Symbolizer
->symbolizeCode(
515 Object
->getFileName(), {VMAddress
, Address
.SectionIndex
});
517 handleAllErrors(LineInfo
.takeError(), [](const ErrorInfoBase
&E
) {
518 errs() << "Symbolizer failed to get line: " << E
.message() << "\n";
523 if (LineInfo
->FileName
== "<invalid>")
527 IndirectInstructions
.insert({VMAddress
, Address
.SectionIndex
});
531 void FileAnalysis::addInstruction(const Instr
&Instruction
) {
533 Instructions
.insert(std::make_pair(Instruction
.VMAddress
, Instruction
));
535 errs() << "Failed to add instruction at address "
536 << format_hex(Instruction
.VMAddress
, 2)
537 << ": Instruction at this address already exists.\n";
542 Error
FileAnalysis::parseSymbolTable() {
543 // Functions that will trap on CFI violations.
544 SmallSet
<StringRef
, 4> TrapOnFailFunctions
;
545 TrapOnFailFunctions
.insert("__cfi_slowpath");
546 TrapOnFailFunctions
.insert("__cfi_slowpath_diag");
547 TrapOnFailFunctions
.insert("abort");
549 // Look through the list of symbols for functions that will trap on CFI
551 for (auto &Sym
: Object
->symbols()) {
552 auto SymNameOrErr
= Sym
.getName();
554 consumeError(SymNameOrErr
.takeError());
555 else if (TrapOnFailFunctions
.count(*SymNameOrErr
) > 0) {
556 auto AddrOrErr
= Sym
.getAddress();
558 consumeError(AddrOrErr
.takeError());
560 TrapOnFailFunctionAddresses
.insert(*AddrOrErr
);
563 if (auto *ElfObject
= dyn_cast
<object::ELFObjectFileBase
>(Object
)) {
564 for (const auto &Addr
: ElfObject
->getPltAddresses()) {
565 object::SymbolRef
Sym(Addr
.first
, Object
);
566 auto SymNameOrErr
= Sym
.getName();
568 consumeError(SymNameOrErr
.takeError());
569 else if (TrapOnFailFunctions
.count(*SymNameOrErr
) > 0)
570 TrapOnFailFunctionAddresses
.insert(Addr
.second
);
573 return Error::success();
576 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text
) : Text(Text
) {}
578 char UnsupportedDisassembly::ID
;
579 void UnsupportedDisassembly::log(raw_ostream
&OS
) const {
580 OS
<< "Could not initialise disassembler: " << Text
;
583 std::error_code
UnsupportedDisassembly::convertToErrorCode() const {
584 return std::error_code();
587 } // namespace cfi_verify