[MIParser] Set RegClassOrRegBank during instruction parsing
[llvm-complete.git] / tools / llvm-cfi-verify / lib / FileAnalysis.cpp
blob03f6e3f8a1141f2c2f5316301b8c345b9827acd1
1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "FileAnalysis.h"
10 #include "GraphBuilder.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstPrinter.h"
19 #include "llvm/MC/MCInstrAnalysis.h"
20 #include "llvm/MC/MCInstrDesc.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/Object/Binary.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Object/ELFObjectFile.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/MemoryBuffer.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/TargetSelect.h"
35 #include "llvm/Support/raw_ostream.h"
38 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
39 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
41 namespace llvm {
42 namespace cfi_verify {
44 bool IgnoreDWARFFlag;
46 static cl::opt<bool, true> IgnoreDWARFArg(
47 "ignore-dwarf",
48 cl::desc(
49 "Ignore all DWARF data. This relaxes the requirements for all "
50 "statically linked libraries to have been compiled with '-g', but "
51 "will result in false positives for 'CFI unprotected' instructions."),
52 cl::location(IgnoreDWARFFlag), cl::init(false));
54 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
55 switch (Status) {
56 case CFIProtectionStatus::PROTECTED:
57 return "PROTECTED";
58 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
59 return "FAIL_NOT_INDIRECT_CF";
60 case CFIProtectionStatus::FAIL_ORPHANS:
61 return "FAIL_ORPHANS";
62 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
63 return "FAIL_BAD_CONDITIONAL_BRANCH";
64 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
65 return "FAIL_REGISTER_CLOBBERED";
66 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
67 return "FAIL_INVALID_INSTRUCTION";
69 llvm_unreachable("Attempted to stringify an unknown enum value.");
72 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
73 // Open the filename provided.
74 Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
75 object::createBinary(Filename);
76 if (!BinaryOrErr)
77 return BinaryOrErr.takeError();
79 // Construct the object and allow it to take ownership of the binary.
80 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
81 FileAnalysis Analysis(std::move(Binary));
83 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
84 if (!Analysis.Object)
85 return make_error<UnsupportedDisassembly>("Failed to cast object");
87 switch (Analysis.Object->getArch()) {
88 case Triple::x86:
89 case Triple::x86_64:
90 case Triple::aarch64:
91 case Triple::aarch64_be:
92 break;
93 default:
94 return make_error<UnsupportedDisassembly>("Unsupported architecture.");
97 Analysis.ObjectTriple = Analysis.Object->makeTriple();
98 Analysis.Features = Analysis.Object->getFeatures();
100 // Init the rest of the object.
101 if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
102 return std::move(InitResponse);
104 if (auto SectionParseResponse = Analysis.parseCodeSections())
105 return std::move(SectionParseResponse);
107 if (auto SymbolTableParseResponse = Analysis.parseSymbolTable())
108 return std::move(SymbolTableParseResponse);
110 return std::move(Analysis);
113 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
114 : Binary(std::move(Binary)) {}
116 FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
117 const SubtargetFeatures &Features)
118 : ObjectTriple(ObjectTriple), Features(Features) {}
120 const Instr *
121 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
122 std::map<uint64_t, Instr>::const_iterator KV =
123 Instructions.find(InstrMeta.VMAddress);
124 if (KV == Instructions.end() || KV == Instructions.begin())
125 return nullptr;
127 if (!(--KV)->second.Valid)
128 return nullptr;
130 return &KV->second;
133 const Instr *
134 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
135 std::map<uint64_t, Instr>::const_iterator KV =
136 Instructions.find(InstrMeta.VMAddress);
137 if (KV == Instructions.end() || ++KV == Instructions.end())
138 return nullptr;
140 if (!KV->second.Valid)
141 return nullptr;
143 return &KV->second;
146 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
147 for (const auto &Operand : InstrMeta.Instruction) {
148 if (Operand.isReg())
149 return true;
151 return false;
154 const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
155 const auto &InstrKV = Instructions.find(Address);
156 if (InstrKV == Instructions.end())
157 return nullptr;
159 return &InstrKV->second;
162 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
163 const auto &InstrKV = Instructions.find(Address);
164 assert(InstrKV != Instructions.end() && "Address doesn't exist.");
165 return InstrKV->second;
168 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
169 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
170 return InstrDesc.isTrap() || willTrapOnCFIViolation(InstrMeta);
173 bool FileAnalysis::willTrapOnCFIViolation(const Instr &InstrMeta) const {
174 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
175 if (!InstrDesc.isCall())
176 return false;
177 uint64_t Target;
178 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
179 InstrMeta.InstructionSize, Target))
180 return false;
181 return TrapOnFailFunctionAddresses.count(Target) > 0;
184 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
185 if (!InstrMeta.Valid)
186 return false;
188 if (isCFITrap(InstrMeta))
189 return false;
191 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
192 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
193 return InstrDesc.isConditionalBranch();
195 return true;
198 const Instr *
199 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
200 if (!InstrMeta.Valid)
201 return nullptr;
203 if (isCFITrap(InstrMeta))
204 return nullptr;
206 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
207 const Instr *NextMetaPtr;
208 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
209 if (InstrDesc.isConditionalBranch())
210 return nullptr;
212 uint64_t Target;
213 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
214 InstrMeta.InstructionSize, Target))
215 return nullptr;
217 NextMetaPtr = getInstruction(Target);
218 } else {
219 NextMetaPtr =
220 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
223 if (!NextMetaPtr || !NextMetaPtr->Valid)
224 return nullptr;
226 return NextMetaPtr;
229 std::set<const Instr *>
230 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
231 std::set<const Instr *> CFCrossReferences;
232 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
234 if (PrevInstruction && canFallThrough(*PrevInstruction))
235 CFCrossReferences.insert(PrevInstruction);
237 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
238 if (TargetRefsKV == StaticBranchTargetings.end())
239 return CFCrossReferences;
241 for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
242 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
243 if (SourceInstrKV == Instructions.end()) {
244 errs() << "Failed to find source instruction at address "
245 << format_hex(SourceInstrAddress, 2)
246 << " for the cross-reference to instruction at address "
247 << format_hex(InstrMeta.VMAddress, 2) << ".\n";
248 continue;
251 CFCrossReferences.insert(&SourceInstrKV->second);
254 return CFCrossReferences;
257 const std::set<object::SectionedAddress> &
258 FileAnalysis::getIndirectInstructions() const {
259 return IndirectInstructions;
262 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
263 return RegisterInfo.get();
266 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
268 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
269 return MIA.get();
272 Expected<DIInliningInfo>
273 FileAnalysis::symbolizeInlinedCode(object::SectionedAddress Address) {
274 assert(Symbolizer != nullptr && "Symbolizer is invalid.");
276 return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
279 CFIProtectionStatus
280 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
281 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
282 if (!InstrMetaPtr)
283 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
285 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
286 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
287 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
289 if (!usesRegisterOperand(*InstrMetaPtr))
290 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
292 if (!Graph.OrphanedNodes.empty())
293 return CFIProtectionStatus::FAIL_ORPHANS;
295 for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
296 if (!BranchNode.CFIProtection)
297 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
300 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
301 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
303 return CFIProtectionStatus::PROTECTED;
306 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
307 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
309 // Get the set of registers we must check to ensure they're not clobbered.
310 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
311 DenseSet<unsigned> RegisterNumbers;
312 for (const auto &Operand : IndirectCF.Instruction) {
313 if (Operand.isReg())
314 RegisterNumbers.insert(Operand.getReg());
316 assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
318 // Now check all branches to indirect CFs and ensure no clobbering happens.
319 for (const auto &Branch : Graph.ConditionalBranchNodes) {
320 uint64_t Node;
321 if (Branch.IndirectCFIsOnTargetPath)
322 Node = Branch.Target;
323 else
324 Node = Branch.Fallthrough;
326 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
327 // we allow them one load.
328 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
330 // We walk backwards from the indirect CF. It is the last node returned by
331 // Graph.flattenAddress, so we skip it since we already handled it.
332 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
333 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
334 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
335 Node = *I;
336 const Instr &NodeInstr = getInstructionOrDie(Node);
337 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
339 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
340 RI != RE; ++RI) {
341 unsigned RegNum = *RI;
342 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
343 *RegisterInfo)) {
344 if (!canLoad || !InstrDesc.mayLoad())
345 return Node;
346 canLoad = false;
347 CurRegisterNumbers.erase(RI);
348 // Add the registers this load reads to those we check for clobbers.
349 for (unsigned i = InstrDesc.getNumDefs(),
350 e = InstrDesc.getNumOperands(); i != e; i++) {
351 const auto Operand = NodeInstr.Instruction.getOperand(i);
352 if (Operand.isReg())
353 CurRegisterNumbers.insert(Operand.getReg());
355 break;
361 return Graph.BaseAddress;
364 void FileAnalysis::printInstruction(const Instr &InstrMeta,
365 raw_ostream &OS) const {
366 Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
369 Error FileAnalysis::initialiseDisassemblyMembers() {
370 std::string TripleName = ObjectTriple.getTriple();
371 ArchName = "";
372 MCPU = "";
373 std::string ErrorString;
375 Symbolizer.reset(new LLVMSymbolizer());
377 ObjectTarget =
378 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
379 if (!ObjectTarget)
380 return make_error<UnsupportedDisassembly>(
381 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
382 "\", failed with error: " + ErrorString)
383 .str());
385 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
386 if (!RegisterInfo)
387 return make_error<UnsupportedDisassembly>(
388 "Failed to initialise RegisterInfo.");
390 AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
391 if (!AsmInfo)
392 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
394 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
395 TripleName, MCPU, Features.getString()));
396 if (!SubtargetInfo)
397 return make_error<UnsupportedDisassembly>(
398 "Failed to initialise SubtargetInfo.");
400 MII.reset(ObjectTarget->createMCInstrInfo());
401 if (!MII)
402 return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
404 Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
406 Disassembler.reset(
407 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
409 if (!Disassembler)
410 return make_error<UnsupportedDisassembly>(
411 "No disassembler available for target");
413 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
415 Printer.reset(ObjectTarget->createMCInstPrinter(
416 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
417 *RegisterInfo));
419 return Error::success();
422 Error FileAnalysis::parseCodeSections() {
423 if (!IgnoreDWARFFlag) {
424 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
425 if (!DWARF)
426 return make_error<StringError>("Could not create DWARF information.",
427 inconvertibleErrorCode());
429 bool LineInfoValid = false;
431 for (auto &Unit : DWARF->compile_units()) {
432 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
433 if (LineTable && !LineTable->Rows.empty()) {
434 LineInfoValid = true;
435 break;
439 if (!LineInfoValid)
440 return make_error<StringError>(
441 "DWARF line information missing. Did you compile with '-g'?",
442 inconvertibleErrorCode());
445 for (const object::SectionRef &Section : Object->sections()) {
446 // Ensure only executable sections get analysed.
447 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
448 continue;
450 // Avoid checking the PLT since it produces spurious failures on AArch64
451 // when ignoring DWARF data.
452 Expected<StringRef> NameOrErr = Section.getName();
453 if (NameOrErr && *NameOrErr == ".plt")
454 continue;
455 consumeError(NameOrErr.takeError());
457 Expected<StringRef> Contents = Section.getContents();
458 if (!Contents)
459 return Contents.takeError();
460 ArrayRef<uint8_t> SectionBytes = arrayRefFromStringRef(*Contents);
462 parseSectionContents(SectionBytes,
463 {Section.getAddress(), Section.getIndex()});
465 return Error::success();
468 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
469 object::SectionedAddress Address) {
470 assert(Symbolizer && "Symbolizer is uninitialised.");
471 MCInst Instruction;
472 Instr InstrMeta;
473 uint64_t InstructionSize;
475 for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
476 bool ValidInstruction =
477 Disassembler->getInstruction(Instruction, InstructionSize,
478 SectionBytes.drop_front(Byte), 0, nulls(),
479 outs()) == MCDisassembler::Success;
481 Byte += InstructionSize;
483 uint64_t VMAddress = Address.Address + Byte - InstructionSize;
484 InstrMeta.Instruction = Instruction;
485 InstrMeta.VMAddress = VMAddress;
486 InstrMeta.InstructionSize = InstructionSize;
487 InstrMeta.Valid = ValidInstruction;
489 addInstruction(InstrMeta);
491 if (!ValidInstruction)
492 continue;
494 // Skip additional parsing for instructions that do not affect the control
495 // flow.
496 const auto &InstrDesc = MII->get(Instruction.getOpcode());
497 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
498 continue;
500 uint64_t Target;
501 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
502 // If the target can be evaluated, it's not indirect.
503 StaticBranchTargetings[Target].push_back(VMAddress);
504 continue;
507 if (!usesRegisterOperand(InstrMeta))
508 continue;
510 if (InstrDesc.isReturn())
511 continue;
513 // Check if this instruction exists in the range of the DWARF metadata.
514 if (!IgnoreDWARFFlag) {
515 auto LineInfo = Symbolizer->symbolizeCode(
516 Object->getFileName(), {VMAddress, Address.SectionIndex});
517 if (!LineInfo) {
518 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
519 errs() << "Symbolizer failed to get line: " << E.message() << "\n";
521 continue;
524 if (LineInfo->FileName == DILineInfo::BadString)
525 continue;
528 IndirectInstructions.insert({VMAddress, Address.SectionIndex});
532 void FileAnalysis::addInstruction(const Instr &Instruction) {
533 const auto &KV =
534 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
535 if (!KV.second) {
536 errs() << "Failed to add instruction at address "
537 << format_hex(Instruction.VMAddress, 2)
538 << ": Instruction at this address already exists.\n";
539 exit(EXIT_FAILURE);
543 Error FileAnalysis::parseSymbolTable() {
544 // Functions that will trap on CFI violations.
545 SmallSet<StringRef, 4> TrapOnFailFunctions;
546 TrapOnFailFunctions.insert("__cfi_slowpath");
547 TrapOnFailFunctions.insert("__cfi_slowpath_diag");
548 TrapOnFailFunctions.insert("abort");
550 // Look through the list of symbols for functions that will trap on CFI
551 // violations.
552 for (auto &Sym : Object->symbols()) {
553 auto SymNameOrErr = Sym.getName();
554 if (!SymNameOrErr)
555 consumeError(SymNameOrErr.takeError());
556 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0) {
557 auto AddrOrErr = Sym.getAddress();
558 if (!AddrOrErr)
559 consumeError(AddrOrErr.takeError());
560 else
561 TrapOnFailFunctionAddresses.insert(*AddrOrErr);
564 if (auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Object)) {
565 for (const auto &Addr : ElfObject->getPltAddresses()) {
566 object::SymbolRef Sym(Addr.first, Object);
567 auto SymNameOrErr = Sym.getName();
568 if (!SymNameOrErr)
569 consumeError(SymNameOrErr.takeError());
570 else if (TrapOnFailFunctions.count(*SymNameOrErr) > 0)
571 TrapOnFailFunctionAddresses.insert(Addr.second);
574 return Error::success();
577 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
579 char UnsupportedDisassembly::ID;
580 void UnsupportedDisassembly::log(raw_ostream &OS) const {
581 OS << "Could not initialise disassembler: " << Text;
584 std::error_code UnsupportedDisassembly::convertToErrorCode() const {
585 return std::error_code();
588 } // namespace cfi_verify
589 } // namespace llvm