1 //===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
10 #define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
12 #include "llvm/ADT/DenseMap.h"
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
16 #include "llvm/MC/MCAsmInfo.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstPrinter.h"
21 #include "llvm/MC/MCInstrAnalysis.h"
22 #include "llvm/MC/MCInstrDesc.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCObjectFileInfo.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/TargetRegistry.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/COFF.h"
30 #include "llvm/Object/ELFObjectFile.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/TargetSelect.h"
37 #include "llvm/Support/raw_ostream.h"
42 #include <unordered_map>
45 namespace cfi_verify
{
49 extern bool IgnoreDWARFFlag
;
51 enum class CFIProtectionStatus
{
52 // This instruction is protected by CFI.
54 // The instruction is not an indirect control flow instruction, and thus
55 // shouldn't be protected.
57 // There is a path to the instruction that was unexpected.
59 // There is a path to the instruction from a conditional branch that does not
60 // properly check the destination for this vcall/icall.
61 FAIL_BAD_CONDITIONAL_BRANCH
,
62 // One of the operands of the indirect CF instruction is modified between the
63 // CFI-check and execution.
64 FAIL_REGISTER_CLOBBERED
,
65 // The instruction referenced does not exist. This normally indicates an
66 // error in the program, where you try and validate a graph that was created
67 // in a different FileAnalysis object.
68 FAIL_INVALID_INSTRUCTION
,
71 StringRef
stringCFIProtectionStatus(CFIProtectionStatus Status
);
73 // Disassembler and analysis tool for machine code files. Keeps track of non-
74 // sequential control flows, including indirect control flow instructions.
77 // A metadata struct for an instruction.
79 uint64_t VMAddress
; // Virtual memory address of this instruction.
80 MCInst Instruction
; // Instruction.
81 uint64_t InstructionSize
; // Size of this instruction.
82 bool Valid
; // Is this a valid instruction? If false, Instr::Instruction is
86 // Construct a FileAnalysis from a file path.
87 static Expected
<FileAnalysis
> Create(StringRef Filename
);
89 // Construct and take ownership of the supplied object. Do not use this
90 // constructor, prefer to use FileAnalysis::Create instead.
91 FileAnalysis(object::OwningBinary
<object::Binary
> Binary
);
92 FileAnalysis() = delete;
93 FileAnalysis(const FileAnalysis
&) = delete;
94 FileAnalysis(FileAnalysis
&&Other
) = default;
96 // Returns the instruction at the provided address. Returns nullptr if there
97 // is no instruction at the provided address.
98 const Instr
*getInstruction(uint64_t Address
) const;
100 // Returns the instruction at the provided adress, dying if the instruction is
102 const Instr
&getInstructionOrDie(uint64_t Address
) const;
104 // Returns a pointer to the previous/next instruction in sequence,
105 // respectively. Returns nullptr if the next/prev instruction doesn't exist,
106 // or if the provided instruction doesn't exist.
107 const Instr
*getPrevInstructionSequential(const Instr
&InstrMeta
) const;
108 const Instr
*getNextInstructionSequential(const Instr
&InstrMeta
) const;
110 // Returns whether this instruction is used by CFI to trap the program.
111 bool isCFITrap(const Instr
&InstrMeta
) const;
113 // Returns whether this instruction is a call to a function that will trap on
114 // CFI violations (i.e., it serves as a trap in this instance).
115 bool willTrapOnCFIViolation(const Instr
&InstrMeta
) const;
117 // Returns whether this function can fall through to the next instruction.
118 // Undefined (and bad) instructions cannot fall through, and instruction that
119 // modify the control flow can only fall through if they are conditional
120 // branches or calls.
121 bool canFallThrough(const Instr
&InstrMeta
) const;
123 // Returns the definitive next instruction. This is different from the next
124 // instruction sequentially as it will follow unconditional branches (assuming
125 // they can be resolved at compile time, i.e. not indirect). This method
126 // returns nullptr if the provided instruction does not transfer control flow
127 // to exactly one instruction that is known deterministically at compile time.
128 // Also returns nullptr if the deterministic target does not exist in this
130 const Instr
*getDefiniteNextInstruction(const Instr
&InstrMeta
) const;
132 // Get a list of deterministic control flows that lead to the provided
133 // instruction. This list includes all static control flow cross-references as
134 // well as the previous instruction if it can fall through.
135 std::set
<const Instr
*>
136 getDirectControlFlowXRefs(const Instr
&InstrMeta
) const;
138 // Returns whether this instruction uses a register operand.
139 bool usesRegisterOperand(const Instr
&InstrMeta
) const;
141 // Returns the list of indirect instructions.
142 const std::set
<object::SectionedAddress
> &getIndirectInstructions() const;
144 const MCRegisterInfo
*getRegisterInfo() const;
145 const MCInstrInfo
*getMCInstrInfo() const;
146 const MCInstrAnalysis
*getMCInstrAnalysis() const;
148 // Returns the inlining information for the provided address.
149 Expected
<DIInliningInfo
>
150 symbolizeInlinedCode(object::SectionedAddress Address
);
152 // Returns whether the provided Graph represents a protected indirect control
153 // flow instruction in this file.
154 CFIProtectionStatus
validateCFIProtection(const GraphResult
&Graph
) const;
156 // Returns the first place the operand register is clobbered between the CFI-
157 // check and the indirect CF instruction execution. We do this by walking
158 // backwards from the indirect CF and ensuring there is at most one load
159 // involving the operand register (which is the indirect CF itself on x86).
160 // If the register is not modified, returns the address of the indirect CF
161 // instruction. The result is undefined if the provided graph does not fall
162 // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see
163 // CFIProtectionStatus).
164 uint64_t indirectCFOperandClobber(const GraphResult
& Graph
) const;
166 // Prints an instruction to the provided stream using this object's pretty-
168 void printInstruction(const Instr
&InstrMeta
, raw_ostream
&OS
) const;
171 // Construct a blank object with the provided triple and features. Used in
172 // testing, where a sub class will dependency inject protected methods to
173 // allow analysis of raw binary, without requiring a fully valid ELF file.
174 FileAnalysis(const Triple
&ObjectTriple
, const SubtargetFeatures
&Features
);
176 // Add an instruction to this object.
177 void addInstruction(const Instr
&Instruction
);
179 // Disassemble and parse the provided bytes into this object. Instruction
180 // address calculation is done relative to the provided SectionAddress.
181 void parseSectionContents(ArrayRef
<uint8_t> SectionBytes
,
182 object::SectionedAddress Address
);
184 // Constructs and initialises members required for disassembly.
185 Error
initialiseDisassemblyMembers();
187 // Parses code sections from the internal object file. Saves them into the
188 // internal members. Should only be called once by Create().
189 Error
parseCodeSections();
191 // Parses the symbol table to look for the addresses of functions that will
192 // trap on CFI violations.
193 Error
parseSymbolTable();
196 // Members that describe the input file.
197 object::OwningBinary
<object::Binary
> Binary
;
198 const object::ObjectFile
*Object
= nullptr;
200 std::string ArchName
;
202 const Target
*ObjectTarget
= nullptr;
203 SubtargetFeatures Features
;
205 // Members required for disassembly.
206 std::unique_ptr
<const MCRegisterInfo
> RegisterInfo
;
207 std::unique_ptr
<const MCAsmInfo
> AsmInfo
;
208 std::unique_ptr
<MCSubtargetInfo
> SubtargetInfo
;
209 std::unique_ptr
<const MCInstrInfo
> MII
;
210 std::unique_ptr
<MCContext
> Context
;
211 std::unique_ptr
<const MCDisassembler
> Disassembler
;
212 std::unique_ptr
<const MCInstrAnalysis
> MIA
;
213 std::unique_ptr
<MCInstPrinter
> Printer
;
215 // Symbolizer used for debug information parsing.
216 std::unique_ptr
<symbolize::LLVMSymbolizer
> Symbolizer
;
218 // A mapping between the virtual memory address to the instruction metadata
219 // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
220 // insertion allocation.
221 std::map
<uint64_t, Instr
> Instructions
;
223 // Contains a mapping between a specific address, and a list of instructions
224 // that use this address as a branch target (including call instructions).
225 DenseMap
<uint64_t, std::vector
<uint64_t>> StaticBranchTargetings
;
227 // A list of addresses of indirect control flow instructions.
228 std::set
<object::SectionedAddress
> IndirectInstructions
;
230 // The addresses of functions that will trap on CFI violations.
231 SmallSet
<uint64_t, 4> TrapOnFailFunctionAddresses
;
234 class UnsupportedDisassembly
: public ErrorInfo
<UnsupportedDisassembly
> {
239 UnsupportedDisassembly(StringRef Text
);
241 void log(raw_ostream
&OS
) const override
;
242 std::error_code
convertToErrorCode() const override
;
245 } // namespace cfi_verify
248 #endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H