1 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
13 /// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14 /// class of bugs on its own. Instead, it provides a generic dynamic data flow
15 /// analysis framework to be used by clients to help detect application-specific
16 /// issues within their own code.
18 /// The analysis is based on automatic propagation of data flow labels (also
19 /// known as taint labels) through a program as it performs computation.
21 /// Each byte of application memory is backed by a shadow memory byte. The
22 /// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
23 /// laid out as follows:
25 /// +--------------------+ 0x800000000000 (top of memory)
27 /// +--------------------+ 0x700000000000
29 /// +--------------------+ 0x610000000000
31 /// +--------------------+ 0x600000000000
33 /// +--------------------+ 0x510000000000
35 /// +--------------------+ 0x500000000000
37 /// +--------------------+ 0x400000000000
39 /// +--------------------+ 0x300000000000
41 /// +--------------------+ 0x200000000000
43 /// +--------------------+ 0x110000000000
45 /// +--------------------+ 0x100000000000
47 /// +--------------------+ 0x010000000000
49 /// +--------------------+ 0x000000000000
51 /// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
52 /// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
54 /// For more information, please refer to the design document:
55 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
57 //===----------------------------------------------------------------------===//
59 #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
60 #include "llvm/ADT/DenseMap.h"
61 #include "llvm/ADT/DenseSet.h"
62 #include "llvm/ADT/DepthFirstIterator.h"
63 #include "llvm/ADT/None.h"
64 #include "llvm/ADT/SmallPtrSet.h"
65 #include "llvm/ADT/SmallVector.h"
66 #include "llvm/ADT/StringExtras.h"
67 #include "llvm/ADT/StringRef.h"
68 #include "llvm/ADT/Triple.h"
69 #include "llvm/ADT/iterator.h"
70 #include "llvm/Analysis/ValueTracking.h"
71 #include "llvm/IR/Argument.h"
72 #include "llvm/IR/Attributes.h"
73 #include "llvm/IR/BasicBlock.h"
74 #include "llvm/IR/Constant.h"
75 #include "llvm/IR/Constants.h"
76 #include "llvm/IR/DataLayout.h"
77 #include "llvm/IR/DerivedTypes.h"
78 #include "llvm/IR/Dominators.h"
79 #include "llvm/IR/Function.h"
80 #include "llvm/IR/GlobalAlias.h"
81 #include "llvm/IR/GlobalValue.h"
82 #include "llvm/IR/GlobalVariable.h"
83 #include "llvm/IR/IRBuilder.h"
84 #include "llvm/IR/InlineAsm.h"
85 #include "llvm/IR/InstVisitor.h"
86 #include "llvm/IR/InstrTypes.h"
87 #include "llvm/IR/Instruction.h"
88 #include "llvm/IR/Instructions.h"
89 #include "llvm/IR/IntrinsicInst.h"
90 #include "llvm/IR/LLVMContext.h"
91 #include "llvm/IR/MDBuilder.h"
92 #include "llvm/IR/Module.h"
93 #include "llvm/IR/PassManager.h"
94 #include "llvm/IR/Type.h"
95 #include "llvm/IR/User.h"
96 #include "llvm/IR/Value.h"
97 #include "llvm/InitializePasses.h"
98 #include "llvm/Pass.h"
99 #include "llvm/Support/Alignment.h"
100 #include "llvm/Support/Casting.h"
101 #include "llvm/Support/CommandLine.h"
102 #include "llvm/Support/ErrorHandling.h"
103 #include "llvm/Support/SpecialCaseList.h"
104 #include "llvm/Support/VirtualFileSystem.h"
105 #include "llvm/Transforms/Instrumentation.h"
106 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
107 #include "llvm/Transforms/Utils/Local.h"
119 using namespace llvm
;
121 // This must be consistent with ShadowWidthBits.
122 static const Align ShadowTLSAlignment
= Align(2);
124 static const Align MinOriginAlignment
= Align(4);
126 // The size of TLS variables. These constants must be kept in sync with the ones
128 static const unsigned ArgTLSSize
= 800;
129 static const unsigned RetvalTLSSize
= 800;
131 // The -dfsan-preserve-alignment flag controls whether this pass assumes that
132 // alignment requirements provided by the input IR are correct. For example,
133 // if the input IR contains a load with alignment 8, this flag will cause
134 // the shadow load to have alignment 16. This flag is disabled by default as
135 // we have unfortunately encountered too much code (including Clang itself;
136 // see PR14291) which performs misaligned access.
137 static cl::opt
<bool> ClPreserveAlignment(
138 "dfsan-preserve-alignment",
139 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden
,
142 // The ABI list files control how shadow parameters are passed. The pass treats
143 // every function labelled "uninstrumented" in the ABI list file as conforming
144 // to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
145 // additional annotations for those functions, a call to one of those functions
146 // will produce a warning message, as the labelling behaviour of the function is
147 // unknown. The other supported annotations are "functional" and "discard",
148 // which are described below under DataFlowSanitizer::WrapperKind.
149 static cl::list
<std::string
> ClABIListFiles(
151 cl::desc("File listing native ABI functions and how the pass treats them"),
154 // Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
155 // functions (see DataFlowSanitizer::InstrumentedABI below).
157 ClArgsABI("dfsan-args-abi",
158 cl::desc("Use the argument ABI rather than the TLS ABI"),
161 // Controls whether the pass includes or ignores the labels of pointers in load
163 static cl::opt
<bool> ClCombinePointerLabelsOnLoad(
164 "dfsan-combine-pointer-labels-on-load",
165 cl::desc("Combine the label of the pointer with the label of the data when "
166 "loading from memory."),
167 cl::Hidden
, cl::init(true));
169 // Controls whether the pass includes or ignores the labels of pointers in
170 // stores instructions.
171 static cl::opt
<bool> ClCombinePointerLabelsOnStore(
172 "dfsan-combine-pointer-labels-on-store",
173 cl::desc("Combine the label of the pointer with the label of the data when "
174 "storing in memory."),
175 cl::Hidden
, cl::init(false));
177 // Controls whether the pass propagates labels of offsets in GEP instructions.
178 static cl::opt
<bool> ClCombineOffsetLabelsOnGEP(
179 "dfsan-combine-offset-labels-on-gep",
181 "Combine the label of the offset with the label of the pointer when "
182 "doing pointer arithmetic."),
183 cl::Hidden
, cl::init(true));
185 static cl::opt
<bool> ClDebugNonzeroLabels(
186 "dfsan-debug-nonzero-labels",
187 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
188 "load or return with a nonzero label"),
191 // Experimental feature that inserts callbacks for certain data events.
192 // Currently callbacks are only inserted for loads, stores, memory transfers
193 // (i.e. memcpy and memmove), and comparisons.
195 // If this flag is set to true, the user must provide definitions for the
196 // following callback functions:
197 // void __dfsan_load_callback(dfsan_label Label, void* addr);
198 // void __dfsan_store_callback(dfsan_label Label, void* addr);
199 // void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
200 // void __dfsan_cmp_callback(dfsan_label CombinedLabel);
201 static cl::opt
<bool> ClEventCallbacks(
202 "dfsan-event-callbacks",
203 cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
204 cl::Hidden
, cl::init(false));
206 // Controls whether the pass tracks the control flow of select instructions.
207 static cl::opt
<bool> ClTrackSelectControlFlow(
208 "dfsan-track-select-control-flow",
209 cl::desc("Propagate labels from condition values of select instructions "
211 cl::Hidden
, cl::init(true));
213 // TODO: This default value follows MSan. DFSan may use a different value.
214 static cl::opt
<int> ClInstrumentWithCallThreshold(
215 "dfsan-instrument-with-call-threshold",
216 cl::desc("If the function being instrumented requires more than "
217 "this number of origin stores, use callbacks instead of "
218 "inline checks (-1 means never use callbacks)."),
219 cl::Hidden
, cl::init(3500));
221 // Controls how to track origins.
222 // * 0: do not track origins.
223 // * 1: track origins at memory store operations.
224 // * 2: track origins at memory load and store operations.
225 // TODO: track callsites.
226 static cl::opt
<int> ClTrackOrigins("dfsan-track-origins",
227 cl::desc("Track origins of labels"),
228 cl::Hidden
, cl::init(0));
230 static StringRef
getGlobalTypeString(const GlobalValue
&G
) {
231 // Types of GlobalVariables are always pointer types.
232 Type
*GType
= G
.getValueType();
233 // For now we support excluding struct types only.
234 if (StructType
*SGType
= dyn_cast
<StructType
>(GType
)) {
235 if (!SGType
->isLiteral())
236 return SGType
->getName();
238 return "<unknown type>";
243 // Memory map parameters used in application-to-shadow address calculation.
244 // Offset = (Addr & ~AndMask) ^ XorMask
245 // Shadow = ShadowBase + Offset
246 // Origin = (OriginBase + Offset) & ~3ULL
247 struct MemoryMapParams
{
254 } // end anonymous namespace
257 // NOLINTNEXTLINE(readability-identifier-naming)
258 static const MemoryMapParams Linux_X86_64_MemoryMapParams
= {
259 0, // AndMask (not used)
260 0x500000000000, // XorMask
261 0, // ShadowBase (not used)
262 0x100000000000, // OriginBase
268 std::unique_ptr
<SpecialCaseList
> SCL
;
271 DFSanABIList() = default;
273 void set(std::unique_ptr
<SpecialCaseList
> List
) { SCL
= std::move(List
); }
275 /// Returns whether either this function or its source file are listed in the
277 bool isIn(const Function
&F
, StringRef Category
) const {
278 return isIn(*F
.getParent(), Category
) ||
279 SCL
->inSection("dataflow", "fun", F
.getName(), Category
);
282 /// Returns whether this global alias is listed in the given category.
284 /// If GA aliases a function, the alias's name is matched as a function name
285 /// would be. Similarly, aliases of globals are matched like globals.
286 bool isIn(const GlobalAlias
&GA
, StringRef Category
) const {
287 if (isIn(*GA
.getParent(), Category
))
290 if (isa
<FunctionType
>(GA
.getValueType()))
291 return SCL
->inSection("dataflow", "fun", GA
.getName(), Category
);
293 return SCL
->inSection("dataflow", "global", GA
.getName(), Category
) ||
294 SCL
->inSection("dataflow", "type", getGlobalTypeString(GA
),
298 /// Returns whether this module is listed in the given category.
299 bool isIn(const Module
&M
, StringRef Category
) const {
300 return SCL
->inSection("dataflow", "src", M
.getModuleIdentifier(), Category
);
304 /// TransformedFunction is used to express the result of transforming one
305 /// function type into another. This struct is immutable. It holds metadata
306 /// useful for updating calls of the old function to the new type.
307 struct TransformedFunction
{
308 TransformedFunction(FunctionType
*OriginalType
, FunctionType
*TransformedType
,
309 std::vector
<unsigned> ArgumentIndexMapping
)
310 : OriginalType(OriginalType
), TransformedType(TransformedType
),
311 ArgumentIndexMapping(ArgumentIndexMapping
) {}
314 TransformedFunction(const TransformedFunction
&) = delete;
315 TransformedFunction
&operator=(const TransformedFunction
&) = delete;
318 TransformedFunction(TransformedFunction
&&) = default;
319 TransformedFunction
&operator=(TransformedFunction
&&) = default;
321 /// Type of the function before the transformation.
322 FunctionType
*OriginalType
;
324 /// Type of the function after the transformation.
325 FunctionType
*TransformedType
;
327 /// Transforming a function may change the position of arguments. This
328 /// member records the mapping from each argument's old position to its new
329 /// position. Argument positions are zero-indexed. If the transformation
330 /// from F to F' made the first argument of F into the third argument of F',
331 /// then ArgumentIndexMapping[0] will equal 2.
332 std::vector
<unsigned> ArgumentIndexMapping
;
335 /// Given function attributes from a call site for the original function,
336 /// return function attributes appropriate for a call to the transformed
339 transformFunctionAttributes(const TransformedFunction
&TransformedFunction
,
340 LLVMContext
&Ctx
, AttributeList CallSiteAttrs
) {
342 // Construct a vector of AttributeSet for each function argument.
343 std::vector
<llvm::AttributeSet
> ArgumentAttributes(
344 TransformedFunction
.TransformedType
->getNumParams());
346 // Copy attributes from the parameter of the original function to the
347 // transformed version. 'ArgumentIndexMapping' holds the mapping from
348 // old argument position to new.
349 for (unsigned I
= 0, IE
= TransformedFunction
.ArgumentIndexMapping
.size();
351 unsigned TransformedIndex
= TransformedFunction
.ArgumentIndexMapping
[I
];
352 ArgumentAttributes
[TransformedIndex
] = CallSiteAttrs
.getParamAttrs(I
);
355 // Copy annotations on varargs arguments.
356 for (unsigned I
= TransformedFunction
.OriginalType
->getNumParams(),
357 IE
= CallSiteAttrs
.getNumAttrSets();
359 ArgumentAttributes
.push_back(CallSiteAttrs
.getParamAttrs(I
));
362 return AttributeList::get(Ctx
, CallSiteAttrs
.getFnAttrs(),
363 CallSiteAttrs
.getRetAttrs(),
364 llvm::makeArrayRef(ArgumentAttributes
));
367 class DataFlowSanitizer
{
368 friend struct DFSanFunction
;
369 friend class DFSanVisitor
;
371 enum { ShadowWidthBits
= 8, ShadowWidthBytes
= ShadowWidthBits
/ 8 };
373 enum { OriginWidthBits
= 32, OriginWidthBytes
= OriginWidthBits
/ 8 };
375 /// Which ABI should be used for instrumented functions?
376 enum InstrumentedABI
{
377 /// Argument and return value labels are passed through additional
378 /// arguments and by modifying the return type.
381 /// Argument and return value labels are passed through TLS variables
382 /// __dfsan_arg_tls and __dfsan_retval_tls.
386 /// How should calls to uninstrumented functions be handled?
388 /// This function is present in an uninstrumented form but we don't know
389 /// how it should be handled. Print a warning and call the function anyway.
390 /// Don't label the return value.
393 /// This function does not write to (user-accessible) memory, and its return
394 /// value is unlabelled.
397 /// This function does not write to (user-accessible) memory, and the label
398 /// of its return value is the union of the label of its arguments.
401 /// Instead of calling the function, a custom wrapper __dfsw_F is called,
402 /// where F is the name of the function. This function may wrap the
403 /// original function or provide its own implementation. This is similar to
404 /// the IA_Args ABI, except that IA_Args uses a struct return type to
405 /// pass the return value shadow in a register, while WK_Custom uses an
406 /// extra pointer argument to return the shadow. This allows the wrapped
407 /// form of the function type to be expressed in C.
414 IntegerType
*OriginTy
;
415 PointerType
*OriginPtrTy
;
416 ConstantInt
*ZeroOrigin
;
417 /// The shadow type for all primitive types and vector types.
418 IntegerType
*PrimitiveShadowTy
;
419 PointerType
*PrimitiveShadowPtrTy
;
420 IntegerType
*IntptrTy
;
421 ConstantInt
*ZeroPrimitiveShadow
;
423 ArrayType
*ArgOriginTLSTy
;
424 Constant
*ArgOriginTLS
;
426 Constant
*RetvalOriginTLS
;
427 FunctionType
*DFSanUnionLoadFnTy
;
428 FunctionType
*DFSanLoadLabelAndOriginFnTy
;
429 FunctionType
*DFSanUnimplementedFnTy
;
430 FunctionType
*DFSanSetLabelFnTy
;
431 FunctionType
*DFSanNonzeroLabelFnTy
;
432 FunctionType
*DFSanVarargWrapperFnTy
;
433 FunctionType
*DFSanCmpCallbackFnTy
;
434 FunctionType
*DFSanLoadStoreCallbackFnTy
;
435 FunctionType
*DFSanMemTransferCallbackFnTy
;
436 FunctionType
*DFSanChainOriginFnTy
;
437 FunctionType
*DFSanChainOriginIfTaintedFnTy
;
438 FunctionType
*DFSanMemOriginTransferFnTy
;
439 FunctionType
*DFSanMaybeStoreOriginFnTy
;
440 FunctionCallee DFSanUnionLoadFn
;
441 FunctionCallee DFSanLoadLabelAndOriginFn
;
442 FunctionCallee DFSanUnimplementedFn
;
443 FunctionCallee DFSanSetLabelFn
;
444 FunctionCallee DFSanNonzeroLabelFn
;
445 FunctionCallee DFSanVarargWrapperFn
;
446 FunctionCallee DFSanLoadCallbackFn
;
447 FunctionCallee DFSanStoreCallbackFn
;
448 FunctionCallee DFSanMemTransferCallbackFn
;
449 FunctionCallee DFSanCmpCallbackFn
;
450 FunctionCallee DFSanChainOriginFn
;
451 FunctionCallee DFSanChainOriginIfTaintedFn
;
452 FunctionCallee DFSanMemOriginTransferFn
;
453 FunctionCallee DFSanMaybeStoreOriginFn
;
454 SmallPtrSet
<Value
*, 16> DFSanRuntimeFunctions
;
455 MDNode
*ColdCallWeights
;
456 MDNode
*OriginStoreWeights
;
457 DFSanABIList ABIList
;
458 DenseMap
<Value
*, Function
*> UnwrappedFnMap
;
459 AttrBuilder ReadOnlyNoneAttrs
;
461 /// Memory map parameters used in calculation mapping application addresses
462 /// to shadow addresses and origin addresses.
463 const MemoryMapParams
*MapParams
;
465 Value
*getShadowOffset(Value
*Addr
, IRBuilder
<> &IRB
);
466 Value
*getShadowAddress(Value
*Addr
, Instruction
*Pos
);
467 Value
*getShadowAddress(Value
*Addr
, Instruction
*Pos
, Value
*ShadowOffset
);
468 std::pair
<Value
*, Value
*>
469 getShadowOriginAddress(Value
*Addr
, Align InstAlignment
, Instruction
*Pos
);
470 bool isInstrumented(const Function
*F
);
471 bool isInstrumented(const GlobalAlias
*GA
);
472 FunctionType
*getArgsFunctionType(FunctionType
*T
);
473 FunctionType
*getTrampolineFunctionType(FunctionType
*T
);
474 TransformedFunction
getCustomFunctionType(FunctionType
*T
);
475 InstrumentedABI
getInstrumentedABI();
476 WrapperKind
getWrapperKind(Function
*F
);
477 void addGlobalNameSuffix(GlobalValue
*GV
);
478 Function
*buildWrapperFunction(Function
*F
, StringRef NewFName
,
479 GlobalValue::LinkageTypes NewFLink
,
480 FunctionType
*NewFT
);
481 Constant
*getOrBuildTrampolineFunction(FunctionType
*FT
, StringRef FName
);
482 void initializeCallbackFunctions(Module
&M
);
483 void initializeRuntimeFunctions(Module
&M
);
484 void injectMetadataGlobals(Module
&M
);
485 bool initializeModule(Module
&M
);
487 /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
488 /// from it. Returns the origin's loaded value.
489 Value
*loadNextOrigin(Instruction
*Pos
, Align OriginAlign
,
492 /// Returns whether the given load byte size is amenable to inlined
493 /// optimization patterns.
494 bool hasLoadSizeForFastPath(uint64_t Size
);
496 /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
497 bool shouldTrackOrigins();
499 /// Returns whether the pass tracks labels for struct fields and array
500 /// indices. Supports only TLS ABI mode.
501 bool shouldTrackFieldsAndIndices();
503 /// Returns a zero constant with the shadow type of OrigTy.
505 /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
506 /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
507 /// getZeroShadow(other type) = i16(0)
509 /// Note that a zero shadow is always i16(0) when shouldTrackFieldsAndIndices
511 Constant
*getZeroShadow(Type
*OrigTy
);
512 /// Returns a zero constant with the shadow type of V's type.
513 Constant
*getZeroShadow(Value
*V
);
515 /// Checks if V is a zero shadow.
516 bool isZeroShadow(Value
*V
);
518 /// Returns the shadow type of OrigTy.
520 /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
521 /// getShadowTy([n x T]) = [n x getShadowTy(T)]
522 /// getShadowTy(other type) = i16
524 /// Note that a shadow type is always i16 when shouldTrackFieldsAndIndices
526 Type
*getShadowTy(Type
*OrigTy
);
527 /// Returns the shadow type of of V's type.
528 Type
*getShadowTy(Value
*V
);
530 const uint64_t NumOfElementsInArgOrgTLS
= ArgTLSSize
/ OriginWidthBytes
;
533 DataFlowSanitizer(const std::vector
<std::string
> &ABIListFiles
);
535 bool runImpl(Module
&M
);
538 struct DFSanFunction
{
539 DataFlowSanitizer
&DFS
;
542 DataFlowSanitizer::InstrumentedABI IA
;
544 AllocaInst
*LabelReturnAlloca
= nullptr;
545 AllocaInst
*OriginReturnAlloca
= nullptr;
546 DenseMap
<Value
*, Value
*> ValShadowMap
;
547 DenseMap
<Value
*, Value
*> ValOriginMap
;
548 DenseMap
<AllocaInst
*, AllocaInst
*> AllocaShadowMap
;
549 DenseMap
<AllocaInst
*, AllocaInst
*> AllocaOriginMap
;
551 struct PHIFixupElement
{
556 std::vector
<PHIFixupElement
> PHIFixups
;
558 DenseSet
<Instruction
*> SkipInsts
;
559 std::vector
<Value
*> NonZeroChecks
;
561 struct CachedShadow
{
562 BasicBlock
*Block
; // The block where Shadow is defined.
565 /// Maps a value to its latest shadow value in terms of domination tree.
566 DenseMap
<std::pair
<Value
*, Value
*>, CachedShadow
> CachedShadows
;
567 /// Maps a value to its latest collapsed shadow value it was converted to in
568 /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
569 /// used at a post process where CFG blocks are split. So it does not cache
570 /// BasicBlock like CachedShadows, but uses domination between values.
571 DenseMap
<Value
*, Value
*> CachedCollapsedShadows
;
572 DenseMap
<Value
*, std::set
<Value
*>> ShadowElements
;
574 DFSanFunction(DataFlowSanitizer
&DFS
, Function
*F
, bool IsNativeABI
)
575 : DFS(DFS
), F(F
), IA(DFS
.getInstrumentedABI()), IsNativeABI(IsNativeABI
) {
579 /// Computes the shadow address for a given function argument.
581 /// Shadow = ArgTLS+ArgOffset.
582 Value
*getArgTLS(Type
*T
, unsigned ArgOffset
, IRBuilder
<> &IRB
);
584 /// Computes the shadow address for a return value.
585 Value
*getRetvalTLS(Type
*T
, IRBuilder
<> &IRB
);
587 /// Computes the origin address for a given function argument.
589 /// Origin = ArgOriginTLS[ArgNo].
590 Value
*getArgOriginTLS(unsigned ArgNo
, IRBuilder
<> &IRB
);
592 /// Computes the origin address for a return value.
593 Value
*getRetvalOriginTLS();
595 Value
*getOrigin(Value
*V
);
596 void setOrigin(Instruction
*I
, Value
*Origin
);
597 /// Generates IR to compute the origin of the last operand with a taint label.
598 Value
*combineOperandOrigins(Instruction
*Inst
);
599 /// Before the instruction Pos, generates IR to compute the last origin with a
600 /// taint label. Labels and origins are from vectors Shadows and Origins
601 /// correspondingly. The generated IR is like
602 /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
603 /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
604 /// zeros with other bitwidths.
605 Value
*combineOrigins(const std::vector
<Value
*> &Shadows
,
606 const std::vector
<Value
*> &Origins
, Instruction
*Pos
,
607 ConstantInt
*Zero
= nullptr);
609 Value
*getShadow(Value
*V
);
610 void setShadow(Instruction
*I
, Value
*Shadow
);
611 /// Generates IR to compute the union of the two given shadows, inserting it
612 /// before Pos. The combined value is with primitive type.
613 Value
*combineShadows(Value
*V1
, Value
*V2
, Instruction
*Pos
);
614 /// Combines the shadow values of V1 and V2, then converts the combined value
615 /// with primitive type into a shadow value with the original type T.
616 Value
*combineShadowsThenConvert(Type
*T
, Value
*V1
, Value
*V2
,
618 Value
*combineOperandShadows(Instruction
*Inst
);
620 /// Generates IR to load shadow and origin corresponding to bytes [\p
621 /// Addr, \p Addr + \p Size), where addr has alignment \p
622 /// InstAlignment, and take the union of each of those shadows. The returned
623 /// shadow always has primitive type.
625 /// When tracking loads is enabled, the returned origin is a chain at the
626 /// current stack if the returned shadow is tainted.
627 std::pair
<Value
*, Value
*> loadShadowOrigin(Value
*Addr
, uint64_t Size
,
631 void storePrimitiveShadowOrigin(Value
*Addr
, uint64_t Size
,
632 Align InstAlignment
, Value
*PrimitiveShadow
,
633 Value
*Origin
, Instruction
*Pos
);
634 /// Applies PrimitiveShadow to all primitive subtypes of T, returning
635 /// the expanded shadow value.
637 /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
638 /// EFP([n x T], PS) = [n x EFP(T,PS)]
639 /// EFP(other types, PS) = PS
640 Value
*expandFromPrimitiveShadow(Type
*T
, Value
*PrimitiveShadow
,
642 /// Collapses Shadow into a single primitive shadow value, unioning all
643 /// primitive shadow values in the process. Returns the final primitive
646 /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
647 /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
648 /// CTP(other types, PS) = PS
649 Value
*collapseToPrimitiveShadow(Value
*Shadow
, Instruction
*Pos
);
651 void storeZeroPrimitiveShadow(Value
*Addr
, uint64_t Size
, Align ShadowAlign
,
654 Align
getShadowAlign(Align InstAlignment
);
657 /// Collapses the shadow with aggregate type into a single primitive shadow
659 template <class AggregateType
>
660 Value
*collapseAggregateShadow(AggregateType
*AT
, Value
*Shadow
,
663 Value
*collapseToPrimitiveShadow(Value
*Shadow
, IRBuilder
<> &IRB
);
665 /// Returns the shadow value of an argument A.
666 Value
*getShadowForTLSArgument(Argument
*A
);
668 /// The fast path of loading shadows.
669 std::pair
<Value
*, Value
*>
670 loadShadowFast(Value
*ShadowAddr
, Value
*OriginAddr
, uint64_t Size
,
671 Align ShadowAlign
, Align OriginAlign
, Value
*FirstOrigin
,
674 Align
getOriginAlign(Align InstAlignment
);
676 /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
677 /// is __dfsan_load_label_and_origin. This function returns the union of all
678 /// labels and the origin of the first taint label. However this is an
679 /// additional call with many instructions. To ensure common cases are fast,
680 /// checks if it is possible to load labels and origins without using the
681 /// callback function.
683 /// When enabling tracking load instructions, we always use
684 /// __dfsan_load_label_and_origin to reduce code size.
685 bool useCallbackLoadLabelAndOrigin(uint64_t Size
, Align InstAlignment
);
687 /// Returns a chain at the current stack with previous origin V.
688 Value
*updateOrigin(Value
*V
, IRBuilder
<> &IRB
);
690 /// Returns a chain at the current stack with previous origin V if Shadow is
692 Value
*updateOriginIfTainted(Value
*Shadow
, Value
*Origin
, IRBuilder
<> &IRB
);
694 /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
695 /// Origin otherwise.
696 Value
*originToIntptr(IRBuilder
<> &IRB
, Value
*Origin
);
698 /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
700 void paintOrigin(IRBuilder
<> &IRB
, Value
*Origin
, Value
*StoreOriginAddr
,
701 uint64_t StoreOriginSize
, Align Alignment
);
703 /// Stores Origin in terms of its Shadow value.
704 /// * Do not write origins for zero shadows because we do not trace origins
705 /// for untainted sinks.
706 /// * Use __dfsan_maybe_store_origin if there are too many origin store
707 /// instrumentations.
708 void storeOrigin(Instruction
*Pos
, Value
*Addr
, uint64_t Size
, Value
*Shadow
,
709 Value
*Origin
, Value
*StoreOriginAddr
, Align InstAlignment
);
711 /// Convert a scalar value to an i1 by comparing with 0.
712 Value
*convertToBool(Value
*V
, IRBuilder
<> &IRB
, const Twine
&Name
= "");
714 bool shouldInstrumentWithCall();
716 /// Generates IR to load shadow and origin corresponding to bytes [\p
717 /// Addr, \p Addr + \p Size), where addr has alignment \p
718 /// InstAlignment, and take the union of each of those shadows. The returned
719 /// shadow always has primitive type.
720 std::pair
<Value
*, Value
*>
721 loadShadowOriginSansLoadTracking(Value
*Addr
, uint64_t Size
,
722 Align InstAlignment
, Instruction
*Pos
);
723 int NumOriginStores
= 0;
726 class DFSanVisitor
: public InstVisitor
<DFSanVisitor
> {
730 DFSanVisitor(DFSanFunction
&DFSF
) : DFSF(DFSF
) {}
732 const DataLayout
&getDataLayout() const {
733 return DFSF
.F
->getParent()->getDataLayout();
736 // Combines shadow values and origins for all of I's operands.
737 void visitInstOperands(Instruction
&I
);
739 void visitUnaryOperator(UnaryOperator
&UO
);
740 void visitBinaryOperator(BinaryOperator
&BO
);
741 void visitBitCastInst(BitCastInst
&BCI
);
742 void visitCastInst(CastInst
&CI
);
743 void visitCmpInst(CmpInst
&CI
);
744 void visitLandingPadInst(LandingPadInst
&LPI
);
745 void visitGetElementPtrInst(GetElementPtrInst
&GEPI
);
746 void visitLoadInst(LoadInst
&LI
);
747 void visitStoreInst(StoreInst
&SI
);
748 void visitAtomicRMWInst(AtomicRMWInst
&I
);
749 void visitAtomicCmpXchgInst(AtomicCmpXchgInst
&I
);
750 void visitReturnInst(ReturnInst
&RI
);
751 void visitCallBase(CallBase
&CB
);
752 void visitPHINode(PHINode
&PN
);
753 void visitExtractElementInst(ExtractElementInst
&I
);
754 void visitInsertElementInst(InsertElementInst
&I
);
755 void visitShuffleVectorInst(ShuffleVectorInst
&I
);
756 void visitExtractValueInst(ExtractValueInst
&I
);
757 void visitInsertValueInst(InsertValueInst
&I
);
758 void visitAllocaInst(AllocaInst
&I
);
759 void visitSelectInst(SelectInst
&I
);
760 void visitMemSetInst(MemSetInst
&I
);
761 void visitMemTransferInst(MemTransferInst
&I
);
764 void visitCASOrRMW(Align InstAlignment
, Instruction
&I
);
766 // Returns false when this is an invoke of a custom function.
767 bool visitWrappedCallBase(Function
&F
, CallBase
&CB
);
769 // Combines origins for all of I's operands.
770 void visitInstOperandOrigins(Instruction
&I
);
772 void addShadowArguments(Function
&F
, CallBase
&CB
, std::vector
<Value
*> &Args
,
775 void addOriginArguments(Function
&F
, CallBase
&CB
, std::vector
<Value
*> &Args
,
779 } // end anonymous namespace
781 DataFlowSanitizer::DataFlowSanitizer(
782 const std::vector
<std::string
> &ABIListFiles
) {
783 std::vector
<std::string
> AllABIListFiles(std::move(ABIListFiles
));
784 llvm::append_range(AllABIListFiles
, ClABIListFiles
);
785 // FIXME: should we propagate vfs::FileSystem to this constructor?
787 SpecialCaseList::createOrDie(AllABIListFiles
, *vfs::getRealFileSystem()));
790 FunctionType
*DataFlowSanitizer::getArgsFunctionType(FunctionType
*T
) {
791 SmallVector
<Type
*, 4> ArgTypes(T
->param_begin(), T
->param_end());
792 ArgTypes
.append(T
->getNumParams(), PrimitiveShadowTy
);
794 ArgTypes
.push_back(PrimitiveShadowPtrTy
);
795 Type
*RetType
= T
->getReturnType();
796 if (!RetType
->isVoidTy())
797 RetType
= StructType::get(RetType
, PrimitiveShadowTy
);
798 return FunctionType::get(RetType
, ArgTypes
, T
->isVarArg());
801 FunctionType
*DataFlowSanitizer::getTrampolineFunctionType(FunctionType
*T
) {
802 assert(!T
->isVarArg());
803 SmallVector
<Type
*, 4> ArgTypes
;
804 ArgTypes
.push_back(T
->getPointerTo());
805 ArgTypes
.append(T
->param_begin(), T
->param_end());
806 ArgTypes
.append(T
->getNumParams(), PrimitiveShadowTy
);
807 Type
*RetType
= T
->getReturnType();
808 if (!RetType
->isVoidTy())
809 ArgTypes
.push_back(PrimitiveShadowPtrTy
);
811 if (shouldTrackOrigins()) {
812 ArgTypes
.append(T
->getNumParams(), OriginTy
);
813 if (!RetType
->isVoidTy())
814 ArgTypes
.push_back(OriginPtrTy
);
817 return FunctionType::get(T
->getReturnType(), ArgTypes
, false);
820 TransformedFunction
DataFlowSanitizer::getCustomFunctionType(FunctionType
*T
) {
821 SmallVector
<Type
*, 4> ArgTypes
;
823 // Some parameters of the custom function being constructed are
824 // parameters of T. Record the mapping from parameters of T to
825 // parameters of the custom function, so that parameter attributes
826 // at call sites can be updated.
827 std::vector
<unsigned> ArgumentIndexMapping
;
828 for (unsigned I
= 0, E
= T
->getNumParams(); I
!= E
; ++I
) {
829 Type
*ParamType
= T
->getParamType(I
);
831 if (isa
<PointerType
>(ParamType
) &&
832 (FT
= dyn_cast
<FunctionType
>(ParamType
->getPointerElementType()))) {
833 ArgumentIndexMapping
.push_back(ArgTypes
.size());
834 ArgTypes
.push_back(getTrampolineFunctionType(FT
)->getPointerTo());
835 ArgTypes
.push_back(Type::getInt8PtrTy(*Ctx
));
837 ArgumentIndexMapping
.push_back(ArgTypes
.size());
838 ArgTypes
.push_back(ParamType
);
841 for (unsigned I
= 0, E
= T
->getNumParams(); I
!= E
; ++I
)
842 ArgTypes
.push_back(PrimitiveShadowTy
);
844 ArgTypes
.push_back(PrimitiveShadowPtrTy
);
845 Type
*RetType
= T
->getReturnType();
846 if (!RetType
->isVoidTy())
847 ArgTypes
.push_back(PrimitiveShadowPtrTy
);
849 if (shouldTrackOrigins()) {
850 for (unsigned I
= 0, E
= T
->getNumParams(); I
!= E
; ++I
)
851 ArgTypes
.push_back(OriginTy
);
853 ArgTypes
.push_back(OriginPtrTy
);
854 if (!RetType
->isVoidTy())
855 ArgTypes
.push_back(OriginPtrTy
);
858 return TransformedFunction(
859 T
, FunctionType::get(T
->getReturnType(), ArgTypes
, T
->isVarArg()),
860 ArgumentIndexMapping
);
863 bool DataFlowSanitizer::isZeroShadow(Value
*V
) {
864 if (!shouldTrackFieldsAndIndices())
865 return ZeroPrimitiveShadow
== V
;
867 Type
*T
= V
->getType();
868 if (!isa
<ArrayType
>(T
) && !isa
<StructType
>(T
)) {
869 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(V
))
874 return isa
<ConstantAggregateZero
>(V
);
877 bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size
) {
878 uint64_t ShadowSize
= Size
* ShadowWidthBytes
;
879 return ShadowSize
% 8 == 0 || ShadowSize
== 4;
882 bool DataFlowSanitizer::shouldTrackOrigins() {
883 static const bool ShouldTrackOrigins
=
884 ClTrackOrigins
&& getInstrumentedABI() == DataFlowSanitizer::IA_TLS
;
885 return ShouldTrackOrigins
;
888 bool DataFlowSanitizer::shouldTrackFieldsAndIndices() {
889 return getInstrumentedABI() == DataFlowSanitizer::IA_TLS
;
892 Constant
*DataFlowSanitizer::getZeroShadow(Type
*OrigTy
) {
893 if (!shouldTrackFieldsAndIndices())
894 return ZeroPrimitiveShadow
;
896 if (!isa
<ArrayType
>(OrigTy
) && !isa
<StructType
>(OrigTy
))
897 return ZeroPrimitiveShadow
;
898 Type
*ShadowTy
= getShadowTy(OrigTy
);
899 return ConstantAggregateZero::get(ShadowTy
);
902 Constant
*DataFlowSanitizer::getZeroShadow(Value
*V
) {
903 return getZeroShadow(V
->getType());
906 static Value
*expandFromPrimitiveShadowRecursive(
907 Value
*Shadow
, SmallVector
<unsigned, 4> &Indices
, Type
*SubShadowTy
,
908 Value
*PrimitiveShadow
, IRBuilder
<> &IRB
) {
909 if (!isa
<ArrayType
>(SubShadowTy
) && !isa
<StructType
>(SubShadowTy
))
910 return IRB
.CreateInsertValue(Shadow
, PrimitiveShadow
, Indices
);
912 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(SubShadowTy
)) {
913 for (unsigned Idx
= 0; Idx
< AT
->getNumElements(); Idx
++) {
914 Indices
.push_back(Idx
);
915 Shadow
= expandFromPrimitiveShadowRecursive(
916 Shadow
, Indices
, AT
->getElementType(), PrimitiveShadow
, IRB
);
922 if (StructType
*ST
= dyn_cast
<StructType
>(SubShadowTy
)) {
923 for (unsigned Idx
= 0; Idx
< ST
->getNumElements(); Idx
++) {
924 Indices
.push_back(Idx
);
925 Shadow
= expandFromPrimitiveShadowRecursive(
926 Shadow
, Indices
, ST
->getElementType(Idx
), PrimitiveShadow
, IRB
);
931 llvm_unreachable("Unexpected shadow type");
934 bool DFSanFunction::shouldInstrumentWithCall() {
935 return ClInstrumentWithCallThreshold
>= 0 &&
936 NumOriginStores
>= ClInstrumentWithCallThreshold
;
939 Value
*DFSanFunction::expandFromPrimitiveShadow(Type
*T
, Value
*PrimitiveShadow
,
941 Type
*ShadowTy
= DFS
.getShadowTy(T
);
943 if (!isa
<ArrayType
>(ShadowTy
) && !isa
<StructType
>(ShadowTy
))
944 return PrimitiveShadow
;
946 if (DFS
.isZeroShadow(PrimitiveShadow
))
947 return DFS
.getZeroShadow(ShadowTy
);
949 IRBuilder
<> IRB(Pos
);
950 SmallVector
<unsigned, 4> Indices
;
951 Value
*Shadow
= UndefValue::get(ShadowTy
);
952 Shadow
= expandFromPrimitiveShadowRecursive(Shadow
, Indices
, ShadowTy
,
953 PrimitiveShadow
, IRB
);
955 // Caches the primitive shadow value that built the shadow value.
956 CachedCollapsedShadows
[Shadow
] = PrimitiveShadow
;
960 template <class AggregateType
>
961 Value
*DFSanFunction::collapseAggregateShadow(AggregateType
*AT
, Value
*Shadow
,
963 if (!AT
->getNumElements())
964 return DFS
.ZeroPrimitiveShadow
;
966 Value
*FirstItem
= IRB
.CreateExtractValue(Shadow
, 0);
967 Value
*Aggregator
= collapseToPrimitiveShadow(FirstItem
, IRB
);
969 for (unsigned Idx
= 1; Idx
< AT
->getNumElements(); Idx
++) {
970 Value
*ShadowItem
= IRB
.CreateExtractValue(Shadow
, Idx
);
971 Value
*ShadowInner
= collapseToPrimitiveShadow(ShadowItem
, IRB
);
972 Aggregator
= IRB
.CreateOr(Aggregator
, ShadowInner
);
977 Value
*DFSanFunction::collapseToPrimitiveShadow(Value
*Shadow
,
979 Type
*ShadowTy
= Shadow
->getType();
980 if (!isa
<ArrayType
>(ShadowTy
) && !isa
<StructType
>(ShadowTy
))
982 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(ShadowTy
))
983 return collapseAggregateShadow
<>(AT
, Shadow
, IRB
);
984 if (StructType
*ST
= dyn_cast
<StructType
>(ShadowTy
))
985 return collapseAggregateShadow
<>(ST
, Shadow
, IRB
);
986 llvm_unreachable("Unexpected shadow type");
989 Value
*DFSanFunction::collapseToPrimitiveShadow(Value
*Shadow
,
991 Type
*ShadowTy
= Shadow
->getType();
992 if (!isa
<ArrayType
>(ShadowTy
) && !isa
<StructType
>(ShadowTy
))
995 assert(DFS
.shouldTrackFieldsAndIndices());
997 // Checks if the cached collapsed shadow value dominates Pos.
998 Value
*&CS
= CachedCollapsedShadows
[Shadow
];
999 if (CS
&& DT
.dominates(CS
, Pos
))
1002 IRBuilder
<> IRB(Pos
);
1003 Value
*PrimitiveShadow
= collapseToPrimitiveShadow(Shadow
, IRB
);
1004 // Caches the converted primitive shadow value.
1005 CS
= PrimitiveShadow
;
1006 return PrimitiveShadow
;
1009 Type
*DataFlowSanitizer::getShadowTy(Type
*OrigTy
) {
1010 if (!shouldTrackFieldsAndIndices())
1011 return PrimitiveShadowTy
;
1013 if (!OrigTy
->isSized())
1014 return PrimitiveShadowTy
;
1015 if (isa
<IntegerType
>(OrigTy
))
1016 return PrimitiveShadowTy
;
1017 if (isa
<VectorType
>(OrigTy
))
1018 return PrimitiveShadowTy
;
1019 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(OrigTy
))
1020 return ArrayType::get(getShadowTy(AT
->getElementType()),
1021 AT
->getNumElements());
1022 if (StructType
*ST
= dyn_cast
<StructType
>(OrigTy
)) {
1023 SmallVector
<Type
*, 4> Elements
;
1024 for (unsigned I
= 0, N
= ST
->getNumElements(); I
< N
; ++I
)
1025 Elements
.push_back(getShadowTy(ST
->getElementType(I
)));
1026 return StructType::get(*Ctx
, Elements
);
1028 return PrimitiveShadowTy
;
1031 Type
*DataFlowSanitizer::getShadowTy(Value
*V
) {
1032 return getShadowTy(V
->getType());
1035 bool DataFlowSanitizer::initializeModule(Module
&M
) {
1036 Triple
TargetTriple(M
.getTargetTriple());
1037 const DataLayout
&DL
= M
.getDataLayout();
1039 if (TargetTriple
.getOS() != Triple::Linux
)
1040 report_fatal_error("unsupported operating system");
1041 if (TargetTriple
.getArch() != Triple::x86_64
)
1042 report_fatal_error("unsupported architecture");
1043 MapParams
= &Linux_X86_64_MemoryMapParams
;
1046 Ctx
= &M
.getContext();
1047 Int8Ptr
= Type::getInt8PtrTy(*Ctx
);
1048 OriginTy
= IntegerType::get(*Ctx
, OriginWidthBits
);
1049 OriginPtrTy
= PointerType::getUnqual(OriginTy
);
1050 PrimitiveShadowTy
= IntegerType::get(*Ctx
, ShadowWidthBits
);
1051 PrimitiveShadowPtrTy
= PointerType::getUnqual(PrimitiveShadowTy
);
1052 IntptrTy
= DL
.getIntPtrType(*Ctx
);
1053 ZeroPrimitiveShadow
= ConstantInt::getSigned(PrimitiveShadowTy
, 0);
1054 ZeroOrigin
= ConstantInt::getSigned(OriginTy
, 0);
1056 Type
*DFSanUnionLoadArgs
[2] = {PrimitiveShadowPtrTy
, IntptrTy
};
1057 DFSanUnionLoadFnTy
= FunctionType::get(PrimitiveShadowTy
, DFSanUnionLoadArgs
,
1058 /*isVarArg=*/false);
1059 Type
*DFSanLoadLabelAndOriginArgs
[2] = {Int8Ptr
, IntptrTy
};
1060 DFSanLoadLabelAndOriginFnTy
=
1061 FunctionType::get(IntegerType::get(*Ctx
, 64), DFSanLoadLabelAndOriginArgs
,
1062 /*isVarArg=*/false);
1063 DFSanUnimplementedFnTy
= FunctionType::get(
1064 Type::getVoidTy(*Ctx
), Type::getInt8PtrTy(*Ctx
), /*isVarArg=*/false);
1065 Type
*DFSanSetLabelArgs
[4] = {PrimitiveShadowTy
, OriginTy
,
1066 Type::getInt8PtrTy(*Ctx
), IntptrTy
};
1067 DFSanSetLabelFnTy
= FunctionType::get(Type::getVoidTy(*Ctx
),
1068 DFSanSetLabelArgs
, /*isVarArg=*/false);
1069 DFSanNonzeroLabelFnTy
=
1070 FunctionType::get(Type::getVoidTy(*Ctx
), None
, /*isVarArg=*/false);
1071 DFSanVarargWrapperFnTy
= FunctionType::get(
1072 Type::getVoidTy(*Ctx
), Type::getInt8PtrTy(*Ctx
), /*isVarArg=*/false);
1073 DFSanCmpCallbackFnTy
=
1074 FunctionType::get(Type::getVoidTy(*Ctx
), PrimitiveShadowTy
,
1075 /*isVarArg=*/false);
1076 DFSanChainOriginFnTy
=
1077 FunctionType::get(OriginTy
, OriginTy
, /*isVarArg=*/false);
1078 Type
*DFSanChainOriginIfTaintedArgs
[2] = {PrimitiveShadowTy
, OriginTy
};
1079 DFSanChainOriginIfTaintedFnTy
= FunctionType::get(
1080 OriginTy
, DFSanChainOriginIfTaintedArgs
, /*isVarArg=*/false);
1081 Type
*DFSanMaybeStoreOriginArgs
[4] = {IntegerType::get(*Ctx
, ShadowWidthBits
),
1082 Int8Ptr
, IntptrTy
, OriginTy
};
1083 DFSanMaybeStoreOriginFnTy
= FunctionType::get(
1084 Type::getVoidTy(*Ctx
), DFSanMaybeStoreOriginArgs
, /*isVarArg=*/false);
1085 Type
*DFSanMemOriginTransferArgs
[3] = {Int8Ptr
, Int8Ptr
, IntptrTy
};
1086 DFSanMemOriginTransferFnTy
= FunctionType::get(
1087 Type::getVoidTy(*Ctx
), DFSanMemOriginTransferArgs
, /*isVarArg=*/false);
1088 Type
*DFSanLoadStoreCallbackArgs
[2] = {PrimitiveShadowTy
, Int8Ptr
};
1089 DFSanLoadStoreCallbackFnTy
=
1090 FunctionType::get(Type::getVoidTy(*Ctx
), DFSanLoadStoreCallbackArgs
,
1091 /*isVarArg=*/false);
1092 Type
*DFSanMemTransferCallbackArgs
[2] = {PrimitiveShadowPtrTy
, IntptrTy
};
1093 DFSanMemTransferCallbackFnTy
=
1094 FunctionType::get(Type::getVoidTy(*Ctx
), DFSanMemTransferCallbackArgs
,
1095 /*isVarArg=*/false);
1097 ColdCallWeights
= MDBuilder(*Ctx
).createBranchWeights(1, 1000);
1098 OriginStoreWeights
= MDBuilder(*Ctx
).createBranchWeights(1, 1000);
1102 bool DataFlowSanitizer::isInstrumented(const Function
*F
) {
1103 return !ABIList
.isIn(*F
, "uninstrumented");
1106 bool DataFlowSanitizer::isInstrumented(const GlobalAlias
*GA
) {
1107 return !ABIList
.isIn(*GA
, "uninstrumented");
1110 DataFlowSanitizer::InstrumentedABI
DataFlowSanitizer::getInstrumentedABI() {
1111 return ClArgsABI
? IA_Args
: IA_TLS
;
1114 DataFlowSanitizer::WrapperKind
DataFlowSanitizer::getWrapperKind(Function
*F
) {
1115 if (ABIList
.isIn(*F
, "functional"))
1116 return WK_Functional
;
1117 if (ABIList
.isIn(*F
, "discard"))
1119 if (ABIList
.isIn(*F
, "custom"))
1125 void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue
*GV
) {
1126 std::string GVName
= std::string(GV
->getName()), Suffix
= ".dfsan";
1127 GV
->setName(GVName
+ Suffix
);
1129 // Try to change the name of the function in module inline asm. We only do
1130 // this for specific asm directives, currently only ".symver", to try to avoid
1131 // corrupting asm which happens to contain the symbol name as a substring.
1132 // Note that the substitution for .symver assumes that the versioned symbol
1133 // also has an instrumented name.
1134 std::string Asm
= GV
->getParent()->getModuleInlineAsm();
1135 std::string SearchStr
= ".symver " + GVName
+ ",";
1136 size_t Pos
= Asm
.find(SearchStr
);
1137 if (Pos
!= std::string::npos
) {
1138 Asm
.replace(Pos
, SearchStr
.size(), ".symver " + GVName
+ Suffix
+ ",");
1139 Pos
= Asm
.find("@");
1141 if (Pos
== std::string::npos
)
1142 report_fatal_error("unsupported .symver: " + Asm
);
1144 Asm
.replace(Pos
, 1, Suffix
+ "@");
1145 GV
->getParent()->setModuleInlineAsm(Asm
);
1150 DataFlowSanitizer::buildWrapperFunction(Function
*F
, StringRef NewFName
,
1151 GlobalValue::LinkageTypes NewFLink
,
1152 FunctionType
*NewFT
) {
1153 FunctionType
*FT
= F
->getFunctionType();
1154 Function
*NewF
= Function::Create(NewFT
, NewFLink
, F
->getAddressSpace(),
1155 NewFName
, F
->getParent());
1156 NewF
->copyAttributesFrom(F
);
1157 NewF
->removeRetAttrs(
1158 AttributeFuncs::typeIncompatible(NewFT
->getReturnType()));
1160 BasicBlock
*BB
= BasicBlock::Create(*Ctx
, "entry", NewF
);
1161 if (F
->isVarArg()) {
1162 NewF
->removeFnAttrs(AttrBuilder().addAttribute("split-stack"));
1163 CallInst::Create(DFSanVarargWrapperFn
,
1164 IRBuilder
<>(BB
).CreateGlobalStringPtr(F
->getName()), "",
1166 new UnreachableInst(*Ctx
, BB
);
1168 auto ArgIt
= pointer_iterator
<Argument
*>(NewF
->arg_begin());
1169 std::vector
<Value
*> Args(ArgIt
, ArgIt
+ FT
->getNumParams());
1171 CallInst
*CI
= CallInst::Create(F
, Args
, "", BB
);
1172 if (FT
->getReturnType()->isVoidTy())
1173 ReturnInst::Create(*Ctx
, BB
);
1175 ReturnInst::Create(*Ctx
, CI
, BB
);
1181 Constant
*DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType
*FT
,
1183 FunctionType
*FTT
= getTrampolineFunctionType(FT
);
1184 FunctionCallee C
= Mod
->getOrInsertFunction(FName
, FTT
);
1185 Function
*F
= dyn_cast
<Function
>(C
.getCallee());
1186 if (F
&& F
->isDeclaration()) {
1187 F
->setLinkage(GlobalValue::LinkOnceODRLinkage
);
1188 BasicBlock
*BB
= BasicBlock::Create(*Ctx
, "entry", F
);
1189 std::vector
<Value
*> Args
;
1190 Function::arg_iterator AI
= F
->arg_begin() + 1;
1191 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++AI
, --N
)
1192 Args
.push_back(&*AI
);
1193 CallInst
*CI
= CallInst::Create(FT
, &*F
->arg_begin(), Args
, "", BB
);
1194 Type
*RetType
= FT
->getReturnType();
1195 ReturnInst
*RI
= RetType
->isVoidTy() ? ReturnInst::Create(*Ctx
, BB
)
1196 : ReturnInst::Create(*Ctx
, CI
, BB
);
1198 // F is called by a wrapped custom function with primitive shadows. So
1199 // its arguments and return value need conversion.
1200 DFSanFunction
DFSF(*this, F
, /*IsNativeABI=*/true);
1201 Function::arg_iterator ValAI
= F
->arg_begin(), ShadowAI
= AI
;
1203 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++ValAI
, ++ShadowAI
, --N
) {
1205 DFSF
.expandFromPrimitiveShadow(ValAI
->getType(), &*ShadowAI
, CI
);
1206 DFSF
.ValShadowMap
[&*ValAI
] = Shadow
;
1208 Function::arg_iterator RetShadowAI
= ShadowAI
;
1209 const bool ShouldTrackOrigins
= shouldTrackOrigins();
1210 if (ShouldTrackOrigins
) {
1211 ValAI
= F
->arg_begin();
1213 Function::arg_iterator OriginAI
= ShadowAI
;
1214 if (!RetType
->isVoidTy())
1216 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++ValAI
, ++OriginAI
, --N
) {
1217 DFSF
.ValOriginMap
[&*ValAI
] = &*OriginAI
;
1220 DFSanVisitor(DFSF
).visitCallInst(*CI
);
1221 if (!RetType
->isVoidTy()) {
1222 Value
*PrimitiveShadow
= DFSF
.collapseToPrimitiveShadow(
1223 DFSF
.getShadow(RI
->getReturnValue()), RI
);
1224 new StoreInst(PrimitiveShadow
, &*RetShadowAI
, RI
);
1225 if (ShouldTrackOrigins
) {
1226 Value
*Origin
= DFSF
.getOrigin(RI
->getReturnValue());
1227 new StoreInst(Origin
, &*std::prev(F
->arg_end()), RI
);
1232 return cast
<Constant
>(C
.getCallee());
1235 // Initialize DataFlowSanitizer runtime functions and declare them in the module
1236 void DataFlowSanitizer::initializeRuntimeFunctions(Module
&M
) {
1239 AL
= AL
.addFnAttribute(M
.getContext(), Attribute::NoUnwind
);
1240 AL
= AL
.addFnAttribute(M
.getContext(), Attribute::ReadOnly
);
1241 AL
= AL
.addRetAttribute(M
.getContext(), Attribute::ZExt
);
1243 Mod
->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy
, AL
);
1247 AL
= AL
.addFnAttribute(M
.getContext(), Attribute::NoUnwind
);
1248 AL
= AL
.addFnAttribute(M
.getContext(), Attribute::ReadOnly
);
1249 AL
= AL
.addRetAttribute(M
.getContext(), Attribute::ZExt
);
1250 DFSanLoadLabelAndOriginFn
= Mod
->getOrInsertFunction(
1251 "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy
, AL
);
1253 DFSanUnimplementedFn
=
1254 Mod
->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy
);
1257 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1258 AL
= AL
.addParamAttribute(M
.getContext(), 1, Attribute::ZExt
);
1260 Mod
->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy
, AL
);
1262 DFSanNonzeroLabelFn
=
1263 Mod
->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy
);
1264 DFSanVarargWrapperFn
= Mod
->getOrInsertFunction("__dfsan_vararg_wrapper",
1265 DFSanVarargWrapperFnTy
);
1268 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1269 AL
= AL
.addRetAttribute(M
.getContext(), Attribute::ZExt
);
1270 DFSanChainOriginFn
= Mod
->getOrInsertFunction("__dfsan_chain_origin",
1271 DFSanChainOriginFnTy
, AL
);
1275 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1276 AL
= AL
.addParamAttribute(M
.getContext(), 1, Attribute::ZExt
);
1277 AL
= AL
.addRetAttribute(M
.getContext(), Attribute::ZExt
);
1278 DFSanChainOriginIfTaintedFn
= Mod
->getOrInsertFunction(
1279 "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy
, AL
);
1281 DFSanMemOriginTransferFn
= Mod
->getOrInsertFunction(
1282 "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy
);
1286 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1287 AL
= AL
.addParamAttribute(M
.getContext(), 3, Attribute::ZExt
);
1288 DFSanMaybeStoreOriginFn
= Mod
->getOrInsertFunction(
1289 "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy
, AL
);
1292 DFSanRuntimeFunctions
.insert(
1293 DFSanUnionLoadFn
.getCallee()->stripPointerCasts());
1294 DFSanRuntimeFunctions
.insert(
1295 DFSanLoadLabelAndOriginFn
.getCallee()->stripPointerCasts());
1296 DFSanRuntimeFunctions
.insert(
1297 DFSanUnimplementedFn
.getCallee()->stripPointerCasts());
1298 DFSanRuntimeFunctions
.insert(
1299 DFSanSetLabelFn
.getCallee()->stripPointerCasts());
1300 DFSanRuntimeFunctions
.insert(
1301 DFSanNonzeroLabelFn
.getCallee()->stripPointerCasts());
1302 DFSanRuntimeFunctions
.insert(
1303 DFSanVarargWrapperFn
.getCallee()->stripPointerCasts());
1304 DFSanRuntimeFunctions
.insert(
1305 DFSanLoadCallbackFn
.getCallee()->stripPointerCasts());
1306 DFSanRuntimeFunctions
.insert(
1307 DFSanStoreCallbackFn
.getCallee()->stripPointerCasts());
1308 DFSanRuntimeFunctions
.insert(
1309 DFSanMemTransferCallbackFn
.getCallee()->stripPointerCasts());
1310 DFSanRuntimeFunctions
.insert(
1311 DFSanCmpCallbackFn
.getCallee()->stripPointerCasts());
1312 DFSanRuntimeFunctions
.insert(
1313 DFSanChainOriginFn
.getCallee()->stripPointerCasts());
1314 DFSanRuntimeFunctions
.insert(
1315 DFSanChainOriginIfTaintedFn
.getCallee()->stripPointerCasts());
1316 DFSanRuntimeFunctions
.insert(
1317 DFSanMemOriginTransferFn
.getCallee()->stripPointerCasts());
1318 DFSanRuntimeFunctions
.insert(
1319 DFSanMaybeStoreOriginFn
.getCallee()->stripPointerCasts());
1322 // Initializes event callback functions and declare them in the module
1323 void DataFlowSanitizer::initializeCallbackFunctions(Module
&M
) {
1324 DFSanLoadCallbackFn
= Mod
->getOrInsertFunction("__dfsan_load_callback",
1325 DFSanLoadStoreCallbackFnTy
);
1326 DFSanStoreCallbackFn
= Mod
->getOrInsertFunction("__dfsan_store_callback",
1327 DFSanLoadStoreCallbackFnTy
);
1328 DFSanMemTransferCallbackFn
= Mod
->getOrInsertFunction(
1329 "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy
);
1330 DFSanCmpCallbackFn
=
1331 Mod
->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy
);
1334 void DataFlowSanitizer::injectMetadataGlobals(Module
&M
) {
1335 // These variables can be used:
1336 // - by the runtime (to discover what the shadow width was, during
1338 // - in testing (to avoid hardcoding the shadow width and type but instead
1339 // extract them by pattern matching)
1340 Type
*IntTy
= Type::getInt32Ty(*Ctx
);
1341 (void)Mod
->getOrInsertGlobal("__dfsan_shadow_width_bits", IntTy
, [&] {
1342 return new GlobalVariable(
1343 M
, IntTy
, /*isConstant=*/true, GlobalValue::WeakODRLinkage
,
1344 ConstantInt::get(IntTy
, ShadowWidthBits
), "__dfsan_shadow_width_bits");
1346 (void)Mod
->getOrInsertGlobal("__dfsan_shadow_width_bytes", IntTy
, [&] {
1347 return new GlobalVariable(M
, IntTy
, /*isConstant=*/true,
1348 GlobalValue::WeakODRLinkage
,
1349 ConstantInt::get(IntTy
, ShadowWidthBytes
),
1350 "__dfsan_shadow_width_bytes");
1354 bool DataFlowSanitizer::runImpl(Module
&M
) {
1355 initializeModule(M
);
1357 if (ABIList
.isIn(M
, "skip"))
1360 const unsigned InitialGlobalSize
= M
.global_size();
1361 const unsigned InitialModuleSize
= M
.size();
1363 bool Changed
= false;
1365 auto GetOrInsertGlobal
= [this, &Changed
](StringRef Name
,
1366 Type
*Ty
) -> Constant
* {
1367 Constant
*C
= Mod
->getOrInsertGlobal(Name
, Ty
);
1368 if (GlobalVariable
*G
= dyn_cast
<GlobalVariable
>(C
)) {
1369 Changed
|= G
->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel
;
1370 G
->setThreadLocalMode(GlobalVariable::InitialExecTLSModel
);
1375 // These globals must be kept in sync with the ones in dfsan.cpp.
1377 GetOrInsertGlobal("__dfsan_arg_tls",
1378 ArrayType::get(Type::getInt64Ty(*Ctx
), ArgTLSSize
/ 8));
1379 RetvalTLS
= GetOrInsertGlobal(
1380 "__dfsan_retval_tls",
1381 ArrayType::get(Type::getInt64Ty(*Ctx
), RetvalTLSSize
/ 8));
1382 ArgOriginTLSTy
= ArrayType::get(OriginTy
, NumOfElementsInArgOrgTLS
);
1383 ArgOriginTLS
= GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy
);
1384 RetvalOriginTLS
= GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy
);
1386 (void)Mod
->getOrInsertGlobal("__dfsan_track_origins", OriginTy
, [&] {
1388 return new GlobalVariable(
1389 M
, OriginTy
, true, GlobalValue::WeakODRLinkage
,
1390 ConstantInt::getSigned(OriginTy
,
1391 shouldTrackOrigins() ? ClTrackOrigins
: 0),
1392 "__dfsan_track_origins");
1395 injectMetadataGlobals(M
);
1397 initializeCallbackFunctions(M
);
1398 initializeRuntimeFunctions(M
);
1400 std::vector
<Function
*> FnsToInstrument
;
1401 SmallPtrSet
<Function
*, 2> FnsWithNativeABI
;
1402 for (Function
&F
: M
)
1403 if (!F
.isIntrinsic() && !DFSanRuntimeFunctions
.contains(&F
))
1404 FnsToInstrument
.push_back(&F
);
1406 // Give function aliases prefixes when necessary, and build wrappers where the
1407 // instrumentedness is inconsistent.
1408 for (Module::alias_iterator AI
= M
.alias_begin(), AE
= M
.alias_end();
1410 GlobalAlias
*GA
= &*AI
;
1412 // Don't stop on weak. We assume people aren't playing games with the
1413 // instrumentedness of overridden weak aliases.
1414 auto *F
= dyn_cast
<Function
>(GA
->getBaseObject());
1418 bool GAInst
= isInstrumented(GA
), FInst
= isInstrumented(F
);
1419 if (GAInst
&& FInst
) {
1420 addGlobalNameSuffix(GA
);
1421 } else if (GAInst
!= FInst
) {
1422 // Non-instrumented alias of an instrumented function, or vice versa.
1423 // Replace the alias with a native-ABI wrapper of the aliasee. The pass
1424 // below will take care of instrumenting it.
1426 buildWrapperFunction(F
, "", GA
->getLinkage(), F
->getFunctionType());
1427 GA
->replaceAllUsesWith(ConstantExpr::getBitCast(NewF
, GA
->getType()));
1429 GA
->eraseFromParent();
1430 FnsToInstrument
.push_back(NewF
);
1434 ReadOnlyNoneAttrs
.addAttribute(Attribute::ReadOnly
)
1435 .addAttribute(Attribute::ReadNone
);
1437 // First, change the ABI of every function in the module. ABI-listed
1438 // functions keep their original ABI and get a wrapper function.
1439 for (std::vector
<Function
*>::iterator FI
= FnsToInstrument
.begin(),
1440 FE
= FnsToInstrument
.end();
1443 FunctionType
*FT
= F
.getFunctionType();
1445 bool IsZeroArgsVoidRet
= (FT
->getNumParams() == 0 && !FT
->isVarArg() &&
1446 FT
->getReturnType()->isVoidTy());
1448 if (isInstrumented(&F
)) {
1449 // Instrumented functions get a '.dfsan' suffix. This allows us to more
1450 // easily identify cases of mismatching ABIs. This naming scheme is
1451 // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1452 if (getInstrumentedABI() == IA_Args
&& !IsZeroArgsVoidRet
) {
1453 FunctionType
*NewFT
= getArgsFunctionType(FT
);
1454 Function
*NewF
= Function::Create(NewFT
, F
.getLinkage(),
1455 F
.getAddressSpace(), "", &M
);
1456 NewF
->copyAttributesFrom(&F
);
1457 NewF
->removeRetAttrs(
1458 AttributeFuncs::typeIncompatible(NewFT
->getReturnType()));
1459 for (Function::arg_iterator FArg
= F
.arg_begin(),
1460 NewFArg
= NewF
->arg_begin(),
1461 FArgEnd
= F
.arg_end();
1462 FArg
!= FArgEnd
; ++FArg
, ++NewFArg
) {
1463 FArg
->replaceAllUsesWith(&*NewFArg
);
1465 NewF
->getBasicBlockList().splice(NewF
->begin(), F
.getBasicBlockList());
1467 for (Function::user_iterator UI
= F
.user_begin(), UE
= F
.user_end();
1469 BlockAddress
*BA
= dyn_cast
<BlockAddress
>(*UI
);
1472 BA
->replaceAllUsesWith(
1473 BlockAddress::get(NewF
, BA
->getBasicBlock()));
1477 F
.replaceAllUsesWith(
1478 ConstantExpr::getBitCast(NewF
, PointerType::getUnqual(FT
)));
1480 F
.eraseFromParent();
1482 addGlobalNameSuffix(NewF
);
1484 addGlobalNameSuffix(&F
);
1486 } else if (!IsZeroArgsVoidRet
|| getWrapperKind(&F
) == WK_Custom
) {
1487 // Build a wrapper function for F. The wrapper simply calls F, and is
1488 // added to FnsToInstrument so that any instrumentation according to its
1489 // WrapperKind is done in the second pass below.
1490 FunctionType
*NewFT
=
1491 getInstrumentedABI() == IA_Args
? getArgsFunctionType(FT
) : FT
;
1493 // If the function being wrapped has local linkage, then preserve the
1494 // function's linkage in the wrapper function.
1495 GlobalValue::LinkageTypes WrapperLinkage
=
1496 F
.hasLocalLinkage() ? F
.getLinkage()
1497 : GlobalValue::LinkOnceODRLinkage
;
1499 Function
*NewF
= buildWrapperFunction(
1501 (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1502 std::string(F
.getName()),
1503 WrapperLinkage
, NewFT
);
1504 if (getInstrumentedABI() == IA_TLS
)
1505 NewF
->removeFnAttrs(ReadOnlyNoneAttrs
);
1507 Value
*WrappedFnCst
=
1508 ConstantExpr::getBitCast(NewF
, PointerType::getUnqual(FT
));
1509 F
.replaceAllUsesWith(WrappedFnCst
);
1511 UnwrappedFnMap
[WrappedFnCst
] = &F
;
1514 if (!F
.isDeclaration()) {
1515 // This function is probably defining an interposition of an
1516 // uninstrumented function and hence needs to keep the original ABI.
1517 // But any functions it may call need to use the instrumented ABI, so
1518 // we instrument it in a mode which preserves the original ABI.
1519 FnsWithNativeABI
.insert(&F
);
1521 // This code needs to rebuild the iterators, as they may be invalidated
1522 // by the push_back, taking care that the new range does not include
1523 // any functions added by this code.
1524 size_t N
= FI
- FnsToInstrument
.begin(),
1525 Count
= FE
- FnsToInstrument
.begin();
1526 FnsToInstrument
.push_back(&F
);
1527 FI
= FnsToInstrument
.begin() + N
;
1528 FE
= FnsToInstrument
.begin() + Count
;
1530 // Hopefully, nobody will try to indirectly call a vararg
1532 } else if (FT
->isVarArg()) {
1533 UnwrappedFnMap
[&F
] = &F
;
1538 for (Function
*F
: FnsToInstrument
) {
1539 if (!F
|| F
->isDeclaration())
1542 removeUnreachableBlocks(*F
);
1544 DFSanFunction
DFSF(*this, F
, FnsWithNativeABI
.count(F
));
1546 // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1547 // Build a copy of the list before iterating over it.
1548 SmallVector
<BasicBlock
*, 4> BBList(depth_first(&F
->getEntryBlock()));
1550 for (BasicBlock
*BB
: BBList
) {
1551 Instruction
*Inst
= &BB
->front();
1553 // DFSanVisitor may split the current basic block, changing the current
1554 // instruction's next pointer and moving the next instruction to the
1555 // tail block from which we should continue.
1556 Instruction
*Next
= Inst
->getNextNode();
1557 // DFSanVisitor may delete Inst, so keep track of whether it was a
1559 bool IsTerminator
= Inst
->isTerminator();
1560 if (!DFSF
.SkipInsts
.count(Inst
))
1561 DFSanVisitor(DFSF
).visit(Inst
);
1568 // We will not necessarily be able to compute the shadow for every phi node
1569 // until we have visited every block. Therefore, the code that handles phi
1570 // nodes adds them to the PHIFixups list so that they can be properly
1572 for (DFSanFunction::PHIFixupElement
&P
: DFSF
.PHIFixups
) {
1573 for (unsigned Val
= 0, N
= P
.Phi
->getNumIncomingValues(); Val
!= N
;
1575 P
.ShadowPhi
->setIncomingValue(
1576 Val
, DFSF
.getShadow(P
.Phi
->getIncomingValue(Val
)));
1578 P
.OriginPhi
->setIncomingValue(
1579 Val
, DFSF
.getOrigin(P
.Phi
->getIncomingValue(Val
)));
1583 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1584 // places (i.e. instructions in basic blocks we haven't even begun visiting
1585 // yet). To make our life easier, do this work in a pass after the main
1587 if (ClDebugNonzeroLabels
) {
1588 for (Value
*V
: DFSF
.NonZeroChecks
) {
1590 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1591 Pos
= I
->getNextNode();
1593 Pos
= &DFSF
.F
->getEntryBlock().front();
1594 while (isa
<PHINode
>(Pos
) || isa
<AllocaInst
>(Pos
))
1595 Pos
= Pos
->getNextNode();
1596 IRBuilder
<> IRB(Pos
);
1597 Value
*PrimitiveShadow
= DFSF
.collapseToPrimitiveShadow(V
, Pos
);
1599 IRB
.CreateICmpNE(PrimitiveShadow
, DFSF
.DFS
.ZeroPrimitiveShadow
);
1600 BranchInst
*BI
= cast
<BranchInst
>(SplitBlockAndInsertIfThen(
1601 Ne
, Pos
, /*Unreachable=*/false, ColdCallWeights
));
1602 IRBuilder
<> ThenIRB(BI
);
1603 ThenIRB
.CreateCall(DFSF
.DFS
.DFSanNonzeroLabelFn
, {});
1608 return Changed
|| !FnsToInstrument
.empty() ||
1609 M
.global_size() != InitialGlobalSize
|| M
.size() != InitialModuleSize
;
1612 Value
*DFSanFunction::getArgTLS(Type
*T
, unsigned ArgOffset
, IRBuilder
<> &IRB
) {
1613 Value
*Base
= IRB
.CreatePointerCast(DFS
.ArgTLS
, DFS
.IntptrTy
);
1615 Base
= IRB
.CreateAdd(Base
, ConstantInt::get(DFS
.IntptrTy
, ArgOffset
));
1616 return IRB
.CreateIntToPtr(Base
, PointerType::get(DFS
.getShadowTy(T
), 0),
1620 Value
*DFSanFunction::getRetvalTLS(Type
*T
, IRBuilder
<> &IRB
) {
1621 return IRB
.CreatePointerCast(
1622 DFS
.RetvalTLS
, PointerType::get(DFS
.getShadowTy(T
), 0), "_dfsret");
1625 Value
*DFSanFunction::getRetvalOriginTLS() { return DFS
.RetvalOriginTLS
; }
1627 Value
*DFSanFunction::getArgOriginTLS(unsigned ArgNo
, IRBuilder
<> &IRB
) {
1628 return IRB
.CreateConstGEP2_64(DFS
.ArgOriginTLSTy
, DFS
.ArgOriginTLS
, 0, ArgNo
,
1632 Value
*DFSanFunction::getOrigin(Value
*V
) {
1633 assert(DFS
.shouldTrackOrigins());
1634 if (!isa
<Argument
>(V
) && !isa
<Instruction
>(V
))
1635 return DFS
.ZeroOrigin
;
1636 Value
*&Origin
= ValOriginMap
[V
];
1638 if (Argument
*A
= dyn_cast
<Argument
>(V
)) {
1640 return DFS
.ZeroOrigin
;
1642 case DataFlowSanitizer::IA_TLS
: {
1643 if (A
->getArgNo() < DFS
.NumOfElementsInArgOrgTLS
) {
1644 Instruction
*ArgOriginTLSPos
= &*F
->getEntryBlock().begin();
1645 IRBuilder
<> IRB(ArgOriginTLSPos
);
1646 Value
*ArgOriginPtr
= getArgOriginTLS(A
->getArgNo(), IRB
);
1647 Origin
= IRB
.CreateLoad(DFS
.OriginTy
, ArgOriginPtr
);
1650 Origin
= DFS
.ZeroOrigin
;
1654 case DataFlowSanitizer::IA_Args
: {
1655 Origin
= DFS
.ZeroOrigin
;
1660 Origin
= DFS
.ZeroOrigin
;
1666 void DFSanFunction::setOrigin(Instruction
*I
, Value
*Origin
) {
1667 if (!DFS
.shouldTrackOrigins())
1669 assert(!ValOriginMap
.count(I
));
1670 assert(Origin
->getType() == DFS
.OriginTy
);
1671 ValOriginMap
[I
] = Origin
;
1674 Value
*DFSanFunction::getShadowForTLSArgument(Argument
*A
) {
1675 unsigned ArgOffset
= 0;
1676 const DataLayout
&DL
= F
->getParent()->getDataLayout();
1677 for (auto &FArg
: F
->args()) {
1678 if (!FArg
.getType()->isSized()) {
1684 unsigned Size
= DL
.getTypeAllocSize(DFS
.getShadowTy(&FArg
));
1686 ArgOffset
+= alignTo(Size
, ShadowTLSAlignment
);
1687 if (ArgOffset
> ArgTLSSize
)
1688 break; // ArgTLS overflows, uses a zero shadow.
1692 if (ArgOffset
+ Size
> ArgTLSSize
)
1693 break; // ArgTLS overflows, uses a zero shadow.
1695 Instruction
*ArgTLSPos
= &*F
->getEntryBlock().begin();
1696 IRBuilder
<> IRB(ArgTLSPos
);
1697 Value
*ArgShadowPtr
= getArgTLS(FArg
.getType(), ArgOffset
, IRB
);
1698 return IRB
.CreateAlignedLoad(DFS
.getShadowTy(&FArg
), ArgShadowPtr
,
1699 ShadowTLSAlignment
);
1702 return DFS
.getZeroShadow(A
);
1705 Value
*DFSanFunction::getShadow(Value
*V
) {
1706 if (!isa
<Argument
>(V
) && !isa
<Instruction
>(V
))
1707 return DFS
.getZeroShadow(V
);
1708 Value
*&Shadow
= ValShadowMap
[V
];
1710 if (Argument
*A
= dyn_cast
<Argument
>(V
)) {
1712 return DFS
.getZeroShadow(V
);
1714 case DataFlowSanitizer::IA_TLS
: {
1715 Shadow
= getShadowForTLSArgument(A
);
1718 case DataFlowSanitizer::IA_Args
: {
1719 unsigned ArgIdx
= A
->getArgNo() + F
->arg_size() / 2;
1720 Function::arg_iterator Arg
= F
->arg_begin();
1721 std::advance(Arg
, ArgIdx
);
1723 assert(Shadow
->getType() == DFS
.PrimitiveShadowTy
);
1727 NonZeroChecks
.push_back(Shadow
);
1729 Shadow
= DFS
.getZeroShadow(V
);
1735 void DFSanFunction::setShadow(Instruction
*I
, Value
*Shadow
) {
1736 assert(!ValShadowMap
.count(I
));
1737 assert(DFS
.shouldTrackFieldsAndIndices() ||
1738 Shadow
->getType() == DFS
.PrimitiveShadowTy
);
1739 ValShadowMap
[I
] = Shadow
;
1742 /// Compute the integer shadow offset that corresponds to a given
1743 /// application address.
1745 /// Offset = (Addr & ~AndMask) ^ XorMask
1746 Value
*DataFlowSanitizer::getShadowOffset(Value
*Addr
, IRBuilder
<> &IRB
) {
1747 assert(Addr
!= RetvalTLS
&& "Reinstrumenting?");
1748 Value
*OffsetLong
= IRB
.CreatePointerCast(Addr
, IntptrTy
);
1750 uint64_t AndMask
= MapParams
->AndMask
;
1753 IRB
.CreateAnd(OffsetLong
, ConstantInt::get(IntptrTy
, ~AndMask
));
1755 uint64_t XorMask
= MapParams
->XorMask
;
1757 OffsetLong
= IRB
.CreateXor(OffsetLong
, ConstantInt::get(IntptrTy
, XorMask
));
1761 std::pair
<Value
*, Value
*>
1762 DataFlowSanitizer::getShadowOriginAddress(Value
*Addr
, Align InstAlignment
,
1764 // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1765 IRBuilder
<> IRB(Pos
);
1766 Value
*ShadowOffset
= getShadowOffset(Addr
, IRB
);
1767 Value
*ShadowLong
= ShadowOffset
;
1768 uint64_t ShadowBase
= MapParams
->ShadowBase
;
1769 if (ShadowBase
!= 0) {
1771 IRB
.CreateAdd(ShadowLong
, ConstantInt::get(IntptrTy
, ShadowBase
));
1773 IntegerType
*ShadowTy
= IntegerType::get(*Ctx
, ShadowWidthBits
);
1775 IRB
.CreateIntToPtr(ShadowLong
, PointerType::get(ShadowTy
, 0));
1776 Value
*OriginPtr
= nullptr;
1777 if (shouldTrackOrigins()) {
1778 Value
*OriginLong
= ShadowOffset
;
1779 uint64_t OriginBase
= MapParams
->OriginBase
;
1780 if (OriginBase
!= 0)
1782 IRB
.CreateAdd(OriginLong
, ConstantInt::get(IntptrTy
, OriginBase
));
1783 const Align Alignment
= llvm::assumeAligned(InstAlignment
.value());
1784 // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1785 // So Mask is unnecessary.
1786 if (Alignment
< MinOriginAlignment
) {
1787 uint64_t Mask
= MinOriginAlignment
.value() - 1;
1788 OriginLong
= IRB
.CreateAnd(OriginLong
, ConstantInt::get(IntptrTy
, ~Mask
));
1790 OriginPtr
= IRB
.CreateIntToPtr(OriginLong
, OriginPtrTy
);
1792 return std::make_pair(ShadowPtr
, OriginPtr
);
1795 Value
*DataFlowSanitizer::getShadowAddress(Value
*Addr
, Instruction
*Pos
,
1796 Value
*ShadowOffset
) {
1797 IRBuilder
<> IRB(Pos
);
1798 return IRB
.CreateIntToPtr(ShadowOffset
, PrimitiveShadowPtrTy
);
1801 Value
*DataFlowSanitizer::getShadowAddress(Value
*Addr
, Instruction
*Pos
) {
1802 IRBuilder
<> IRB(Pos
);
1803 Value
*ShadowOffset
= getShadowOffset(Addr
, IRB
);
1804 return getShadowAddress(Addr
, Pos
, ShadowOffset
);
1807 Value
*DFSanFunction::combineShadowsThenConvert(Type
*T
, Value
*V1
, Value
*V2
,
1809 Value
*PrimitiveValue
= combineShadows(V1
, V2
, Pos
);
1810 return expandFromPrimitiveShadow(T
, PrimitiveValue
, Pos
);
1813 // Generates IR to compute the union of the two given shadows, inserting it
1814 // before Pos. The combined value is with primitive type.
1815 Value
*DFSanFunction::combineShadows(Value
*V1
, Value
*V2
, Instruction
*Pos
) {
1816 if (DFS
.isZeroShadow(V1
))
1817 return collapseToPrimitiveShadow(V2
, Pos
);
1818 if (DFS
.isZeroShadow(V2
))
1819 return collapseToPrimitiveShadow(V1
, Pos
);
1821 return collapseToPrimitiveShadow(V1
, Pos
);
1823 auto V1Elems
= ShadowElements
.find(V1
);
1824 auto V2Elems
= ShadowElements
.find(V2
);
1825 if (V1Elems
!= ShadowElements
.end() && V2Elems
!= ShadowElements
.end()) {
1826 if (std::includes(V1Elems
->second
.begin(), V1Elems
->second
.end(),
1827 V2Elems
->second
.begin(), V2Elems
->second
.end())) {
1828 return collapseToPrimitiveShadow(V1
, Pos
);
1830 if (std::includes(V2Elems
->second
.begin(), V2Elems
->second
.end(),
1831 V1Elems
->second
.begin(), V1Elems
->second
.end())) {
1832 return collapseToPrimitiveShadow(V2
, Pos
);
1834 } else if (V1Elems
!= ShadowElements
.end()) {
1835 if (V1Elems
->second
.count(V2
))
1836 return collapseToPrimitiveShadow(V1
, Pos
);
1837 } else if (V2Elems
!= ShadowElements
.end()) {
1838 if (V2Elems
->second
.count(V1
))
1839 return collapseToPrimitiveShadow(V2
, Pos
);
1842 auto Key
= std::make_pair(V1
, V2
);
1844 std::swap(Key
.first
, Key
.second
);
1845 CachedShadow
&CCS
= CachedShadows
[Key
];
1846 if (CCS
.Block
&& DT
.dominates(CCS
.Block
, Pos
->getParent()))
1849 // Converts inputs shadows to shadows with primitive types.
1850 Value
*PV1
= collapseToPrimitiveShadow(V1
, Pos
);
1851 Value
*PV2
= collapseToPrimitiveShadow(V2
, Pos
);
1853 IRBuilder
<> IRB(Pos
);
1854 CCS
.Block
= Pos
->getParent();
1855 CCS
.Shadow
= IRB
.CreateOr(PV1
, PV2
);
1857 std::set
<Value
*> UnionElems
;
1858 if (V1Elems
!= ShadowElements
.end()) {
1859 UnionElems
= V1Elems
->second
;
1861 UnionElems
.insert(V1
);
1863 if (V2Elems
!= ShadowElements
.end()) {
1864 UnionElems
.insert(V2Elems
->second
.begin(), V2Elems
->second
.end());
1866 UnionElems
.insert(V2
);
1868 ShadowElements
[CCS
.Shadow
] = std::move(UnionElems
);
1873 // A convenience function which folds the shadows of each of the operands
1874 // of the provided instruction Inst, inserting the IR before Inst. Returns
1875 // the computed union Value.
1876 Value
*DFSanFunction::combineOperandShadows(Instruction
*Inst
) {
1877 if (Inst
->getNumOperands() == 0)
1878 return DFS
.getZeroShadow(Inst
);
1880 Value
*Shadow
= getShadow(Inst
->getOperand(0));
1881 for (unsigned I
= 1, N
= Inst
->getNumOperands(); I
< N
; ++I
)
1882 Shadow
= combineShadows(Shadow
, getShadow(Inst
->getOperand(I
)), Inst
);
1884 return expandFromPrimitiveShadow(Inst
->getType(), Shadow
, Inst
);
1887 void DFSanVisitor::visitInstOperands(Instruction
&I
) {
1888 Value
*CombinedShadow
= DFSF
.combineOperandShadows(&I
);
1889 DFSF
.setShadow(&I
, CombinedShadow
);
1890 visitInstOperandOrigins(I
);
1893 Value
*DFSanFunction::combineOrigins(const std::vector
<Value
*> &Shadows
,
1894 const std::vector
<Value
*> &Origins
,
1895 Instruction
*Pos
, ConstantInt
*Zero
) {
1896 assert(Shadows
.size() == Origins
.size());
1897 size_t Size
= Origins
.size();
1899 return DFS
.ZeroOrigin
;
1900 Value
*Origin
= nullptr;
1902 Zero
= DFS
.ZeroPrimitiveShadow
;
1903 for (size_t I
= 0; I
!= Size
; ++I
) {
1904 Value
*OpOrigin
= Origins
[I
];
1905 Constant
*ConstOpOrigin
= dyn_cast
<Constant
>(OpOrigin
);
1906 if (ConstOpOrigin
&& ConstOpOrigin
->isNullValue())
1912 Value
*OpShadow
= Shadows
[I
];
1913 Value
*PrimitiveShadow
= collapseToPrimitiveShadow(OpShadow
, Pos
);
1914 IRBuilder
<> IRB(Pos
);
1915 Value
*Cond
= IRB
.CreateICmpNE(PrimitiveShadow
, Zero
);
1916 Origin
= IRB
.CreateSelect(Cond
, OpOrigin
, Origin
);
1918 return Origin
? Origin
: DFS
.ZeroOrigin
;
1921 Value
*DFSanFunction::combineOperandOrigins(Instruction
*Inst
) {
1922 size_t Size
= Inst
->getNumOperands();
1923 std::vector
<Value
*> Shadows(Size
);
1924 std::vector
<Value
*> Origins(Size
);
1925 for (unsigned I
= 0; I
!= Size
; ++I
) {
1926 Shadows
[I
] = getShadow(Inst
->getOperand(I
));
1927 Origins
[I
] = getOrigin(Inst
->getOperand(I
));
1929 return combineOrigins(Shadows
, Origins
, Inst
);
1932 void DFSanVisitor::visitInstOperandOrigins(Instruction
&I
) {
1933 if (!DFSF
.DFS
.shouldTrackOrigins())
1935 Value
*CombinedOrigin
= DFSF
.combineOperandOrigins(&I
);
1936 DFSF
.setOrigin(&I
, CombinedOrigin
);
1939 Align
DFSanFunction::getShadowAlign(Align InstAlignment
) {
1940 const Align Alignment
= ClPreserveAlignment
? InstAlignment
: Align(1);
1941 return Align(Alignment
.value() * DFS
.ShadowWidthBytes
);
1944 Align
DFSanFunction::getOriginAlign(Align InstAlignment
) {
1945 const Align Alignment
= llvm::assumeAligned(InstAlignment
.value());
1946 return Align(std::max(MinOriginAlignment
, Alignment
));
1949 bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size
,
1950 Align InstAlignment
) {
1951 // When enabling tracking load instructions, we always use
1952 // __dfsan_load_label_and_origin to reduce code size.
1953 if (ClTrackOrigins
== 2)
1957 // * if Size == 1, it is sufficient to load its origin aligned at 4.
1958 // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
1959 // load its origin aligned at 4. If not, although origins may be lost, it
1960 // should not happen very often.
1961 // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
1962 // Size % 4 == 0, it is more efficient to load origins without callbacks.
1963 // * Otherwise we use __dfsan_load_label_and_origin.
1964 // This should ensure that common cases run efficiently.
1968 const Align Alignment
= llvm::assumeAligned(InstAlignment
.value());
1969 return Alignment
< MinOriginAlignment
|| !DFS
.hasLoadSizeForFastPath(Size
);
1972 Value
*DataFlowSanitizer::loadNextOrigin(Instruction
*Pos
, Align OriginAlign
,
1973 Value
**OriginAddr
) {
1974 IRBuilder
<> IRB(Pos
);
1976 IRB
.CreateGEP(OriginTy
, *OriginAddr
, ConstantInt::get(IntptrTy
, 1));
1977 return IRB
.CreateAlignedLoad(OriginTy
, *OriginAddr
, OriginAlign
);
1980 std::pair
<Value
*, Value
*> DFSanFunction::loadShadowFast(
1981 Value
*ShadowAddr
, Value
*OriginAddr
, uint64_t Size
, Align ShadowAlign
,
1982 Align OriginAlign
, Value
*FirstOrigin
, Instruction
*Pos
) {
1983 const bool ShouldTrackOrigins
= DFS
.shouldTrackOrigins();
1984 const uint64_t ShadowSize
= Size
* DFS
.ShadowWidthBytes
;
1986 assert(Size
>= 4 && "Not large enough load size for fast path!");
1988 // Used for origin tracking.
1989 std::vector
<Value
*> Shadows
;
1990 std::vector
<Value
*> Origins
;
1992 // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
1993 // but this function is only used in a subset of cases that make it possible
1994 // to optimize the instrumentation.
1996 // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
1997 // per byte) is either:
1998 // - a multiple of 8 (common)
1999 // - equal to 4 (only for load32)
2001 // For the second case, we can fit the wide shadow in a 32-bit integer. In all
2002 // other cases, we use a 64-bit integer to hold the wide shadow.
2003 Type
*WideShadowTy
=
2004 ShadowSize
== 4 ? Type::getInt32Ty(*DFS
.Ctx
) : Type::getInt64Ty(*DFS
.Ctx
);
2006 IRBuilder
<> IRB(Pos
);
2007 Value
*WideAddr
= IRB
.CreateBitCast(ShadowAddr
, WideShadowTy
->getPointerTo());
2008 Value
*CombinedWideShadow
=
2009 IRB
.CreateAlignedLoad(WideShadowTy
, WideAddr
, ShadowAlign
);
2011 unsigned WideShadowBitWidth
= WideShadowTy
->getIntegerBitWidth();
2012 const uint64_t BytesPerWideShadow
= WideShadowBitWidth
/ DFS
.ShadowWidthBits
;
2014 auto AppendWideShadowAndOrigin
= [&](Value
*WideShadow
, Value
*Origin
) {
2015 if (BytesPerWideShadow
> 4) {
2016 assert(BytesPerWideShadow
== 8);
2017 // The wide shadow relates to two origin pointers: one for the first four
2018 // application bytes, and one for the latest four. We use a left shift to
2019 // get just the shadow bytes that correspond to the first origin pointer,
2020 // and then the entire shadow for the second origin pointer (which will be
2021 // chosen by combineOrigins() iff the least-significant half of the wide
2022 // shadow was empty but the other half was not).
2023 Value
*WideShadowLo
= IRB
.CreateShl(
2024 WideShadow
, ConstantInt::get(WideShadowTy
, WideShadowBitWidth
/ 2));
2025 Shadows
.push_back(WideShadow
);
2026 Origins
.push_back(DFS
.loadNextOrigin(Pos
, OriginAlign
, &OriginAddr
));
2028 Shadows
.push_back(WideShadowLo
);
2029 Origins
.push_back(Origin
);
2031 Shadows
.push_back(WideShadow
);
2032 Origins
.push_back(Origin
);
2036 if (ShouldTrackOrigins
)
2037 AppendWideShadowAndOrigin(CombinedWideShadow
, FirstOrigin
);
2039 // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
2040 // then OR individual shadows within the combined WideShadow by binary ORing.
2041 // This is fewer instructions than ORing shadows individually, since it
2042 // needs logN shift/or instructions (N being the bytes of the combined wide
2044 for (uint64_t ByteOfs
= BytesPerWideShadow
; ByteOfs
< Size
;
2045 ByteOfs
+= BytesPerWideShadow
) {
2046 WideAddr
= IRB
.CreateGEP(WideShadowTy
, WideAddr
,
2047 ConstantInt::get(DFS
.IntptrTy
, 1));
2048 Value
*NextWideShadow
=
2049 IRB
.CreateAlignedLoad(WideShadowTy
, WideAddr
, ShadowAlign
);
2050 CombinedWideShadow
= IRB
.CreateOr(CombinedWideShadow
, NextWideShadow
);
2051 if (ShouldTrackOrigins
) {
2052 Value
*NextOrigin
= DFS
.loadNextOrigin(Pos
, OriginAlign
, &OriginAddr
);
2053 AppendWideShadowAndOrigin(NextWideShadow
, NextOrigin
);
2056 for (unsigned Width
= WideShadowBitWidth
/ 2; Width
>= DFS
.ShadowWidthBits
;
2058 Value
*ShrShadow
= IRB
.CreateLShr(CombinedWideShadow
, Width
);
2059 CombinedWideShadow
= IRB
.CreateOr(CombinedWideShadow
, ShrShadow
);
2061 return {IRB
.CreateTrunc(CombinedWideShadow
, DFS
.PrimitiveShadowTy
),
2063 ? combineOrigins(Shadows
, Origins
, Pos
,
2064 ConstantInt::getSigned(IRB
.getInt64Ty(), 0))
2068 std::pair
<Value
*, Value
*> DFSanFunction::loadShadowOriginSansLoadTracking(
2069 Value
*Addr
, uint64_t Size
, Align InstAlignment
, Instruction
*Pos
) {
2070 const bool ShouldTrackOrigins
= DFS
.shouldTrackOrigins();
2072 // Non-escaped loads.
2073 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Addr
)) {
2074 const auto SI
= AllocaShadowMap
.find(AI
);
2075 if (SI
!= AllocaShadowMap
.end()) {
2076 IRBuilder
<> IRB(Pos
);
2077 Value
*ShadowLI
= IRB
.CreateLoad(DFS
.PrimitiveShadowTy
, SI
->second
);
2078 const auto OI
= AllocaOriginMap
.find(AI
);
2079 assert(!ShouldTrackOrigins
|| OI
!= AllocaOriginMap
.end());
2080 return {ShadowLI
, ShouldTrackOrigins
2081 ? IRB
.CreateLoad(DFS
.OriginTy
, OI
->second
)
2086 // Load from constant addresses.
2087 SmallVector
<const Value
*, 2> Objs
;
2088 getUnderlyingObjects(Addr
, Objs
);
2089 bool AllConstants
= true;
2090 for (const Value
*Obj
: Objs
) {
2091 if (isa
<Function
>(Obj
) || isa
<BlockAddress
>(Obj
))
2093 if (isa
<GlobalVariable
>(Obj
) && cast
<GlobalVariable
>(Obj
)->isConstant())
2096 AllConstants
= false;
2100 return {DFS
.ZeroPrimitiveShadow
,
2101 ShouldTrackOrigins
? DFS
.ZeroOrigin
: nullptr};
2104 return {DFS
.ZeroPrimitiveShadow
,
2105 ShouldTrackOrigins
? DFS
.ZeroOrigin
: nullptr};
2107 // Use callback to load if this is not an optimizable case for origin
2109 if (ShouldTrackOrigins
&&
2110 useCallbackLoadLabelAndOrigin(Size
, InstAlignment
)) {
2111 IRBuilder
<> IRB(Pos
);
2113 IRB
.CreateCall(DFS
.DFSanLoadLabelAndOriginFn
,
2114 {IRB
.CreatePointerCast(Addr
, IRB
.getInt8PtrTy()),
2115 ConstantInt::get(DFS
.IntptrTy
, Size
)});
2116 Call
->addRetAttr(Attribute::ZExt
);
2117 return {IRB
.CreateTrunc(IRB
.CreateLShr(Call
, DFS
.OriginWidthBits
),
2118 DFS
.PrimitiveShadowTy
),
2119 IRB
.CreateTrunc(Call
, DFS
.OriginTy
)};
2122 // Other cases that support loading shadows or origins in a fast way.
2123 Value
*ShadowAddr
, *OriginAddr
;
2124 std::tie(ShadowAddr
, OriginAddr
) =
2125 DFS
.getShadowOriginAddress(Addr
, InstAlignment
, Pos
);
2127 const Align ShadowAlign
= getShadowAlign(InstAlignment
);
2128 const Align OriginAlign
= getOriginAlign(InstAlignment
);
2129 Value
*Origin
= nullptr;
2130 if (ShouldTrackOrigins
) {
2131 IRBuilder
<> IRB(Pos
);
2132 Origin
= IRB
.CreateAlignedLoad(DFS
.OriginTy
, OriginAddr
, OriginAlign
);
2135 // When the byte size is small enough, we can load the shadow directly with
2136 // just a few instructions.
2139 LoadInst
*LI
= new LoadInst(DFS
.PrimitiveShadowTy
, ShadowAddr
, "", Pos
);
2140 LI
->setAlignment(ShadowAlign
);
2141 return {LI
, Origin
};
2144 IRBuilder
<> IRB(Pos
);
2145 Value
*ShadowAddr1
= IRB
.CreateGEP(DFS
.PrimitiveShadowTy
, ShadowAddr
,
2146 ConstantInt::get(DFS
.IntptrTy
, 1));
2148 IRB
.CreateAlignedLoad(DFS
.PrimitiveShadowTy
, ShadowAddr
, ShadowAlign
);
2150 IRB
.CreateAlignedLoad(DFS
.PrimitiveShadowTy
, ShadowAddr1
, ShadowAlign
);
2151 return {combineShadows(Load
, Load1
, Pos
), Origin
};
2154 bool HasSizeForFastPath
= DFS
.hasLoadSizeForFastPath(Size
);
2156 if (HasSizeForFastPath
)
2157 return loadShadowFast(ShadowAddr
, OriginAddr
, Size
, ShadowAlign
,
2158 OriginAlign
, Origin
, Pos
);
2160 IRBuilder
<> IRB(Pos
);
2161 CallInst
*FallbackCall
= IRB
.CreateCall(
2162 DFS
.DFSanUnionLoadFn
, {ShadowAddr
, ConstantInt::get(DFS
.IntptrTy
, Size
)});
2163 FallbackCall
->addRetAttr(Attribute::ZExt
);
2164 return {FallbackCall
, Origin
};
2167 std::pair
<Value
*, Value
*> DFSanFunction::loadShadowOrigin(Value
*Addr
,
2169 Align InstAlignment
,
2171 Value
*PrimitiveShadow
, *Origin
;
2172 std::tie(PrimitiveShadow
, Origin
) =
2173 loadShadowOriginSansLoadTracking(Addr
, Size
, InstAlignment
, Pos
);
2174 if (DFS
.shouldTrackOrigins()) {
2175 if (ClTrackOrigins
== 2) {
2176 IRBuilder
<> IRB(Pos
);
2177 auto *ConstantShadow
= dyn_cast
<Constant
>(PrimitiveShadow
);
2178 if (!ConstantShadow
|| !ConstantShadow
->isZeroValue())
2179 Origin
= updateOriginIfTainted(PrimitiveShadow
, Origin
, IRB
);
2182 return {PrimitiveShadow
, Origin
};
2185 static AtomicOrdering
addAcquireOrdering(AtomicOrdering AO
) {
2187 case AtomicOrdering::NotAtomic
:
2188 return AtomicOrdering::NotAtomic
;
2189 case AtomicOrdering::Unordered
:
2190 case AtomicOrdering::Monotonic
:
2191 case AtomicOrdering::Acquire
:
2192 return AtomicOrdering::Acquire
;
2193 case AtomicOrdering::Release
:
2194 case AtomicOrdering::AcquireRelease
:
2195 return AtomicOrdering::AcquireRelease
;
2196 case AtomicOrdering::SequentiallyConsistent
:
2197 return AtomicOrdering::SequentiallyConsistent
;
2199 llvm_unreachable("Unknown ordering");
2202 void DFSanVisitor::visitLoadInst(LoadInst
&LI
) {
2203 auto &DL
= LI
.getModule()->getDataLayout();
2204 uint64_t Size
= DL
.getTypeStoreSize(LI
.getType());
2206 DFSF
.setShadow(&LI
, DFSF
.DFS
.getZeroShadow(&LI
));
2207 DFSF
.setOrigin(&LI
, DFSF
.DFS
.ZeroOrigin
);
2211 // When an application load is atomic, increase atomic ordering between
2212 // atomic application loads and stores to ensure happen-before order; load
2213 // shadow data after application data; store zero shadow data before
2214 // application data. This ensure shadow loads return either labels of the
2215 // initial application data or zeros.
2217 LI
.setOrdering(addAcquireOrdering(LI
.getOrdering()));
2219 Instruction
*Pos
= LI
.isAtomic() ? LI
.getNextNode() : &LI
;
2220 std::vector
<Value
*> Shadows
;
2221 std::vector
<Value
*> Origins
;
2222 Value
*PrimitiveShadow
, *Origin
;
2223 std::tie(PrimitiveShadow
, Origin
) =
2224 DFSF
.loadShadowOrigin(LI
.getPointerOperand(), Size
, LI
.getAlign(), Pos
);
2225 const bool ShouldTrackOrigins
= DFSF
.DFS
.shouldTrackOrigins();
2226 if (ShouldTrackOrigins
) {
2227 Shadows
.push_back(PrimitiveShadow
);
2228 Origins
.push_back(Origin
);
2230 if (ClCombinePointerLabelsOnLoad
) {
2231 Value
*PtrShadow
= DFSF
.getShadow(LI
.getPointerOperand());
2232 PrimitiveShadow
= DFSF
.combineShadows(PrimitiveShadow
, PtrShadow
, Pos
);
2233 if (ShouldTrackOrigins
) {
2234 Shadows
.push_back(PtrShadow
);
2235 Origins
.push_back(DFSF
.getOrigin(LI
.getPointerOperand()));
2238 if (!DFSF
.DFS
.isZeroShadow(PrimitiveShadow
))
2239 DFSF
.NonZeroChecks
.push_back(PrimitiveShadow
);
2242 DFSF
.expandFromPrimitiveShadow(LI
.getType(), PrimitiveShadow
, Pos
);
2243 DFSF
.setShadow(&LI
, Shadow
);
2245 if (ShouldTrackOrigins
) {
2246 DFSF
.setOrigin(&LI
, DFSF
.combineOrigins(Shadows
, Origins
, Pos
));
2249 if (ClEventCallbacks
) {
2250 IRBuilder
<> IRB(Pos
);
2251 Value
*Addr8
= IRB
.CreateBitCast(LI
.getPointerOperand(), DFSF
.DFS
.Int8Ptr
);
2252 IRB
.CreateCall(DFSF
.DFS
.DFSanLoadCallbackFn
, {PrimitiveShadow
, Addr8
});
2256 Value
*DFSanFunction::updateOriginIfTainted(Value
*Shadow
, Value
*Origin
,
2258 assert(DFS
.shouldTrackOrigins());
2259 return IRB
.CreateCall(DFS
.DFSanChainOriginIfTaintedFn
, {Shadow
, Origin
});
2262 Value
*DFSanFunction::updateOrigin(Value
*V
, IRBuilder
<> &IRB
) {
2263 if (!DFS
.shouldTrackOrigins())
2265 return IRB
.CreateCall(DFS
.DFSanChainOriginFn
, V
);
2268 Value
*DFSanFunction::originToIntptr(IRBuilder
<> &IRB
, Value
*Origin
) {
2269 const unsigned OriginSize
= DataFlowSanitizer::OriginWidthBytes
;
2270 const DataLayout
&DL
= F
->getParent()->getDataLayout();
2271 unsigned IntptrSize
= DL
.getTypeStoreSize(DFS
.IntptrTy
);
2272 if (IntptrSize
== OriginSize
)
2274 assert(IntptrSize
== OriginSize
* 2);
2275 Origin
= IRB
.CreateIntCast(Origin
, DFS
.IntptrTy
, /* isSigned */ false);
2276 return IRB
.CreateOr(Origin
, IRB
.CreateShl(Origin
, OriginSize
* 8));
2279 void DFSanFunction::paintOrigin(IRBuilder
<> &IRB
, Value
*Origin
,
2280 Value
*StoreOriginAddr
,
2281 uint64_t StoreOriginSize
, Align Alignment
) {
2282 const unsigned OriginSize
= DataFlowSanitizer::OriginWidthBytes
;
2283 const DataLayout
&DL
= F
->getParent()->getDataLayout();
2284 const Align IntptrAlignment
= DL
.getABITypeAlign(DFS
.IntptrTy
);
2285 unsigned IntptrSize
= DL
.getTypeStoreSize(DFS
.IntptrTy
);
2286 assert(IntptrAlignment
>= MinOriginAlignment
);
2287 assert(IntptrSize
>= OriginSize
);
2290 Align CurrentAlignment
= Alignment
;
2291 if (Alignment
>= IntptrAlignment
&& IntptrSize
> OriginSize
) {
2292 Value
*IntptrOrigin
= originToIntptr(IRB
, Origin
);
2293 Value
*IntptrStoreOriginPtr
= IRB
.CreatePointerCast(
2294 StoreOriginAddr
, PointerType::get(DFS
.IntptrTy
, 0));
2295 for (unsigned I
= 0; I
< StoreOriginSize
/ IntptrSize
; ++I
) {
2297 I
? IRB
.CreateConstGEP1_32(DFS
.IntptrTy
, IntptrStoreOriginPtr
, I
)
2298 : IntptrStoreOriginPtr
;
2299 IRB
.CreateAlignedStore(IntptrOrigin
, Ptr
, CurrentAlignment
);
2300 Ofs
+= IntptrSize
/ OriginSize
;
2301 CurrentAlignment
= IntptrAlignment
;
2305 for (unsigned I
= Ofs
; I
< (StoreOriginSize
+ OriginSize
- 1) / OriginSize
;
2307 Value
*GEP
= I
? IRB
.CreateConstGEP1_32(DFS
.OriginTy
, StoreOriginAddr
, I
)
2309 IRB
.CreateAlignedStore(Origin
, GEP
, CurrentAlignment
);
2310 CurrentAlignment
= MinOriginAlignment
;
2314 Value
*DFSanFunction::convertToBool(Value
*V
, IRBuilder
<> &IRB
,
2315 const Twine
&Name
) {
2316 Type
*VTy
= V
->getType();
2317 assert(VTy
->isIntegerTy());
2318 if (VTy
->getIntegerBitWidth() == 1)
2319 // Just converting a bool to a bool, so do nothing.
2321 return IRB
.CreateICmpNE(V
, ConstantInt::get(VTy
, 0), Name
);
2324 void DFSanFunction::storeOrigin(Instruction
*Pos
, Value
*Addr
, uint64_t Size
,
2325 Value
*Shadow
, Value
*Origin
,
2326 Value
*StoreOriginAddr
, Align InstAlignment
) {
2327 // Do not write origins for zero shadows because we do not trace origins for
2329 const Align OriginAlignment
= getOriginAlign(InstAlignment
);
2330 Value
*CollapsedShadow
= collapseToPrimitiveShadow(Shadow
, Pos
);
2331 IRBuilder
<> IRB(Pos
);
2332 if (auto *ConstantShadow
= dyn_cast
<Constant
>(CollapsedShadow
)) {
2333 if (!ConstantShadow
->isZeroValue())
2334 paintOrigin(IRB
, updateOrigin(Origin
, IRB
), StoreOriginAddr
, Size
,
2339 if (shouldInstrumentWithCall()) {
2340 IRB
.CreateCall(DFS
.DFSanMaybeStoreOriginFn
,
2342 IRB
.CreatePointerCast(Addr
, IRB
.getInt8PtrTy()),
2343 ConstantInt::get(DFS
.IntptrTy
, Size
), Origin
});
2345 Value
*Cmp
= convertToBool(CollapsedShadow
, IRB
, "_dfscmp");
2346 Instruction
*CheckTerm
= SplitBlockAndInsertIfThen(
2347 Cmp
, &*IRB
.GetInsertPoint(), false, DFS
.OriginStoreWeights
, &DT
);
2348 IRBuilder
<> IRBNew(CheckTerm
);
2349 paintOrigin(IRBNew
, updateOrigin(Origin
, IRBNew
), StoreOriginAddr
, Size
,
2355 void DFSanFunction::storeZeroPrimitiveShadow(Value
*Addr
, uint64_t Size
,
2358 IRBuilder
<> IRB(Pos
);
2359 IntegerType
*ShadowTy
=
2360 IntegerType::get(*DFS
.Ctx
, Size
* DFS
.ShadowWidthBits
);
2361 Value
*ExtZeroShadow
= ConstantInt::get(ShadowTy
, 0);
2362 Value
*ShadowAddr
= DFS
.getShadowAddress(Addr
, Pos
);
2363 Value
*ExtShadowAddr
=
2364 IRB
.CreateBitCast(ShadowAddr
, PointerType::getUnqual(ShadowTy
));
2365 IRB
.CreateAlignedStore(ExtZeroShadow
, ExtShadowAddr
, ShadowAlign
);
2366 // Do not write origins for 0 shadows because we do not trace origins for
2370 void DFSanFunction::storePrimitiveShadowOrigin(Value
*Addr
, uint64_t Size
,
2371 Align InstAlignment
,
2372 Value
*PrimitiveShadow
,
2375 const bool ShouldTrackOrigins
= DFS
.shouldTrackOrigins() && Origin
;
2377 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Addr
)) {
2378 const auto SI
= AllocaShadowMap
.find(AI
);
2379 if (SI
!= AllocaShadowMap
.end()) {
2380 IRBuilder
<> IRB(Pos
);
2381 IRB
.CreateStore(PrimitiveShadow
, SI
->second
);
2383 // Do not write origins for 0 shadows because we do not trace origins for
2385 if (ShouldTrackOrigins
&& !DFS
.isZeroShadow(PrimitiveShadow
)) {
2386 const auto OI
= AllocaOriginMap
.find(AI
);
2387 assert(OI
!= AllocaOriginMap
.end() && Origin
);
2388 IRB
.CreateStore(Origin
, OI
->second
);
2394 const Align ShadowAlign
= getShadowAlign(InstAlignment
);
2395 if (DFS
.isZeroShadow(PrimitiveShadow
)) {
2396 storeZeroPrimitiveShadow(Addr
, Size
, ShadowAlign
, Pos
);
2400 IRBuilder
<> IRB(Pos
);
2401 Value
*ShadowAddr
, *OriginAddr
;
2402 std::tie(ShadowAddr
, OriginAddr
) =
2403 DFS
.getShadowOriginAddress(Addr
, InstAlignment
, Pos
);
2405 const unsigned ShadowVecSize
= 8;
2406 assert(ShadowVecSize
* DFS
.ShadowWidthBits
<= 128 &&
2407 "Shadow vector is too large!");
2409 uint64_t Offset
= 0;
2410 uint64_t LeftSize
= Size
;
2411 if (LeftSize
>= ShadowVecSize
) {
2413 FixedVectorType::get(DFS
.PrimitiveShadowTy
, ShadowVecSize
);
2414 Value
*ShadowVec
= UndefValue::get(ShadowVecTy
);
2415 for (unsigned I
= 0; I
!= ShadowVecSize
; ++I
) {
2416 ShadowVec
= IRB
.CreateInsertElement(
2417 ShadowVec
, PrimitiveShadow
,
2418 ConstantInt::get(Type::getInt32Ty(*DFS
.Ctx
), I
));
2420 Value
*ShadowVecAddr
=
2421 IRB
.CreateBitCast(ShadowAddr
, PointerType::getUnqual(ShadowVecTy
));
2423 Value
*CurShadowVecAddr
=
2424 IRB
.CreateConstGEP1_32(ShadowVecTy
, ShadowVecAddr
, Offset
);
2425 IRB
.CreateAlignedStore(ShadowVec
, CurShadowVecAddr
, ShadowAlign
);
2426 LeftSize
-= ShadowVecSize
;
2428 } while (LeftSize
>= ShadowVecSize
);
2429 Offset
*= ShadowVecSize
;
2431 while (LeftSize
> 0) {
2432 Value
*CurShadowAddr
=
2433 IRB
.CreateConstGEP1_32(DFS
.PrimitiveShadowTy
, ShadowAddr
, Offset
);
2434 IRB
.CreateAlignedStore(PrimitiveShadow
, CurShadowAddr
, ShadowAlign
);
2439 if (ShouldTrackOrigins
) {
2440 storeOrigin(Pos
, Addr
, Size
, PrimitiveShadow
, Origin
, OriginAddr
,
2445 static AtomicOrdering
addReleaseOrdering(AtomicOrdering AO
) {
2447 case AtomicOrdering::NotAtomic
:
2448 return AtomicOrdering::NotAtomic
;
2449 case AtomicOrdering::Unordered
:
2450 case AtomicOrdering::Monotonic
:
2451 case AtomicOrdering::Release
:
2452 return AtomicOrdering::Release
;
2453 case AtomicOrdering::Acquire
:
2454 case AtomicOrdering::AcquireRelease
:
2455 return AtomicOrdering::AcquireRelease
;
2456 case AtomicOrdering::SequentiallyConsistent
:
2457 return AtomicOrdering::SequentiallyConsistent
;
2459 llvm_unreachable("Unknown ordering");
2462 void DFSanVisitor::visitStoreInst(StoreInst
&SI
) {
2463 auto &DL
= SI
.getModule()->getDataLayout();
2464 Value
*Val
= SI
.getValueOperand();
2465 uint64_t Size
= DL
.getTypeStoreSize(Val
->getType());
2469 // When an application store is atomic, increase atomic ordering between
2470 // atomic application loads and stores to ensure happen-before order; load
2471 // shadow data after application data; store zero shadow data before
2472 // application data. This ensure shadow loads return either labels of the
2473 // initial application data or zeros.
2475 SI
.setOrdering(addReleaseOrdering(SI
.getOrdering()));
2477 const bool ShouldTrackOrigins
=
2478 DFSF
.DFS
.shouldTrackOrigins() && !SI
.isAtomic();
2479 std::vector
<Value
*> Shadows
;
2480 std::vector
<Value
*> Origins
;
2483 SI
.isAtomic() ? DFSF
.DFS
.getZeroShadow(Val
) : DFSF
.getShadow(Val
);
2485 if (ShouldTrackOrigins
) {
2486 Shadows
.push_back(Shadow
);
2487 Origins
.push_back(DFSF
.getOrigin(Val
));
2490 Value
*PrimitiveShadow
;
2491 if (ClCombinePointerLabelsOnStore
) {
2492 Value
*PtrShadow
= DFSF
.getShadow(SI
.getPointerOperand());
2493 if (ShouldTrackOrigins
) {
2494 Shadows
.push_back(PtrShadow
);
2495 Origins
.push_back(DFSF
.getOrigin(SI
.getPointerOperand()));
2497 PrimitiveShadow
= DFSF
.combineShadows(Shadow
, PtrShadow
, &SI
);
2499 PrimitiveShadow
= DFSF
.collapseToPrimitiveShadow(Shadow
, &SI
);
2501 Value
*Origin
= nullptr;
2502 if (ShouldTrackOrigins
)
2503 Origin
= DFSF
.combineOrigins(Shadows
, Origins
, &SI
);
2504 DFSF
.storePrimitiveShadowOrigin(SI
.getPointerOperand(), Size
, SI
.getAlign(),
2505 PrimitiveShadow
, Origin
, &SI
);
2506 if (ClEventCallbacks
) {
2507 IRBuilder
<> IRB(&SI
);
2508 Value
*Addr8
= IRB
.CreateBitCast(SI
.getPointerOperand(), DFSF
.DFS
.Int8Ptr
);
2509 IRB
.CreateCall(DFSF
.DFS
.DFSanStoreCallbackFn
, {PrimitiveShadow
, Addr8
});
2513 void DFSanVisitor::visitCASOrRMW(Align InstAlignment
, Instruction
&I
) {
2514 assert(isa
<AtomicRMWInst
>(I
) || isa
<AtomicCmpXchgInst
>(I
));
2516 Value
*Val
= I
.getOperand(1);
2517 const auto &DL
= I
.getModule()->getDataLayout();
2518 uint64_t Size
= DL
.getTypeStoreSize(Val
->getType());
2522 // Conservatively set data at stored addresses and return with zero shadow to
2523 // prevent shadow data races.
2524 IRBuilder
<> IRB(&I
);
2525 Value
*Addr
= I
.getOperand(0);
2526 const Align ShadowAlign
= DFSF
.getShadowAlign(InstAlignment
);
2527 DFSF
.storeZeroPrimitiveShadow(Addr
, Size
, ShadowAlign
, &I
);
2528 DFSF
.setShadow(&I
, DFSF
.DFS
.getZeroShadow(&I
));
2529 DFSF
.setOrigin(&I
, DFSF
.DFS
.ZeroOrigin
);
2532 void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst
&I
) {
2533 visitCASOrRMW(I
.getAlign(), I
);
2534 // TODO: The ordering change follows MSan. It is possible not to change
2535 // ordering because we always set and use 0 shadows.
2536 I
.setOrdering(addReleaseOrdering(I
.getOrdering()));
2539 void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst
&I
) {
2540 visitCASOrRMW(I
.getAlign(), I
);
2541 // TODO: The ordering change follows MSan. It is possible not to change
2542 // ordering because we always set and use 0 shadows.
2543 I
.setSuccessOrdering(addReleaseOrdering(I
.getSuccessOrdering()));
2546 void DFSanVisitor::visitUnaryOperator(UnaryOperator
&UO
) {
2547 visitInstOperands(UO
);
2550 void DFSanVisitor::visitBinaryOperator(BinaryOperator
&BO
) {
2551 visitInstOperands(BO
);
2554 void DFSanVisitor::visitBitCastInst(BitCastInst
&BCI
) {
2555 if (DFSF
.DFS
.getInstrumentedABI() == DataFlowSanitizer::IA_TLS
) {
2556 // Special case: if this is the bitcast (there is exactly 1 allowed) between
2557 // a musttail call and a ret, don't instrument. New instructions are not
2558 // allowed after a musttail call.
2559 if (auto *CI
= dyn_cast
<CallInst
>(BCI
.getOperand(0)))
2560 if (CI
->isMustTailCall())
2563 // TODO: handle musttail call returns for IA_Args.
2564 visitInstOperands(BCI
);
2567 void DFSanVisitor::visitCastInst(CastInst
&CI
) { visitInstOperands(CI
); }
2569 void DFSanVisitor::visitCmpInst(CmpInst
&CI
) {
2570 visitInstOperands(CI
);
2571 if (ClEventCallbacks
) {
2572 IRBuilder
<> IRB(&CI
);
2573 Value
*CombinedShadow
= DFSF
.getShadow(&CI
);
2574 IRB
.CreateCall(DFSF
.DFS
.DFSanCmpCallbackFn
, CombinedShadow
);
2578 void DFSanVisitor::visitLandingPadInst(LandingPadInst
&LPI
) {
2579 // We do not need to track data through LandingPadInst.
2581 // For the C++ exceptions, if a value is thrown, this value will be stored
2582 // in a memory location provided by __cxa_allocate_exception(...) (on the
2583 // throw side) or __cxa_begin_catch(...) (on the catch side).
2584 // This memory will have a shadow, so with the loads and stores we will be
2585 // able to propagate labels on data thrown through exceptions, without any
2586 // special handling of the LandingPadInst.
2588 // The second element in the pair result of the LandingPadInst is a
2589 // register value, but it is for a type ID and should never be tainted.
2590 DFSF
.setShadow(&LPI
, DFSF
.DFS
.getZeroShadow(&LPI
));
2591 DFSF
.setOrigin(&LPI
, DFSF
.DFS
.ZeroOrigin
);
2594 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst
&GEPI
) {
2595 if (ClCombineOffsetLabelsOnGEP
) {
2596 visitInstOperands(GEPI
);
2600 // Only propagate shadow/origin of base pointer value but ignore those of
2602 Value
*BasePointer
= GEPI
.getPointerOperand();
2603 DFSF
.setShadow(&GEPI
, DFSF
.getShadow(BasePointer
));
2604 if (DFSF
.DFS
.shouldTrackOrigins())
2605 DFSF
.setOrigin(&GEPI
, DFSF
.getOrigin(BasePointer
));
2608 void DFSanVisitor::visitExtractElementInst(ExtractElementInst
&I
) {
2609 visitInstOperands(I
);
2612 void DFSanVisitor::visitInsertElementInst(InsertElementInst
&I
) {
2613 visitInstOperands(I
);
2616 void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst
&I
) {
2617 visitInstOperands(I
);
2620 void DFSanVisitor::visitExtractValueInst(ExtractValueInst
&I
) {
2621 if (!DFSF
.DFS
.shouldTrackFieldsAndIndices()) {
2622 visitInstOperands(I
);
2626 IRBuilder
<> IRB(&I
);
2627 Value
*Agg
= I
.getAggregateOperand();
2628 Value
*AggShadow
= DFSF
.getShadow(Agg
);
2629 Value
*ResShadow
= IRB
.CreateExtractValue(AggShadow
, I
.getIndices());
2630 DFSF
.setShadow(&I
, ResShadow
);
2631 visitInstOperandOrigins(I
);
2634 void DFSanVisitor::visitInsertValueInst(InsertValueInst
&I
) {
2635 if (!DFSF
.DFS
.shouldTrackFieldsAndIndices()) {
2636 visitInstOperands(I
);
2640 IRBuilder
<> IRB(&I
);
2641 Value
*AggShadow
= DFSF
.getShadow(I
.getAggregateOperand());
2642 Value
*InsShadow
= DFSF
.getShadow(I
.getInsertedValueOperand());
2643 Value
*Res
= IRB
.CreateInsertValue(AggShadow
, InsShadow
, I
.getIndices());
2644 DFSF
.setShadow(&I
, Res
);
2645 visitInstOperandOrigins(I
);
2648 void DFSanVisitor::visitAllocaInst(AllocaInst
&I
) {
2649 bool AllLoadsStores
= true;
2650 for (User
*U
: I
.users()) {
2651 if (isa
<LoadInst
>(U
))
2654 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(U
)) {
2655 if (SI
->getPointerOperand() == &I
)
2659 AllLoadsStores
= false;
2662 if (AllLoadsStores
) {
2663 IRBuilder
<> IRB(&I
);
2664 DFSF
.AllocaShadowMap
[&I
] = IRB
.CreateAlloca(DFSF
.DFS
.PrimitiveShadowTy
);
2665 if (DFSF
.DFS
.shouldTrackOrigins()) {
2666 DFSF
.AllocaOriginMap
[&I
] =
2667 IRB
.CreateAlloca(DFSF
.DFS
.OriginTy
, nullptr, "_dfsa");
2670 DFSF
.setShadow(&I
, DFSF
.DFS
.ZeroPrimitiveShadow
);
2671 DFSF
.setOrigin(&I
, DFSF
.DFS
.ZeroOrigin
);
2674 void DFSanVisitor::visitSelectInst(SelectInst
&I
) {
2675 Value
*CondShadow
= DFSF
.getShadow(I
.getCondition());
2676 Value
*TrueShadow
= DFSF
.getShadow(I
.getTrueValue());
2677 Value
*FalseShadow
= DFSF
.getShadow(I
.getFalseValue());
2678 Value
*ShadowSel
= nullptr;
2679 const bool ShouldTrackOrigins
= DFSF
.DFS
.shouldTrackOrigins();
2680 std::vector
<Value
*> Shadows
;
2681 std::vector
<Value
*> Origins
;
2683 ShouldTrackOrigins
? DFSF
.getOrigin(I
.getTrueValue()) : nullptr;
2684 Value
*FalseOrigin
=
2685 ShouldTrackOrigins
? DFSF
.getOrigin(I
.getFalseValue()) : nullptr;
2687 if (isa
<VectorType
>(I
.getCondition()->getType())) {
2688 ShadowSel
= DFSF
.combineShadowsThenConvert(I
.getType(), TrueShadow
,
2690 if (ShouldTrackOrigins
) {
2691 Shadows
.push_back(TrueShadow
);
2692 Shadows
.push_back(FalseShadow
);
2693 Origins
.push_back(TrueOrigin
);
2694 Origins
.push_back(FalseOrigin
);
2697 if (TrueShadow
== FalseShadow
) {
2698 ShadowSel
= TrueShadow
;
2699 if (ShouldTrackOrigins
) {
2700 Shadows
.push_back(TrueShadow
);
2701 Origins
.push_back(TrueOrigin
);
2705 SelectInst::Create(I
.getCondition(), TrueShadow
, FalseShadow
, "", &I
);
2706 if (ShouldTrackOrigins
) {
2707 Shadows
.push_back(ShadowSel
);
2708 Origins
.push_back(SelectInst::Create(I
.getCondition(), TrueOrigin
,
2709 FalseOrigin
, "", &I
));
2713 DFSF
.setShadow(&I
, ClTrackSelectControlFlow
2714 ? DFSF
.combineShadowsThenConvert(
2715 I
.getType(), CondShadow
, ShadowSel
, &I
)
2717 if (ShouldTrackOrigins
) {
2718 if (ClTrackSelectControlFlow
) {
2719 Shadows
.push_back(CondShadow
);
2720 Origins
.push_back(DFSF
.getOrigin(I
.getCondition()));
2722 DFSF
.setOrigin(&I
, DFSF
.combineOrigins(Shadows
, Origins
, &I
));
2726 void DFSanVisitor::visitMemSetInst(MemSetInst
&I
) {
2727 IRBuilder
<> IRB(&I
);
2728 Value
*ValShadow
= DFSF
.getShadow(I
.getValue());
2729 Value
*ValOrigin
= DFSF
.DFS
.shouldTrackOrigins()
2730 ? DFSF
.getOrigin(I
.getValue())
2731 : DFSF
.DFS
.ZeroOrigin
;
2733 DFSF
.DFS
.DFSanSetLabelFn
,
2734 {ValShadow
, ValOrigin
,
2735 IRB
.CreateBitCast(I
.getDest(), Type::getInt8PtrTy(*DFSF
.DFS
.Ctx
)),
2736 IRB
.CreateZExtOrTrunc(I
.getLength(), DFSF
.DFS
.IntptrTy
)});
2739 void DFSanVisitor::visitMemTransferInst(MemTransferInst
&I
) {
2740 IRBuilder
<> IRB(&I
);
2742 // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2743 // need to move origins before moving shadows.
2744 if (DFSF
.DFS
.shouldTrackOrigins()) {
2746 DFSF
.DFS
.DFSanMemOriginTransferFn
,
2747 {IRB
.CreatePointerCast(I
.getArgOperand(0), IRB
.getInt8PtrTy()),
2748 IRB
.CreatePointerCast(I
.getArgOperand(1), IRB
.getInt8PtrTy()),
2749 IRB
.CreateIntCast(I
.getArgOperand(2), DFSF
.DFS
.IntptrTy
, false)});
2752 Value
*RawDestShadow
= DFSF
.DFS
.getShadowAddress(I
.getDest(), &I
);
2753 Value
*SrcShadow
= DFSF
.DFS
.getShadowAddress(I
.getSource(), &I
);
2755 IRB
.CreateMul(I
.getLength(), ConstantInt::get(I
.getLength()->getType(),
2756 DFSF
.DFS
.ShadowWidthBytes
));
2757 Type
*Int8Ptr
= Type::getInt8PtrTy(*DFSF
.DFS
.Ctx
);
2758 Value
*DestShadow
= IRB
.CreateBitCast(RawDestShadow
, Int8Ptr
);
2759 SrcShadow
= IRB
.CreateBitCast(SrcShadow
, Int8Ptr
);
2760 auto *MTI
= cast
<MemTransferInst
>(
2761 IRB
.CreateCall(I
.getFunctionType(), I
.getCalledOperand(),
2762 {DestShadow
, SrcShadow
, LenShadow
, I
.getVolatileCst()}));
2763 if (ClPreserveAlignment
) {
2764 MTI
->setDestAlignment(I
.getDestAlign() * DFSF
.DFS
.ShadowWidthBytes
);
2765 MTI
->setSourceAlignment(I
.getSourceAlign() * DFSF
.DFS
.ShadowWidthBytes
);
2767 MTI
->setDestAlignment(Align(DFSF
.DFS
.ShadowWidthBytes
));
2768 MTI
->setSourceAlignment(Align(DFSF
.DFS
.ShadowWidthBytes
));
2770 if (ClEventCallbacks
) {
2771 IRB
.CreateCall(DFSF
.DFS
.DFSanMemTransferCallbackFn
,
2773 IRB
.CreateZExtOrTrunc(I
.getLength(), DFSF
.DFS
.IntptrTy
)});
2777 static bool isAMustTailRetVal(Value
*RetVal
) {
2778 // Tail call may have a bitcast between return.
2779 if (auto *I
= dyn_cast
<BitCastInst
>(RetVal
)) {
2780 RetVal
= I
->getOperand(0);
2782 if (auto *I
= dyn_cast
<CallInst
>(RetVal
)) {
2783 return I
->isMustTailCall();
2788 void DFSanVisitor::visitReturnInst(ReturnInst
&RI
) {
2789 if (!DFSF
.IsNativeABI
&& RI
.getReturnValue()) {
2791 case DataFlowSanitizer::IA_TLS
: {
2792 // Don't emit the instrumentation for musttail call returns.
2793 if (isAMustTailRetVal(RI
.getReturnValue()))
2796 Value
*S
= DFSF
.getShadow(RI
.getReturnValue());
2797 IRBuilder
<> IRB(&RI
);
2798 Type
*RT
= DFSF
.F
->getFunctionType()->getReturnType();
2800 getDataLayout().getTypeAllocSize(DFSF
.DFS
.getShadowTy(RT
));
2801 if (Size
<= RetvalTLSSize
) {
2802 // If the size overflows, stores nothing. At callsite, oversized return
2803 // shadows are set to zero.
2804 IRB
.CreateAlignedStore(S
, DFSF
.getRetvalTLS(RT
, IRB
),
2805 ShadowTLSAlignment
);
2807 if (DFSF
.DFS
.shouldTrackOrigins()) {
2808 Value
*O
= DFSF
.getOrigin(RI
.getReturnValue());
2809 IRB
.CreateStore(O
, DFSF
.getRetvalOriginTLS());
2813 case DataFlowSanitizer::IA_Args
: {
2814 // TODO: handle musttail call returns for IA_Args.
2816 IRBuilder
<> IRB(&RI
);
2817 Type
*RT
= DFSF
.F
->getFunctionType()->getReturnType();
2819 IRB
.CreateInsertValue(UndefValue::get(RT
), RI
.getReturnValue(), 0);
2821 IRB
.CreateInsertValue(InsVal
, DFSF
.getShadow(RI
.getReturnValue()), 1);
2822 RI
.setOperand(0, InsShadow
);
2829 void DFSanVisitor::addShadowArguments(Function
&F
, CallBase
&CB
,
2830 std::vector
<Value
*> &Args
,
2832 FunctionType
*FT
= F
.getFunctionType();
2834 auto *I
= CB
.arg_begin();
2836 // Adds non-variable argument shadows.
2837 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++I
, --N
)
2838 Args
.push_back(DFSF
.collapseToPrimitiveShadow(DFSF
.getShadow(*I
), &CB
));
2840 // Adds variable argument shadows.
2841 if (FT
->isVarArg()) {
2842 auto *LabelVATy
= ArrayType::get(DFSF
.DFS
.PrimitiveShadowTy
,
2843 CB
.arg_size() - FT
->getNumParams());
2844 auto *LabelVAAlloca
=
2845 new AllocaInst(LabelVATy
, getDataLayout().getAllocaAddrSpace(),
2846 "labelva", &DFSF
.F
->getEntryBlock().front());
2848 for (unsigned N
= 0; I
!= CB
.arg_end(); ++I
, ++N
) {
2849 auto *LabelVAPtr
= IRB
.CreateStructGEP(LabelVATy
, LabelVAAlloca
, N
);
2850 IRB
.CreateStore(DFSF
.collapseToPrimitiveShadow(DFSF
.getShadow(*I
), &CB
),
2854 Args
.push_back(IRB
.CreateStructGEP(LabelVATy
, LabelVAAlloca
, 0));
2857 // Adds the return value shadow.
2858 if (!FT
->getReturnType()->isVoidTy()) {
2859 if (!DFSF
.LabelReturnAlloca
) {
2860 DFSF
.LabelReturnAlloca
= new AllocaInst(
2861 DFSF
.DFS
.PrimitiveShadowTy
, getDataLayout().getAllocaAddrSpace(),
2862 "labelreturn", &DFSF
.F
->getEntryBlock().front());
2864 Args
.push_back(DFSF
.LabelReturnAlloca
);
2868 void DFSanVisitor::addOriginArguments(Function
&F
, CallBase
&CB
,
2869 std::vector
<Value
*> &Args
,
2871 FunctionType
*FT
= F
.getFunctionType();
2873 auto *I
= CB
.arg_begin();
2875 // Add non-variable argument origins.
2876 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++I
, --N
)
2877 Args
.push_back(DFSF
.getOrigin(*I
));
2879 // Add variable argument origins.
2880 if (FT
->isVarArg()) {
2882 ArrayType::get(DFSF
.DFS
.OriginTy
, CB
.arg_size() - FT
->getNumParams());
2883 auto *OriginVAAlloca
=
2884 new AllocaInst(OriginVATy
, getDataLayout().getAllocaAddrSpace(),
2885 "originva", &DFSF
.F
->getEntryBlock().front());
2887 for (unsigned N
= 0; I
!= CB
.arg_end(); ++I
, ++N
) {
2888 auto *OriginVAPtr
= IRB
.CreateStructGEP(OriginVATy
, OriginVAAlloca
, N
);
2889 IRB
.CreateStore(DFSF
.getOrigin(*I
), OriginVAPtr
);
2892 Args
.push_back(IRB
.CreateStructGEP(OriginVATy
, OriginVAAlloca
, 0));
2895 // Add the return value origin.
2896 if (!FT
->getReturnType()->isVoidTy()) {
2897 if (!DFSF
.OriginReturnAlloca
) {
2898 DFSF
.OriginReturnAlloca
= new AllocaInst(
2899 DFSF
.DFS
.OriginTy
, getDataLayout().getAllocaAddrSpace(),
2900 "originreturn", &DFSF
.F
->getEntryBlock().front());
2902 Args
.push_back(DFSF
.OriginReturnAlloca
);
2906 bool DFSanVisitor::visitWrappedCallBase(Function
&F
, CallBase
&CB
) {
2907 IRBuilder
<> IRB(&CB
);
2908 switch (DFSF
.DFS
.getWrapperKind(&F
)) {
2909 case DataFlowSanitizer::WK_Warning
:
2910 CB
.setCalledFunction(&F
);
2911 IRB
.CreateCall(DFSF
.DFS
.DFSanUnimplementedFn
,
2912 IRB
.CreateGlobalStringPtr(F
.getName()));
2913 DFSF
.setShadow(&CB
, DFSF
.DFS
.getZeroShadow(&CB
));
2914 DFSF
.setOrigin(&CB
, DFSF
.DFS
.ZeroOrigin
);
2916 case DataFlowSanitizer::WK_Discard
:
2917 CB
.setCalledFunction(&F
);
2918 DFSF
.setShadow(&CB
, DFSF
.DFS
.getZeroShadow(&CB
));
2919 DFSF
.setOrigin(&CB
, DFSF
.DFS
.ZeroOrigin
);
2921 case DataFlowSanitizer::WK_Functional
:
2922 CB
.setCalledFunction(&F
);
2923 visitInstOperands(CB
);
2925 case DataFlowSanitizer::WK_Custom
:
2926 // Don't try to handle invokes of custom functions, it's too complicated.
2927 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
2929 CallInst
*CI
= dyn_cast
<CallInst
>(&CB
);
2933 const bool ShouldTrackOrigins
= DFSF
.DFS
.shouldTrackOrigins();
2934 FunctionType
*FT
= F
.getFunctionType();
2935 TransformedFunction CustomFn
= DFSF
.DFS
.getCustomFunctionType(FT
);
2936 std::string CustomFName
= ShouldTrackOrigins
? "__dfso_" : "__dfsw_";
2937 CustomFName
+= F
.getName();
2938 FunctionCallee CustomF
= DFSF
.DFS
.Mod
->getOrInsertFunction(
2939 CustomFName
, CustomFn
.TransformedType
);
2940 if (Function
*CustomFn
= dyn_cast
<Function
>(CustomF
.getCallee())) {
2941 CustomFn
->copyAttributesFrom(&F
);
2943 // Custom functions returning non-void will write to the return label.
2944 if (!FT
->getReturnType()->isVoidTy()) {
2945 CustomFn
->removeFnAttrs(DFSF
.DFS
.ReadOnlyNoneAttrs
);
2949 std::vector
<Value
*> Args
;
2951 // Adds non-variable arguments.
2952 auto *I
= CB
.arg_begin();
2953 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++I
, --N
) {
2954 Type
*T
= (*I
)->getType();
2955 FunctionType
*ParamFT
;
2956 if (isa
<PointerType
>(T
) &&
2957 (ParamFT
= dyn_cast
<FunctionType
>(T
->getPointerElementType()))) {
2958 std::string TName
= "dfst";
2959 TName
+= utostr(FT
->getNumParams() - N
);
2961 TName
+= F
.getName();
2962 Constant
*Trampoline
=
2963 DFSF
.DFS
.getOrBuildTrampolineFunction(ParamFT
, TName
);
2964 Args
.push_back(Trampoline
);
2966 IRB
.CreateBitCast(*I
, Type::getInt8PtrTy(*DFSF
.DFS
.Ctx
)));
2972 // Adds shadow arguments.
2973 const unsigned ShadowArgStart
= Args
.size();
2974 addShadowArguments(F
, CB
, Args
, IRB
);
2976 // Adds origin arguments.
2977 const unsigned OriginArgStart
= Args
.size();
2978 if (ShouldTrackOrigins
)
2979 addOriginArguments(F
, CB
, Args
, IRB
);
2981 // Adds variable arguments.
2982 append_range(Args
, drop_begin(CB
.args(), FT
->getNumParams()));
2984 CallInst
*CustomCI
= IRB
.CreateCall(CustomF
, Args
);
2985 CustomCI
->setCallingConv(CI
->getCallingConv());
2986 CustomCI
->setAttributes(transformFunctionAttributes(
2987 CustomFn
, CI
->getContext(), CI
->getAttributes()));
2989 // Update the parameter attributes of the custom call instruction to
2990 // zero extend the shadow parameters. This is required for targets
2991 // which consider PrimitiveShadowTy an illegal type.
2992 for (unsigned N
= 0; N
< FT
->getNumParams(); N
++) {
2993 const unsigned ArgNo
= ShadowArgStart
+ N
;
2994 if (CustomCI
->getArgOperand(ArgNo
)->getType() ==
2995 DFSF
.DFS
.PrimitiveShadowTy
)
2996 CustomCI
->addParamAttr(ArgNo
, Attribute::ZExt
);
2997 if (ShouldTrackOrigins
) {
2998 const unsigned OriginArgNo
= OriginArgStart
+ N
;
2999 if (CustomCI
->getArgOperand(OriginArgNo
)->getType() ==
3001 CustomCI
->addParamAttr(OriginArgNo
, Attribute::ZExt
);
3005 // Loads the return value shadow and origin.
3006 if (!FT
->getReturnType()->isVoidTy()) {
3007 LoadInst
*LabelLoad
=
3008 IRB
.CreateLoad(DFSF
.DFS
.PrimitiveShadowTy
, DFSF
.LabelReturnAlloca
);
3009 DFSF
.setShadow(CustomCI
, DFSF
.expandFromPrimitiveShadow(
3010 FT
->getReturnType(), LabelLoad
, &CB
));
3011 if (ShouldTrackOrigins
) {
3012 LoadInst
*OriginLoad
=
3013 IRB
.CreateLoad(DFSF
.DFS
.OriginTy
, DFSF
.OriginReturnAlloca
);
3014 DFSF
.setOrigin(CustomCI
, OriginLoad
);
3018 CI
->replaceAllUsesWith(CustomCI
);
3019 CI
->eraseFromParent();
3025 void DFSanVisitor::visitCallBase(CallBase
&CB
) {
3026 Function
*F
= CB
.getCalledFunction();
3027 if ((F
&& F
->isIntrinsic()) || CB
.isInlineAsm()) {
3028 visitInstOperands(CB
);
3032 // Calls to this function are synthesized in wrappers, and we shouldn't
3034 if (F
== DFSF
.DFS
.DFSanVarargWrapperFn
.getCallee()->stripPointerCasts())
3037 DenseMap
<Value
*, Function
*>::iterator UnwrappedFnIt
=
3038 DFSF
.DFS
.UnwrappedFnMap
.find(CB
.getCalledOperand());
3039 if (UnwrappedFnIt
!= DFSF
.DFS
.UnwrappedFnMap
.end())
3040 if (visitWrappedCallBase(*UnwrappedFnIt
->second
, CB
))
3043 IRBuilder
<> IRB(&CB
);
3045 const bool ShouldTrackOrigins
= DFSF
.DFS
.shouldTrackOrigins();
3046 FunctionType
*FT
= CB
.getFunctionType();
3047 if (DFSF
.DFS
.getInstrumentedABI() == DataFlowSanitizer::IA_TLS
) {
3048 // Stores argument shadows.
3049 unsigned ArgOffset
= 0;
3050 const DataLayout
&DL
= getDataLayout();
3051 for (unsigned I
= 0, N
= FT
->getNumParams(); I
!= N
; ++I
) {
3052 if (ShouldTrackOrigins
) {
3053 // Ignore overflowed origins
3054 Value
*ArgShadow
= DFSF
.getShadow(CB
.getArgOperand(I
));
3055 if (I
< DFSF
.DFS
.NumOfElementsInArgOrgTLS
&&
3056 !DFSF
.DFS
.isZeroShadow(ArgShadow
))
3057 IRB
.CreateStore(DFSF
.getOrigin(CB
.getArgOperand(I
)),
3058 DFSF
.getArgOriginTLS(I
, IRB
));
3062 DL
.getTypeAllocSize(DFSF
.DFS
.getShadowTy(FT
->getParamType(I
)));
3063 // Stop storing if arguments' size overflows. Inside a function, arguments
3064 // after overflow have zero shadow values.
3065 if (ArgOffset
+ Size
> ArgTLSSize
)
3067 IRB
.CreateAlignedStore(
3068 DFSF
.getShadow(CB
.getArgOperand(I
)),
3069 DFSF
.getArgTLS(FT
->getParamType(I
), ArgOffset
, IRB
),
3070 ShadowTLSAlignment
);
3071 ArgOffset
+= alignTo(Size
, ShadowTLSAlignment
);
3075 Instruction
*Next
= nullptr;
3076 if (!CB
.getType()->isVoidTy()) {
3077 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(&CB
)) {
3078 if (II
->getNormalDest()->getSinglePredecessor()) {
3079 Next
= &II
->getNormalDest()->front();
3082 SplitEdge(II
->getParent(), II
->getNormalDest(), &DFSF
.DT
);
3083 Next
= &NewBB
->front();
3086 assert(CB
.getIterator() != CB
.getParent()->end());
3087 Next
= CB
.getNextNode();
3090 if (DFSF
.DFS
.getInstrumentedABI() == DataFlowSanitizer::IA_TLS
) {
3091 // Don't emit the epilogue for musttail call returns.
3092 if (isa
<CallInst
>(CB
) && cast
<CallInst
>(CB
).isMustTailCall())
3095 // Loads the return value shadow.
3096 IRBuilder
<> NextIRB(Next
);
3097 const DataLayout
&DL
= getDataLayout();
3098 unsigned Size
= DL
.getTypeAllocSize(DFSF
.DFS
.getShadowTy(&CB
));
3099 if (Size
> RetvalTLSSize
) {
3100 // Set overflowed return shadow to be zero.
3101 DFSF
.setShadow(&CB
, DFSF
.DFS
.getZeroShadow(&CB
));
3103 LoadInst
*LI
= NextIRB
.CreateAlignedLoad(
3104 DFSF
.DFS
.getShadowTy(&CB
), DFSF
.getRetvalTLS(CB
.getType(), NextIRB
),
3105 ShadowTLSAlignment
, "_dfsret");
3106 DFSF
.SkipInsts
.insert(LI
);
3107 DFSF
.setShadow(&CB
, LI
);
3108 DFSF
.NonZeroChecks
.push_back(LI
);
3111 if (ShouldTrackOrigins
) {
3112 LoadInst
*LI
= NextIRB
.CreateLoad(
3113 DFSF
.DFS
.OriginTy
, DFSF
.getRetvalOriginTLS(), "_dfsret_o");
3114 DFSF
.SkipInsts
.insert(LI
);
3115 DFSF
.setOrigin(&CB
, LI
);
3120 // Do all instrumentation for IA_Args down here to defer tampering with the
3121 // CFG in a way that SplitEdge may be able to detect.
3122 if (DFSF
.DFS
.getInstrumentedABI() == DataFlowSanitizer::IA_Args
) {
3123 // TODO: handle musttail call returns for IA_Args.
3125 FunctionType
*NewFT
= DFSF
.DFS
.getArgsFunctionType(FT
);
3127 IRB
.CreateBitCast(CB
.getCalledOperand(), PointerType::getUnqual(NewFT
));
3129 const unsigned NumParams
= FT
->getNumParams();
3131 // Copy original arguments.
3132 auto *ArgIt
= CB
.arg_begin(), *ArgEnd
= CB
.arg_end();
3133 std::vector
<Value
*> Args(NumParams
);
3134 std::copy_n(ArgIt
, NumParams
, Args
.begin());
3136 // Add shadow arguments by transforming original arguments.
3137 std::generate_n(std::back_inserter(Args
), NumParams
,
3138 [&]() { return DFSF
.getShadow(*ArgIt
++); });
3140 if (FT
->isVarArg()) {
3141 unsigned VarArgSize
= CB
.arg_size() - NumParams
;
3142 ArrayType
*VarArgArrayTy
=
3143 ArrayType::get(DFSF
.DFS
.PrimitiveShadowTy
, VarArgSize
);
3144 AllocaInst
*VarArgShadow
=
3145 new AllocaInst(VarArgArrayTy
, getDataLayout().getAllocaAddrSpace(),
3146 "", &DFSF
.F
->getEntryBlock().front());
3147 Args
.push_back(IRB
.CreateConstGEP2_32(VarArgArrayTy
, VarArgShadow
, 0, 0));
3149 // Copy remaining var args.
3150 unsigned GepIndex
= 0;
3151 std::for_each(ArgIt
, ArgEnd
, [&](Value
*Arg
) {
3153 DFSF
.getShadow(Arg
),
3154 IRB
.CreateConstGEP2_32(VarArgArrayTy
, VarArgShadow
, 0, GepIndex
++));
3155 Args
.push_back(Arg
);
3160 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(&CB
)) {
3161 NewCB
= IRB
.CreateInvoke(NewFT
, Func
, II
->getNormalDest(),
3162 II
->getUnwindDest(), Args
);
3164 NewCB
= IRB
.CreateCall(NewFT
, Func
, Args
);
3166 NewCB
->setCallingConv(CB
.getCallingConv());
3167 NewCB
->setAttributes(CB
.getAttributes().removeRetAttributes(
3168 *DFSF
.DFS
.Ctx
, AttributeFuncs::typeIncompatible(NewCB
->getType())));
3171 ExtractValueInst
*ExVal
= ExtractValueInst::Create(NewCB
, 0, "", Next
);
3172 DFSF
.SkipInsts
.insert(ExVal
);
3173 ExtractValueInst
*ExShadow
= ExtractValueInst::Create(NewCB
, 1, "", Next
);
3174 DFSF
.SkipInsts
.insert(ExShadow
);
3175 DFSF
.setShadow(ExVal
, ExShadow
);
3176 DFSF
.NonZeroChecks
.push_back(ExShadow
);
3178 CB
.replaceAllUsesWith(ExVal
);
3181 CB
.eraseFromParent();
3185 void DFSanVisitor::visitPHINode(PHINode
&PN
) {
3186 Type
*ShadowTy
= DFSF
.DFS
.getShadowTy(&PN
);
3188 PHINode::Create(ShadowTy
, PN
.getNumIncomingValues(), "", &PN
);
3190 // Give the shadow phi node valid predecessors to fool SplitEdge into working.
3191 Value
*UndefShadow
= UndefValue::get(ShadowTy
);
3192 for (BasicBlock
*BB
: PN
.blocks())
3193 ShadowPN
->addIncoming(UndefShadow
, BB
);
3195 DFSF
.setShadow(&PN
, ShadowPN
);
3197 PHINode
*OriginPN
= nullptr;
3198 if (DFSF
.DFS
.shouldTrackOrigins()) {
3200 PHINode::Create(DFSF
.DFS
.OriginTy
, PN
.getNumIncomingValues(), "", &PN
);
3201 Value
*UndefOrigin
= UndefValue::get(DFSF
.DFS
.OriginTy
);
3202 for (BasicBlock
*BB
: PN
.blocks())
3203 OriginPN
->addIncoming(UndefOrigin
, BB
);
3204 DFSF
.setOrigin(&PN
, OriginPN
);
3207 DFSF
.PHIFixups
.push_back({&PN
, ShadowPN
, OriginPN
});
3211 class DataFlowSanitizerLegacyPass
: public ModulePass
{
3213 std::vector
<std::string
> ABIListFiles
;
3218 DataFlowSanitizerLegacyPass(
3219 const std::vector
<std::string
> &ABIListFiles
= std::vector
<std::string
>())
3220 : ModulePass(ID
), ABIListFiles(ABIListFiles
) {}
3222 bool runOnModule(Module
&M
) override
{
3223 return DataFlowSanitizer(ABIListFiles
).runImpl(M
);
3228 char DataFlowSanitizerLegacyPass::ID
;
3230 INITIALIZE_PASS(DataFlowSanitizerLegacyPass
, "dfsan",
3231 "DataFlowSanitizer: dynamic data flow analysis.", false, false)
3233 ModulePass
*llvm::createDataFlowSanitizerLegacyPassPass(
3234 const std::vector
<std::string
> &ABIListFiles
) {
3235 return new DataFlowSanitizerLegacyPass(ABIListFiles
);
3238 PreservedAnalyses
DataFlowSanitizerPass::run(Module
&M
,
3239 ModuleAnalysisManager
&AM
) {
3240 if (DataFlowSanitizer(ABIListFiles
).runImpl(M
)) {
3241 return PreservedAnalyses::none();
3243 return PreservedAnalyses::all();