1 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
13 /// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14 /// class of bugs on its own. Instead, it provides a generic dynamic data flow
15 /// analysis framework to be used by clients to help detect application-specific
16 /// issues within their own code.
18 /// The analysis is based on automatic propagation of data flow labels (also
19 /// known as taint labels) through a program as it performs computation.
21 /// Argument and return value labels are passed through TLS variables
22 /// __dfsan_arg_tls and __dfsan_retval_tls.
24 /// Each byte of application memory is backed by a shadow memory byte. The
25 /// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
26 /// laid out as follows:
28 /// +--------------------+ 0x800000000000 (top of memory)
30 /// +--------------------+ 0x700000000000
32 /// +--------------------+ 0x610000000000
34 /// +--------------------+ 0x600000000000
36 /// +--------------------+ 0x510000000000
38 /// +--------------------+ 0x500000000000
40 /// +--------------------+ 0x400000000000
42 /// +--------------------+ 0x300000000000
44 /// +--------------------+ 0x200000000000
46 /// +--------------------+ 0x110000000000
48 /// +--------------------+ 0x100000000000
50 /// +--------------------+ 0x010000000000
52 /// +--------------------+ 0x000000000000
54 /// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
55 /// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
57 /// For more information, please refer to the design document:
58 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
60 //===----------------------------------------------------------------------===//
62 #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/DenseSet.h"
65 #include "llvm/ADT/DepthFirstIterator.h"
66 #include "llvm/ADT/SmallPtrSet.h"
67 #include "llvm/ADT/SmallVector.h"
68 #include "llvm/ADT/StringRef.h"
69 #include "llvm/ADT/StringSet.h"
70 #include "llvm/ADT/iterator.h"
71 #include "llvm/Analysis/DomTreeUpdater.h"
72 #include "llvm/Analysis/GlobalsModRef.h"
73 #include "llvm/Analysis/TargetLibraryInfo.h"
74 #include "llvm/Analysis/ValueTracking.h"
75 #include "llvm/IR/Argument.h"
76 #include "llvm/IR/AttributeMask.h"
77 #include "llvm/IR/Attributes.h"
78 #include "llvm/IR/BasicBlock.h"
79 #include "llvm/IR/Constant.h"
80 #include "llvm/IR/Constants.h"
81 #include "llvm/IR/DataLayout.h"
82 #include "llvm/IR/DerivedTypes.h"
83 #include "llvm/IR/Dominators.h"
84 #include "llvm/IR/Function.h"
85 #include "llvm/IR/GlobalAlias.h"
86 #include "llvm/IR/GlobalValue.h"
87 #include "llvm/IR/GlobalVariable.h"
88 #include "llvm/IR/IRBuilder.h"
89 #include "llvm/IR/InstVisitor.h"
90 #include "llvm/IR/InstrTypes.h"
91 #include "llvm/IR/Instruction.h"
92 #include "llvm/IR/Instructions.h"
93 #include "llvm/IR/IntrinsicInst.h"
94 #include "llvm/IR/MDBuilder.h"
95 #include "llvm/IR/Module.h"
96 #include "llvm/IR/PassManager.h"
97 #include "llvm/IR/Type.h"
98 #include "llvm/IR/User.h"
99 #include "llvm/IR/Value.h"
100 #include "llvm/Support/Alignment.h"
101 #include "llvm/Support/Casting.h"
102 #include "llvm/Support/CommandLine.h"
103 #include "llvm/Support/ErrorHandling.h"
104 #include "llvm/Support/SpecialCaseList.h"
105 #include "llvm/Support/VirtualFileSystem.h"
106 #include "llvm/TargetParser/Triple.h"
107 #include "llvm/Transforms/Instrumentation.h"
108 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
109 #include "llvm/Transforms/Utils/Local.h"
120 using namespace llvm
;
122 // This must be consistent with ShadowWidthBits.
123 static const Align ShadowTLSAlignment
= Align(2);
125 static const Align MinOriginAlignment
= Align(4);
127 // The size of TLS variables. These constants must be kept in sync with the ones
129 static const unsigned ArgTLSSize
= 800;
130 static const unsigned RetvalTLSSize
= 800;
132 // The -dfsan-preserve-alignment flag controls whether this pass assumes that
133 // alignment requirements provided by the input IR are correct. For example,
134 // if the input IR contains a load with alignment 8, this flag will cause
135 // the shadow load to have alignment 16. This flag is disabled by default as
136 // we have unfortunately encountered too much code (including Clang itself;
137 // see PR14291) which performs misaligned access.
138 static cl::opt
<bool> ClPreserveAlignment(
139 "dfsan-preserve-alignment",
140 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden
,
143 // The ABI list files control how shadow parameters are passed. The pass treats
144 // every function labelled "uninstrumented" in the ABI list file as conforming
145 // to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
146 // additional annotations for those functions, a call to one of those functions
147 // will produce a warning message, as the labelling behaviour of the function is
148 // unknown. The other supported annotations for uninstrumented functions are
149 // "functional" and "discard", which are described below under
150 // DataFlowSanitizer::WrapperKind.
151 // Functions will often be labelled with both "uninstrumented" and one of
152 // "functional" or "discard". This will leave the function unchanged by this
153 // pass, and create a wrapper function that will call the original.
155 // Instrumented functions can also be annotated as "force_zero_labels", which
156 // will make all shadow and return values set zero labels.
157 // Functions should never be labelled with both "force_zero_labels" and
158 // "uninstrumented" or any of the unistrumented wrapper kinds.
159 static cl::list
<std::string
> ClABIListFiles(
161 cl::desc("File listing native ABI functions and how the pass treats them"),
164 // Controls whether the pass includes or ignores the labels of pointers in load
166 static cl::opt
<bool> ClCombinePointerLabelsOnLoad(
167 "dfsan-combine-pointer-labels-on-load",
168 cl::desc("Combine the label of the pointer with the label of the data when "
169 "loading from memory."),
170 cl::Hidden
, cl::init(true));
172 // Controls whether the pass includes or ignores the labels of pointers in
173 // stores instructions.
174 static cl::opt
<bool> ClCombinePointerLabelsOnStore(
175 "dfsan-combine-pointer-labels-on-store",
176 cl::desc("Combine the label of the pointer with the label of the data when "
177 "storing in memory."),
178 cl::Hidden
, cl::init(false));
180 // Controls whether the pass propagates labels of offsets in GEP instructions.
181 static cl::opt
<bool> ClCombineOffsetLabelsOnGEP(
182 "dfsan-combine-offset-labels-on-gep",
184 "Combine the label of the offset with the label of the pointer when "
185 "doing pointer arithmetic."),
186 cl::Hidden
, cl::init(true));
188 static cl::list
<std::string
> ClCombineTaintLookupTables(
189 "dfsan-combine-taint-lookup-table",
191 "When dfsan-combine-offset-labels-on-gep and/or "
192 "dfsan-combine-pointer-labels-on-load are false, this flag can "
193 "be used to re-enable combining offset and/or pointer taint when "
194 "loading specific constant global variables (i.e. lookup tables)."),
197 static cl::opt
<bool> ClDebugNonzeroLabels(
198 "dfsan-debug-nonzero-labels",
199 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
200 "load or return with a nonzero label"),
203 // Experimental feature that inserts callbacks for certain data events.
204 // Currently callbacks are only inserted for loads, stores, memory transfers
205 // (i.e. memcpy and memmove), and comparisons.
207 // If this flag is set to true, the user must provide definitions for the
208 // following callback functions:
209 // void __dfsan_load_callback(dfsan_label Label, void* addr);
210 // void __dfsan_store_callback(dfsan_label Label, void* addr);
211 // void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
212 // void __dfsan_cmp_callback(dfsan_label CombinedLabel);
213 static cl::opt
<bool> ClEventCallbacks(
214 "dfsan-event-callbacks",
215 cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
216 cl::Hidden
, cl::init(false));
218 // Experimental feature that inserts callbacks for conditionals, including:
219 // conditional branch, switch, select.
220 // This must be true for dfsan_set_conditional_callback() to have effect.
221 static cl::opt
<bool> ClConditionalCallbacks(
222 "dfsan-conditional-callbacks",
223 cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden
,
226 // Experimental feature that inserts callbacks for data reaching a function,
227 // either via function arguments and loads.
228 // This must be true for dfsan_set_reaches_function_callback() to have effect.
229 static cl::opt
<bool> ClReachesFunctionCallbacks(
230 "dfsan-reaches-function-callbacks",
231 cl::desc("Insert calls to callback functions on data reaching a function."),
232 cl::Hidden
, cl::init(false));
234 // Controls whether the pass tracks the control flow of select instructions.
235 static cl::opt
<bool> ClTrackSelectControlFlow(
236 "dfsan-track-select-control-flow",
237 cl::desc("Propagate labels from condition values of select instructions "
239 cl::Hidden
, cl::init(true));
241 // TODO: This default value follows MSan. DFSan may use a different value.
242 static cl::opt
<int> ClInstrumentWithCallThreshold(
243 "dfsan-instrument-with-call-threshold",
244 cl::desc("If the function being instrumented requires more than "
245 "this number of origin stores, use callbacks instead of "
246 "inline checks (-1 means never use callbacks)."),
247 cl::Hidden
, cl::init(3500));
249 // Controls how to track origins.
250 // * 0: do not track origins.
251 // * 1: track origins at memory store operations.
252 // * 2: track origins at memory load and store operations.
253 // TODO: track callsites.
254 static cl::opt
<int> ClTrackOrigins("dfsan-track-origins",
255 cl::desc("Track origins of labels"),
256 cl::Hidden
, cl::init(0));
258 static cl::opt
<bool> ClIgnorePersonalityRoutine(
259 "dfsan-ignore-personality-routine",
260 cl::desc("If a personality routine is marked uninstrumented from the ABI "
261 "list, do not create a wrapper for it."),
262 cl::Hidden
, cl::init(false));
264 static StringRef
getGlobalTypeString(const GlobalValue
&G
) {
265 // Types of GlobalVariables are always pointer types.
266 Type
*GType
= G
.getValueType();
267 // For now we support excluding struct types only.
268 if (StructType
*SGType
= dyn_cast
<StructType
>(GType
)) {
269 if (!SGType
->isLiteral())
270 return SGType
->getName();
272 return "<unknown type>";
277 // Memory map parameters used in application-to-shadow address calculation.
278 // Offset = (Addr & ~AndMask) ^ XorMask
279 // Shadow = ShadowBase + Offset
280 // Origin = (OriginBase + Offset) & ~3ULL
281 struct MemoryMapParams
{
288 } // end anonymous namespace
290 // NOLINTBEGIN(readability-identifier-naming)
292 const MemoryMapParams Linux_AArch64_MemoryMapParams
= {
293 0, // AndMask (not used)
294 0x0B00000000000, // XorMask
295 0, // ShadowBase (not used)
296 0x0200000000000, // OriginBase
300 const MemoryMapParams Linux_X86_64_MemoryMapParams
= {
301 0, // AndMask (not used)
302 0x500000000000, // XorMask
303 0, // ShadowBase (not used)
304 0x100000000000, // OriginBase
306 // NOLINTEND(readability-identifier-naming)
309 const MemoryMapParams Linux_LoongArch64_MemoryMapParams
= {
310 0, // AndMask (not used)
311 0x500000000000, // XorMask
312 0, // ShadowBase (not used)
313 0x100000000000, // OriginBase
319 std::unique_ptr
<SpecialCaseList
> SCL
;
322 DFSanABIList() = default;
324 void set(std::unique_ptr
<SpecialCaseList
> List
) { SCL
= std::move(List
); }
326 /// Returns whether either this function or its source file are listed in the
328 bool isIn(const Function
&F
, StringRef Category
) const {
329 return isIn(*F
.getParent(), Category
) ||
330 SCL
->inSection("dataflow", "fun", F
.getName(), Category
);
333 /// Returns whether this global alias is listed in the given category.
335 /// If GA aliases a function, the alias's name is matched as a function name
336 /// would be. Similarly, aliases of globals are matched like globals.
337 bool isIn(const GlobalAlias
&GA
, StringRef Category
) const {
338 if (isIn(*GA
.getParent(), Category
))
341 if (isa
<FunctionType
>(GA
.getValueType()))
342 return SCL
->inSection("dataflow", "fun", GA
.getName(), Category
);
344 return SCL
->inSection("dataflow", "global", GA
.getName(), Category
) ||
345 SCL
->inSection("dataflow", "type", getGlobalTypeString(GA
),
349 /// Returns whether this module is listed in the given category.
350 bool isIn(const Module
&M
, StringRef Category
) const {
351 return SCL
->inSection("dataflow", "src", M
.getModuleIdentifier(), Category
);
355 /// TransformedFunction is used to express the result of transforming one
356 /// function type into another. This struct is immutable. It holds metadata
357 /// useful for updating calls of the old function to the new type.
358 struct TransformedFunction
{
359 TransformedFunction(FunctionType
*OriginalType
, FunctionType
*TransformedType
,
360 std::vector
<unsigned> ArgumentIndexMapping
)
361 : OriginalType(OriginalType
), TransformedType(TransformedType
),
362 ArgumentIndexMapping(ArgumentIndexMapping
) {}
365 TransformedFunction(const TransformedFunction
&) = delete;
366 TransformedFunction
&operator=(const TransformedFunction
&) = delete;
369 TransformedFunction(TransformedFunction
&&) = default;
370 TransformedFunction
&operator=(TransformedFunction
&&) = default;
372 /// Type of the function before the transformation.
373 FunctionType
*OriginalType
;
375 /// Type of the function after the transformation.
376 FunctionType
*TransformedType
;
378 /// Transforming a function may change the position of arguments. This
379 /// member records the mapping from each argument's old position to its new
380 /// position. Argument positions are zero-indexed. If the transformation
381 /// from F to F' made the first argument of F into the third argument of F',
382 /// then ArgumentIndexMapping[0] will equal 2.
383 std::vector
<unsigned> ArgumentIndexMapping
;
386 /// Given function attributes from a call site for the original function,
387 /// return function attributes appropriate for a call to the transformed
390 transformFunctionAttributes(const TransformedFunction
&TransformedFunction
,
391 LLVMContext
&Ctx
, AttributeList CallSiteAttrs
) {
393 // Construct a vector of AttributeSet for each function argument.
394 std::vector
<llvm::AttributeSet
> ArgumentAttributes(
395 TransformedFunction
.TransformedType
->getNumParams());
397 // Copy attributes from the parameter of the original function to the
398 // transformed version. 'ArgumentIndexMapping' holds the mapping from
399 // old argument position to new.
400 for (unsigned I
= 0, IE
= TransformedFunction
.ArgumentIndexMapping
.size();
402 unsigned TransformedIndex
= TransformedFunction
.ArgumentIndexMapping
[I
];
403 ArgumentAttributes
[TransformedIndex
] = CallSiteAttrs
.getParamAttrs(I
);
406 // Copy annotations on varargs arguments.
407 for (unsigned I
= TransformedFunction
.OriginalType
->getNumParams(),
408 IE
= CallSiteAttrs
.getNumAttrSets();
410 ArgumentAttributes
.push_back(CallSiteAttrs
.getParamAttrs(I
));
413 return AttributeList::get(Ctx
, CallSiteAttrs
.getFnAttrs(),
414 CallSiteAttrs
.getRetAttrs(),
415 llvm::ArrayRef(ArgumentAttributes
));
418 class DataFlowSanitizer
{
419 friend struct DFSanFunction
;
420 friend class DFSanVisitor
;
422 enum { ShadowWidthBits
= 8, ShadowWidthBytes
= ShadowWidthBits
/ 8 };
424 enum { OriginWidthBits
= 32, OriginWidthBytes
= OriginWidthBits
/ 8 };
426 /// How should calls to uninstrumented functions be handled?
428 /// This function is present in an uninstrumented form but we don't know
429 /// how it should be handled. Print a warning and call the function anyway.
430 /// Don't label the return value.
433 /// This function does not write to (user-accessible) memory, and its return
434 /// value is unlabelled.
437 /// This function does not write to (user-accessible) memory, and the label
438 /// of its return value is the union of the label of its arguments.
441 /// Instead of calling the function, a custom wrapper __dfsw_F is called,
442 /// where F is the name of the function. This function may wrap the
443 /// original function or provide its own implementation. WK_Custom uses an
444 /// extra pointer argument to return the shadow. This allows the wrapped
445 /// form of the function type to be expressed in C.
452 IntegerType
*OriginTy
;
453 PointerType
*OriginPtrTy
;
454 ConstantInt
*ZeroOrigin
;
455 /// The shadow type for all primitive types and vector types.
456 IntegerType
*PrimitiveShadowTy
;
457 PointerType
*PrimitiveShadowPtrTy
;
458 IntegerType
*IntptrTy
;
459 ConstantInt
*ZeroPrimitiveShadow
;
461 ArrayType
*ArgOriginTLSTy
;
462 Constant
*ArgOriginTLS
;
464 Constant
*RetvalOriginTLS
;
465 FunctionType
*DFSanUnionLoadFnTy
;
466 FunctionType
*DFSanLoadLabelAndOriginFnTy
;
467 FunctionType
*DFSanUnimplementedFnTy
;
468 FunctionType
*DFSanWrapperExternWeakNullFnTy
;
469 FunctionType
*DFSanSetLabelFnTy
;
470 FunctionType
*DFSanNonzeroLabelFnTy
;
471 FunctionType
*DFSanVarargWrapperFnTy
;
472 FunctionType
*DFSanConditionalCallbackFnTy
;
473 FunctionType
*DFSanConditionalCallbackOriginFnTy
;
474 FunctionType
*DFSanReachesFunctionCallbackFnTy
;
475 FunctionType
*DFSanReachesFunctionCallbackOriginFnTy
;
476 FunctionType
*DFSanCmpCallbackFnTy
;
477 FunctionType
*DFSanLoadStoreCallbackFnTy
;
478 FunctionType
*DFSanMemTransferCallbackFnTy
;
479 FunctionType
*DFSanChainOriginFnTy
;
480 FunctionType
*DFSanChainOriginIfTaintedFnTy
;
481 FunctionType
*DFSanMemOriginTransferFnTy
;
482 FunctionType
*DFSanMemShadowOriginTransferFnTy
;
483 FunctionType
*DFSanMemShadowOriginConditionalExchangeFnTy
;
484 FunctionType
*DFSanMaybeStoreOriginFnTy
;
485 FunctionCallee DFSanUnionLoadFn
;
486 FunctionCallee DFSanLoadLabelAndOriginFn
;
487 FunctionCallee DFSanUnimplementedFn
;
488 FunctionCallee DFSanWrapperExternWeakNullFn
;
489 FunctionCallee DFSanSetLabelFn
;
490 FunctionCallee DFSanNonzeroLabelFn
;
491 FunctionCallee DFSanVarargWrapperFn
;
492 FunctionCallee DFSanLoadCallbackFn
;
493 FunctionCallee DFSanStoreCallbackFn
;
494 FunctionCallee DFSanMemTransferCallbackFn
;
495 FunctionCallee DFSanConditionalCallbackFn
;
496 FunctionCallee DFSanConditionalCallbackOriginFn
;
497 FunctionCallee DFSanReachesFunctionCallbackFn
;
498 FunctionCallee DFSanReachesFunctionCallbackOriginFn
;
499 FunctionCallee DFSanCmpCallbackFn
;
500 FunctionCallee DFSanChainOriginFn
;
501 FunctionCallee DFSanChainOriginIfTaintedFn
;
502 FunctionCallee DFSanMemOriginTransferFn
;
503 FunctionCallee DFSanMemShadowOriginTransferFn
;
504 FunctionCallee DFSanMemShadowOriginConditionalExchangeFn
;
505 FunctionCallee DFSanMaybeStoreOriginFn
;
506 SmallPtrSet
<Value
*, 16> DFSanRuntimeFunctions
;
507 MDNode
*ColdCallWeights
;
508 MDNode
*OriginStoreWeights
;
509 DFSanABIList ABIList
;
510 DenseMap
<Value
*, Function
*> UnwrappedFnMap
;
511 AttributeMask ReadOnlyNoneAttrs
;
512 StringSet
<> CombineTaintLookupTableNames
;
514 /// Memory map parameters used in calculation mapping application addresses
515 /// to shadow addresses and origin addresses.
516 const MemoryMapParams
*MapParams
;
518 Value
*getShadowOffset(Value
*Addr
, IRBuilder
<> &IRB
);
519 Value
*getShadowAddress(Value
*Addr
, Instruction
*Pos
);
520 Value
*getShadowAddress(Value
*Addr
, Instruction
*Pos
, Value
*ShadowOffset
);
521 std::pair
<Value
*, Value
*>
522 getShadowOriginAddress(Value
*Addr
, Align InstAlignment
, Instruction
*Pos
);
523 bool isInstrumented(const Function
*F
);
524 bool isInstrumented(const GlobalAlias
*GA
);
525 bool isForceZeroLabels(const Function
*F
);
526 TransformedFunction
getCustomFunctionType(FunctionType
*T
);
527 WrapperKind
getWrapperKind(Function
*F
);
528 void addGlobalNameSuffix(GlobalValue
*GV
);
529 void buildExternWeakCheckIfNeeded(IRBuilder
<> &IRB
, Function
*F
);
530 Function
*buildWrapperFunction(Function
*F
, StringRef NewFName
,
531 GlobalValue::LinkageTypes NewFLink
,
532 FunctionType
*NewFT
);
533 void initializeCallbackFunctions(Module
&M
);
534 void initializeRuntimeFunctions(Module
&M
);
535 bool initializeModule(Module
&M
);
537 /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
538 /// from it. Returns the origin's loaded value.
539 Value
*loadNextOrigin(Instruction
*Pos
, Align OriginAlign
,
542 /// Returns whether the given load byte size is amenable to inlined
543 /// optimization patterns.
544 bool hasLoadSizeForFastPath(uint64_t Size
);
546 /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
547 bool shouldTrackOrigins();
549 /// Returns a zero constant with the shadow type of OrigTy.
551 /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
552 /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
553 /// getZeroShadow(other type) = i16(0)
554 Constant
*getZeroShadow(Type
*OrigTy
);
555 /// Returns a zero constant with the shadow type of V's type.
556 Constant
*getZeroShadow(Value
*V
);
558 /// Checks if V is a zero shadow.
559 bool isZeroShadow(Value
*V
);
561 /// Returns the shadow type of OrigTy.
563 /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
564 /// getShadowTy([n x T]) = [n x getShadowTy(T)]
565 /// getShadowTy(other type) = i16
566 Type
*getShadowTy(Type
*OrigTy
);
567 /// Returns the shadow type of V's type.
568 Type
*getShadowTy(Value
*V
);
570 const uint64_t NumOfElementsInArgOrgTLS
= ArgTLSSize
/ OriginWidthBytes
;
573 DataFlowSanitizer(const std::vector
<std::string
> &ABIListFiles
);
575 bool runImpl(Module
&M
,
576 llvm::function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
);
579 struct DFSanFunction
{
580 DataFlowSanitizer
&DFS
;
584 bool IsForceZeroLabels
;
585 TargetLibraryInfo
&TLI
;
586 AllocaInst
*LabelReturnAlloca
= nullptr;
587 AllocaInst
*OriginReturnAlloca
= nullptr;
588 DenseMap
<Value
*, Value
*> ValShadowMap
;
589 DenseMap
<Value
*, Value
*> ValOriginMap
;
590 DenseMap
<AllocaInst
*, AllocaInst
*> AllocaShadowMap
;
591 DenseMap
<AllocaInst
*, AllocaInst
*> AllocaOriginMap
;
593 struct PHIFixupElement
{
598 std::vector
<PHIFixupElement
> PHIFixups
;
600 DenseSet
<Instruction
*> SkipInsts
;
601 std::vector
<Value
*> NonZeroChecks
;
603 struct CachedShadow
{
604 BasicBlock
*Block
; // The block where Shadow is defined.
607 /// Maps a value to its latest shadow value in terms of domination tree.
608 DenseMap
<std::pair
<Value
*, Value
*>, CachedShadow
> CachedShadows
;
609 /// Maps a value to its latest collapsed shadow value it was converted to in
610 /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
611 /// used at a post process where CFG blocks are split. So it does not cache
612 /// BasicBlock like CachedShadows, but uses domination between values.
613 DenseMap
<Value
*, Value
*> CachedCollapsedShadows
;
614 DenseMap
<Value
*, std::set
<Value
*>> ShadowElements
;
616 DFSanFunction(DataFlowSanitizer
&DFS
, Function
*F
, bool IsNativeABI
,
617 bool IsForceZeroLabels
, TargetLibraryInfo
&TLI
)
618 : DFS(DFS
), F(F
), IsNativeABI(IsNativeABI
),
619 IsForceZeroLabels(IsForceZeroLabels
), TLI(TLI
) {
623 /// Computes the shadow address for a given function argument.
625 /// Shadow = ArgTLS+ArgOffset.
626 Value
*getArgTLS(Type
*T
, unsigned ArgOffset
, IRBuilder
<> &IRB
);
628 /// Computes the shadow address for a return value.
629 Value
*getRetvalTLS(Type
*T
, IRBuilder
<> &IRB
);
631 /// Computes the origin address for a given function argument.
633 /// Origin = ArgOriginTLS[ArgNo].
634 Value
*getArgOriginTLS(unsigned ArgNo
, IRBuilder
<> &IRB
);
636 /// Computes the origin address for a return value.
637 Value
*getRetvalOriginTLS();
639 Value
*getOrigin(Value
*V
);
640 void setOrigin(Instruction
*I
, Value
*Origin
);
641 /// Generates IR to compute the origin of the last operand with a taint label.
642 Value
*combineOperandOrigins(Instruction
*Inst
);
643 /// Before the instruction Pos, generates IR to compute the last origin with a
644 /// taint label. Labels and origins are from vectors Shadows and Origins
645 /// correspondingly. The generated IR is like
646 /// Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
647 /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
648 /// zeros with other bitwidths.
649 Value
*combineOrigins(const std::vector
<Value
*> &Shadows
,
650 const std::vector
<Value
*> &Origins
, Instruction
*Pos
,
651 ConstantInt
*Zero
= nullptr);
653 Value
*getShadow(Value
*V
);
654 void setShadow(Instruction
*I
, Value
*Shadow
);
655 /// Generates IR to compute the union of the two given shadows, inserting it
656 /// before Pos. The combined value is with primitive type.
657 Value
*combineShadows(Value
*V1
, Value
*V2
, Instruction
*Pos
);
658 /// Combines the shadow values of V1 and V2, then converts the combined value
659 /// with primitive type into a shadow value with the original type T.
660 Value
*combineShadowsThenConvert(Type
*T
, Value
*V1
, Value
*V2
,
662 Value
*combineOperandShadows(Instruction
*Inst
);
664 /// Generates IR to load shadow and origin corresponding to bytes [\p
665 /// Addr, \p Addr + \p Size), where addr has alignment \p
666 /// InstAlignment, and take the union of each of those shadows. The returned
667 /// shadow always has primitive type.
669 /// When tracking loads is enabled, the returned origin is a chain at the
670 /// current stack if the returned shadow is tainted.
671 std::pair
<Value
*, Value
*> loadShadowOrigin(Value
*Addr
, uint64_t Size
,
675 void storePrimitiveShadowOrigin(Value
*Addr
, uint64_t Size
,
676 Align InstAlignment
, Value
*PrimitiveShadow
,
677 Value
*Origin
, Instruction
*Pos
);
678 /// Applies PrimitiveShadow to all primitive subtypes of T, returning
679 /// the expanded shadow value.
681 /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
682 /// EFP([n x T], PS) = [n x EFP(T,PS)]
683 /// EFP(other types, PS) = PS
684 Value
*expandFromPrimitiveShadow(Type
*T
, Value
*PrimitiveShadow
,
686 /// Collapses Shadow into a single primitive shadow value, unioning all
687 /// primitive shadow values in the process. Returns the final primitive
690 /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
691 /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
692 /// CTP(other types, PS) = PS
693 Value
*collapseToPrimitiveShadow(Value
*Shadow
, Instruction
*Pos
);
695 void storeZeroPrimitiveShadow(Value
*Addr
, uint64_t Size
, Align ShadowAlign
,
698 Align
getShadowAlign(Align InstAlignment
);
700 // If ClConditionalCallbacks is enabled, insert a callback after a given
701 // branch instruction using the given conditional expression.
702 void addConditionalCallbacksIfEnabled(Instruction
&I
, Value
*Condition
);
704 // If ClReachesFunctionCallbacks is enabled, insert a callback for each
705 // argument and load instruction.
706 void addReachesFunctionCallbacksIfEnabled(IRBuilder
<> &IRB
, Instruction
&I
,
709 bool isLookupTableConstant(Value
*P
);
712 /// Collapses the shadow with aggregate type into a single primitive shadow
714 template <class AggregateType
>
715 Value
*collapseAggregateShadow(AggregateType
*AT
, Value
*Shadow
,
718 Value
*collapseToPrimitiveShadow(Value
*Shadow
, IRBuilder
<> &IRB
);
720 /// Returns the shadow value of an argument A.
721 Value
*getShadowForTLSArgument(Argument
*A
);
723 /// The fast path of loading shadows.
724 std::pair
<Value
*, Value
*>
725 loadShadowFast(Value
*ShadowAddr
, Value
*OriginAddr
, uint64_t Size
,
726 Align ShadowAlign
, Align OriginAlign
, Value
*FirstOrigin
,
729 Align
getOriginAlign(Align InstAlignment
);
731 /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
732 /// is __dfsan_load_label_and_origin. This function returns the union of all
733 /// labels and the origin of the first taint label. However this is an
734 /// additional call with many instructions. To ensure common cases are fast,
735 /// checks if it is possible to load labels and origins without using the
736 /// callback function.
738 /// When enabling tracking load instructions, we always use
739 /// __dfsan_load_label_and_origin to reduce code size.
740 bool useCallbackLoadLabelAndOrigin(uint64_t Size
, Align InstAlignment
);
742 /// Returns a chain at the current stack with previous origin V.
743 Value
*updateOrigin(Value
*V
, IRBuilder
<> &IRB
);
745 /// Returns a chain at the current stack with previous origin V if Shadow is
747 Value
*updateOriginIfTainted(Value
*Shadow
, Value
*Origin
, IRBuilder
<> &IRB
);
749 /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
750 /// Origin otherwise.
751 Value
*originToIntptr(IRBuilder
<> &IRB
, Value
*Origin
);
753 /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
755 void paintOrigin(IRBuilder
<> &IRB
, Value
*Origin
, Value
*StoreOriginAddr
,
756 uint64_t StoreOriginSize
, Align Alignment
);
758 /// Stores Origin in terms of its Shadow value.
759 /// * Do not write origins for zero shadows because we do not trace origins
760 /// for untainted sinks.
761 /// * Use __dfsan_maybe_store_origin if there are too many origin store
762 /// instrumentations.
763 void storeOrigin(Instruction
*Pos
, Value
*Addr
, uint64_t Size
, Value
*Shadow
,
764 Value
*Origin
, Value
*StoreOriginAddr
, Align InstAlignment
);
766 /// Convert a scalar value to an i1 by comparing with 0.
767 Value
*convertToBool(Value
*V
, IRBuilder
<> &IRB
, const Twine
&Name
= "");
769 bool shouldInstrumentWithCall();
771 /// Generates IR to load shadow and origin corresponding to bytes [\p
772 /// Addr, \p Addr + \p Size), where addr has alignment \p
773 /// InstAlignment, and take the union of each of those shadows. The returned
774 /// shadow always has primitive type.
775 std::pair
<Value
*, Value
*>
776 loadShadowOriginSansLoadTracking(Value
*Addr
, uint64_t Size
,
777 Align InstAlignment
, Instruction
*Pos
);
778 int NumOriginStores
= 0;
781 class DFSanVisitor
: public InstVisitor
<DFSanVisitor
> {
785 DFSanVisitor(DFSanFunction
&DFSF
) : DFSF(DFSF
) {}
787 const DataLayout
&getDataLayout() const {
788 return DFSF
.F
->getParent()->getDataLayout();
791 // Combines shadow values and origins for all of I's operands.
792 void visitInstOperands(Instruction
&I
);
794 void visitUnaryOperator(UnaryOperator
&UO
);
795 void visitBinaryOperator(BinaryOperator
&BO
);
796 void visitBitCastInst(BitCastInst
&BCI
);
797 void visitCastInst(CastInst
&CI
);
798 void visitCmpInst(CmpInst
&CI
);
799 void visitLandingPadInst(LandingPadInst
&LPI
);
800 void visitGetElementPtrInst(GetElementPtrInst
&GEPI
);
801 void visitLoadInst(LoadInst
&LI
);
802 void visitStoreInst(StoreInst
&SI
);
803 void visitAtomicRMWInst(AtomicRMWInst
&I
);
804 void visitAtomicCmpXchgInst(AtomicCmpXchgInst
&I
);
805 void visitReturnInst(ReturnInst
&RI
);
806 void visitLibAtomicLoad(CallBase
&CB
);
807 void visitLibAtomicStore(CallBase
&CB
);
808 void visitLibAtomicExchange(CallBase
&CB
);
809 void visitLibAtomicCompareExchange(CallBase
&CB
);
810 void visitCallBase(CallBase
&CB
);
811 void visitPHINode(PHINode
&PN
);
812 void visitExtractElementInst(ExtractElementInst
&I
);
813 void visitInsertElementInst(InsertElementInst
&I
);
814 void visitShuffleVectorInst(ShuffleVectorInst
&I
);
815 void visitExtractValueInst(ExtractValueInst
&I
);
816 void visitInsertValueInst(InsertValueInst
&I
);
817 void visitAllocaInst(AllocaInst
&I
);
818 void visitSelectInst(SelectInst
&I
);
819 void visitMemSetInst(MemSetInst
&I
);
820 void visitMemTransferInst(MemTransferInst
&I
);
821 void visitBranchInst(BranchInst
&BR
);
822 void visitSwitchInst(SwitchInst
&SW
);
825 void visitCASOrRMW(Align InstAlignment
, Instruction
&I
);
827 // Returns false when this is an invoke of a custom function.
828 bool visitWrappedCallBase(Function
&F
, CallBase
&CB
);
830 // Combines origins for all of I's operands.
831 void visitInstOperandOrigins(Instruction
&I
);
833 void addShadowArguments(Function
&F
, CallBase
&CB
, std::vector
<Value
*> &Args
,
836 void addOriginArguments(Function
&F
, CallBase
&CB
, std::vector
<Value
*> &Args
,
839 Value
*makeAddAcquireOrderingTable(IRBuilder
<> &IRB
);
840 Value
*makeAddReleaseOrderingTable(IRBuilder
<> &IRB
);
843 bool LibAtomicFunction(const Function
&F
) {
844 // This is a bit of a hack because TargetLibraryInfo is a function pass.
845 // The DFSan pass would need to be refactored to be function pass oriented
846 // (like MSan is) in order to fit together nicely with TargetLibraryInfo.
847 // We need this check to prevent them from being instrumented, or wrapped.
848 // Match on name and number of arguments.
849 if (!F
.hasName() || F
.isVarArg())
851 switch (F
.arg_size()) {
853 return F
.getName() == "__atomic_load" || F
.getName() == "__atomic_store";
855 return F
.getName() == "__atomic_exchange";
857 return F
.getName() == "__atomic_compare_exchange";
863 } // end anonymous namespace
865 DataFlowSanitizer::DataFlowSanitizer(
866 const std::vector
<std::string
> &ABIListFiles
) {
867 std::vector
<std::string
> AllABIListFiles(std::move(ABIListFiles
));
868 llvm::append_range(AllABIListFiles
, ClABIListFiles
);
869 // FIXME: should we propagate vfs::FileSystem to this constructor?
871 SpecialCaseList::createOrDie(AllABIListFiles
, *vfs::getRealFileSystem()));
873 for (StringRef v
: ClCombineTaintLookupTables
)
874 CombineTaintLookupTableNames
.insert(v
);
877 TransformedFunction
DataFlowSanitizer::getCustomFunctionType(FunctionType
*T
) {
878 SmallVector
<Type
*, 4> ArgTypes
;
880 // Some parameters of the custom function being constructed are
881 // parameters of T. Record the mapping from parameters of T to
882 // parameters of the custom function, so that parameter attributes
883 // at call sites can be updated.
884 std::vector
<unsigned> ArgumentIndexMapping
;
885 for (unsigned I
= 0, E
= T
->getNumParams(); I
!= E
; ++I
) {
886 Type
*ParamType
= T
->getParamType(I
);
887 ArgumentIndexMapping
.push_back(ArgTypes
.size());
888 ArgTypes
.push_back(ParamType
);
890 for (unsigned I
= 0, E
= T
->getNumParams(); I
!= E
; ++I
)
891 ArgTypes
.push_back(PrimitiveShadowTy
);
893 ArgTypes
.push_back(PrimitiveShadowPtrTy
);
894 Type
*RetType
= T
->getReturnType();
895 if (!RetType
->isVoidTy())
896 ArgTypes
.push_back(PrimitiveShadowPtrTy
);
898 if (shouldTrackOrigins()) {
899 for (unsigned I
= 0, E
= T
->getNumParams(); I
!= E
; ++I
)
900 ArgTypes
.push_back(OriginTy
);
902 ArgTypes
.push_back(OriginPtrTy
);
903 if (!RetType
->isVoidTy())
904 ArgTypes
.push_back(OriginPtrTy
);
907 return TransformedFunction(
908 T
, FunctionType::get(T
->getReturnType(), ArgTypes
, T
->isVarArg()),
909 ArgumentIndexMapping
);
912 bool DataFlowSanitizer::isZeroShadow(Value
*V
) {
913 Type
*T
= V
->getType();
914 if (!isa
<ArrayType
>(T
) && !isa
<StructType
>(T
)) {
915 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(V
))
920 return isa
<ConstantAggregateZero
>(V
);
923 bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size
) {
924 uint64_t ShadowSize
= Size
* ShadowWidthBytes
;
925 return ShadowSize
% 8 == 0 || ShadowSize
== 4;
928 bool DataFlowSanitizer::shouldTrackOrigins() {
929 static const bool ShouldTrackOrigins
= ClTrackOrigins
;
930 return ShouldTrackOrigins
;
933 Constant
*DataFlowSanitizer::getZeroShadow(Type
*OrigTy
) {
934 if (!isa
<ArrayType
>(OrigTy
) && !isa
<StructType
>(OrigTy
))
935 return ZeroPrimitiveShadow
;
936 Type
*ShadowTy
= getShadowTy(OrigTy
);
937 return ConstantAggregateZero::get(ShadowTy
);
940 Constant
*DataFlowSanitizer::getZeroShadow(Value
*V
) {
941 return getZeroShadow(V
->getType());
944 static Value
*expandFromPrimitiveShadowRecursive(
945 Value
*Shadow
, SmallVector
<unsigned, 4> &Indices
, Type
*SubShadowTy
,
946 Value
*PrimitiveShadow
, IRBuilder
<> &IRB
) {
947 if (!isa
<ArrayType
>(SubShadowTy
) && !isa
<StructType
>(SubShadowTy
))
948 return IRB
.CreateInsertValue(Shadow
, PrimitiveShadow
, Indices
);
950 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(SubShadowTy
)) {
951 for (unsigned Idx
= 0; Idx
< AT
->getNumElements(); Idx
++) {
952 Indices
.push_back(Idx
);
953 Shadow
= expandFromPrimitiveShadowRecursive(
954 Shadow
, Indices
, AT
->getElementType(), PrimitiveShadow
, IRB
);
960 if (StructType
*ST
= dyn_cast
<StructType
>(SubShadowTy
)) {
961 for (unsigned Idx
= 0; Idx
< ST
->getNumElements(); Idx
++) {
962 Indices
.push_back(Idx
);
963 Shadow
= expandFromPrimitiveShadowRecursive(
964 Shadow
, Indices
, ST
->getElementType(Idx
), PrimitiveShadow
, IRB
);
969 llvm_unreachable("Unexpected shadow type");
972 bool DFSanFunction::shouldInstrumentWithCall() {
973 return ClInstrumentWithCallThreshold
>= 0 &&
974 NumOriginStores
>= ClInstrumentWithCallThreshold
;
977 Value
*DFSanFunction::expandFromPrimitiveShadow(Type
*T
, Value
*PrimitiveShadow
,
979 Type
*ShadowTy
= DFS
.getShadowTy(T
);
981 if (!isa
<ArrayType
>(ShadowTy
) && !isa
<StructType
>(ShadowTy
))
982 return PrimitiveShadow
;
984 if (DFS
.isZeroShadow(PrimitiveShadow
))
985 return DFS
.getZeroShadow(ShadowTy
);
987 IRBuilder
<> IRB(Pos
);
988 SmallVector
<unsigned, 4> Indices
;
989 Value
*Shadow
= UndefValue::get(ShadowTy
);
990 Shadow
= expandFromPrimitiveShadowRecursive(Shadow
, Indices
, ShadowTy
,
991 PrimitiveShadow
, IRB
);
993 // Caches the primitive shadow value that built the shadow value.
994 CachedCollapsedShadows
[Shadow
] = PrimitiveShadow
;
998 template <class AggregateType
>
999 Value
*DFSanFunction::collapseAggregateShadow(AggregateType
*AT
, Value
*Shadow
,
1001 if (!AT
->getNumElements())
1002 return DFS
.ZeroPrimitiveShadow
;
1004 Value
*FirstItem
= IRB
.CreateExtractValue(Shadow
, 0);
1005 Value
*Aggregator
= collapseToPrimitiveShadow(FirstItem
, IRB
);
1007 for (unsigned Idx
= 1; Idx
< AT
->getNumElements(); Idx
++) {
1008 Value
*ShadowItem
= IRB
.CreateExtractValue(Shadow
, Idx
);
1009 Value
*ShadowInner
= collapseToPrimitiveShadow(ShadowItem
, IRB
);
1010 Aggregator
= IRB
.CreateOr(Aggregator
, ShadowInner
);
1015 Value
*DFSanFunction::collapseToPrimitiveShadow(Value
*Shadow
,
1017 Type
*ShadowTy
= Shadow
->getType();
1018 if (!isa
<ArrayType
>(ShadowTy
) && !isa
<StructType
>(ShadowTy
))
1020 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(ShadowTy
))
1021 return collapseAggregateShadow
<>(AT
, Shadow
, IRB
);
1022 if (StructType
*ST
= dyn_cast
<StructType
>(ShadowTy
))
1023 return collapseAggregateShadow
<>(ST
, Shadow
, IRB
);
1024 llvm_unreachable("Unexpected shadow type");
1027 Value
*DFSanFunction::collapseToPrimitiveShadow(Value
*Shadow
,
1029 Type
*ShadowTy
= Shadow
->getType();
1030 if (!isa
<ArrayType
>(ShadowTy
) && !isa
<StructType
>(ShadowTy
))
1033 // Checks if the cached collapsed shadow value dominates Pos.
1034 Value
*&CS
= CachedCollapsedShadows
[Shadow
];
1035 if (CS
&& DT
.dominates(CS
, Pos
))
1038 IRBuilder
<> IRB(Pos
);
1039 Value
*PrimitiveShadow
= collapseToPrimitiveShadow(Shadow
, IRB
);
1040 // Caches the converted primitive shadow value.
1041 CS
= PrimitiveShadow
;
1042 return PrimitiveShadow
;
1045 void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction
&I
,
1047 if (!ClConditionalCallbacks
) {
1050 IRBuilder
<> IRB(&I
);
1051 Value
*CondShadow
= getShadow(Condition
);
1053 if (DFS
.shouldTrackOrigins()) {
1054 Value
*CondOrigin
= getOrigin(Condition
);
1055 CI
= IRB
.CreateCall(DFS
.DFSanConditionalCallbackOriginFn
,
1056 {CondShadow
, CondOrigin
});
1058 CI
= IRB
.CreateCall(DFS
.DFSanConditionalCallbackFn
, {CondShadow
});
1060 CI
->addParamAttr(0, Attribute::ZExt
);
1063 void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder
<> &IRB
,
1066 if (!ClReachesFunctionCallbacks
) {
1069 const DebugLoc
&dbgloc
= I
.getDebugLoc();
1070 Value
*DataShadow
= collapseToPrimitiveShadow(getShadow(Data
), IRB
);
1071 ConstantInt
*CILine
;
1072 llvm::Value
*FilePathPtr
;
1074 if (dbgloc
.get() == nullptr) {
1075 CILine
= llvm::ConstantInt::get(I
.getContext(), llvm::APInt(32, 0));
1076 FilePathPtr
= IRB
.CreateGlobalStringPtr(
1077 I
.getFunction()->getParent()->getSourceFileName());
1079 CILine
= llvm::ConstantInt::get(I
.getContext(),
1080 llvm::APInt(32, dbgloc
.getLine()));
1082 IRB
.CreateGlobalStringPtr(dbgloc
->getFilename());
1085 llvm::Value
*FunctionNamePtr
=
1086 IRB
.CreateGlobalStringPtr(I
.getFunction()->getName());
1089 std::vector
<Value
*> args
;
1091 if (DFS
.shouldTrackOrigins()) {
1092 Value
*DataOrigin
= getOrigin(Data
);
1093 args
= { DataShadow
, DataOrigin
, FilePathPtr
, CILine
, FunctionNamePtr
};
1094 CB
= IRB
.CreateCall(DFS
.DFSanReachesFunctionCallbackOriginFn
, args
);
1096 args
= { DataShadow
, FilePathPtr
, CILine
, FunctionNamePtr
};
1097 CB
= IRB
.CreateCall(DFS
.DFSanReachesFunctionCallbackFn
, args
);
1099 CB
->addParamAttr(0, Attribute::ZExt
);
1100 CB
->setDebugLoc(dbgloc
);
1103 Type
*DataFlowSanitizer::getShadowTy(Type
*OrigTy
) {
1104 if (!OrigTy
->isSized())
1105 return PrimitiveShadowTy
;
1106 if (isa
<IntegerType
>(OrigTy
))
1107 return PrimitiveShadowTy
;
1108 if (isa
<VectorType
>(OrigTy
))
1109 return PrimitiveShadowTy
;
1110 if (ArrayType
*AT
= dyn_cast
<ArrayType
>(OrigTy
))
1111 return ArrayType::get(getShadowTy(AT
->getElementType()),
1112 AT
->getNumElements());
1113 if (StructType
*ST
= dyn_cast
<StructType
>(OrigTy
)) {
1114 SmallVector
<Type
*, 4> Elements
;
1115 for (unsigned I
= 0, N
= ST
->getNumElements(); I
< N
; ++I
)
1116 Elements
.push_back(getShadowTy(ST
->getElementType(I
)));
1117 return StructType::get(*Ctx
, Elements
);
1119 return PrimitiveShadowTy
;
1122 Type
*DataFlowSanitizer::getShadowTy(Value
*V
) {
1123 return getShadowTy(V
->getType());
1126 bool DataFlowSanitizer::initializeModule(Module
&M
) {
1127 Triple
TargetTriple(M
.getTargetTriple());
1128 const DataLayout
&DL
= M
.getDataLayout();
1130 if (TargetTriple
.getOS() != Triple::Linux
)
1131 report_fatal_error("unsupported operating system");
1132 switch (TargetTriple
.getArch()) {
1133 case Triple::aarch64
:
1134 MapParams
= &Linux_AArch64_MemoryMapParams
;
1136 case Triple::x86_64
:
1137 MapParams
= &Linux_X86_64_MemoryMapParams
;
1139 case Triple::loongarch64
:
1140 MapParams
= &Linux_LoongArch64_MemoryMapParams
;
1143 report_fatal_error("unsupported architecture");
1147 Ctx
= &M
.getContext();
1148 Int8Ptr
= Type::getInt8PtrTy(*Ctx
);
1149 OriginTy
= IntegerType::get(*Ctx
, OriginWidthBits
);
1150 OriginPtrTy
= PointerType::getUnqual(OriginTy
);
1151 PrimitiveShadowTy
= IntegerType::get(*Ctx
, ShadowWidthBits
);
1152 PrimitiveShadowPtrTy
= PointerType::getUnqual(PrimitiveShadowTy
);
1153 IntptrTy
= DL
.getIntPtrType(*Ctx
);
1154 ZeroPrimitiveShadow
= ConstantInt::getSigned(PrimitiveShadowTy
, 0);
1155 ZeroOrigin
= ConstantInt::getSigned(OriginTy
, 0);
1157 Type
*DFSanUnionLoadArgs
[2] = {PrimitiveShadowPtrTy
, IntptrTy
};
1158 DFSanUnionLoadFnTy
= FunctionType::get(PrimitiveShadowTy
, DFSanUnionLoadArgs
,
1159 /*isVarArg=*/false);
1160 Type
*DFSanLoadLabelAndOriginArgs
[2] = {Int8Ptr
, IntptrTy
};
1161 DFSanLoadLabelAndOriginFnTy
=
1162 FunctionType::get(IntegerType::get(*Ctx
, 64), DFSanLoadLabelAndOriginArgs
,
1163 /*isVarArg=*/false);
1164 DFSanUnimplementedFnTy
= FunctionType::get(
1165 Type::getVoidTy(*Ctx
), Type::getInt8PtrTy(*Ctx
), /*isVarArg=*/false);
1166 Type
*DFSanWrapperExternWeakNullArgs
[2] = {Int8Ptr
, Int8Ptr
};
1167 DFSanWrapperExternWeakNullFnTy
=
1168 FunctionType::get(Type::getVoidTy(*Ctx
), DFSanWrapperExternWeakNullArgs
,
1169 /*isVarArg=*/false);
1170 Type
*DFSanSetLabelArgs
[4] = {PrimitiveShadowTy
, OriginTy
,
1171 Type::getInt8PtrTy(*Ctx
), IntptrTy
};
1172 DFSanSetLabelFnTy
= FunctionType::get(Type::getVoidTy(*Ctx
),
1173 DFSanSetLabelArgs
, /*isVarArg=*/false);
1174 DFSanNonzeroLabelFnTy
= FunctionType::get(Type::getVoidTy(*Ctx
), std::nullopt
,
1175 /*isVarArg=*/false);
1176 DFSanVarargWrapperFnTy
= FunctionType::get(
1177 Type::getVoidTy(*Ctx
), Type::getInt8PtrTy(*Ctx
), /*isVarArg=*/false);
1178 DFSanConditionalCallbackFnTy
=
1179 FunctionType::get(Type::getVoidTy(*Ctx
), PrimitiveShadowTy
,
1180 /*isVarArg=*/false);
1181 Type
*DFSanConditionalCallbackOriginArgs
[2] = {PrimitiveShadowTy
, OriginTy
};
1182 DFSanConditionalCallbackOriginFnTy
= FunctionType::get(
1183 Type::getVoidTy(*Ctx
), DFSanConditionalCallbackOriginArgs
,
1184 /*isVarArg=*/false);
1185 Type
*DFSanReachesFunctionCallbackArgs
[4] = {PrimitiveShadowTy
, Int8Ptr
,
1187 DFSanReachesFunctionCallbackFnTy
=
1188 FunctionType::get(Type::getVoidTy(*Ctx
), DFSanReachesFunctionCallbackArgs
,
1189 /*isVarArg=*/false);
1190 Type
*DFSanReachesFunctionCallbackOriginArgs
[5] = {
1191 PrimitiveShadowTy
, OriginTy
, Int8Ptr
, OriginTy
, Int8Ptr
};
1192 DFSanReachesFunctionCallbackOriginFnTy
= FunctionType::get(
1193 Type::getVoidTy(*Ctx
), DFSanReachesFunctionCallbackOriginArgs
,
1194 /*isVarArg=*/false);
1195 DFSanCmpCallbackFnTy
=
1196 FunctionType::get(Type::getVoidTy(*Ctx
), PrimitiveShadowTy
,
1197 /*isVarArg=*/false);
1198 DFSanChainOriginFnTy
=
1199 FunctionType::get(OriginTy
, OriginTy
, /*isVarArg=*/false);
1200 Type
*DFSanChainOriginIfTaintedArgs
[2] = {PrimitiveShadowTy
, OriginTy
};
1201 DFSanChainOriginIfTaintedFnTy
= FunctionType::get(
1202 OriginTy
, DFSanChainOriginIfTaintedArgs
, /*isVarArg=*/false);
1203 Type
*DFSanMaybeStoreOriginArgs
[4] = {IntegerType::get(*Ctx
, ShadowWidthBits
),
1204 Int8Ptr
, IntptrTy
, OriginTy
};
1205 DFSanMaybeStoreOriginFnTy
= FunctionType::get(
1206 Type::getVoidTy(*Ctx
), DFSanMaybeStoreOriginArgs
, /*isVarArg=*/false);
1207 Type
*DFSanMemOriginTransferArgs
[3] = {Int8Ptr
, Int8Ptr
, IntptrTy
};
1208 DFSanMemOriginTransferFnTy
= FunctionType::get(
1209 Type::getVoidTy(*Ctx
), DFSanMemOriginTransferArgs
, /*isVarArg=*/false);
1210 Type
*DFSanMemShadowOriginTransferArgs
[3] = {Int8Ptr
, Int8Ptr
, IntptrTy
};
1211 DFSanMemShadowOriginTransferFnTy
=
1212 FunctionType::get(Type::getVoidTy(*Ctx
), DFSanMemShadowOriginTransferArgs
,
1213 /*isVarArg=*/false);
1214 Type
*DFSanMemShadowOriginConditionalExchangeArgs
[5] = {
1215 IntegerType::get(*Ctx
, 8), Int8Ptr
, Int8Ptr
, Int8Ptr
, IntptrTy
};
1216 DFSanMemShadowOriginConditionalExchangeFnTy
= FunctionType::get(
1217 Type::getVoidTy(*Ctx
), DFSanMemShadowOriginConditionalExchangeArgs
,
1218 /*isVarArg=*/false);
1219 Type
*DFSanLoadStoreCallbackArgs
[2] = {PrimitiveShadowTy
, Int8Ptr
};
1220 DFSanLoadStoreCallbackFnTy
=
1221 FunctionType::get(Type::getVoidTy(*Ctx
), DFSanLoadStoreCallbackArgs
,
1222 /*isVarArg=*/false);
1223 Type
*DFSanMemTransferCallbackArgs
[2] = {PrimitiveShadowPtrTy
, IntptrTy
};
1224 DFSanMemTransferCallbackFnTy
=
1225 FunctionType::get(Type::getVoidTy(*Ctx
), DFSanMemTransferCallbackArgs
,
1226 /*isVarArg=*/false);
1228 ColdCallWeights
= MDBuilder(*Ctx
).createBranchWeights(1, 1000);
1229 OriginStoreWeights
= MDBuilder(*Ctx
).createBranchWeights(1, 1000);
1233 bool DataFlowSanitizer::isInstrumented(const Function
*F
) {
1234 return !ABIList
.isIn(*F
, "uninstrumented");
1237 bool DataFlowSanitizer::isInstrumented(const GlobalAlias
*GA
) {
1238 return !ABIList
.isIn(*GA
, "uninstrumented");
1241 bool DataFlowSanitizer::isForceZeroLabels(const Function
*F
) {
1242 return ABIList
.isIn(*F
, "force_zero_labels");
1245 DataFlowSanitizer::WrapperKind
DataFlowSanitizer::getWrapperKind(Function
*F
) {
1246 if (ABIList
.isIn(*F
, "functional"))
1247 return WK_Functional
;
1248 if (ABIList
.isIn(*F
, "discard"))
1250 if (ABIList
.isIn(*F
, "custom"))
1256 void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue
*GV
) {
1257 std::string GVName
= std::string(GV
->getName()), Suffix
= ".dfsan";
1258 GV
->setName(GVName
+ Suffix
);
1260 // Try to change the name of the function in module inline asm. We only do
1261 // this for specific asm directives, currently only ".symver", to try to avoid
1262 // corrupting asm which happens to contain the symbol name as a substring.
1263 // Note that the substitution for .symver assumes that the versioned symbol
1264 // also has an instrumented name.
1265 std::string Asm
= GV
->getParent()->getModuleInlineAsm();
1266 std::string SearchStr
= ".symver " + GVName
+ ",";
1267 size_t Pos
= Asm
.find(SearchStr
);
1268 if (Pos
!= std::string::npos
) {
1269 Asm
.replace(Pos
, SearchStr
.size(), ".symver " + GVName
+ Suffix
+ ",");
1270 Pos
= Asm
.find('@');
1272 if (Pos
== std::string::npos
)
1273 report_fatal_error(Twine("unsupported .symver: ", Asm
));
1275 Asm
.replace(Pos
, 1, Suffix
+ "@");
1276 GV
->getParent()->setModuleInlineAsm(Asm
);
1280 void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder
<> &IRB
,
1282 // If the function we are wrapping was ExternWeak, it may be null.
1283 // The original code before calling this wrapper may have checked for null,
1284 // but replacing with a known-to-not-be-null wrapper can break this check.
1285 // When replacing uses of the extern weak function with the wrapper we try
1286 // to avoid replacing uses in conditionals, but this is not perfect.
1287 // In the case where we fail, and accidentally optimize out a null check
1288 // for a extern weak function, add a check here to help identify the issue.
1289 if (GlobalValue::isExternalWeakLinkage(F
->getLinkage())) {
1290 std::vector
<Value
*> Args
;
1291 Args
.push_back(IRB
.CreatePointerCast(F
, IRB
.getInt8PtrTy()));
1292 Args
.push_back(IRB
.CreateGlobalStringPtr(F
->getName()));
1293 IRB
.CreateCall(DFSanWrapperExternWeakNullFn
, Args
);
1298 DataFlowSanitizer::buildWrapperFunction(Function
*F
, StringRef NewFName
,
1299 GlobalValue::LinkageTypes NewFLink
,
1300 FunctionType
*NewFT
) {
1301 FunctionType
*FT
= F
->getFunctionType();
1302 Function
*NewF
= Function::Create(NewFT
, NewFLink
, F
->getAddressSpace(),
1303 NewFName
, F
->getParent());
1304 NewF
->copyAttributesFrom(F
);
1305 NewF
->removeRetAttrs(
1306 AttributeFuncs::typeIncompatible(NewFT
->getReturnType()));
1308 BasicBlock
*BB
= BasicBlock::Create(*Ctx
, "entry", NewF
);
1309 if (F
->isVarArg()) {
1310 NewF
->removeFnAttr("split-stack");
1311 CallInst::Create(DFSanVarargWrapperFn
,
1312 IRBuilder
<>(BB
).CreateGlobalStringPtr(F
->getName()), "",
1314 new UnreachableInst(*Ctx
, BB
);
1316 auto ArgIt
= pointer_iterator
<Argument
*>(NewF
->arg_begin());
1317 std::vector
<Value
*> Args(ArgIt
, ArgIt
+ FT
->getNumParams());
1319 CallInst
*CI
= CallInst::Create(F
, Args
, "", BB
);
1320 if (FT
->getReturnType()->isVoidTy())
1321 ReturnInst::Create(*Ctx
, BB
);
1323 ReturnInst::Create(*Ctx
, CI
, BB
);
1329 // Initialize DataFlowSanitizer runtime functions and declare them in the module
1330 void DataFlowSanitizer::initializeRuntimeFunctions(Module
&M
) {
1331 LLVMContext
&C
= M
.getContext();
1334 AL
= AL
.addFnAttribute(C
, Attribute::NoUnwind
);
1335 AL
= AL
.addFnAttribute(
1336 C
, Attribute::getWithMemoryEffects(C
, MemoryEffects::readOnly()));
1337 AL
= AL
.addRetAttribute(C
, Attribute::ZExt
);
1339 Mod
->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy
, AL
);
1343 AL
= AL
.addFnAttribute(C
, Attribute::NoUnwind
);
1344 AL
= AL
.addFnAttribute(
1345 C
, Attribute::getWithMemoryEffects(C
, MemoryEffects::readOnly()));
1346 AL
= AL
.addRetAttribute(C
, Attribute::ZExt
);
1347 DFSanLoadLabelAndOriginFn
= Mod
->getOrInsertFunction(
1348 "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy
, AL
);
1350 DFSanUnimplementedFn
=
1351 Mod
->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy
);
1352 DFSanWrapperExternWeakNullFn
= Mod
->getOrInsertFunction(
1353 "__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy
);
1356 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1357 AL
= AL
.addParamAttribute(M
.getContext(), 1, Attribute::ZExt
);
1359 Mod
->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy
, AL
);
1361 DFSanNonzeroLabelFn
=
1362 Mod
->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy
);
1363 DFSanVarargWrapperFn
= Mod
->getOrInsertFunction("__dfsan_vararg_wrapper",
1364 DFSanVarargWrapperFnTy
);
1367 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1368 AL
= AL
.addRetAttribute(M
.getContext(), Attribute::ZExt
);
1369 DFSanChainOriginFn
= Mod
->getOrInsertFunction("__dfsan_chain_origin",
1370 DFSanChainOriginFnTy
, AL
);
1374 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1375 AL
= AL
.addParamAttribute(M
.getContext(), 1, Attribute::ZExt
);
1376 AL
= AL
.addRetAttribute(M
.getContext(), Attribute::ZExt
);
1377 DFSanChainOriginIfTaintedFn
= Mod
->getOrInsertFunction(
1378 "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy
, AL
);
1380 DFSanMemOriginTransferFn
= Mod
->getOrInsertFunction(
1381 "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy
);
1383 DFSanMemShadowOriginTransferFn
= Mod
->getOrInsertFunction(
1384 "__dfsan_mem_shadow_origin_transfer", DFSanMemShadowOriginTransferFnTy
);
1386 DFSanMemShadowOriginConditionalExchangeFn
=
1387 Mod
->getOrInsertFunction("__dfsan_mem_shadow_origin_conditional_exchange",
1388 DFSanMemShadowOriginConditionalExchangeFnTy
);
1392 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1393 AL
= AL
.addParamAttribute(M
.getContext(), 3, Attribute::ZExt
);
1394 DFSanMaybeStoreOriginFn
= Mod
->getOrInsertFunction(
1395 "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy
, AL
);
1398 DFSanRuntimeFunctions
.insert(
1399 DFSanUnionLoadFn
.getCallee()->stripPointerCasts());
1400 DFSanRuntimeFunctions
.insert(
1401 DFSanLoadLabelAndOriginFn
.getCallee()->stripPointerCasts());
1402 DFSanRuntimeFunctions
.insert(
1403 DFSanUnimplementedFn
.getCallee()->stripPointerCasts());
1404 DFSanRuntimeFunctions
.insert(
1405 DFSanWrapperExternWeakNullFn
.getCallee()->stripPointerCasts());
1406 DFSanRuntimeFunctions
.insert(
1407 DFSanSetLabelFn
.getCallee()->stripPointerCasts());
1408 DFSanRuntimeFunctions
.insert(
1409 DFSanNonzeroLabelFn
.getCallee()->stripPointerCasts());
1410 DFSanRuntimeFunctions
.insert(
1411 DFSanVarargWrapperFn
.getCallee()->stripPointerCasts());
1412 DFSanRuntimeFunctions
.insert(
1413 DFSanLoadCallbackFn
.getCallee()->stripPointerCasts());
1414 DFSanRuntimeFunctions
.insert(
1415 DFSanStoreCallbackFn
.getCallee()->stripPointerCasts());
1416 DFSanRuntimeFunctions
.insert(
1417 DFSanMemTransferCallbackFn
.getCallee()->stripPointerCasts());
1418 DFSanRuntimeFunctions
.insert(
1419 DFSanConditionalCallbackFn
.getCallee()->stripPointerCasts());
1420 DFSanRuntimeFunctions
.insert(
1421 DFSanConditionalCallbackOriginFn
.getCallee()->stripPointerCasts());
1422 DFSanRuntimeFunctions
.insert(
1423 DFSanReachesFunctionCallbackFn
.getCallee()->stripPointerCasts());
1424 DFSanRuntimeFunctions
.insert(
1425 DFSanReachesFunctionCallbackOriginFn
.getCallee()->stripPointerCasts());
1426 DFSanRuntimeFunctions
.insert(
1427 DFSanCmpCallbackFn
.getCallee()->stripPointerCasts());
1428 DFSanRuntimeFunctions
.insert(
1429 DFSanChainOriginFn
.getCallee()->stripPointerCasts());
1430 DFSanRuntimeFunctions
.insert(
1431 DFSanChainOriginIfTaintedFn
.getCallee()->stripPointerCasts());
1432 DFSanRuntimeFunctions
.insert(
1433 DFSanMemOriginTransferFn
.getCallee()->stripPointerCasts());
1434 DFSanRuntimeFunctions
.insert(
1435 DFSanMemShadowOriginTransferFn
.getCallee()->stripPointerCasts());
1436 DFSanRuntimeFunctions
.insert(
1437 DFSanMemShadowOriginConditionalExchangeFn
.getCallee()
1438 ->stripPointerCasts());
1439 DFSanRuntimeFunctions
.insert(
1440 DFSanMaybeStoreOriginFn
.getCallee()->stripPointerCasts());
1443 // Initializes event callback functions and declare them in the module
1444 void DataFlowSanitizer::initializeCallbackFunctions(Module
&M
) {
1447 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1448 DFSanLoadCallbackFn
= Mod
->getOrInsertFunction(
1449 "__dfsan_load_callback", DFSanLoadStoreCallbackFnTy
, AL
);
1453 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1454 DFSanStoreCallbackFn
= Mod
->getOrInsertFunction(
1455 "__dfsan_store_callback", DFSanLoadStoreCallbackFnTy
, AL
);
1457 DFSanMemTransferCallbackFn
= Mod
->getOrInsertFunction(
1458 "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy
);
1461 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1462 DFSanCmpCallbackFn
= Mod
->getOrInsertFunction("__dfsan_cmp_callback",
1463 DFSanCmpCallbackFnTy
, AL
);
1467 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1468 DFSanConditionalCallbackFn
= Mod
->getOrInsertFunction(
1469 "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy
, AL
);
1473 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1474 DFSanConditionalCallbackOriginFn
=
1475 Mod
->getOrInsertFunction("__dfsan_conditional_callback_origin",
1476 DFSanConditionalCallbackOriginFnTy
, AL
);
1480 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1481 DFSanReachesFunctionCallbackFn
=
1482 Mod
->getOrInsertFunction("__dfsan_reaches_function_callback",
1483 DFSanReachesFunctionCallbackFnTy
, AL
);
1487 AL
= AL
.addParamAttribute(M
.getContext(), 0, Attribute::ZExt
);
1488 DFSanReachesFunctionCallbackOriginFn
=
1489 Mod
->getOrInsertFunction("__dfsan_reaches_function_callback_origin",
1490 DFSanReachesFunctionCallbackOriginFnTy
, AL
);
1494 bool DataFlowSanitizer::runImpl(
1495 Module
&M
, llvm::function_ref
<TargetLibraryInfo
&(Function
&)> GetTLI
) {
1496 initializeModule(M
);
1498 if (ABIList
.isIn(M
, "skip"))
1501 const unsigned InitialGlobalSize
= M
.global_size();
1502 const unsigned InitialModuleSize
= M
.size();
1504 bool Changed
= false;
1506 auto GetOrInsertGlobal
= [this, &Changed
](StringRef Name
,
1507 Type
*Ty
) -> Constant
* {
1508 Constant
*C
= Mod
->getOrInsertGlobal(Name
, Ty
);
1509 if (GlobalVariable
*G
= dyn_cast
<GlobalVariable
>(C
)) {
1510 Changed
|= G
->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel
;
1511 G
->setThreadLocalMode(GlobalVariable::InitialExecTLSModel
);
1516 // These globals must be kept in sync with the ones in dfsan.cpp.
1518 GetOrInsertGlobal("__dfsan_arg_tls",
1519 ArrayType::get(Type::getInt64Ty(*Ctx
), ArgTLSSize
/ 8));
1520 RetvalTLS
= GetOrInsertGlobal(
1521 "__dfsan_retval_tls",
1522 ArrayType::get(Type::getInt64Ty(*Ctx
), RetvalTLSSize
/ 8));
1523 ArgOriginTLSTy
= ArrayType::get(OriginTy
, NumOfElementsInArgOrgTLS
);
1524 ArgOriginTLS
= GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy
);
1525 RetvalOriginTLS
= GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy
);
1527 (void)Mod
->getOrInsertGlobal("__dfsan_track_origins", OriginTy
, [&] {
1529 return new GlobalVariable(
1530 M
, OriginTy
, true, GlobalValue::WeakODRLinkage
,
1531 ConstantInt::getSigned(OriginTy
,
1532 shouldTrackOrigins() ? ClTrackOrigins
: 0),
1533 "__dfsan_track_origins");
1536 initializeCallbackFunctions(M
);
1537 initializeRuntimeFunctions(M
);
1539 std::vector
<Function
*> FnsToInstrument
;
1540 SmallPtrSet
<Function
*, 2> FnsWithNativeABI
;
1541 SmallPtrSet
<Function
*, 2> FnsWithForceZeroLabel
;
1542 SmallPtrSet
<Constant
*, 1> PersonalityFns
;
1543 for (Function
&F
: M
)
1544 if (!F
.isIntrinsic() && !DFSanRuntimeFunctions
.contains(&F
) &&
1545 !LibAtomicFunction(F
)) {
1546 FnsToInstrument
.push_back(&F
);
1547 if (F
.hasPersonalityFn())
1548 PersonalityFns
.insert(F
.getPersonalityFn()->stripPointerCasts());
1551 if (ClIgnorePersonalityRoutine
) {
1552 for (auto *C
: PersonalityFns
) {
1553 assert(isa
<Function
>(C
) && "Personality routine is not a function!");
1554 Function
*F
= cast
<Function
>(C
);
1555 if (!isInstrumented(F
))
1556 llvm::erase_value(FnsToInstrument
, F
);
1560 // Give function aliases prefixes when necessary, and build wrappers where the
1561 // instrumentedness is inconsistent.
1562 for (GlobalAlias
&GA
: llvm::make_early_inc_range(M
.aliases())) {
1563 // Don't stop on weak. We assume people aren't playing games with the
1564 // instrumentedness of overridden weak aliases.
1565 auto *F
= dyn_cast
<Function
>(GA
.getAliaseeObject());
1569 bool GAInst
= isInstrumented(&GA
), FInst
= isInstrumented(F
);
1570 if (GAInst
&& FInst
) {
1571 addGlobalNameSuffix(&GA
);
1572 } else if (GAInst
!= FInst
) {
1573 // Non-instrumented alias of an instrumented function, or vice versa.
1574 // Replace the alias with a native-ABI wrapper of the aliasee. The pass
1575 // below will take care of instrumenting it.
1577 buildWrapperFunction(F
, "", GA
.getLinkage(), F
->getFunctionType());
1578 GA
.replaceAllUsesWith(ConstantExpr::getBitCast(NewF
, GA
.getType()));
1579 NewF
->takeName(&GA
);
1580 GA
.eraseFromParent();
1581 FnsToInstrument
.push_back(NewF
);
1585 // TODO: This could be more precise.
1586 ReadOnlyNoneAttrs
.addAttribute(Attribute::Memory
);
1588 // First, change the ABI of every function in the module. ABI-listed
1589 // functions keep their original ABI and get a wrapper function.
1590 for (std::vector
<Function
*>::iterator FI
= FnsToInstrument
.begin(),
1591 FE
= FnsToInstrument
.end();
1594 FunctionType
*FT
= F
.getFunctionType();
1596 bool IsZeroArgsVoidRet
= (FT
->getNumParams() == 0 && !FT
->isVarArg() &&
1597 FT
->getReturnType()->isVoidTy());
1599 if (isInstrumented(&F
)) {
1600 if (isForceZeroLabels(&F
))
1601 FnsWithForceZeroLabel
.insert(&F
);
1603 // Instrumented functions get a '.dfsan' suffix. This allows us to more
1604 // easily identify cases of mismatching ABIs. This naming scheme is
1605 // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1606 addGlobalNameSuffix(&F
);
1607 } else if (!IsZeroArgsVoidRet
|| getWrapperKind(&F
) == WK_Custom
) {
1608 // Build a wrapper function for F. The wrapper simply calls F, and is
1609 // added to FnsToInstrument so that any instrumentation according to its
1610 // WrapperKind is done in the second pass below.
1612 // If the function being wrapped has local linkage, then preserve the
1613 // function's linkage in the wrapper function.
1614 GlobalValue::LinkageTypes WrapperLinkage
=
1615 F
.hasLocalLinkage() ? F
.getLinkage()
1616 : GlobalValue::LinkOnceODRLinkage
;
1618 Function
*NewF
= buildWrapperFunction(
1620 (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1621 std::string(F
.getName()),
1622 WrapperLinkage
, FT
);
1623 NewF
->removeFnAttrs(ReadOnlyNoneAttrs
);
1625 Value
*WrappedFnCst
=
1626 ConstantExpr::getBitCast(NewF
, PointerType::getUnqual(FT
));
1628 // Extern weak functions can sometimes be null at execution time.
1629 // Code will sometimes check if an extern weak function is null.
1630 // This could look something like:
1631 // declare extern_weak i8 @my_func(i8)
1632 // br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func,
1633 // label %avoid_my_func
1634 // The @"dfsw$my_func" wrapper is never null, so if we replace this use
1635 // in the comparison, the icmp will simplify to false and we have
1636 // accidentally optimized away a null check that is necessary.
1637 // This can lead to a crash when the null extern_weak my_func is called.
1639 // To prevent (the most common pattern of) this problem,
1640 // do not replace uses in comparisons with the wrapper.
1641 // We definitely want to replace uses in call instructions.
1642 // Other uses (e.g. store the function address somewhere) might be
1643 // called or compared or both - this case may not be handled correctly.
1644 // We will default to replacing with wrapper in cases we are unsure.
1645 auto IsNotCmpUse
= [](Use
&U
) -> bool {
1646 User
*Usr
= U
.getUser();
1647 if (ConstantExpr
*CE
= dyn_cast
<ConstantExpr
>(Usr
)) {
1648 // This is the most common case for icmp ne null
1649 if (CE
->getOpcode() == Instruction::ICmp
) {
1653 if (Instruction
*I
= dyn_cast
<Instruction
>(Usr
)) {
1654 if (I
->getOpcode() == Instruction::ICmp
) {
1660 F
.replaceUsesWithIf(WrappedFnCst
, IsNotCmpUse
);
1662 UnwrappedFnMap
[WrappedFnCst
] = &F
;
1665 if (!F
.isDeclaration()) {
1666 // This function is probably defining an interposition of an
1667 // uninstrumented function and hence needs to keep the original ABI.
1668 // But any functions it may call need to use the instrumented ABI, so
1669 // we instrument it in a mode which preserves the original ABI.
1670 FnsWithNativeABI
.insert(&F
);
1672 // This code needs to rebuild the iterators, as they may be invalidated
1673 // by the push_back, taking care that the new range does not include
1674 // any functions added by this code.
1675 size_t N
= FI
- FnsToInstrument
.begin(),
1676 Count
= FE
- FnsToInstrument
.begin();
1677 FnsToInstrument
.push_back(&F
);
1678 FI
= FnsToInstrument
.begin() + N
;
1679 FE
= FnsToInstrument
.begin() + Count
;
1681 // Hopefully, nobody will try to indirectly call a vararg
1683 } else if (FT
->isVarArg()) {
1684 UnwrappedFnMap
[&F
] = &F
;
1689 for (Function
*F
: FnsToInstrument
) {
1690 if (!F
|| F
->isDeclaration())
1693 removeUnreachableBlocks(*F
);
1695 DFSanFunction
DFSF(*this, F
, FnsWithNativeABI
.count(F
),
1696 FnsWithForceZeroLabel
.count(F
), GetTLI(*F
));
1698 if (ClReachesFunctionCallbacks
) {
1699 // Add callback for arguments reaching this function.
1700 for (auto &FArg
: F
->args()) {
1701 Instruction
*Next
= &F
->getEntryBlock().front();
1702 Value
*FArgShadow
= DFSF
.getShadow(&FArg
);
1703 if (isZeroShadow(FArgShadow
))
1705 if (Instruction
*FArgShadowInst
= dyn_cast
<Instruction
>(FArgShadow
)) {
1706 Next
= FArgShadowInst
->getNextNode();
1708 if (shouldTrackOrigins()) {
1709 if (Instruction
*Origin
=
1710 dyn_cast
<Instruction
>(DFSF
.getOrigin(&FArg
))) {
1711 // Ensure IRB insertion point is after loads for shadow and origin.
1712 Instruction
*OriginNext
= Origin
->getNextNode();
1713 if (Next
->comesBefore(OriginNext
)) {
1718 IRBuilder
<> IRB(Next
);
1719 DFSF
.addReachesFunctionCallbacksIfEnabled(IRB
, *Next
, &FArg
);
1723 // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1724 // Build a copy of the list before iterating over it.
1725 SmallVector
<BasicBlock
*, 4> BBList(depth_first(&F
->getEntryBlock()));
1727 for (BasicBlock
*BB
: BBList
) {
1728 Instruction
*Inst
= &BB
->front();
1730 // DFSanVisitor may split the current basic block, changing the current
1731 // instruction's next pointer and moving the next instruction to the
1732 // tail block from which we should continue.
1733 Instruction
*Next
= Inst
->getNextNode();
1734 // DFSanVisitor may delete Inst, so keep track of whether it was a
1736 bool IsTerminator
= Inst
->isTerminator();
1737 if (!DFSF
.SkipInsts
.count(Inst
))
1738 DFSanVisitor(DFSF
).visit(Inst
);
1745 // We will not necessarily be able to compute the shadow for every phi node
1746 // until we have visited every block. Therefore, the code that handles phi
1747 // nodes adds them to the PHIFixups list so that they can be properly
1749 for (DFSanFunction::PHIFixupElement
&P
: DFSF
.PHIFixups
) {
1750 for (unsigned Val
= 0, N
= P
.Phi
->getNumIncomingValues(); Val
!= N
;
1752 P
.ShadowPhi
->setIncomingValue(
1753 Val
, DFSF
.getShadow(P
.Phi
->getIncomingValue(Val
)));
1755 P
.OriginPhi
->setIncomingValue(
1756 Val
, DFSF
.getOrigin(P
.Phi
->getIncomingValue(Val
)));
1760 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1761 // places (i.e. instructions in basic blocks we haven't even begun visiting
1762 // yet). To make our life easier, do this work in a pass after the main
1764 if (ClDebugNonzeroLabels
) {
1765 for (Value
*V
: DFSF
.NonZeroChecks
) {
1767 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
1768 Pos
= I
->getNextNode();
1770 Pos
= &DFSF
.F
->getEntryBlock().front();
1771 while (isa
<PHINode
>(Pos
) || isa
<AllocaInst
>(Pos
))
1772 Pos
= Pos
->getNextNode();
1773 IRBuilder
<> IRB(Pos
);
1774 Value
*PrimitiveShadow
= DFSF
.collapseToPrimitiveShadow(V
, Pos
);
1776 IRB
.CreateICmpNE(PrimitiveShadow
, DFSF
.DFS
.ZeroPrimitiveShadow
);
1777 BranchInst
*BI
= cast
<BranchInst
>(SplitBlockAndInsertIfThen(
1778 Ne
, Pos
, /*Unreachable=*/false, ColdCallWeights
));
1779 IRBuilder
<> ThenIRB(BI
);
1780 ThenIRB
.CreateCall(DFSF
.DFS
.DFSanNonzeroLabelFn
, {});
1785 return Changed
|| !FnsToInstrument
.empty() ||
1786 M
.global_size() != InitialGlobalSize
|| M
.size() != InitialModuleSize
;
1789 Value
*DFSanFunction::getArgTLS(Type
*T
, unsigned ArgOffset
, IRBuilder
<> &IRB
) {
1790 Value
*Base
= IRB
.CreatePointerCast(DFS
.ArgTLS
, DFS
.IntptrTy
);
1792 Base
= IRB
.CreateAdd(Base
, ConstantInt::get(DFS
.IntptrTy
, ArgOffset
));
1793 return IRB
.CreateIntToPtr(Base
, PointerType::get(DFS
.getShadowTy(T
), 0),
1797 Value
*DFSanFunction::getRetvalTLS(Type
*T
, IRBuilder
<> &IRB
) {
1798 return IRB
.CreatePointerCast(
1799 DFS
.RetvalTLS
, PointerType::get(DFS
.getShadowTy(T
), 0), "_dfsret");
1802 Value
*DFSanFunction::getRetvalOriginTLS() { return DFS
.RetvalOriginTLS
; }
1804 Value
*DFSanFunction::getArgOriginTLS(unsigned ArgNo
, IRBuilder
<> &IRB
) {
1805 return IRB
.CreateConstGEP2_64(DFS
.ArgOriginTLSTy
, DFS
.ArgOriginTLS
, 0, ArgNo
,
1809 Value
*DFSanFunction::getOrigin(Value
*V
) {
1810 assert(DFS
.shouldTrackOrigins());
1811 if (!isa
<Argument
>(V
) && !isa
<Instruction
>(V
))
1812 return DFS
.ZeroOrigin
;
1813 Value
*&Origin
= ValOriginMap
[V
];
1815 if (Argument
*A
= dyn_cast
<Argument
>(V
)) {
1817 return DFS
.ZeroOrigin
;
1818 if (A
->getArgNo() < DFS
.NumOfElementsInArgOrgTLS
) {
1819 Instruction
*ArgOriginTLSPos
= &*F
->getEntryBlock().begin();
1820 IRBuilder
<> IRB(ArgOriginTLSPos
);
1821 Value
*ArgOriginPtr
= getArgOriginTLS(A
->getArgNo(), IRB
);
1822 Origin
= IRB
.CreateLoad(DFS
.OriginTy
, ArgOriginPtr
);
1825 Origin
= DFS
.ZeroOrigin
;
1828 Origin
= DFS
.ZeroOrigin
;
1834 void DFSanFunction::setOrigin(Instruction
*I
, Value
*Origin
) {
1835 if (!DFS
.shouldTrackOrigins())
1837 assert(!ValOriginMap
.count(I
));
1838 assert(Origin
->getType() == DFS
.OriginTy
);
1839 ValOriginMap
[I
] = Origin
;
1842 Value
*DFSanFunction::getShadowForTLSArgument(Argument
*A
) {
1843 unsigned ArgOffset
= 0;
1844 const DataLayout
&DL
= F
->getParent()->getDataLayout();
1845 for (auto &FArg
: F
->args()) {
1846 if (!FArg
.getType()->isSized()) {
1852 unsigned Size
= DL
.getTypeAllocSize(DFS
.getShadowTy(&FArg
));
1854 ArgOffset
+= alignTo(Size
, ShadowTLSAlignment
);
1855 if (ArgOffset
> ArgTLSSize
)
1856 break; // ArgTLS overflows, uses a zero shadow.
1860 if (ArgOffset
+ Size
> ArgTLSSize
)
1861 break; // ArgTLS overflows, uses a zero shadow.
1863 Instruction
*ArgTLSPos
= &*F
->getEntryBlock().begin();
1864 IRBuilder
<> IRB(ArgTLSPos
);
1865 Value
*ArgShadowPtr
= getArgTLS(FArg
.getType(), ArgOffset
, IRB
);
1866 return IRB
.CreateAlignedLoad(DFS
.getShadowTy(&FArg
), ArgShadowPtr
,
1867 ShadowTLSAlignment
);
1870 return DFS
.getZeroShadow(A
);
1873 Value
*DFSanFunction::getShadow(Value
*V
) {
1874 if (!isa
<Argument
>(V
) && !isa
<Instruction
>(V
))
1875 return DFS
.getZeroShadow(V
);
1876 if (IsForceZeroLabels
)
1877 return DFS
.getZeroShadow(V
);
1878 Value
*&Shadow
= ValShadowMap
[V
];
1880 if (Argument
*A
= dyn_cast
<Argument
>(V
)) {
1882 return DFS
.getZeroShadow(V
);
1883 Shadow
= getShadowForTLSArgument(A
);
1884 NonZeroChecks
.push_back(Shadow
);
1886 Shadow
= DFS
.getZeroShadow(V
);
1892 void DFSanFunction::setShadow(Instruction
*I
, Value
*Shadow
) {
1893 assert(!ValShadowMap
.count(I
));
1894 ValShadowMap
[I
] = Shadow
;
1897 /// Compute the integer shadow offset that corresponds to a given
1898 /// application address.
1900 /// Offset = (Addr & ~AndMask) ^ XorMask
1901 Value
*DataFlowSanitizer::getShadowOffset(Value
*Addr
, IRBuilder
<> &IRB
) {
1902 assert(Addr
!= RetvalTLS
&& "Reinstrumenting?");
1903 Value
*OffsetLong
= IRB
.CreatePointerCast(Addr
, IntptrTy
);
1905 uint64_t AndMask
= MapParams
->AndMask
;
1908 IRB
.CreateAnd(OffsetLong
, ConstantInt::get(IntptrTy
, ~AndMask
));
1910 uint64_t XorMask
= MapParams
->XorMask
;
1912 OffsetLong
= IRB
.CreateXor(OffsetLong
, ConstantInt::get(IntptrTy
, XorMask
));
1916 std::pair
<Value
*, Value
*>
1917 DataFlowSanitizer::getShadowOriginAddress(Value
*Addr
, Align InstAlignment
,
1919 // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1920 IRBuilder
<> IRB(Pos
);
1921 Value
*ShadowOffset
= getShadowOffset(Addr
, IRB
);
1922 Value
*ShadowLong
= ShadowOffset
;
1923 uint64_t ShadowBase
= MapParams
->ShadowBase
;
1924 if (ShadowBase
!= 0) {
1926 IRB
.CreateAdd(ShadowLong
, ConstantInt::get(IntptrTy
, ShadowBase
));
1928 IntegerType
*ShadowTy
= IntegerType::get(*Ctx
, ShadowWidthBits
);
1930 IRB
.CreateIntToPtr(ShadowLong
, PointerType::get(ShadowTy
, 0));
1931 Value
*OriginPtr
= nullptr;
1932 if (shouldTrackOrigins()) {
1933 Value
*OriginLong
= ShadowOffset
;
1934 uint64_t OriginBase
= MapParams
->OriginBase
;
1935 if (OriginBase
!= 0)
1937 IRB
.CreateAdd(OriginLong
, ConstantInt::get(IntptrTy
, OriginBase
));
1938 const Align Alignment
= llvm::assumeAligned(InstAlignment
.value());
1939 // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1940 // So Mask is unnecessary.
1941 if (Alignment
< MinOriginAlignment
) {
1942 uint64_t Mask
= MinOriginAlignment
.value() - 1;
1943 OriginLong
= IRB
.CreateAnd(OriginLong
, ConstantInt::get(IntptrTy
, ~Mask
));
1945 OriginPtr
= IRB
.CreateIntToPtr(OriginLong
, OriginPtrTy
);
1947 return std::make_pair(ShadowPtr
, OriginPtr
);
1950 Value
*DataFlowSanitizer::getShadowAddress(Value
*Addr
, Instruction
*Pos
,
1951 Value
*ShadowOffset
) {
1952 IRBuilder
<> IRB(Pos
);
1953 return IRB
.CreateIntToPtr(ShadowOffset
, PrimitiveShadowPtrTy
);
1956 Value
*DataFlowSanitizer::getShadowAddress(Value
*Addr
, Instruction
*Pos
) {
1957 IRBuilder
<> IRB(Pos
);
1958 Value
*ShadowOffset
= getShadowOffset(Addr
, IRB
);
1959 return getShadowAddress(Addr
, Pos
, ShadowOffset
);
1962 Value
*DFSanFunction::combineShadowsThenConvert(Type
*T
, Value
*V1
, Value
*V2
,
1964 Value
*PrimitiveValue
= combineShadows(V1
, V2
, Pos
);
1965 return expandFromPrimitiveShadow(T
, PrimitiveValue
, Pos
);
1968 // Generates IR to compute the union of the two given shadows, inserting it
1969 // before Pos. The combined value is with primitive type.
1970 Value
*DFSanFunction::combineShadows(Value
*V1
, Value
*V2
, Instruction
*Pos
) {
1971 if (DFS
.isZeroShadow(V1
))
1972 return collapseToPrimitiveShadow(V2
, Pos
);
1973 if (DFS
.isZeroShadow(V2
))
1974 return collapseToPrimitiveShadow(V1
, Pos
);
1976 return collapseToPrimitiveShadow(V1
, Pos
);
1978 auto V1Elems
= ShadowElements
.find(V1
);
1979 auto V2Elems
= ShadowElements
.find(V2
);
1980 if (V1Elems
!= ShadowElements
.end() && V2Elems
!= ShadowElements
.end()) {
1981 if (std::includes(V1Elems
->second
.begin(), V1Elems
->second
.end(),
1982 V2Elems
->second
.begin(), V2Elems
->second
.end())) {
1983 return collapseToPrimitiveShadow(V1
, Pos
);
1985 if (std::includes(V2Elems
->second
.begin(), V2Elems
->second
.end(),
1986 V1Elems
->second
.begin(), V1Elems
->second
.end())) {
1987 return collapseToPrimitiveShadow(V2
, Pos
);
1989 } else if (V1Elems
!= ShadowElements
.end()) {
1990 if (V1Elems
->second
.count(V2
))
1991 return collapseToPrimitiveShadow(V1
, Pos
);
1992 } else if (V2Elems
!= ShadowElements
.end()) {
1993 if (V2Elems
->second
.count(V1
))
1994 return collapseToPrimitiveShadow(V2
, Pos
);
1997 auto Key
= std::make_pair(V1
, V2
);
1999 std::swap(Key
.first
, Key
.second
);
2000 CachedShadow
&CCS
= CachedShadows
[Key
];
2001 if (CCS
.Block
&& DT
.dominates(CCS
.Block
, Pos
->getParent()))
2004 // Converts inputs shadows to shadows with primitive types.
2005 Value
*PV1
= collapseToPrimitiveShadow(V1
, Pos
);
2006 Value
*PV2
= collapseToPrimitiveShadow(V2
, Pos
);
2008 IRBuilder
<> IRB(Pos
);
2009 CCS
.Block
= Pos
->getParent();
2010 CCS
.Shadow
= IRB
.CreateOr(PV1
, PV2
);
2012 std::set
<Value
*> UnionElems
;
2013 if (V1Elems
!= ShadowElements
.end()) {
2014 UnionElems
= V1Elems
->second
;
2016 UnionElems
.insert(V1
);
2018 if (V2Elems
!= ShadowElements
.end()) {
2019 UnionElems
.insert(V2Elems
->second
.begin(), V2Elems
->second
.end());
2021 UnionElems
.insert(V2
);
2023 ShadowElements
[CCS
.Shadow
] = std::move(UnionElems
);
2028 // A convenience function which folds the shadows of each of the operands
2029 // of the provided instruction Inst, inserting the IR before Inst. Returns
2030 // the computed union Value.
2031 Value
*DFSanFunction::combineOperandShadows(Instruction
*Inst
) {
2032 if (Inst
->getNumOperands() == 0)
2033 return DFS
.getZeroShadow(Inst
);
2035 Value
*Shadow
= getShadow(Inst
->getOperand(0));
2036 for (unsigned I
= 1, N
= Inst
->getNumOperands(); I
< N
; ++I
)
2037 Shadow
= combineShadows(Shadow
, getShadow(Inst
->getOperand(I
)), Inst
);
2039 return expandFromPrimitiveShadow(Inst
->getType(), Shadow
, Inst
);
2042 void DFSanVisitor::visitInstOperands(Instruction
&I
) {
2043 Value
*CombinedShadow
= DFSF
.combineOperandShadows(&I
);
2044 DFSF
.setShadow(&I
, CombinedShadow
);
2045 visitInstOperandOrigins(I
);
2048 Value
*DFSanFunction::combineOrigins(const std::vector
<Value
*> &Shadows
,
2049 const std::vector
<Value
*> &Origins
,
2050 Instruction
*Pos
, ConstantInt
*Zero
) {
2051 assert(Shadows
.size() == Origins
.size());
2052 size_t Size
= Origins
.size();
2054 return DFS
.ZeroOrigin
;
2055 Value
*Origin
= nullptr;
2057 Zero
= DFS
.ZeroPrimitiveShadow
;
2058 for (size_t I
= 0; I
!= Size
; ++I
) {
2059 Value
*OpOrigin
= Origins
[I
];
2060 Constant
*ConstOpOrigin
= dyn_cast
<Constant
>(OpOrigin
);
2061 if (ConstOpOrigin
&& ConstOpOrigin
->isNullValue())
2067 Value
*OpShadow
= Shadows
[I
];
2068 Value
*PrimitiveShadow
= collapseToPrimitiveShadow(OpShadow
, Pos
);
2069 IRBuilder
<> IRB(Pos
);
2070 Value
*Cond
= IRB
.CreateICmpNE(PrimitiveShadow
, Zero
);
2071 Origin
= IRB
.CreateSelect(Cond
, OpOrigin
, Origin
);
2073 return Origin
? Origin
: DFS
.ZeroOrigin
;
2076 Value
*DFSanFunction::combineOperandOrigins(Instruction
*Inst
) {
2077 size_t Size
= Inst
->getNumOperands();
2078 std::vector
<Value
*> Shadows(Size
);
2079 std::vector
<Value
*> Origins(Size
);
2080 for (unsigned I
= 0; I
!= Size
; ++I
) {
2081 Shadows
[I
] = getShadow(Inst
->getOperand(I
));
2082 Origins
[I
] = getOrigin(Inst
->getOperand(I
));
2084 return combineOrigins(Shadows
, Origins
, Inst
);
2087 void DFSanVisitor::visitInstOperandOrigins(Instruction
&I
) {
2088 if (!DFSF
.DFS
.shouldTrackOrigins())
2090 Value
*CombinedOrigin
= DFSF
.combineOperandOrigins(&I
);
2091 DFSF
.setOrigin(&I
, CombinedOrigin
);
2094 Align
DFSanFunction::getShadowAlign(Align InstAlignment
) {
2095 const Align Alignment
= ClPreserveAlignment
? InstAlignment
: Align(1);
2096 return Align(Alignment
.value() * DFS
.ShadowWidthBytes
);
2099 Align
DFSanFunction::getOriginAlign(Align InstAlignment
) {
2100 const Align Alignment
= llvm::assumeAligned(InstAlignment
.value());
2101 return Align(std::max(MinOriginAlignment
, Alignment
));
2104 bool DFSanFunction::isLookupTableConstant(Value
*P
) {
2105 if (GlobalVariable
*GV
= dyn_cast
<GlobalVariable
>(P
->stripPointerCasts()))
2106 if (GV
->isConstant() && GV
->hasName())
2107 return DFS
.CombineTaintLookupTableNames
.count(GV
->getName());
2112 bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size
,
2113 Align InstAlignment
) {
2114 // When enabling tracking load instructions, we always use
2115 // __dfsan_load_label_and_origin to reduce code size.
2116 if (ClTrackOrigins
== 2)
2120 // * if Size == 1, it is sufficient to load its origin aligned at 4.
2121 // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
2122 // load its origin aligned at 4. If not, although origins may be lost, it
2123 // should not happen very often.
2124 // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
2125 // Size % 4 == 0, it is more efficient to load origins without callbacks.
2126 // * Otherwise we use __dfsan_load_label_and_origin.
2127 // This should ensure that common cases run efficiently.
2131 const Align Alignment
= llvm::assumeAligned(InstAlignment
.value());
2132 return Alignment
< MinOriginAlignment
|| !DFS
.hasLoadSizeForFastPath(Size
);
2135 Value
*DataFlowSanitizer::loadNextOrigin(Instruction
*Pos
, Align OriginAlign
,
2136 Value
**OriginAddr
) {
2137 IRBuilder
<> IRB(Pos
);
2139 IRB
.CreateGEP(OriginTy
, *OriginAddr
, ConstantInt::get(IntptrTy
, 1));
2140 return IRB
.CreateAlignedLoad(OriginTy
, *OriginAddr
, OriginAlign
);
2143 std::pair
<Value
*, Value
*> DFSanFunction::loadShadowFast(
2144 Value
*ShadowAddr
, Value
*OriginAddr
, uint64_t Size
, Align ShadowAlign
,
2145 Align OriginAlign
, Value
*FirstOrigin
, Instruction
*Pos
) {
2146 const bool ShouldTrackOrigins
= DFS
.shouldTrackOrigins();
2147 const uint64_t ShadowSize
= Size
* DFS
.ShadowWidthBytes
;
2149 assert(Size
>= 4 && "Not large enough load size for fast path!");
2151 // Used for origin tracking.
2152 std::vector
<Value
*> Shadows
;
2153 std::vector
<Value
*> Origins
;
2155 // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
2156 // but this function is only used in a subset of cases that make it possible
2157 // to optimize the instrumentation.
2159 // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
2160 // per byte) is either:
2161 // - a multiple of 8 (common)
2162 // - equal to 4 (only for load32)
2164 // For the second case, we can fit the wide shadow in a 32-bit integer. In all
2165 // other cases, we use a 64-bit integer to hold the wide shadow.
2166 Type
*WideShadowTy
=
2167 ShadowSize
== 4 ? Type::getInt32Ty(*DFS
.Ctx
) : Type::getInt64Ty(*DFS
.Ctx
);
2169 IRBuilder
<> IRB(Pos
);
2170 Value
*CombinedWideShadow
=
2171 IRB
.CreateAlignedLoad(WideShadowTy
, ShadowAddr
, ShadowAlign
);
2173 unsigned WideShadowBitWidth
= WideShadowTy
->getIntegerBitWidth();
2174 const uint64_t BytesPerWideShadow
= WideShadowBitWidth
/ DFS
.ShadowWidthBits
;
2176 auto AppendWideShadowAndOrigin
= [&](Value
*WideShadow
, Value
*Origin
) {
2177 if (BytesPerWideShadow
> 4) {
2178 assert(BytesPerWideShadow
== 8);
2179 // The wide shadow relates to two origin pointers: one for the first four
2180 // application bytes, and one for the latest four. We use a left shift to
2181 // get just the shadow bytes that correspond to the first origin pointer,
2182 // and then the entire shadow for the second origin pointer (which will be
2183 // chosen by combineOrigins() iff the least-significant half of the wide
2184 // shadow was empty but the other half was not).
2185 Value
*WideShadowLo
= IRB
.CreateShl(
2186 WideShadow
, ConstantInt::get(WideShadowTy
, WideShadowBitWidth
/ 2));
2187 Shadows
.push_back(WideShadow
);
2188 Origins
.push_back(DFS
.loadNextOrigin(Pos
, OriginAlign
, &OriginAddr
));
2190 Shadows
.push_back(WideShadowLo
);
2191 Origins
.push_back(Origin
);
2193 Shadows
.push_back(WideShadow
);
2194 Origins
.push_back(Origin
);
2198 if (ShouldTrackOrigins
)
2199 AppendWideShadowAndOrigin(CombinedWideShadow
, FirstOrigin
);
2201 // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
2202 // then OR individual shadows within the combined WideShadow by binary ORing.
2203 // This is fewer instructions than ORing shadows individually, since it
2204 // needs logN shift/or instructions (N being the bytes of the combined wide
2206 for (uint64_t ByteOfs
= BytesPerWideShadow
; ByteOfs
< Size
;
2207 ByteOfs
+= BytesPerWideShadow
) {
2208 ShadowAddr
= IRB
.CreateGEP(WideShadowTy
, ShadowAddr
,
2209 ConstantInt::get(DFS
.IntptrTy
, 1));
2210 Value
*NextWideShadow
=
2211 IRB
.CreateAlignedLoad(WideShadowTy
, ShadowAddr
, ShadowAlign
);
2212 CombinedWideShadow
= IRB
.CreateOr(CombinedWideShadow
, NextWideShadow
);
2213 if (ShouldTrackOrigins
) {
2214 Value
*NextOrigin
= DFS
.loadNextOrigin(Pos
, OriginAlign
, &OriginAddr
);
2215 AppendWideShadowAndOrigin(NextWideShadow
, NextOrigin
);
2218 for (unsigned Width
= WideShadowBitWidth
/ 2; Width
>= DFS
.ShadowWidthBits
;
2220 Value
*ShrShadow
= IRB
.CreateLShr(CombinedWideShadow
, Width
);
2221 CombinedWideShadow
= IRB
.CreateOr(CombinedWideShadow
, ShrShadow
);
2223 return {IRB
.CreateTrunc(CombinedWideShadow
, DFS
.PrimitiveShadowTy
),
2225 ? combineOrigins(Shadows
, Origins
, Pos
,
2226 ConstantInt::getSigned(IRB
.getInt64Ty(), 0))
2230 std::pair
<Value
*, Value
*> DFSanFunction::loadShadowOriginSansLoadTracking(
2231 Value
*Addr
, uint64_t Size
, Align InstAlignment
, Instruction
*Pos
) {
2232 const bool ShouldTrackOrigins
= DFS
.shouldTrackOrigins();
2234 // Non-escaped loads.
2235 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Addr
)) {
2236 const auto SI
= AllocaShadowMap
.find(AI
);
2237 if (SI
!= AllocaShadowMap
.end()) {
2238 IRBuilder
<> IRB(Pos
);
2239 Value
*ShadowLI
= IRB
.CreateLoad(DFS
.PrimitiveShadowTy
, SI
->second
);
2240 const auto OI
= AllocaOriginMap
.find(AI
);
2241 assert(!ShouldTrackOrigins
|| OI
!= AllocaOriginMap
.end());
2242 return {ShadowLI
, ShouldTrackOrigins
2243 ? IRB
.CreateLoad(DFS
.OriginTy
, OI
->second
)
2248 // Load from constant addresses.
2249 SmallVector
<const Value
*, 2> Objs
;
2250 getUnderlyingObjects(Addr
, Objs
);
2251 bool AllConstants
= true;
2252 for (const Value
*Obj
: Objs
) {
2253 if (isa
<Function
>(Obj
) || isa
<BlockAddress
>(Obj
))
2255 if (isa
<GlobalVariable
>(Obj
) && cast
<GlobalVariable
>(Obj
)->isConstant())
2258 AllConstants
= false;
2262 return {DFS
.ZeroPrimitiveShadow
,
2263 ShouldTrackOrigins
? DFS
.ZeroOrigin
: nullptr};
2266 return {DFS
.ZeroPrimitiveShadow
,
2267 ShouldTrackOrigins
? DFS
.ZeroOrigin
: nullptr};
2269 // Use callback to load if this is not an optimizable case for origin
2271 if (ShouldTrackOrigins
&&
2272 useCallbackLoadLabelAndOrigin(Size
, InstAlignment
)) {
2273 IRBuilder
<> IRB(Pos
);
2275 IRB
.CreateCall(DFS
.DFSanLoadLabelAndOriginFn
,
2276 {IRB
.CreatePointerCast(Addr
, IRB
.getInt8PtrTy()),
2277 ConstantInt::get(DFS
.IntptrTy
, Size
)});
2278 Call
->addRetAttr(Attribute::ZExt
);
2279 return {IRB
.CreateTrunc(IRB
.CreateLShr(Call
, DFS
.OriginWidthBits
),
2280 DFS
.PrimitiveShadowTy
),
2281 IRB
.CreateTrunc(Call
, DFS
.OriginTy
)};
2284 // Other cases that support loading shadows or origins in a fast way.
2285 Value
*ShadowAddr
, *OriginAddr
;
2286 std::tie(ShadowAddr
, OriginAddr
) =
2287 DFS
.getShadowOriginAddress(Addr
, InstAlignment
, Pos
);
2289 const Align ShadowAlign
= getShadowAlign(InstAlignment
);
2290 const Align OriginAlign
= getOriginAlign(InstAlignment
);
2291 Value
*Origin
= nullptr;
2292 if (ShouldTrackOrigins
) {
2293 IRBuilder
<> IRB(Pos
);
2294 Origin
= IRB
.CreateAlignedLoad(DFS
.OriginTy
, OriginAddr
, OriginAlign
);
2297 // When the byte size is small enough, we can load the shadow directly with
2298 // just a few instructions.
2301 LoadInst
*LI
= new LoadInst(DFS
.PrimitiveShadowTy
, ShadowAddr
, "", Pos
);
2302 LI
->setAlignment(ShadowAlign
);
2303 return {LI
, Origin
};
2306 IRBuilder
<> IRB(Pos
);
2307 Value
*ShadowAddr1
= IRB
.CreateGEP(DFS
.PrimitiveShadowTy
, ShadowAddr
,
2308 ConstantInt::get(DFS
.IntptrTy
, 1));
2310 IRB
.CreateAlignedLoad(DFS
.PrimitiveShadowTy
, ShadowAddr
, ShadowAlign
);
2312 IRB
.CreateAlignedLoad(DFS
.PrimitiveShadowTy
, ShadowAddr1
, ShadowAlign
);
2313 return {combineShadows(Load
, Load1
, Pos
), Origin
};
2316 bool HasSizeForFastPath
= DFS
.hasLoadSizeForFastPath(Size
);
2318 if (HasSizeForFastPath
)
2319 return loadShadowFast(ShadowAddr
, OriginAddr
, Size
, ShadowAlign
,
2320 OriginAlign
, Origin
, Pos
);
2322 IRBuilder
<> IRB(Pos
);
2323 CallInst
*FallbackCall
= IRB
.CreateCall(
2324 DFS
.DFSanUnionLoadFn
, {ShadowAddr
, ConstantInt::get(DFS
.IntptrTy
, Size
)});
2325 FallbackCall
->addRetAttr(Attribute::ZExt
);
2326 return {FallbackCall
, Origin
};
2329 std::pair
<Value
*, Value
*> DFSanFunction::loadShadowOrigin(Value
*Addr
,
2331 Align InstAlignment
,
2333 Value
*PrimitiveShadow
, *Origin
;
2334 std::tie(PrimitiveShadow
, Origin
) =
2335 loadShadowOriginSansLoadTracking(Addr
, Size
, InstAlignment
, Pos
);
2336 if (DFS
.shouldTrackOrigins()) {
2337 if (ClTrackOrigins
== 2) {
2338 IRBuilder
<> IRB(Pos
);
2339 auto *ConstantShadow
= dyn_cast
<Constant
>(PrimitiveShadow
);
2340 if (!ConstantShadow
|| !ConstantShadow
->isZeroValue())
2341 Origin
= updateOriginIfTainted(PrimitiveShadow
, Origin
, IRB
);
2344 return {PrimitiveShadow
, Origin
};
2347 static AtomicOrdering
addAcquireOrdering(AtomicOrdering AO
) {
2349 case AtomicOrdering::NotAtomic
:
2350 return AtomicOrdering::NotAtomic
;
2351 case AtomicOrdering::Unordered
:
2352 case AtomicOrdering::Monotonic
:
2353 case AtomicOrdering::Acquire
:
2354 return AtomicOrdering::Acquire
;
2355 case AtomicOrdering::Release
:
2356 case AtomicOrdering::AcquireRelease
:
2357 return AtomicOrdering::AcquireRelease
;
2358 case AtomicOrdering::SequentiallyConsistent
:
2359 return AtomicOrdering::SequentiallyConsistent
;
2361 llvm_unreachable("Unknown ordering");
2364 Value
*StripPointerGEPsAndCasts(Value
*V
) {
2365 if (!V
->getType()->isPointerTy())
2368 // DFSan pass should be running on valid IR, but we'll
2369 // keep a seen set to ensure there are no issues.
2370 SmallPtrSet
<const Value
*, 4> Visited
;
2373 if (auto *GEP
= dyn_cast
<GEPOperator
>(V
)) {
2374 V
= GEP
->getPointerOperand();
2375 } else if (Operator::getOpcode(V
) == Instruction::BitCast
) {
2376 V
= cast
<Operator
>(V
)->getOperand(0);
2377 if (!V
->getType()->isPointerTy())
2379 } else if (isa
<GlobalAlias
>(V
)) {
2380 V
= cast
<GlobalAlias
>(V
)->getAliasee();
2382 } while (Visited
.insert(V
).second
);
2387 void DFSanVisitor::visitLoadInst(LoadInst
&LI
) {
2388 auto &DL
= LI
.getModule()->getDataLayout();
2389 uint64_t Size
= DL
.getTypeStoreSize(LI
.getType());
2391 DFSF
.setShadow(&LI
, DFSF
.DFS
.getZeroShadow(&LI
));
2392 DFSF
.setOrigin(&LI
, DFSF
.DFS
.ZeroOrigin
);
2396 // When an application load is atomic, increase atomic ordering between
2397 // atomic application loads and stores to ensure happen-before order; load
2398 // shadow data after application data; store zero shadow data before
2399 // application data. This ensure shadow loads return either labels of the
2400 // initial application data or zeros.
2402 LI
.setOrdering(addAcquireOrdering(LI
.getOrdering()));
2404 Instruction
*AfterLi
= LI
.getNextNode();
2405 Instruction
*Pos
= LI
.isAtomic() ? LI
.getNextNode() : &LI
;
2406 std::vector
<Value
*> Shadows
;
2407 std::vector
<Value
*> Origins
;
2408 Value
*PrimitiveShadow
, *Origin
;
2409 std::tie(PrimitiveShadow
, Origin
) =
2410 DFSF
.loadShadowOrigin(LI
.getPointerOperand(), Size
, LI
.getAlign(), Pos
);
2411 const bool ShouldTrackOrigins
= DFSF
.DFS
.shouldTrackOrigins();
2412 if (ShouldTrackOrigins
) {
2413 Shadows
.push_back(PrimitiveShadow
);
2414 Origins
.push_back(Origin
);
2416 if (ClCombinePointerLabelsOnLoad
||
2417 DFSF
.isLookupTableConstant(
2418 StripPointerGEPsAndCasts(LI
.getPointerOperand()))) {
2419 Value
*PtrShadow
= DFSF
.getShadow(LI
.getPointerOperand());
2420 PrimitiveShadow
= DFSF
.combineShadows(PrimitiveShadow
, PtrShadow
, Pos
);
2421 if (ShouldTrackOrigins
) {
2422 Shadows
.push_back(PtrShadow
);
2423 Origins
.push_back(DFSF
.getOrigin(LI
.getPointerOperand()));
2426 if (!DFSF
.DFS
.isZeroShadow(PrimitiveShadow
))
2427 DFSF
.NonZeroChecks
.push_back(PrimitiveShadow
);
2430 DFSF
.expandFromPrimitiveShadow(LI
.getType(), PrimitiveShadow
, Pos
);
2431 DFSF
.setShadow(&LI
, Shadow
);
2433 if (ShouldTrackOrigins
) {
2434 DFSF
.setOrigin(&LI
, DFSF
.combineOrigins(Shadows
, Origins
, Pos
));
2437 if (ClEventCallbacks
) {
2438 IRBuilder
<> IRB(Pos
);
2439 Value
*Addr8
= IRB
.CreateBitCast(LI
.getPointerOperand(), DFSF
.DFS
.Int8Ptr
);
2441 IRB
.CreateCall(DFSF
.DFS
.DFSanLoadCallbackFn
, {PrimitiveShadow
, Addr8
});
2442 CI
->addParamAttr(0, Attribute::ZExt
);
2445 IRBuilder
<> IRB(AfterLi
);
2446 DFSF
.addReachesFunctionCallbacksIfEnabled(IRB
, LI
, &LI
);
2449 Value
*DFSanFunction::updateOriginIfTainted(Value
*Shadow
, Value
*Origin
,
2451 assert(DFS
.shouldTrackOrigins());
2452 return IRB
.CreateCall(DFS
.DFSanChainOriginIfTaintedFn
, {Shadow
, Origin
});
2455 Value
*DFSanFunction::updateOrigin(Value
*V
, IRBuilder
<> &IRB
) {
2456 if (!DFS
.shouldTrackOrigins())
2458 return IRB
.CreateCall(DFS
.DFSanChainOriginFn
, V
);
2461 Value
*DFSanFunction::originToIntptr(IRBuilder
<> &IRB
, Value
*Origin
) {
2462 const unsigned OriginSize
= DataFlowSanitizer::OriginWidthBytes
;
2463 const DataLayout
&DL
= F
->getParent()->getDataLayout();
2464 unsigned IntptrSize
= DL
.getTypeStoreSize(DFS
.IntptrTy
);
2465 if (IntptrSize
== OriginSize
)
2467 assert(IntptrSize
== OriginSize
* 2);
2468 Origin
= IRB
.CreateIntCast(Origin
, DFS
.IntptrTy
, /* isSigned */ false);
2469 return IRB
.CreateOr(Origin
, IRB
.CreateShl(Origin
, OriginSize
* 8));
2472 void DFSanFunction::paintOrigin(IRBuilder
<> &IRB
, Value
*Origin
,
2473 Value
*StoreOriginAddr
,
2474 uint64_t StoreOriginSize
, Align Alignment
) {
2475 const unsigned OriginSize
= DataFlowSanitizer::OriginWidthBytes
;
2476 const DataLayout
&DL
= F
->getParent()->getDataLayout();
2477 const Align IntptrAlignment
= DL
.getABITypeAlign(DFS
.IntptrTy
);
2478 unsigned IntptrSize
= DL
.getTypeStoreSize(DFS
.IntptrTy
);
2479 assert(IntptrAlignment
>= MinOriginAlignment
);
2480 assert(IntptrSize
>= OriginSize
);
2483 Align CurrentAlignment
= Alignment
;
2484 if (Alignment
>= IntptrAlignment
&& IntptrSize
> OriginSize
) {
2485 Value
*IntptrOrigin
= originToIntptr(IRB
, Origin
);
2486 Value
*IntptrStoreOriginPtr
= IRB
.CreatePointerCast(
2487 StoreOriginAddr
, PointerType::get(DFS
.IntptrTy
, 0));
2488 for (unsigned I
= 0; I
< StoreOriginSize
/ IntptrSize
; ++I
) {
2490 I
? IRB
.CreateConstGEP1_32(DFS
.IntptrTy
, IntptrStoreOriginPtr
, I
)
2491 : IntptrStoreOriginPtr
;
2492 IRB
.CreateAlignedStore(IntptrOrigin
, Ptr
, CurrentAlignment
);
2493 Ofs
+= IntptrSize
/ OriginSize
;
2494 CurrentAlignment
= IntptrAlignment
;
2498 for (unsigned I
= Ofs
; I
< (StoreOriginSize
+ OriginSize
- 1) / OriginSize
;
2500 Value
*GEP
= I
? IRB
.CreateConstGEP1_32(DFS
.OriginTy
, StoreOriginAddr
, I
)
2502 IRB
.CreateAlignedStore(Origin
, GEP
, CurrentAlignment
);
2503 CurrentAlignment
= MinOriginAlignment
;
2507 Value
*DFSanFunction::convertToBool(Value
*V
, IRBuilder
<> &IRB
,
2508 const Twine
&Name
) {
2509 Type
*VTy
= V
->getType();
2510 assert(VTy
->isIntegerTy());
2511 if (VTy
->getIntegerBitWidth() == 1)
2512 // Just converting a bool to a bool, so do nothing.
2514 return IRB
.CreateICmpNE(V
, ConstantInt::get(VTy
, 0), Name
);
2517 void DFSanFunction::storeOrigin(Instruction
*Pos
, Value
*Addr
, uint64_t Size
,
2518 Value
*Shadow
, Value
*Origin
,
2519 Value
*StoreOriginAddr
, Align InstAlignment
) {
2520 // Do not write origins for zero shadows because we do not trace origins for
2522 const Align OriginAlignment
= getOriginAlign(InstAlignment
);
2523 Value
*CollapsedShadow
= collapseToPrimitiveShadow(Shadow
, Pos
);
2524 IRBuilder
<> IRB(Pos
);
2525 if (auto *ConstantShadow
= dyn_cast
<Constant
>(CollapsedShadow
)) {
2526 if (!ConstantShadow
->isZeroValue())
2527 paintOrigin(IRB
, updateOrigin(Origin
, IRB
), StoreOriginAddr
, Size
,
2532 if (shouldInstrumentWithCall()) {
2533 IRB
.CreateCall(DFS
.DFSanMaybeStoreOriginFn
,
2535 IRB
.CreatePointerCast(Addr
, IRB
.getInt8PtrTy()),
2536 ConstantInt::get(DFS
.IntptrTy
, Size
), Origin
});
2538 Value
*Cmp
= convertToBool(CollapsedShadow
, IRB
, "_dfscmp");
2539 DomTreeUpdater
DTU(DT
, DomTreeUpdater::UpdateStrategy::Lazy
);
2540 Instruction
*CheckTerm
= SplitBlockAndInsertIfThen(
2541 Cmp
, &*IRB
.GetInsertPoint(), false, DFS
.OriginStoreWeights
, &DTU
);
2542 IRBuilder
<> IRBNew(CheckTerm
);
2543 paintOrigin(IRBNew
, updateOrigin(Origin
, IRBNew
), StoreOriginAddr
, Size
,
2549 void DFSanFunction::storeZeroPrimitiveShadow(Value
*Addr
, uint64_t Size
,
2552 IRBuilder
<> IRB(Pos
);
2553 IntegerType
*ShadowTy
=
2554 IntegerType::get(*DFS
.Ctx
, Size
* DFS
.ShadowWidthBits
);
2555 Value
*ExtZeroShadow
= ConstantInt::get(ShadowTy
, 0);
2556 Value
*ShadowAddr
= DFS
.getShadowAddress(Addr
, Pos
);
2557 Value
*ExtShadowAddr
=
2558 IRB
.CreateBitCast(ShadowAddr
, PointerType::getUnqual(ShadowTy
));
2559 IRB
.CreateAlignedStore(ExtZeroShadow
, ExtShadowAddr
, ShadowAlign
);
2560 // Do not write origins for 0 shadows because we do not trace origins for
2564 void DFSanFunction::storePrimitiveShadowOrigin(Value
*Addr
, uint64_t Size
,
2565 Align InstAlignment
,
2566 Value
*PrimitiveShadow
,
2569 const bool ShouldTrackOrigins
= DFS
.shouldTrackOrigins() && Origin
;
2571 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(Addr
)) {
2572 const auto SI
= AllocaShadowMap
.find(AI
);
2573 if (SI
!= AllocaShadowMap
.end()) {
2574 IRBuilder
<> IRB(Pos
);
2575 IRB
.CreateStore(PrimitiveShadow
, SI
->second
);
2577 // Do not write origins for 0 shadows because we do not trace origins for
2579 if (ShouldTrackOrigins
&& !DFS
.isZeroShadow(PrimitiveShadow
)) {
2580 const auto OI
= AllocaOriginMap
.find(AI
);
2581 assert(OI
!= AllocaOriginMap
.end() && Origin
);
2582 IRB
.CreateStore(Origin
, OI
->second
);
2588 const Align ShadowAlign
= getShadowAlign(InstAlignment
);
2589 if (DFS
.isZeroShadow(PrimitiveShadow
)) {
2590 storeZeroPrimitiveShadow(Addr
, Size
, ShadowAlign
, Pos
);
2594 IRBuilder
<> IRB(Pos
);
2595 Value
*ShadowAddr
, *OriginAddr
;
2596 std::tie(ShadowAddr
, OriginAddr
) =
2597 DFS
.getShadowOriginAddress(Addr
, InstAlignment
, Pos
);
2599 const unsigned ShadowVecSize
= 8;
2600 assert(ShadowVecSize
* DFS
.ShadowWidthBits
<= 128 &&
2601 "Shadow vector is too large!");
2603 uint64_t Offset
= 0;
2604 uint64_t LeftSize
= Size
;
2605 if (LeftSize
>= ShadowVecSize
) {
2607 FixedVectorType::get(DFS
.PrimitiveShadowTy
, ShadowVecSize
);
2608 Value
*ShadowVec
= PoisonValue::get(ShadowVecTy
);
2609 for (unsigned I
= 0; I
!= ShadowVecSize
; ++I
) {
2610 ShadowVec
= IRB
.CreateInsertElement(
2611 ShadowVec
, PrimitiveShadow
,
2612 ConstantInt::get(Type::getInt32Ty(*DFS
.Ctx
), I
));
2614 Value
*ShadowVecAddr
=
2615 IRB
.CreateBitCast(ShadowAddr
, PointerType::getUnqual(ShadowVecTy
));
2617 Value
*CurShadowVecAddr
=
2618 IRB
.CreateConstGEP1_32(ShadowVecTy
, ShadowVecAddr
, Offset
);
2619 IRB
.CreateAlignedStore(ShadowVec
, CurShadowVecAddr
, ShadowAlign
);
2620 LeftSize
-= ShadowVecSize
;
2622 } while (LeftSize
>= ShadowVecSize
);
2623 Offset
*= ShadowVecSize
;
2625 while (LeftSize
> 0) {
2626 Value
*CurShadowAddr
=
2627 IRB
.CreateConstGEP1_32(DFS
.PrimitiveShadowTy
, ShadowAddr
, Offset
);
2628 IRB
.CreateAlignedStore(PrimitiveShadow
, CurShadowAddr
, ShadowAlign
);
2633 if (ShouldTrackOrigins
) {
2634 storeOrigin(Pos
, Addr
, Size
, PrimitiveShadow
, Origin
, OriginAddr
,
2639 static AtomicOrdering
addReleaseOrdering(AtomicOrdering AO
) {
2641 case AtomicOrdering::NotAtomic
:
2642 return AtomicOrdering::NotAtomic
;
2643 case AtomicOrdering::Unordered
:
2644 case AtomicOrdering::Monotonic
:
2645 case AtomicOrdering::Release
:
2646 return AtomicOrdering::Release
;
2647 case AtomicOrdering::Acquire
:
2648 case AtomicOrdering::AcquireRelease
:
2649 return AtomicOrdering::AcquireRelease
;
2650 case AtomicOrdering::SequentiallyConsistent
:
2651 return AtomicOrdering::SequentiallyConsistent
;
2653 llvm_unreachable("Unknown ordering");
2656 void DFSanVisitor::visitStoreInst(StoreInst
&SI
) {
2657 auto &DL
= SI
.getModule()->getDataLayout();
2658 Value
*Val
= SI
.getValueOperand();
2659 uint64_t Size
= DL
.getTypeStoreSize(Val
->getType());
2663 // When an application store is atomic, increase atomic ordering between
2664 // atomic application loads and stores to ensure happen-before order; load
2665 // shadow data after application data; store zero shadow data before
2666 // application data. This ensure shadow loads return either labels of the
2667 // initial application data or zeros.
2669 SI
.setOrdering(addReleaseOrdering(SI
.getOrdering()));
2671 const bool ShouldTrackOrigins
=
2672 DFSF
.DFS
.shouldTrackOrigins() && !SI
.isAtomic();
2673 std::vector
<Value
*> Shadows
;
2674 std::vector
<Value
*> Origins
;
2677 SI
.isAtomic() ? DFSF
.DFS
.getZeroShadow(Val
) : DFSF
.getShadow(Val
);
2679 if (ShouldTrackOrigins
) {
2680 Shadows
.push_back(Shadow
);
2681 Origins
.push_back(DFSF
.getOrigin(Val
));
2684 Value
*PrimitiveShadow
;
2685 if (ClCombinePointerLabelsOnStore
) {
2686 Value
*PtrShadow
= DFSF
.getShadow(SI
.getPointerOperand());
2687 if (ShouldTrackOrigins
) {
2688 Shadows
.push_back(PtrShadow
);
2689 Origins
.push_back(DFSF
.getOrigin(SI
.getPointerOperand()));
2691 PrimitiveShadow
= DFSF
.combineShadows(Shadow
, PtrShadow
, &SI
);
2693 PrimitiveShadow
= DFSF
.collapseToPrimitiveShadow(Shadow
, &SI
);
2695 Value
*Origin
= nullptr;
2696 if (ShouldTrackOrigins
)
2697 Origin
= DFSF
.combineOrigins(Shadows
, Origins
, &SI
);
2698 DFSF
.storePrimitiveShadowOrigin(SI
.getPointerOperand(), Size
, SI
.getAlign(),
2699 PrimitiveShadow
, Origin
, &SI
);
2700 if (ClEventCallbacks
) {
2701 IRBuilder
<> IRB(&SI
);
2702 Value
*Addr8
= IRB
.CreateBitCast(SI
.getPointerOperand(), DFSF
.DFS
.Int8Ptr
);
2704 IRB
.CreateCall(DFSF
.DFS
.DFSanStoreCallbackFn
, {PrimitiveShadow
, Addr8
});
2705 CI
->addParamAttr(0, Attribute::ZExt
);
2709 void DFSanVisitor::visitCASOrRMW(Align InstAlignment
, Instruction
&I
) {
2710 assert(isa
<AtomicRMWInst
>(I
) || isa
<AtomicCmpXchgInst
>(I
));
2712 Value
*Val
= I
.getOperand(1);
2713 const auto &DL
= I
.getModule()->getDataLayout();
2714 uint64_t Size
= DL
.getTypeStoreSize(Val
->getType());
2718 // Conservatively set data at stored addresses and return with zero shadow to
2719 // prevent shadow data races.
2720 IRBuilder
<> IRB(&I
);
2721 Value
*Addr
= I
.getOperand(0);
2722 const Align ShadowAlign
= DFSF
.getShadowAlign(InstAlignment
);
2723 DFSF
.storeZeroPrimitiveShadow(Addr
, Size
, ShadowAlign
, &I
);
2724 DFSF
.setShadow(&I
, DFSF
.DFS
.getZeroShadow(&I
));
2725 DFSF
.setOrigin(&I
, DFSF
.DFS
.ZeroOrigin
);
2728 void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst
&I
) {
2729 visitCASOrRMW(I
.getAlign(), I
);
2730 // TODO: The ordering change follows MSan. It is possible not to change
2731 // ordering because we always set and use 0 shadows.
2732 I
.setOrdering(addReleaseOrdering(I
.getOrdering()));
2735 void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst
&I
) {
2736 visitCASOrRMW(I
.getAlign(), I
);
2737 // TODO: The ordering change follows MSan. It is possible not to change
2738 // ordering because we always set and use 0 shadows.
2739 I
.setSuccessOrdering(addReleaseOrdering(I
.getSuccessOrdering()));
2742 void DFSanVisitor::visitUnaryOperator(UnaryOperator
&UO
) {
2743 visitInstOperands(UO
);
2746 void DFSanVisitor::visitBinaryOperator(BinaryOperator
&BO
) {
2747 visitInstOperands(BO
);
2750 void DFSanVisitor::visitBitCastInst(BitCastInst
&BCI
) {
2751 // Special case: if this is the bitcast (there is exactly 1 allowed) between
2752 // a musttail call and a ret, don't instrument. New instructions are not
2753 // allowed after a musttail call.
2754 if (auto *CI
= dyn_cast
<CallInst
>(BCI
.getOperand(0)))
2755 if (CI
->isMustTailCall())
2757 visitInstOperands(BCI
);
2760 void DFSanVisitor::visitCastInst(CastInst
&CI
) { visitInstOperands(CI
); }
2762 void DFSanVisitor::visitCmpInst(CmpInst
&CI
) {
2763 visitInstOperands(CI
);
2764 if (ClEventCallbacks
) {
2765 IRBuilder
<> IRB(&CI
);
2766 Value
*CombinedShadow
= DFSF
.getShadow(&CI
);
2768 IRB
.CreateCall(DFSF
.DFS
.DFSanCmpCallbackFn
, CombinedShadow
);
2769 CallI
->addParamAttr(0, Attribute::ZExt
);
2773 void DFSanVisitor::visitLandingPadInst(LandingPadInst
&LPI
) {
2774 // We do not need to track data through LandingPadInst.
2776 // For the C++ exceptions, if a value is thrown, this value will be stored
2777 // in a memory location provided by __cxa_allocate_exception(...) (on the
2778 // throw side) or __cxa_begin_catch(...) (on the catch side).
2779 // This memory will have a shadow, so with the loads and stores we will be
2780 // able to propagate labels on data thrown through exceptions, without any
2781 // special handling of the LandingPadInst.
2783 // The second element in the pair result of the LandingPadInst is a
2784 // register value, but it is for a type ID and should never be tainted.
2785 DFSF
.setShadow(&LPI
, DFSF
.DFS
.getZeroShadow(&LPI
));
2786 DFSF
.setOrigin(&LPI
, DFSF
.DFS
.ZeroOrigin
);
2789 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst
&GEPI
) {
2790 if (ClCombineOffsetLabelsOnGEP
||
2791 DFSF
.isLookupTableConstant(
2792 StripPointerGEPsAndCasts(GEPI
.getPointerOperand()))) {
2793 visitInstOperands(GEPI
);
2797 // Only propagate shadow/origin of base pointer value but ignore those of
2799 Value
*BasePointer
= GEPI
.getPointerOperand();
2800 DFSF
.setShadow(&GEPI
, DFSF
.getShadow(BasePointer
));
2801 if (DFSF
.DFS
.shouldTrackOrigins())
2802 DFSF
.setOrigin(&GEPI
, DFSF
.getOrigin(BasePointer
));
2805 void DFSanVisitor::visitExtractElementInst(ExtractElementInst
&I
) {
2806 visitInstOperands(I
);
2809 void DFSanVisitor::visitInsertElementInst(InsertElementInst
&I
) {
2810 visitInstOperands(I
);
2813 void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst
&I
) {
2814 visitInstOperands(I
);
2817 void DFSanVisitor::visitExtractValueInst(ExtractValueInst
&I
) {
2818 IRBuilder
<> IRB(&I
);
2819 Value
*Agg
= I
.getAggregateOperand();
2820 Value
*AggShadow
= DFSF
.getShadow(Agg
);
2821 Value
*ResShadow
= IRB
.CreateExtractValue(AggShadow
, I
.getIndices());
2822 DFSF
.setShadow(&I
, ResShadow
);
2823 visitInstOperandOrigins(I
);
2826 void DFSanVisitor::visitInsertValueInst(InsertValueInst
&I
) {
2827 IRBuilder
<> IRB(&I
);
2828 Value
*AggShadow
= DFSF
.getShadow(I
.getAggregateOperand());
2829 Value
*InsShadow
= DFSF
.getShadow(I
.getInsertedValueOperand());
2830 Value
*Res
= IRB
.CreateInsertValue(AggShadow
, InsShadow
, I
.getIndices());
2831 DFSF
.setShadow(&I
, Res
);
2832 visitInstOperandOrigins(I
);
2835 void DFSanVisitor::visitAllocaInst(AllocaInst
&I
) {
2836 bool AllLoadsStores
= true;
2837 for (User
*U
: I
.users()) {
2838 if (isa
<LoadInst
>(U
))
2841 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(U
)) {
2842 if (SI
->getPointerOperand() == &I
)
2846 AllLoadsStores
= false;
2849 if (AllLoadsStores
) {
2850 IRBuilder
<> IRB(&I
);
2851 DFSF
.AllocaShadowMap
[&I
] = IRB
.CreateAlloca(DFSF
.DFS
.PrimitiveShadowTy
);
2852 if (DFSF
.DFS
.shouldTrackOrigins()) {
2853 DFSF
.AllocaOriginMap
[&I
] =
2854 IRB
.CreateAlloca(DFSF
.DFS
.OriginTy
, nullptr, "_dfsa");
2857 DFSF
.setShadow(&I
, DFSF
.DFS
.ZeroPrimitiveShadow
);
2858 DFSF
.setOrigin(&I
, DFSF
.DFS
.ZeroOrigin
);
2861 void DFSanVisitor::visitSelectInst(SelectInst
&I
) {
2862 Value
*CondShadow
= DFSF
.getShadow(I
.getCondition());
2863 Value
*TrueShadow
= DFSF
.getShadow(I
.getTrueValue());
2864 Value
*FalseShadow
= DFSF
.getShadow(I
.getFalseValue());
2865 Value
*ShadowSel
= nullptr;
2866 const bool ShouldTrackOrigins
= DFSF
.DFS
.shouldTrackOrigins();
2867 std::vector
<Value
*> Shadows
;
2868 std::vector
<Value
*> Origins
;
2870 ShouldTrackOrigins
? DFSF
.getOrigin(I
.getTrueValue()) : nullptr;
2871 Value
*FalseOrigin
=
2872 ShouldTrackOrigins
? DFSF
.getOrigin(I
.getFalseValue()) : nullptr;
2874 DFSF
.addConditionalCallbacksIfEnabled(I
, I
.getCondition());
2876 if (isa
<VectorType
>(I
.getCondition()->getType())) {
2877 ShadowSel
= DFSF
.combineShadowsThenConvert(I
.getType(), TrueShadow
,
2879 if (ShouldTrackOrigins
) {
2880 Shadows
.push_back(TrueShadow
);
2881 Shadows
.push_back(FalseShadow
);
2882 Origins
.push_back(TrueOrigin
);
2883 Origins
.push_back(FalseOrigin
);
2886 if (TrueShadow
== FalseShadow
) {
2887 ShadowSel
= TrueShadow
;
2888 if (ShouldTrackOrigins
) {
2889 Shadows
.push_back(TrueShadow
);
2890 Origins
.push_back(TrueOrigin
);
2894 SelectInst::Create(I
.getCondition(), TrueShadow
, FalseShadow
, "", &I
);
2895 if (ShouldTrackOrigins
) {
2896 Shadows
.push_back(ShadowSel
);
2897 Origins
.push_back(SelectInst::Create(I
.getCondition(), TrueOrigin
,
2898 FalseOrigin
, "", &I
));
2902 DFSF
.setShadow(&I
, ClTrackSelectControlFlow
2903 ? DFSF
.combineShadowsThenConvert(
2904 I
.getType(), CondShadow
, ShadowSel
, &I
)
2906 if (ShouldTrackOrigins
) {
2907 if (ClTrackSelectControlFlow
) {
2908 Shadows
.push_back(CondShadow
);
2909 Origins
.push_back(DFSF
.getOrigin(I
.getCondition()));
2911 DFSF
.setOrigin(&I
, DFSF
.combineOrigins(Shadows
, Origins
, &I
));
2915 void DFSanVisitor::visitMemSetInst(MemSetInst
&I
) {
2916 IRBuilder
<> IRB(&I
);
2917 Value
*ValShadow
= DFSF
.getShadow(I
.getValue());
2918 Value
*ValOrigin
= DFSF
.DFS
.shouldTrackOrigins()
2919 ? DFSF
.getOrigin(I
.getValue())
2920 : DFSF
.DFS
.ZeroOrigin
;
2922 DFSF
.DFS
.DFSanSetLabelFn
,
2923 {ValShadow
, ValOrigin
,
2924 IRB
.CreateBitCast(I
.getDest(), Type::getInt8PtrTy(*DFSF
.DFS
.Ctx
)),
2925 IRB
.CreateZExtOrTrunc(I
.getLength(), DFSF
.DFS
.IntptrTy
)});
2928 void DFSanVisitor::visitMemTransferInst(MemTransferInst
&I
) {
2929 IRBuilder
<> IRB(&I
);
2931 // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2932 // need to move origins before moving shadows.
2933 if (DFSF
.DFS
.shouldTrackOrigins()) {
2935 DFSF
.DFS
.DFSanMemOriginTransferFn
,
2936 {IRB
.CreatePointerCast(I
.getArgOperand(0), IRB
.getInt8PtrTy()),
2937 IRB
.CreatePointerCast(I
.getArgOperand(1), IRB
.getInt8PtrTy()),
2938 IRB
.CreateIntCast(I
.getArgOperand(2), DFSF
.DFS
.IntptrTy
, false)});
2941 Value
*RawDestShadow
= DFSF
.DFS
.getShadowAddress(I
.getDest(), &I
);
2942 Value
*SrcShadow
= DFSF
.DFS
.getShadowAddress(I
.getSource(), &I
);
2944 IRB
.CreateMul(I
.getLength(), ConstantInt::get(I
.getLength()->getType(),
2945 DFSF
.DFS
.ShadowWidthBytes
));
2946 Type
*Int8Ptr
= Type::getInt8PtrTy(*DFSF
.DFS
.Ctx
);
2947 Value
*DestShadow
= IRB
.CreateBitCast(RawDestShadow
, Int8Ptr
);
2948 SrcShadow
= IRB
.CreateBitCast(SrcShadow
, Int8Ptr
);
2949 auto *MTI
= cast
<MemTransferInst
>(
2950 IRB
.CreateCall(I
.getFunctionType(), I
.getCalledOperand(),
2951 {DestShadow
, SrcShadow
, LenShadow
, I
.getVolatileCst()}));
2952 MTI
->setDestAlignment(DFSF
.getShadowAlign(I
.getDestAlign().valueOrOne()));
2953 MTI
->setSourceAlignment(DFSF
.getShadowAlign(I
.getSourceAlign().valueOrOne()));
2954 if (ClEventCallbacks
) {
2955 IRB
.CreateCall(DFSF
.DFS
.DFSanMemTransferCallbackFn
,
2957 IRB
.CreateZExtOrTrunc(I
.getLength(), DFSF
.DFS
.IntptrTy
)});
2961 void DFSanVisitor::visitBranchInst(BranchInst
&BR
) {
2962 if (!BR
.isConditional())
2965 DFSF
.addConditionalCallbacksIfEnabled(BR
, BR
.getCondition());
2968 void DFSanVisitor::visitSwitchInst(SwitchInst
&SW
) {
2969 DFSF
.addConditionalCallbacksIfEnabled(SW
, SW
.getCondition());
2972 static bool isAMustTailRetVal(Value
*RetVal
) {
2973 // Tail call may have a bitcast between return.
2974 if (auto *I
= dyn_cast
<BitCastInst
>(RetVal
)) {
2975 RetVal
= I
->getOperand(0);
2977 if (auto *I
= dyn_cast
<CallInst
>(RetVal
)) {
2978 return I
->isMustTailCall();
2983 void DFSanVisitor::visitReturnInst(ReturnInst
&RI
) {
2984 if (!DFSF
.IsNativeABI
&& RI
.getReturnValue()) {
2985 // Don't emit the instrumentation for musttail call returns.
2986 if (isAMustTailRetVal(RI
.getReturnValue()))
2989 Value
*S
= DFSF
.getShadow(RI
.getReturnValue());
2990 IRBuilder
<> IRB(&RI
);
2991 Type
*RT
= DFSF
.F
->getFunctionType()->getReturnType();
2992 unsigned Size
= getDataLayout().getTypeAllocSize(DFSF
.DFS
.getShadowTy(RT
));
2993 if (Size
<= RetvalTLSSize
) {
2994 // If the size overflows, stores nothing. At callsite, oversized return
2995 // shadows are set to zero.
2996 IRB
.CreateAlignedStore(S
, DFSF
.getRetvalTLS(RT
, IRB
), ShadowTLSAlignment
);
2998 if (DFSF
.DFS
.shouldTrackOrigins()) {
2999 Value
*O
= DFSF
.getOrigin(RI
.getReturnValue());
3000 IRB
.CreateStore(O
, DFSF
.getRetvalOriginTLS());
3005 void DFSanVisitor::addShadowArguments(Function
&F
, CallBase
&CB
,
3006 std::vector
<Value
*> &Args
,
3008 FunctionType
*FT
= F
.getFunctionType();
3010 auto *I
= CB
.arg_begin();
3012 // Adds non-variable argument shadows.
3013 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++I
, --N
)
3014 Args
.push_back(DFSF
.collapseToPrimitiveShadow(DFSF
.getShadow(*I
), &CB
));
3016 // Adds variable argument shadows.
3017 if (FT
->isVarArg()) {
3018 auto *LabelVATy
= ArrayType::get(DFSF
.DFS
.PrimitiveShadowTy
,
3019 CB
.arg_size() - FT
->getNumParams());
3020 auto *LabelVAAlloca
=
3021 new AllocaInst(LabelVATy
, getDataLayout().getAllocaAddrSpace(),
3022 "labelva", &DFSF
.F
->getEntryBlock().front());
3024 for (unsigned N
= 0; I
!= CB
.arg_end(); ++I
, ++N
) {
3025 auto *LabelVAPtr
= IRB
.CreateStructGEP(LabelVATy
, LabelVAAlloca
, N
);
3026 IRB
.CreateStore(DFSF
.collapseToPrimitiveShadow(DFSF
.getShadow(*I
), &CB
),
3030 Args
.push_back(IRB
.CreateStructGEP(LabelVATy
, LabelVAAlloca
, 0));
3033 // Adds the return value shadow.
3034 if (!FT
->getReturnType()->isVoidTy()) {
3035 if (!DFSF
.LabelReturnAlloca
) {
3036 DFSF
.LabelReturnAlloca
= new AllocaInst(
3037 DFSF
.DFS
.PrimitiveShadowTy
, getDataLayout().getAllocaAddrSpace(),
3038 "labelreturn", &DFSF
.F
->getEntryBlock().front());
3040 Args
.push_back(DFSF
.LabelReturnAlloca
);
3044 void DFSanVisitor::addOriginArguments(Function
&F
, CallBase
&CB
,
3045 std::vector
<Value
*> &Args
,
3047 FunctionType
*FT
= F
.getFunctionType();
3049 auto *I
= CB
.arg_begin();
3051 // Add non-variable argument origins.
3052 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++I
, --N
)
3053 Args
.push_back(DFSF
.getOrigin(*I
));
3055 // Add variable argument origins.
3056 if (FT
->isVarArg()) {
3058 ArrayType::get(DFSF
.DFS
.OriginTy
, CB
.arg_size() - FT
->getNumParams());
3059 auto *OriginVAAlloca
=
3060 new AllocaInst(OriginVATy
, getDataLayout().getAllocaAddrSpace(),
3061 "originva", &DFSF
.F
->getEntryBlock().front());
3063 for (unsigned N
= 0; I
!= CB
.arg_end(); ++I
, ++N
) {
3064 auto *OriginVAPtr
= IRB
.CreateStructGEP(OriginVATy
, OriginVAAlloca
, N
);
3065 IRB
.CreateStore(DFSF
.getOrigin(*I
), OriginVAPtr
);
3068 Args
.push_back(IRB
.CreateStructGEP(OriginVATy
, OriginVAAlloca
, 0));
3071 // Add the return value origin.
3072 if (!FT
->getReturnType()->isVoidTy()) {
3073 if (!DFSF
.OriginReturnAlloca
) {
3074 DFSF
.OriginReturnAlloca
= new AllocaInst(
3075 DFSF
.DFS
.OriginTy
, getDataLayout().getAllocaAddrSpace(),
3076 "originreturn", &DFSF
.F
->getEntryBlock().front());
3078 Args
.push_back(DFSF
.OriginReturnAlloca
);
3082 bool DFSanVisitor::visitWrappedCallBase(Function
&F
, CallBase
&CB
) {
3083 IRBuilder
<> IRB(&CB
);
3084 switch (DFSF
.DFS
.getWrapperKind(&F
)) {
3085 case DataFlowSanitizer::WK_Warning
:
3086 CB
.setCalledFunction(&F
);
3087 IRB
.CreateCall(DFSF
.DFS
.DFSanUnimplementedFn
,
3088 IRB
.CreateGlobalStringPtr(F
.getName()));
3089 DFSF
.DFS
.buildExternWeakCheckIfNeeded(IRB
, &F
);
3090 DFSF
.setShadow(&CB
, DFSF
.DFS
.getZeroShadow(&CB
));
3091 DFSF
.setOrigin(&CB
, DFSF
.DFS
.ZeroOrigin
);
3093 case DataFlowSanitizer::WK_Discard
:
3094 CB
.setCalledFunction(&F
);
3095 DFSF
.DFS
.buildExternWeakCheckIfNeeded(IRB
, &F
);
3096 DFSF
.setShadow(&CB
, DFSF
.DFS
.getZeroShadow(&CB
));
3097 DFSF
.setOrigin(&CB
, DFSF
.DFS
.ZeroOrigin
);
3099 case DataFlowSanitizer::WK_Functional
:
3100 CB
.setCalledFunction(&F
);
3101 DFSF
.DFS
.buildExternWeakCheckIfNeeded(IRB
, &F
);
3102 visitInstOperands(CB
);
3104 case DataFlowSanitizer::WK_Custom
:
3105 // Don't try to handle invokes of custom functions, it's too complicated.
3106 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
3108 CallInst
*CI
= dyn_cast
<CallInst
>(&CB
);
3112 const bool ShouldTrackOrigins
= DFSF
.DFS
.shouldTrackOrigins();
3113 FunctionType
*FT
= F
.getFunctionType();
3114 TransformedFunction CustomFn
= DFSF
.DFS
.getCustomFunctionType(FT
);
3115 std::string CustomFName
= ShouldTrackOrigins
? "__dfso_" : "__dfsw_";
3116 CustomFName
+= F
.getName();
3117 FunctionCallee CustomF
= DFSF
.DFS
.Mod
->getOrInsertFunction(
3118 CustomFName
, CustomFn
.TransformedType
);
3119 if (Function
*CustomFn
= dyn_cast
<Function
>(CustomF
.getCallee())) {
3120 CustomFn
->copyAttributesFrom(&F
);
3122 // Custom functions returning non-void will write to the return label.
3123 if (!FT
->getReturnType()->isVoidTy()) {
3124 CustomFn
->removeFnAttrs(DFSF
.DFS
.ReadOnlyNoneAttrs
);
3128 std::vector
<Value
*> Args
;
3130 // Adds non-variable arguments.
3131 auto *I
= CB
.arg_begin();
3132 for (unsigned N
= FT
->getNumParams(); N
!= 0; ++I
, --N
) {
3136 // Adds shadow arguments.
3137 const unsigned ShadowArgStart
= Args
.size();
3138 addShadowArguments(F
, CB
, Args
, IRB
);
3140 // Adds origin arguments.
3141 const unsigned OriginArgStart
= Args
.size();
3142 if (ShouldTrackOrigins
)
3143 addOriginArguments(F
, CB
, Args
, IRB
);
3145 // Adds variable arguments.
3146 append_range(Args
, drop_begin(CB
.args(), FT
->getNumParams()));
3148 CallInst
*CustomCI
= IRB
.CreateCall(CustomF
, Args
);
3149 CustomCI
->setCallingConv(CI
->getCallingConv());
3150 CustomCI
->setAttributes(transformFunctionAttributes(
3151 CustomFn
, CI
->getContext(), CI
->getAttributes()));
3153 // Update the parameter attributes of the custom call instruction to
3154 // zero extend the shadow parameters. This is required for targets
3155 // which consider PrimitiveShadowTy an illegal type.
3156 for (unsigned N
= 0; N
< FT
->getNumParams(); N
++) {
3157 const unsigned ArgNo
= ShadowArgStart
+ N
;
3158 if (CustomCI
->getArgOperand(ArgNo
)->getType() ==
3159 DFSF
.DFS
.PrimitiveShadowTy
)
3160 CustomCI
->addParamAttr(ArgNo
, Attribute::ZExt
);
3161 if (ShouldTrackOrigins
) {
3162 const unsigned OriginArgNo
= OriginArgStart
+ N
;
3163 if (CustomCI
->getArgOperand(OriginArgNo
)->getType() ==
3165 CustomCI
->addParamAttr(OriginArgNo
, Attribute::ZExt
);
3169 // Loads the return value shadow and origin.
3170 if (!FT
->getReturnType()->isVoidTy()) {
3171 LoadInst
*LabelLoad
=
3172 IRB
.CreateLoad(DFSF
.DFS
.PrimitiveShadowTy
, DFSF
.LabelReturnAlloca
);
3173 DFSF
.setShadow(CustomCI
, DFSF
.expandFromPrimitiveShadow(
3174 FT
->getReturnType(), LabelLoad
, &CB
));
3175 if (ShouldTrackOrigins
) {
3176 LoadInst
*OriginLoad
=
3177 IRB
.CreateLoad(DFSF
.DFS
.OriginTy
, DFSF
.OriginReturnAlloca
);
3178 DFSF
.setOrigin(CustomCI
, OriginLoad
);
3182 CI
->replaceAllUsesWith(CustomCI
);
3183 CI
->eraseFromParent();
3189 Value
*DFSanVisitor::makeAddAcquireOrderingTable(IRBuilder
<> &IRB
) {
3190 constexpr int NumOrderings
= (int)AtomicOrderingCABI::seq_cst
+ 1;
3191 uint32_t OrderingTable
[NumOrderings
] = {};
3193 OrderingTable
[(int)AtomicOrderingCABI::relaxed
] =
3194 OrderingTable
[(int)AtomicOrderingCABI::acquire
] =
3195 OrderingTable
[(int)AtomicOrderingCABI::consume
] =
3196 (int)AtomicOrderingCABI::acquire
;
3197 OrderingTable
[(int)AtomicOrderingCABI::release
] =
3198 OrderingTable
[(int)AtomicOrderingCABI::acq_rel
] =
3199 (int)AtomicOrderingCABI::acq_rel
;
3200 OrderingTable
[(int)AtomicOrderingCABI::seq_cst
] =
3201 (int)AtomicOrderingCABI::seq_cst
;
3203 return ConstantDataVector::get(IRB
.getContext(),
3204 ArrayRef(OrderingTable
, NumOrderings
));
3207 void DFSanVisitor::visitLibAtomicLoad(CallBase
&CB
) {
3208 // Since we use getNextNode here, we can't have CB terminate the BB.
3209 assert(isa
<CallInst
>(CB
));
3211 IRBuilder
<> IRB(&CB
);
3212 Value
*Size
= CB
.getArgOperand(0);
3213 Value
*SrcPtr
= CB
.getArgOperand(1);
3214 Value
*DstPtr
= CB
.getArgOperand(2);
3215 Value
*Ordering
= CB
.getArgOperand(3);
3216 // Convert the call to have at least Acquire ordering to make sure
3217 // the shadow operations aren't reordered before it.
3218 Value
*NewOrdering
=
3219 IRB
.CreateExtractElement(makeAddAcquireOrderingTable(IRB
), Ordering
);
3220 CB
.setArgOperand(3, NewOrdering
);
3222 IRBuilder
<> NextIRB(CB
.getNextNode());
3223 NextIRB
.SetCurrentDebugLocation(CB
.getDebugLoc());
3225 // TODO: Support ClCombinePointerLabelsOnLoad
3226 // TODO: Support ClEventCallbacks
3228 NextIRB
.CreateCall(DFSF
.DFS
.DFSanMemShadowOriginTransferFn
,
3229 {NextIRB
.CreatePointerCast(DstPtr
, NextIRB
.getInt8PtrTy()),
3230 NextIRB
.CreatePointerCast(SrcPtr
, NextIRB
.getInt8PtrTy()),
3231 NextIRB
.CreateIntCast(Size
, DFSF
.DFS
.IntptrTy
, false)});
3234 Value
*DFSanVisitor::makeAddReleaseOrderingTable(IRBuilder
<> &IRB
) {
3235 constexpr int NumOrderings
= (int)AtomicOrderingCABI::seq_cst
+ 1;
3236 uint32_t OrderingTable
[NumOrderings
] = {};
3238 OrderingTable
[(int)AtomicOrderingCABI::relaxed
] =
3239 OrderingTable
[(int)AtomicOrderingCABI::release
] =
3240 (int)AtomicOrderingCABI::release
;
3241 OrderingTable
[(int)AtomicOrderingCABI::consume
] =
3242 OrderingTable
[(int)AtomicOrderingCABI::acquire
] =
3243 OrderingTable
[(int)AtomicOrderingCABI::acq_rel
] =
3244 (int)AtomicOrderingCABI::acq_rel
;
3245 OrderingTable
[(int)AtomicOrderingCABI::seq_cst
] =
3246 (int)AtomicOrderingCABI::seq_cst
;
3248 return ConstantDataVector::get(IRB
.getContext(),
3249 ArrayRef(OrderingTable
, NumOrderings
));
3252 void DFSanVisitor::visitLibAtomicStore(CallBase
&CB
) {
3253 IRBuilder
<> IRB(&CB
);
3254 Value
*Size
= CB
.getArgOperand(0);
3255 Value
*SrcPtr
= CB
.getArgOperand(1);
3256 Value
*DstPtr
= CB
.getArgOperand(2);
3257 Value
*Ordering
= CB
.getArgOperand(3);
3258 // Convert the call to have at least Release ordering to make sure
3259 // the shadow operations aren't reordered after it.
3260 Value
*NewOrdering
=
3261 IRB
.CreateExtractElement(makeAddReleaseOrderingTable(IRB
), Ordering
);
3262 CB
.setArgOperand(3, NewOrdering
);
3264 // TODO: Support ClCombinePointerLabelsOnStore
3265 // TODO: Support ClEventCallbacks
3267 IRB
.CreateCall(DFSF
.DFS
.DFSanMemShadowOriginTransferFn
,
3268 {IRB
.CreatePointerCast(DstPtr
, IRB
.getInt8PtrTy()),
3269 IRB
.CreatePointerCast(SrcPtr
, IRB
.getInt8PtrTy()),
3270 IRB
.CreateIntCast(Size
, DFSF
.DFS
.IntptrTy
, false)});
3273 void DFSanVisitor::visitLibAtomicExchange(CallBase
&CB
) {
3274 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int
3276 IRBuilder
<> IRB(&CB
);
3277 Value
*Size
= CB
.getArgOperand(0);
3278 Value
*TargetPtr
= CB
.getArgOperand(1);
3279 Value
*SrcPtr
= CB
.getArgOperand(2);
3280 Value
*DstPtr
= CB
.getArgOperand(3);
3282 // This operation is not atomic for the shadow and origin memory.
3283 // This could result in DFSan false positives or false negatives.
3284 // For now we will assume these operations are rare, and
3285 // the additional complexity to address this is not warrented.
3287 // Current Target to Dest
3288 IRB
.CreateCall(DFSF
.DFS
.DFSanMemShadowOriginTransferFn
,
3289 {IRB
.CreatePointerCast(DstPtr
, IRB
.getInt8PtrTy()),
3290 IRB
.CreatePointerCast(TargetPtr
, IRB
.getInt8PtrTy()),
3291 IRB
.CreateIntCast(Size
, DFSF
.DFS
.IntptrTy
, false)});
3293 // Current Src to Target (overriding)
3294 IRB
.CreateCall(DFSF
.DFS
.DFSanMemShadowOriginTransferFn
,
3295 {IRB
.CreatePointerCast(TargetPtr
, IRB
.getInt8PtrTy()),
3296 IRB
.CreatePointerCast(SrcPtr
, IRB
.getInt8PtrTy()),
3297 IRB
.CreateIntCast(Size
, DFSF
.DFS
.IntptrTy
, false)});
3300 void DFSanVisitor::visitLibAtomicCompareExchange(CallBase
&CB
) {
3301 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void
3302 // *desired, int success_order, int failure_order)
3303 Value
*Size
= CB
.getArgOperand(0);
3304 Value
*TargetPtr
= CB
.getArgOperand(1);
3305 Value
*ExpectedPtr
= CB
.getArgOperand(2);
3306 Value
*DesiredPtr
= CB
.getArgOperand(3);
3308 // This operation is not atomic for the shadow and origin memory.
3309 // This could result in DFSan false positives or false negatives.
3310 // For now we will assume these operations are rare, and
3311 // the additional complexity to address this is not warrented.
3313 IRBuilder
<> NextIRB(CB
.getNextNode());
3314 NextIRB
.SetCurrentDebugLocation(CB
.getDebugLoc());
3316 DFSF
.setShadow(&CB
, DFSF
.DFS
.getZeroShadow(&CB
));
3318 // If original call returned true, copy Desired to Target.
3319 // If original call returned false, copy Target to Expected.
3321 DFSF
.DFS
.DFSanMemShadowOriginConditionalExchangeFn
,
3322 {NextIRB
.CreateIntCast(&CB
, NextIRB
.getInt8Ty(), false),
3323 NextIRB
.CreatePointerCast(TargetPtr
, NextIRB
.getInt8PtrTy()),
3324 NextIRB
.CreatePointerCast(ExpectedPtr
, NextIRB
.getInt8PtrTy()),
3325 NextIRB
.CreatePointerCast(DesiredPtr
, NextIRB
.getInt8PtrTy()),
3326 NextIRB
.CreateIntCast(Size
, DFSF
.DFS
.IntptrTy
, false)});
3329 void DFSanVisitor::visitCallBase(CallBase
&CB
) {
3330 Function
*F
= CB
.getCalledFunction();
3331 if ((F
&& F
->isIntrinsic()) || CB
.isInlineAsm()) {
3332 visitInstOperands(CB
);
3336 // Calls to this function are synthesized in wrappers, and we shouldn't
3338 if (F
== DFSF
.DFS
.DFSanVarargWrapperFn
.getCallee()->stripPointerCasts())
3342 if (DFSF
.TLI
.getLibFunc(CB
, LF
)) {
3343 // libatomic.a functions need to have special handling because there isn't
3344 // a good way to intercept them or compile the library with
3347 case LibFunc_atomic_load
:
3348 if (!isa
<CallInst
>(CB
)) {
3349 llvm::errs() << "DFSAN -- cannot instrument invoke of libatomic load. "
3353 visitLibAtomicLoad(CB
);
3355 case LibFunc_atomic_store
:
3356 visitLibAtomicStore(CB
);
3363 // TODO: These are not supported by TLI? They are not in the enum.
3364 if (F
&& F
->hasName() && !F
->isVarArg()) {
3365 if (F
->getName() == "__atomic_exchange") {
3366 visitLibAtomicExchange(CB
);
3369 if (F
->getName() == "__atomic_compare_exchange") {
3370 visitLibAtomicCompareExchange(CB
);
3375 DenseMap
<Value
*, Function
*>::iterator UnwrappedFnIt
=
3376 DFSF
.DFS
.UnwrappedFnMap
.find(CB
.getCalledOperand());
3377 if (UnwrappedFnIt
!= DFSF
.DFS
.UnwrappedFnMap
.end())
3378 if (visitWrappedCallBase(*UnwrappedFnIt
->second
, CB
))
3381 IRBuilder
<> IRB(&CB
);
3383 const bool ShouldTrackOrigins
= DFSF
.DFS
.shouldTrackOrigins();
3384 FunctionType
*FT
= CB
.getFunctionType();
3385 const DataLayout
&DL
= getDataLayout();
3387 // Stores argument shadows.
3388 unsigned ArgOffset
= 0;
3389 for (unsigned I
= 0, N
= FT
->getNumParams(); I
!= N
; ++I
) {
3390 if (ShouldTrackOrigins
) {
3391 // Ignore overflowed origins
3392 Value
*ArgShadow
= DFSF
.getShadow(CB
.getArgOperand(I
));
3393 if (I
< DFSF
.DFS
.NumOfElementsInArgOrgTLS
&&
3394 !DFSF
.DFS
.isZeroShadow(ArgShadow
))
3395 IRB
.CreateStore(DFSF
.getOrigin(CB
.getArgOperand(I
)),
3396 DFSF
.getArgOriginTLS(I
, IRB
));
3400 DL
.getTypeAllocSize(DFSF
.DFS
.getShadowTy(FT
->getParamType(I
)));
3401 // Stop storing if arguments' size overflows. Inside a function, arguments
3402 // after overflow have zero shadow values.
3403 if (ArgOffset
+ Size
> ArgTLSSize
)
3405 IRB
.CreateAlignedStore(DFSF
.getShadow(CB
.getArgOperand(I
)),
3406 DFSF
.getArgTLS(FT
->getParamType(I
), ArgOffset
, IRB
),
3407 ShadowTLSAlignment
);
3408 ArgOffset
+= alignTo(Size
, ShadowTLSAlignment
);
3411 Instruction
*Next
= nullptr;
3412 if (!CB
.getType()->isVoidTy()) {
3413 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(&CB
)) {
3414 if (II
->getNormalDest()->getSinglePredecessor()) {
3415 Next
= &II
->getNormalDest()->front();
3418 SplitEdge(II
->getParent(), II
->getNormalDest(), &DFSF
.DT
);
3419 Next
= &NewBB
->front();
3422 assert(CB
.getIterator() != CB
.getParent()->end());
3423 Next
= CB
.getNextNode();
3426 // Don't emit the epilogue for musttail call returns.
3427 if (isa
<CallInst
>(CB
) && cast
<CallInst
>(CB
).isMustTailCall())
3430 // Loads the return value shadow.
3431 IRBuilder
<> NextIRB(Next
);
3432 unsigned Size
= DL
.getTypeAllocSize(DFSF
.DFS
.getShadowTy(&CB
));
3433 if (Size
> RetvalTLSSize
) {
3434 // Set overflowed return shadow to be zero.
3435 DFSF
.setShadow(&CB
, DFSF
.DFS
.getZeroShadow(&CB
));
3437 LoadInst
*LI
= NextIRB
.CreateAlignedLoad(
3438 DFSF
.DFS
.getShadowTy(&CB
), DFSF
.getRetvalTLS(CB
.getType(), NextIRB
),
3439 ShadowTLSAlignment
, "_dfsret");
3440 DFSF
.SkipInsts
.insert(LI
);
3441 DFSF
.setShadow(&CB
, LI
);
3442 DFSF
.NonZeroChecks
.push_back(LI
);
3445 if (ShouldTrackOrigins
) {
3446 LoadInst
*LI
= NextIRB
.CreateLoad(DFSF
.DFS
.OriginTy
,
3447 DFSF
.getRetvalOriginTLS(), "_dfsret_o");
3448 DFSF
.SkipInsts
.insert(LI
);
3449 DFSF
.setOrigin(&CB
, LI
);
3452 DFSF
.addReachesFunctionCallbacksIfEnabled(NextIRB
, CB
, &CB
);
3456 void DFSanVisitor::visitPHINode(PHINode
&PN
) {
3457 Type
*ShadowTy
= DFSF
.DFS
.getShadowTy(&PN
);
3459 PHINode::Create(ShadowTy
, PN
.getNumIncomingValues(), "", &PN
);
3461 // Give the shadow phi node valid predecessors to fool SplitEdge into working.
3462 Value
*UndefShadow
= UndefValue::get(ShadowTy
);
3463 for (BasicBlock
*BB
: PN
.blocks())
3464 ShadowPN
->addIncoming(UndefShadow
, BB
);
3466 DFSF
.setShadow(&PN
, ShadowPN
);
3468 PHINode
*OriginPN
= nullptr;
3469 if (DFSF
.DFS
.shouldTrackOrigins()) {
3471 PHINode::Create(DFSF
.DFS
.OriginTy
, PN
.getNumIncomingValues(), "", &PN
);
3472 Value
*UndefOrigin
= UndefValue::get(DFSF
.DFS
.OriginTy
);
3473 for (BasicBlock
*BB
: PN
.blocks())
3474 OriginPN
->addIncoming(UndefOrigin
, BB
);
3475 DFSF
.setOrigin(&PN
, OriginPN
);
3478 DFSF
.PHIFixups
.push_back({&PN
, ShadowPN
, OriginPN
});
3481 PreservedAnalyses
DataFlowSanitizerPass::run(Module
&M
,
3482 ModuleAnalysisManager
&AM
) {
3483 auto GetTLI
= [&](Function
&F
) -> TargetLibraryInfo
& {
3485 AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
3486 return FAM
.getResult
<TargetLibraryAnalysis
>(F
);
3488 if (!DataFlowSanitizer(ABIListFiles
).runImpl(M
, GetTLI
))
3489 return PreservedAnalyses::all();
3491 PreservedAnalyses PA
= PreservedAnalyses::none();
3492 // GlobalsAA is considered stateless and does not get invalidated unless
3493 // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
3494 // make changes that require GlobalsAA to be invalidated.
3495 PA
.abandon
<GlobalsAA
>();