[RISCV] Check isFixedLengthVector before calling getVectorNumElements in getSingleShu...
[llvm-project.git] / llvm / lib / Transforms / Instrumentation / NumericalStabilitySanitizer.cpp
blobd396dbf75eebc61f55efa1f0c41927655966ec8b
1 //===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the instrumentation pass for the numerical sanitizer.
10 // Conceptually the pass injects shadow computations using higher precision
11 // types and inserts consistency checks. For details see the paper
12 // https://arxiv.org/abs/2102.12782.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/Analysis/TargetLibraryInfo.h"
23 #include "llvm/Analysis/ValueTracking.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Regex.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39 #include "llvm/Transforms/Utils/Instrumentation.h"
40 #include "llvm/Transforms/Utils/Local.h"
41 #include "llvm/Transforms/Utils/ModuleUtils.h"
43 #include <cstdint>
45 using namespace llvm;
47 #define DEBUG_TYPE "nsan"
49 STATISTIC(NumInstrumentedFTLoads,
50 "Number of instrumented floating-point loads");
52 STATISTIC(NumInstrumentedFTCalls,
53 "Number of instrumented floating-point calls");
54 STATISTIC(NumInstrumentedFTRets,
55 "Number of instrumented floating-point returns");
56 STATISTIC(NumInstrumentedFTStores,
57 "Number of instrumented floating-point stores");
58 STATISTIC(NumInstrumentedNonFTStores,
59 "Number of instrumented non floating-point stores");
60 STATISTIC(
61 NumInstrumentedNonFTMemcpyStores,
62 "Number of instrumented non floating-point stores with memcpy semantics");
63 STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
65 // Using smaller shadow types types can help improve speed. For example, `dlq`
66 // is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
67 // `dqq`.
68 static cl::opt<std::string> ClShadowMapping(
69 "nsan-shadow-type-mapping", cl::init("dqq"),
70 cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
71 "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
72 "ppc_fp128 (extended double) respectively. The default is to "
73 "shadow `float` as `double`, and `double` and `x86_fp80` as "
74 "`fp128`"),
75 cl::Hidden);
77 static cl::opt<bool>
78 ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
79 cl::desc("Instrument floating-point comparisons"),
80 cl::Hidden);
82 static cl::opt<std::string> ClCheckFunctionsFilter(
83 "check-functions-filter",
84 cl::desc("Only emit checks for arguments of functions "
85 "whose names match the given regular expression"),
86 cl::value_desc("regex"));
88 static cl::opt<bool> ClTruncateFCmpEq(
89 "nsan-truncate-fcmp-eq", cl::init(true),
90 cl::desc(
91 "This flag controls the behaviour of fcmp equality comparisons."
92 "For equality comparisons such as `x == 0.0f`, we can perform the "
93 "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
94 " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
95 "catch the case when `x_shadow` is accurate enough (and therefore "
96 "close enough to zero) so that `trunc(x_shadow)` is zero even though "
97 "both `x` and `x_shadow` are not"),
98 cl::Hidden);
100 // When there is external, uninstrumented code writing to memory, the shadow
101 // memory can get out of sync with the application memory. Enabling this flag
102 // emits consistency checks for loads to catch this situation.
103 // When everything is instrumented, this is not strictly necessary because any
104 // load should have a corresponding store, but can help debug cases when the
105 // framework did a bad job at tracking shadow memory modifications by failing on
106 // load rather than store.
107 // TODO: provide a way to resume computations from the FT value when the load
108 // is inconsistent. This ensures that further computations are not polluted.
109 static cl::opt<bool> ClCheckLoads("nsan-check-loads",
110 cl::desc("Check floating-point load"),
111 cl::Hidden);
113 static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
114 cl::desc("Check floating-point stores"),
115 cl::Hidden);
117 static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
118 cl::desc("Check floating-point return values"),
119 cl::Hidden);
121 // LLVM may store constant floats as bitcasted ints.
122 // It's not really necessary to shadow such stores,
123 // if the shadow value is unknown the framework will re-extend it on load
124 // anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
125 // impossible to determine the floating-point type based on the size.
126 // However, for debugging purposes it can be useful to model such stores.
127 static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
128 "nsan-propagate-non-ft-const-stores-as-ft",
129 cl::desc(
130 "Propagate non floating-point const stores as floating point values."
131 "For debugging purposes only"),
132 cl::Hidden);
134 constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
135 constexpr StringLiteral kNsanInitName("__nsan_init");
137 // The following values must be kept in sync with the runtime.
138 constexpr int kShadowScale = 2;
139 constexpr int kMaxVectorWidth = 8;
140 constexpr int kMaxNumArgs = 128;
141 constexpr int kMaxShadowTypeSizeBytes = 16; // fp128
143 namespace {
145 // Defines the characteristics (type id, type, and floating-point semantics)
146 // attached for all possible shadow types.
147 class ShadowTypeConfig {
148 public:
149 static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
151 // The LLVM Type corresponding to the shadow type.
152 virtual Type *getType(LLVMContext &Context) const = 0;
154 // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
155 virtual char getNsanTypeId() const = 0;
157 virtual ~ShadowTypeConfig() = default;
160 template <char NsanTypeId>
161 class ShadowTypeConfigImpl : public ShadowTypeConfig {
162 public:
163 char getNsanTypeId() const override { return NsanTypeId; }
164 static constexpr const char kNsanTypeId = NsanTypeId;
167 // `double` (`d`) shadow type.
168 class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
169 Type *getType(LLVMContext &Context) const override {
170 return Type::getDoubleTy(Context);
174 // `x86_fp80` (`l`) shadow type: X86 long double.
175 class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
176 Type *getType(LLVMContext &Context) const override {
177 return Type::getX86_FP80Ty(Context);
181 // `fp128` (`q`) shadow type.
182 class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
183 Type *getType(LLVMContext &Context) const override {
184 return Type::getFP128Ty(Context);
188 // `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
189 class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
190 Type *getType(LLVMContext &Context) const override {
191 return Type::getPPC_FP128Ty(Context);
195 // Creates a ShadowTypeConfig given its type id.
196 std::unique_ptr<ShadowTypeConfig>
197 ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
198 switch (TypeId) {
199 case F64ShadowConfig::kNsanTypeId:
200 return std::make_unique<F64ShadowConfig>();
201 case F80ShadowConfig::kNsanTypeId:
202 return std::make_unique<F80ShadowConfig>();
203 case F128ShadowConfig::kNsanTypeId:
204 return std::make_unique<F128ShadowConfig>();
205 case PPC128ShadowConfig::kNsanTypeId:
206 return std::make_unique<PPC128ShadowConfig>();
208 report_fatal_error("nsan: invalid shadow type id '" + Twine(TypeId) + "'");
211 // An enum corresponding to shadow value types. Used as indices in arrays, so
212 // not an `enum class`.
213 enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
215 // If `FT` corresponds to a primitive FTValueType, return it.
216 static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
217 if (FT->isFloatTy())
218 return kFloat;
219 if (FT->isDoubleTy())
220 return kDouble;
221 if (FT->isX86_FP80Ty())
222 return kLongDouble;
223 return {};
226 // Returns the LLVM type for an FTValueType.
227 static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
228 switch (VT) {
229 case kFloat:
230 return Type::getFloatTy(Context);
231 case kDouble:
232 return Type::getDoubleTy(Context);
233 case kLongDouble:
234 return Type::getX86_FP80Ty(Context);
235 case kNumValueTypes:
236 return nullptr;
238 llvm_unreachable("Unhandled FTValueType enum");
241 // Returns the type name for an FTValueType.
242 static const char *typeNameFromFTValueType(FTValueType VT) {
243 switch (VT) {
244 case kFloat:
245 return "float";
246 case kDouble:
247 return "double";
248 case kLongDouble:
249 return "longdouble";
250 case kNumValueTypes:
251 return nullptr;
253 llvm_unreachable("Unhandled FTValueType enum");
256 // A specific mapping configuration of application type to shadow type for nsan
257 // (see -nsan-shadow-mapping flag).
258 class MappingConfig {
259 public:
260 explicit MappingConfig(LLVMContext &C) : Context(C) {
261 if (ClShadowMapping.size() != 3)
262 report_fatal_error("Invalid nsan mapping: " + Twine(ClShadowMapping));
263 unsigned ShadowTypeSizeBits[kNumValueTypes];
264 for (int VT = 0; VT < kNumValueTypes; ++VT) {
265 auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]);
266 if (!Config)
267 report_fatal_error("Failed to get ShadowTypeConfig for " +
268 Twine(ClShadowMapping[VT]));
269 const unsigned AppTypeSize =
270 typeFromFTValueType(static_cast<FTValueType>(VT), Context)
271 ->getScalarSizeInBits();
272 const unsigned ShadowTypeSize =
273 Config->getType(Context)->getScalarSizeInBits();
274 // Check that the shadow type size is at most kShadowScale times the
275 // application type size, so that shadow memory compoutations are valid.
276 if (ShadowTypeSize > kShadowScale * AppTypeSize)
277 report_fatal_error("Invalid nsan mapping f" + Twine(AppTypeSize) +
278 "->f" + Twine(ShadowTypeSize) +
279 ": The shadow type size should be at most " +
280 Twine(kShadowScale) +
281 " times the application type size");
282 ShadowTypeSizeBits[VT] = ShadowTypeSize;
283 Configs[VT] = std::move(Config);
286 // Check that the mapping is monotonous. This is required because if one
287 // does an fpextend of `float->long double` in application code, nsan is
288 // going to do an fpextend of `shadow(float) -> shadow(long double)` in
289 // shadow code. This will fail in `qql` mode, since nsan would be
290 // fpextending `f128->long`, which is invalid.
291 // TODO: Relax this.
292 if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] ||
293 ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble])
294 report_fatal_error("Invalid nsan mapping: { float->f" +
295 Twine(ShadowTypeSizeBits[kFloat]) + "; double->f" +
296 Twine(ShadowTypeSizeBits[kDouble]) +
297 "; long double->f" +
298 Twine(ShadowTypeSizeBits[kLongDouble]) + " }");
301 const ShadowTypeConfig &byValueType(FTValueType VT) const {
302 assert(VT < FTValueType::kNumValueTypes && "invalid value type");
303 return *Configs[VT];
306 // Returns the extended shadow type for a given application type.
307 Type *getExtendedFPType(Type *FT) const {
308 if (const auto VT = ftValueTypeFromType(FT))
309 return Configs[*VT]->getType(Context);
310 if (FT->isVectorTy()) {
311 auto *VecTy = cast<VectorType>(FT);
312 // TODO: add support for scalable vector types.
313 if (VecTy->isScalableTy())
314 return nullptr;
315 Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType());
316 return ExtendedScalar
317 ? VectorType::get(ExtendedScalar, VecTy->getElementCount())
318 : nullptr;
320 return nullptr;
323 private:
324 LLVMContext &Context;
325 std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
328 // The memory extents of a type specifies how many elements of a given
329 // FTValueType needs to be stored when storing this type.
330 struct MemoryExtents {
331 FTValueType ValueType;
332 uint64_t NumElts;
335 static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
336 if (const auto VT = ftValueTypeFromType(FT))
337 return {*VT, 1};
338 if (auto *VecTy = dyn_cast<VectorType>(FT)) {
339 const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType());
340 return {ScalarExtents.ValueType,
341 ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
343 llvm_unreachable("invalid value type");
346 // The location of a check. Passed as parameters to runtime checking functions.
347 class CheckLoc {
348 public:
349 // Creates a location that references an application memory location.
350 static CheckLoc makeStore(Value *Address) {
351 CheckLoc Result(kStore);
352 Result.Address = Address;
353 return Result;
355 static CheckLoc makeLoad(Value *Address) {
356 CheckLoc Result(kLoad);
357 Result.Address = Address;
358 return Result;
361 // Creates a location that references an argument, given by id.
362 static CheckLoc makeArg(int ArgId) {
363 CheckLoc Result(kArg);
364 Result.ArgId = ArgId;
365 return Result;
368 // Creates a location that references the return value of a function.
369 static CheckLoc makeRet() { return CheckLoc(kRet); }
371 // Creates a location that references a vector insert.
372 static CheckLoc makeInsert() { return CheckLoc(kInsert); }
374 // Returns the CheckType of location this refers to, as an integer-typed LLVM
375 // IR value.
376 Value *getType(LLVMContext &C) const {
377 return ConstantInt::get(Type::getInt32Ty(C), static_cast<int>(CheckTy));
380 // Returns a CheckType-specific value representing details of the location
381 // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
382 // IR value.
383 Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const {
384 switch (CheckTy) {
385 case kUnknown:
386 llvm_unreachable("unknown type");
387 case kRet:
388 case kInsert:
389 return ConstantInt::get(IntptrTy, 0);
390 case kArg:
391 return ConstantInt::get(IntptrTy, ArgId);
392 case kLoad:
393 case kStore:
394 return Builder.CreatePtrToInt(Address, IntptrTy);
396 llvm_unreachable("Unhandled CheckType enum");
399 private:
400 // Must be kept in sync with the runtime,
401 // see compiler-rt/lib/nsan/nsan_stats.h
402 enum CheckType {
403 kUnknown = 0,
404 kRet,
405 kArg,
406 kLoad,
407 kStore,
408 kInsert,
410 explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
412 Value *Address = nullptr;
413 const CheckType CheckTy;
414 int ArgId = -1;
417 // A map of LLVM IR values to shadow LLVM IR values.
418 class ValueToShadowMap {
419 public:
420 explicit ValueToShadowMap(const MappingConfig &Config) : Config(Config) {}
422 ValueToShadowMap(const ValueToShadowMap &) = delete;
423 ValueToShadowMap &operator=(const ValueToShadowMap &) = delete;
425 // Sets the shadow value for a value. Asserts that the value does not already
426 // have a value.
427 void setShadow(Value &V, Value &Shadow) {
428 [[maybe_unused]] const bool Inserted = Map.try_emplace(&V, &Shadow).second;
429 LLVM_DEBUG({
430 if (!Inserted) {
431 if (auto *I = dyn_cast<Instruction>(&V))
432 errs() << I->getFunction()->getName() << ": ";
433 errs() << "duplicate shadow (" << &V << "): ";
434 V.dump();
437 assert(Inserted && "duplicate shadow");
440 // Returns true if the value already has a shadow (including if the value is a
441 // constant). If true, calling getShadow() is valid.
442 bool hasShadow(Value *V) const {
443 return isa<Constant>(V) || (Map.find(V) != Map.end());
446 // Returns the shadow value for a given value. Asserts that the value has
447 // a shadow value. Lazily creates shadows for constant values.
448 Value *getShadow(Value *V) const {
449 if (Constant *C = dyn_cast<Constant>(V))
450 return getShadowConstant(C);
451 return Map.find(V)->second;
454 bool empty() const { return Map.empty(); }
456 private:
457 // Extends a constant application value to its shadow counterpart.
458 APFloat extendConstantFP(APFloat CV, const fltSemantics &To) const {
459 bool LosesInfo = false;
460 CV.convert(To, APFloatBase::rmTowardZero, &LosesInfo);
461 return CV;
464 // Returns the shadow constant for the given application constant.
465 Constant *getShadowConstant(Constant *C) const {
466 if (UndefValue *U = dyn_cast<UndefValue>(C)) {
467 return UndefValue::get(Config.getExtendedFPType(U->getType()));
469 if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
470 // Floating-point constants.
471 Type *Ty = Config.getExtendedFPType(CFP->getType());
472 return ConstantFP::get(
473 Ty, extendConstantFP(CFP->getValueAPF(), Ty->getFltSemantics()));
475 // Vector, array, or aggregate constants.
476 if (C->getType()->isVectorTy()) {
477 SmallVector<Constant *, 8> Elements;
478 for (int I = 0, E = cast<VectorType>(C->getType())
479 ->getElementCount()
480 .getFixedValue();
481 I < E; ++I)
482 Elements.push_back(getShadowConstant(C->getAggregateElement(I)));
483 return ConstantVector::get(Elements);
485 llvm_unreachable("unimplemented");
488 const MappingConfig &Config;
489 DenseMap<Value *, Value *> Map;
492 class NsanMemOpFn {
493 public:
494 NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized, StringRef Fallback,
495 size_t NumArgs);
496 FunctionCallee getFunctionFor(uint64_t MemOpSize) const;
497 FunctionCallee getFallback() const;
499 private:
500 SmallVector<FunctionCallee> Funcs;
501 size_t NumSizedFuncs;
504 NsanMemOpFn::NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized,
505 StringRef Fallback, size_t NumArgs) {
506 LLVMContext &Ctx = M.getContext();
507 AttributeList Attr;
508 Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind);
509 Type *PtrTy = PointerType::getUnqual(Ctx);
510 Type *VoidTy = Type::getVoidTy(Ctx);
511 IntegerType *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
512 FunctionType *SizedFnTy = nullptr;
514 NumSizedFuncs = Sized.size();
516 // First entry is fallback function
517 if (NumArgs == 3) {
518 Funcs.push_back(
519 M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, PtrTy, IntptrTy));
520 SizedFnTy = FunctionType::get(VoidTy, {PtrTy, PtrTy}, false);
521 } else if (NumArgs == 2) {
522 Funcs.push_back(
523 M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, IntptrTy));
524 SizedFnTy = FunctionType::get(VoidTy, {PtrTy}, false);
525 } else {
526 llvm_unreachable("Unexpected value of sized functions arguments");
529 for (size_t i = 0; i < NumSizedFuncs; ++i)
530 Funcs.push_back(M.getOrInsertFunction(Sized[i], SizedFnTy, Attr));
533 FunctionCallee NsanMemOpFn::getFunctionFor(uint64_t MemOpSize) const {
534 // Now `getFunctionFor` operates on `Funcs` of size 4 (at least) and the
535 // following code assumes that the number of functions in `Func` is sufficient
536 assert(NumSizedFuncs >= 3 && "Unexpected number of sized functions");
538 size_t Idx =
539 MemOpSize == 4 ? 1 : (MemOpSize == 8 ? 2 : (MemOpSize == 16 ? 3 : 0));
541 return Funcs[Idx];
544 FunctionCallee NsanMemOpFn::getFallback() const { return Funcs[0]; }
546 /// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
547 /// API function declarations into the module if they don't exist already.
548 /// Instantiating ensures the __nsan_init function is in the list of global
549 /// constructors for the module.
550 class NumericalStabilitySanitizer {
551 public:
552 NumericalStabilitySanitizer(Module &M);
553 bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
555 private:
556 bool instrumentMemIntrinsic(MemIntrinsic *MI);
557 void maybeAddSuffixForNsanInterface(CallBase *CI);
558 bool addrPointsToConstantData(Value *Addr);
559 void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI,
560 ValueToShadowMap &Map);
561 Value *createShadowValueWithOperandsAvailable(Instruction &Inst,
562 const TargetLibraryInfo &TLI,
563 const ValueToShadowMap &Map);
564 PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI);
565 void createShadowArguments(Function &F, const TargetLibraryInfo &TLI,
566 ValueToShadowMap &Map);
568 void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI,
569 const ValueToShadowMap &Map);
571 void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI,
572 const ValueToShadowMap &Map);
573 Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder,
574 CheckLoc Loc);
575 Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder,
576 CheckLoc Loc);
577 void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map);
579 // Value creation handlers.
580 Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT);
581 Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
582 const TargetLibraryInfo &TLI,
583 const ValueToShadowMap &Map, IRBuilder<> &Builder);
584 Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
585 const TargetLibraryInfo &TLI,
586 const ValueToShadowMap &Map,
587 IRBuilder<> &Builder);
588 Value *handleTrunc(const FPTruncInst &Trunc, Type *VT, Type *ExtendedVT,
589 const ValueToShadowMap &Map, IRBuilder<> &Builder);
590 Value *handleExt(const FPExtInst &Ext, Type *VT, Type *ExtendedVT,
591 const ValueToShadowMap &Map, IRBuilder<> &Builder);
593 // Value propagation handlers.
594 void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT,
595 const ValueToShadowMap &Map);
596 void propagateNonFTStore(StoreInst &Store, Type *VT,
597 const ValueToShadowMap &Map);
599 const DataLayout &DL;
600 LLVMContext &Context;
601 MappingConfig Config;
602 IntegerType *IntptrTy = nullptr;
604 // TODO: Use std::array instead?
605 FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes] = {};
606 FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes] = {};
607 FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes] = {};
608 FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes] = {};
610 NsanMemOpFn NsanCopyFns;
611 NsanMemOpFn NsanSetUnknownFns;
613 FunctionCallee NsanGetRawShadowTypePtr;
614 FunctionCallee NsanGetRawShadowPtr;
615 GlobalValue *NsanShadowRetTag = nullptr;
617 Type *NsanShadowRetType = nullptr;
618 GlobalValue *NsanShadowRetPtr = nullptr;
620 GlobalValue *NsanShadowArgsTag = nullptr;
622 Type *NsanShadowArgsType = nullptr;
623 GlobalValue *NsanShadowArgsPtr = nullptr;
625 std::optional<Regex> CheckFunctionsFilter;
627 } // end anonymous namespace
629 PreservedAnalyses
630 NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) {
631 getOrCreateSanitizerCtorAndInitFunctions(
632 M, kNsanModuleCtorName, kNsanInitName, /*InitArgTypes=*/{},
633 /*InitArgs=*/{},
634 // This callback is invoked when the functions are created the first
635 // time. Hook them into the global ctors list in that case:
636 [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
638 NumericalStabilitySanitizer Nsan(M);
639 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
640 for (Function &F : M)
641 Nsan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
643 return PreservedAnalyses::none();
646 static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) {
647 return dyn_cast<GlobalValue>(M.getOrInsertGlobal(Name, Ty, [&M, Ty, Name] {
648 return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
649 nullptr, Name, nullptr,
650 GlobalVariable::InitialExecTLSModel);
651 }));
654 NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
655 : DL(M.getDataLayout()), Context(M.getContext()), Config(Context),
656 NsanCopyFns(M, {"__nsan_copy_4", "__nsan_copy_8", "__nsan_copy_16"},
657 "__nsan_copy_values", /*NumArgs=*/3),
658 NsanSetUnknownFns(M,
659 {"__nsan_set_value_unknown_4",
660 "__nsan_set_value_unknown_8",
661 "__nsan_set_value_unknown_16"},
662 "__nsan_set_value_unknown", /*NumArgs=*/2) {
663 IntptrTy = DL.getIntPtrType(Context);
664 Type *PtrTy = PointerType::getUnqual(Context);
665 Type *Int32Ty = Type::getInt32Ty(Context);
666 Type *Int1Ty = Type::getInt1Ty(Context);
667 Type *VoidTy = Type::getVoidTy(Context);
669 AttributeList Attr;
670 Attr = Attr.addFnAttribute(Context, Attribute::NoUnwind);
671 // Initialize the runtime values (functions and global variables).
672 for (int I = 0; I < kNumValueTypes; ++I) {
673 const FTValueType VT = static_cast<FTValueType>(I);
674 const char *VTName = typeNameFromFTValueType(VT);
675 Type *VTTy = typeFromFTValueType(VT, Context);
677 // Load/store.
678 const std::string GetterPrefix =
679 std::string("__nsan_get_shadow_ptr_for_") + VTName;
680 NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction(
681 GetterPrefix + "_store", Attr, PtrTy, PtrTy, IntptrTy);
682 NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction(
683 GetterPrefix + "_load", Attr, PtrTy, PtrTy, IntptrTy);
685 // Check.
686 const auto &ShadowConfig = Config.byValueType(VT);
687 Type *ShadowTy = ShadowConfig.getType(Context);
688 NsanCheckValue[VT] =
689 M.getOrInsertFunction(std::string("__nsan_internal_check_") + VTName +
690 "_" + ShadowConfig.getNsanTypeId(),
691 Attr, Int32Ty, VTTy, ShadowTy, Int32Ty, IntptrTy);
692 NsanFCmpFail[VT] = M.getOrInsertFunction(
693 std::string("__nsan_fcmp_fail_") + VTName + "_" +
694 ShadowConfig.getNsanTypeId(),
695 Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty);
698 // TODO: Add attributes nofree, nosync, readnone, readonly,
699 NsanGetRawShadowTypePtr = M.getOrInsertFunction(
700 "__nsan_internal_get_raw_shadow_type_ptr", Attr, PtrTy, PtrTy);
701 NsanGetRawShadowPtr = M.getOrInsertFunction(
702 "__nsan_internal_get_raw_shadow_ptr", Attr, PtrTy, PtrTy);
704 NsanShadowRetTag = createThreadLocalGV("__nsan_shadow_ret_tag", M, IntptrTy);
706 NsanShadowRetType = ArrayType::get(Type::getInt8Ty(Context),
707 kMaxVectorWidth * kMaxShadowTypeSizeBytes);
708 NsanShadowRetPtr =
709 createThreadLocalGV("__nsan_shadow_ret_ptr", M, NsanShadowRetType);
711 NsanShadowArgsTag =
712 createThreadLocalGV("__nsan_shadow_args_tag", M, IntptrTy);
714 NsanShadowArgsType =
715 ArrayType::get(Type::getInt8Ty(Context),
716 kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes);
718 NsanShadowArgsPtr =
719 createThreadLocalGV("__nsan_shadow_args_ptr", M, NsanShadowArgsType);
721 if (!ClCheckFunctionsFilter.empty()) {
722 Regex R = Regex(ClCheckFunctionsFilter);
723 std::string RegexError;
724 assert(R.isValid(RegexError));
725 CheckFunctionsFilter = std::move(R);
729 // Returns true if the given LLVM Value points to constant data (typically, a
730 // global variable reference).
731 bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) {
732 // If this is a GEP, just analyze its pointer operand.
733 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
734 Addr = GEP->getPointerOperand();
736 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr))
737 return GV->isConstant();
738 return false;
741 // This instruments the function entry to create shadow arguments.
742 // Pseudocode:
743 // if (this_fn_ptr == __nsan_shadow_args_tag) {
744 // s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
745 // s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
746 // ...
747 // __nsan_shadow_args_tag = 0;
748 // } else {
749 // s(arg0) = fext(arg0);
750 // s(arg1) = fext(arg1);
751 // ...
752 // }
753 void NumericalStabilitySanitizer::createShadowArguments(
754 Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
755 assert(!F.getIntrinsicID() && "found a definition of an intrinsic");
757 // Do not bother if there are no FP args.
758 if (all_of(F.args(), [this](const Argument &Arg) {
759 return Config.getExtendedFPType(Arg.getType()) == nullptr;
761 return;
763 IRBuilder<> Builder(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIIt());
764 // The function has shadow args if the shadow args tag matches the function
765 // address.
766 Value *HasShadowArgs = Builder.CreateICmpEQ(
767 Builder.CreateLoad(IntptrTy, NsanShadowArgsTag, /*isVolatile=*/false),
768 Builder.CreatePtrToInt(&F, IntptrTy));
770 unsigned ShadowArgsOffsetBytes = 0;
771 for (Argument &Arg : F.args()) {
772 Type *VT = Arg.getType();
773 Type *ExtendedVT = Config.getExtendedFPType(VT);
774 if (ExtendedVT == nullptr)
775 continue; // Not an FT value.
776 Value *L = Builder.CreateAlignedLoad(
777 ExtendedVT,
778 Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
779 ShadowArgsOffsetBytes),
780 Align(1), /*isVolatile=*/false);
781 Value *Shadow = Builder.CreateSelect(HasShadowArgs, L,
782 Builder.CreateFPExt(&Arg, ExtendedVT));
783 Map.setShadow(Arg, *Shadow);
784 TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
785 assert(!SlotSize.isScalable() && "unsupported");
786 ShadowArgsOffsetBytes += SlotSize;
788 Builder.CreateStore(ConstantInt::get(IntptrTy, 0), NsanShadowArgsTag);
791 // Returns true if the instrumentation should emit code to check arguments
792 // before a function call.
793 static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
794 const std::optional<Regex> &CheckFunctionsFilter) {
796 Function *Fn = CI.getCalledFunction();
798 if (CheckFunctionsFilter) {
799 // Skip checking args of indirect calls.
800 if (Fn == nullptr)
801 return false;
802 if (CheckFunctionsFilter->match(Fn->getName()))
803 return true;
804 return false;
807 if (Fn == nullptr)
808 return true; // Always check args of indirect calls.
810 // Never check nsan functions, the user called them for a reason.
811 if (Fn->getName().starts_with("__nsan_"))
812 return false;
814 const auto ID = Fn->getIntrinsicID();
815 LibFunc LFunc = LibFunc::NumLibFuncs;
816 // Always check args of unknown functions.
817 if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc))
818 return true;
820 // Do not check args of an `fabs` call that is used for a comparison.
821 // This is typically used for `fabs(a-b) < tolerance`, where what matters is
822 // the result of the comparison, which is already caught be the fcmp checks.
823 if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf ||
824 LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl)
825 for (const auto &U : CI.users())
826 if (isa<CmpInst>(U))
827 return false;
829 return true; // Default is check.
832 // Populates the shadow call stack (which contains shadow values for every
833 // floating-point parameter to the function).
834 void NumericalStabilitySanitizer::populateShadowStack(
835 CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) {
836 // Do not create a shadow stack for inline asm.
837 if (CI.isInlineAsm())
838 return;
840 // Do not bother if there are no FP args.
841 if (all_of(CI.operands(), [this](const Value *Arg) {
842 return Config.getExtendedFPType(Arg->getType()) == nullptr;
844 return;
846 IRBuilder<> Builder(&CI);
847 SmallVector<Value *, 8> ArgShadows;
848 const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter);
849 for (auto [ArgIdx, Arg] : enumerate(CI.operands())) {
850 if (Config.getExtendedFPType(Arg->getType()) == nullptr)
851 continue; // Not an FT value.
852 Value *ArgShadow = Map.getShadow(Arg);
853 ArgShadows.push_back(ShouldCheckArgs ? emitCheck(Arg, ArgShadow, Builder,
854 CheckLoc::makeArg(ArgIdx))
855 : ArgShadow);
858 // Do not create shadow stacks for intrinsics/known lib funcs.
859 if (Function *Fn = CI.getCalledFunction()) {
860 LibFunc LFunc;
861 if (Fn->isIntrinsic() || TLI.getLibFunc(*Fn, LFunc))
862 return;
865 // Set the shadow stack tag.
866 Builder.CreateStore(CI.getCalledOperand(), NsanShadowArgsTag);
867 TypeSize ShadowArgsOffsetBytes = TypeSize::getFixed(0);
869 unsigned ShadowArgId = 0;
870 for (const Value *Arg : CI.operands()) {
871 Type *VT = Arg->getType();
872 Type *ExtendedVT = Config.getExtendedFPType(VT);
873 if (ExtendedVT == nullptr)
874 continue; // Not an FT value.
875 Builder.CreateAlignedStore(
876 ArgShadows[ShadowArgId++],
877 Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
878 ShadowArgsOffsetBytes),
879 Align(1), /*isVolatile=*/false);
880 TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
881 assert(!SlotSize.isScalable() && "unsupported");
882 ShadowArgsOffsetBytes += SlotSize;
886 // Internal part of emitCheck(). Returns a value that indicates whether
887 // computation should continue with the shadow or resume by re-fextending the
888 // value.
889 enum class ContinuationType { // Keep in sync with runtime.
890 ContinueWithShadow = 0,
891 ResumeFromValue = 1,
894 Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV,
895 IRBuilder<> &Builder,
896 CheckLoc Loc) {
897 // Do not emit checks for constant values, this is redundant.
898 if (isa<Constant>(V))
899 return ConstantInt::get(
900 Builder.getInt32Ty(),
901 static_cast<int>(ContinuationType::ContinueWithShadow));
903 Type *Ty = V->getType();
904 if (const auto VT = ftValueTypeFromType(Ty))
905 return Builder.CreateCall(
906 NsanCheckValue[*VT],
907 {V, ShadowV, Loc.getType(Context), Loc.getValue(IntptrTy, Builder)});
909 if (Ty->isVectorTy()) {
910 auto *VecTy = cast<VectorType>(Ty);
911 // We currently skip scalable vector types in MappingConfig,
912 // thus we should not encounter any such types here.
913 assert(!VecTy->isScalableTy() &&
914 "Scalable vector types are not supported yet");
915 Value *CheckResult = nullptr;
916 for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) {
917 // We resume if any element resumes. Another option would be to create a
918 // vector shuffle with the array of ContinueWithShadow, but that is too
919 // complex.
920 Value *ExtractV = Builder.CreateExtractElement(V, I);
921 Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
922 Value *ComponentCheckResult =
923 emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
924 CheckResult = CheckResult
925 ? Builder.CreateOr(CheckResult, ComponentCheckResult)
926 : ComponentCheckResult;
928 return CheckResult;
930 if (Ty->isArrayTy()) {
931 Value *CheckResult = nullptr;
932 for (auto I : seq(Ty->getArrayNumElements())) {
933 Value *ExtractV = Builder.CreateExtractElement(V, I);
934 Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
935 Value *ComponentCheckResult =
936 emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
937 CheckResult = CheckResult
938 ? Builder.CreateOr(CheckResult, ComponentCheckResult)
939 : ComponentCheckResult;
941 return CheckResult;
943 if (Ty->isStructTy()) {
944 Value *CheckResult = nullptr;
945 for (auto I : seq(Ty->getStructNumElements())) {
946 if (Config.getExtendedFPType(Ty->getStructElementType(I)) == nullptr)
947 continue; // Only check FT values.
948 Value *ExtractV = Builder.CreateExtractValue(V, I);
949 Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
950 Value *ComponentCheckResult =
951 emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
952 CheckResult = CheckResult
953 ? Builder.CreateOr(CheckResult, ComponentCheckResult)
954 : ComponentCheckResult;
956 if (!CheckResult)
957 return ConstantInt::get(
958 Builder.getInt32Ty(),
959 static_cast<int>(ContinuationType::ContinueWithShadow));
960 return CheckResult;
963 llvm_unreachable("not implemented");
966 // Inserts a runtime check of V against its shadow value ShadowV.
967 // We check values whenever they escape: on return, call, stores, and
968 // insertvalue.
969 // Returns the shadow value that should be used to continue the computations,
970 // depending on the answer from the runtime.
971 // TODO: Should we check on select ? phi ?
972 Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV,
973 IRBuilder<> &Builder,
974 CheckLoc Loc) {
975 // Do not emit checks for constant values, this is redundant.
976 if (isa<Constant>(V))
977 return ShadowV;
979 if (Instruction *Inst = dyn_cast<Instruction>(V)) {
980 Function *F = Inst->getFunction();
981 if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) {
982 return ShadowV;
986 Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc);
987 Value *ICmpEQ = Builder.CreateICmpEQ(
988 CheckResult,
989 ConstantInt::get(Builder.getInt32Ty(),
990 static_cast<int>(ContinuationType::ResumeFromValue)));
991 return Builder.CreateSelect(
992 ICmpEQ, Builder.CreateFPExt(V, Config.getExtendedFPType(V->getType())),
993 ShadowV);
996 // Inserts a check that fcmp on shadow values are consistent with that on base
997 // values.
998 void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
999 const ValueToShadowMap &Map) {
1000 if (!ClInstrumentFCmp)
1001 return;
1003 Function *F = FCmp.getFunction();
1004 if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName()))
1005 return;
1007 Value *LHS = FCmp.getOperand(0);
1008 if (Config.getExtendedFPType(LHS->getType()) == nullptr)
1009 return;
1010 Value *RHS = FCmp.getOperand(1);
1012 // Split the basic block. On mismatch, we'll jump to the new basic block with
1013 // a call to the runtime for error reporting.
1014 BasicBlock *FCmpBB = FCmp.getParent();
1015 BasicBlock *NextBB = FCmpBB->splitBasicBlock(FCmp.getNextNode());
1016 // Remove the newly created terminator unconditional branch.
1017 FCmpBB->back().eraseFromParent();
1018 BasicBlock *FailBB =
1019 BasicBlock::Create(Context, "", FCmpBB->getParent(), NextBB);
1021 // Create the shadow fcmp and comparison between the fcmps.
1022 IRBuilder<> FCmpBuilder(FCmpBB);
1023 FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
1024 Value *ShadowLHS = Map.getShadow(LHS);
1025 Value *ShadowRHS = Map.getShadow(RHS);
1026 // See comment on ClTruncateFCmpEq.
1027 if (FCmp.isEquality() && ClTruncateFCmpEq) {
1028 Type *Ty = ShadowLHS->getType();
1029 ShadowLHS = FCmpBuilder.CreateFPExt(
1030 FCmpBuilder.CreateFPTrunc(ShadowLHS, LHS->getType()), Ty);
1031 ShadowRHS = FCmpBuilder.CreateFPExt(
1032 FCmpBuilder.CreateFPTrunc(ShadowRHS, RHS->getType()), Ty);
1034 Value *ShadowFCmp =
1035 FCmpBuilder.CreateFCmp(FCmp.getPredicate(), ShadowLHS, ShadowRHS);
1036 Value *OriginalAndShadowFcmpMatch =
1037 FCmpBuilder.CreateICmpEQ(&FCmp, ShadowFCmp);
1039 if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) {
1040 // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1,
1041 // where an element is true if the corresponding elements in original and
1042 // shadow are the same. We want all elements to be 1.
1043 OriginalAndShadowFcmpMatch =
1044 FCmpBuilder.CreateAndReduce(OriginalAndShadowFcmpMatch);
1047 // Use MDBuilder(*C).createLikelyBranchWeights() because "match" is the common
1048 // case.
1049 FCmpBuilder.CreateCondBr(OriginalAndShadowFcmpMatch, NextBB, FailBB,
1050 MDBuilder(Context).createLikelyBranchWeights());
1052 // Fill in FailBB.
1053 IRBuilder<> FailBuilder(FailBB);
1054 FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
1056 const auto EmitFailCall = [this, &FCmp, &FCmpBuilder,
1057 &FailBuilder](Value *L, Value *R, Value *ShadowL,
1058 Value *ShadowR, Value *Result,
1059 Value *ShadowResult) {
1060 Type *FT = L->getType();
1061 FunctionCallee *Callee = nullptr;
1062 if (FT->isFloatTy()) {
1063 Callee = &(NsanFCmpFail[kFloat]);
1064 } else if (FT->isDoubleTy()) {
1065 Callee = &(NsanFCmpFail[kDouble]);
1066 } else if (FT->isX86_FP80Ty()) {
1067 // TODO: make NsanFCmpFailLongDouble work.
1068 Callee = &(NsanFCmpFail[kDouble]);
1069 L = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1070 R = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1071 } else {
1072 llvm_unreachable("not implemented");
1074 FailBuilder.CreateCall(*Callee, {L, R, ShadowL, ShadowR,
1075 ConstantInt::get(FCmpBuilder.getInt32Ty(),
1076 FCmp.getPredicate()),
1077 Result, ShadowResult});
1079 if (LHS->getType()->isVectorTy()) {
1080 for (int I = 0, E = cast<VectorType>(LHS->getType())
1081 ->getElementCount()
1082 .getFixedValue();
1083 I < E; ++I) {
1084 Value *ExtractLHS = FailBuilder.CreateExtractElement(LHS, I);
1085 Value *ExtractRHS = FailBuilder.CreateExtractElement(RHS, I);
1086 Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(ShadowLHS, I);
1087 Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(ShadowRHS, I);
1088 Value *ExtractFCmp = FailBuilder.CreateExtractElement(&FCmp, I);
1089 Value *ExtractShadowFCmp =
1090 FailBuilder.CreateExtractElement(ShadowFCmp, I);
1091 EmitFailCall(ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS,
1092 ExtractFCmp, ExtractShadowFCmp);
1094 } else {
1095 EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp);
1097 FailBuilder.CreateBr(NextBB);
1099 ++NumInstrumentedFCmp;
1102 // Creates a shadow phi value for any phi that defines a value of FT type.
1103 PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
1104 PHINode &Phi, const TargetLibraryInfo &TLI) {
1105 Type *VT = Phi.getType();
1106 Type *ExtendedVT = Config.getExtendedFPType(VT);
1107 if (ExtendedVT == nullptr)
1108 return nullptr; // Not an FT value.
1109 // The phi operands are shadow values and are not available when the phi is
1110 // created. They will be populated in a final phase, once all shadow values
1111 // have been created.
1112 PHINode *Shadow = PHINode::Create(ExtendedVT, Phi.getNumIncomingValues());
1113 Shadow->insertAfter(Phi.getIterator());
1114 return Shadow;
1117 Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT,
1118 Type *ExtendedVT) {
1119 IRBuilder<> Builder(Load.getNextNode());
1120 Builder.SetCurrentDebugLocation(Load.getDebugLoc());
1121 if (addrPointsToConstantData(Load.getPointerOperand())) {
1122 // No need to look into the shadow memory, the value is a constant. Just
1123 // convert from FT to 2FT.
1124 return Builder.CreateFPExt(&Load, ExtendedVT);
1127 // if (%shadowptr == &)
1128 // %shadow = fpext %v
1129 // else
1130 // %shadow = load (ptrcast %shadow_ptr))
1131 // Considered options here:
1132 // - Have `NsanGetShadowPtrForLoad` return a fixed address
1133 // &__nsan_unknown_value_shadow_address that is valid to load from, and
1134 // use a select. This has the advantage that the generated IR is simpler.
1135 // - Have `NsanGetShadowPtrForLoad` return nullptr. Because `select` does
1136 // not short-circuit, dereferencing the returned pointer is no longer an
1137 // option, have to split and create a separate basic block. This has the
1138 // advantage of being easier to debug because it crashes if we ever mess
1139 // up.
1141 const auto Extents = getMemoryExtentsOrDie(VT);
1142 Value *ShadowPtr = Builder.CreateCall(
1143 NsanGetShadowPtrForLoad[Extents.ValueType],
1144 {Load.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1145 ++NumInstrumentedFTLoads;
1147 // Split the basic block.
1148 BasicBlock *LoadBB = Load.getParent();
1149 BasicBlock *NextBB = LoadBB->splitBasicBlock(Builder.GetInsertPoint());
1150 // Create the two options for creating the shadow value.
1151 BasicBlock *ShadowLoadBB =
1152 BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1153 BasicBlock *FExtBB =
1154 BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1156 // Replace the newly created terminator unconditional branch by a conditional
1157 // branch to one of the options.
1159 LoadBB->back().eraseFromParent();
1160 IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated.
1161 LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1162 LoadBBBuilder.CreateCondBr(LoadBBBuilder.CreateIsNull(ShadowPtr), FExtBB,
1163 ShadowLoadBB);
1166 // Fill in ShadowLoadBB.
1167 IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB);
1168 ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1169 Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad(
1170 ExtendedVT, ShadowPtr, Align(1), Load.isVolatile());
1171 if (ClCheckLoads) {
1172 ShadowLoad = emitCheck(&Load, ShadowLoad, ShadowLoadBBBuilder,
1173 CheckLoc::makeLoad(Load.getPointerOperand()));
1175 ShadowLoadBBBuilder.CreateBr(NextBB);
1177 // Fill in FExtBB.
1178 IRBuilder<> FExtBBBuilder(FExtBB);
1179 FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1180 Value *FExt = FExtBBBuilder.CreateFPExt(&Load, ExtendedVT);
1181 FExtBBBuilder.CreateBr(NextBB);
1183 // The shadow value come from any of the options.
1184 IRBuilder<> NextBBBuilder(&*NextBB->begin());
1185 NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1186 PHINode *ShadowPhi = NextBBBuilder.CreatePHI(ExtendedVT, 2);
1187 ShadowPhi->addIncoming(ShadowLoad, ShadowLoadBB);
1188 ShadowPhi->addIncoming(FExt, FExtBB);
1189 return ShadowPhi;
1192 Value *NumericalStabilitySanitizer::handleTrunc(const FPTruncInst &Trunc,
1193 Type *VT, Type *ExtendedVT,
1194 const ValueToShadowMap &Map,
1195 IRBuilder<> &Builder) {
1196 Value *OrigSource = Trunc.getOperand(0);
1197 Type *OrigSourceTy = OrigSource->getType();
1198 Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1200 // When truncating:
1201 // - (A) If the source has a shadow, we truncate from the shadow, else we
1202 // truncate from the original source.
1203 // - (B) If the shadow of the source is larger than the shadow of the dest,
1204 // we still need a truncate. Else, the shadow of the source is the same
1205 // type as the shadow of the dest (because mappings are non-decreasing), so
1206 // we don't need to emit a truncate.
1207 // Examples,
1208 // with a mapping of {f32->f64;f64->f80;f80->f128}
1209 // fptrunc double %1 to float -> fptrunc x86_fp80 s(%1) to double
1210 // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double
1211 // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double
1212 // fptrunc x86_fp80 %1 to double -> x86_fp80 s(%1)
1213 // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to x86_fp80
1214 // fptrunc fp128 %1 to x86_fp80 -> fp128 %1
1215 // with a mapping of {f32->f64;f64->f128;f80->f128}
1216 // fptrunc double %1 to float -> fptrunc fp128 s(%1) to double
1217 // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double
1218 // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double
1219 // fptrunc x86_fp80 %1 to double -> fp128 %1
1220 // fptrunc fp128 %1 to double -> fp128 %1
1221 // fptrunc fp128 %1 to x86_fp80 -> fp128 %1
1222 // with a mapping of {f32->f32;f64->f32;f80->f64}
1223 // fptrunc double %1 to float -> float s(%1)
1224 // fptrunc x86_fp80 %1 to float -> fptrunc double s(%1) to float
1225 // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to float
1226 // fptrunc x86_fp80 %1 to double -> fptrunc double s(%1) to float
1227 // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to float
1228 // fptrunc fp128 %1 to x86_fp80 -> fptrunc fp128 %1 to double
1230 // See (A) above.
1231 Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1232 Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1233 // See (B) above.
1234 if (SourceTy == ExtendedVT)
1235 return Source;
1237 return Builder.CreateFPTrunc(Source, ExtendedVT);
1240 Value *NumericalStabilitySanitizer::handleExt(const FPExtInst &Ext, Type *VT,
1241 Type *ExtendedVT,
1242 const ValueToShadowMap &Map,
1243 IRBuilder<> &Builder) {
1244 Value *OrigSource = Ext.getOperand(0);
1245 Type *OrigSourceTy = OrigSource->getType();
1246 Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1247 // When extending:
1248 // - (A) If the source has a shadow, we extend from the shadow, else we
1249 // extend from the original source.
1250 // - (B) If the shadow of the dest is larger than the shadow of the source,
1251 // we still need an extend. Else, the shadow of the source is the same
1252 // type as the shadow of the dest (because mappings are non-decreasing), so
1253 // we don't need to emit an extend.
1254 // Examples,
1255 // with a mapping of {f32->f64;f64->f80;f80->f128}
1256 // fpext half %1 to float -> fpext half %1 to double
1257 // fpext half %1 to double -> fpext half %1 to x86_fp80
1258 // fpext half %1 to x86_fp80 -> fpext half %1 to fp128
1259 // fpext float %1 to double -> double s(%1)
1260 // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128
1261 // fpext double %1 to x86_fp80 -> fpext x86_fp80 s(%1) to fp128
1262 // with a mapping of {f32->f64;f64->f128;f80->f128}
1263 // fpext half %1 to float -> fpext half %1 to double
1264 // fpext half %1 to double -> fpext half %1 to fp128
1265 // fpext half %1 to x86_fp80 -> fpext half %1 to fp128
1266 // fpext float %1 to double -> fpext double s(%1) to fp128
1267 // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128
1268 // fpext double %1 to x86_fp80 -> fp128 s(%1)
1269 // with a mapping of {f32->f32;f64->f32;f80->f64}
1270 // fpext half %1 to float -> fpext half %1 to float
1271 // fpext half %1 to double -> fpext half %1 to float
1272 // fpext half %1 to x86_fp80 -> fpext half %1 to double
1273 // fpext float %1 to double -> s(%1)
1274 // fpext float %1 to x86_fp80 -> fpext float s(%1) to double
1275 // fpext double %1 to x86_fp80 -> fpext float s(%1) to double
1277 // See (A) above.
1278 Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1279 Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1280 // See (B) above.
1281 if (SourceTy == ExtendedVT)
1282 return Source;
1284 return Builder.CreateFPExt(Source, ExtendedVT);
1287 namespace {
1288 // TODO: This should be tablegen-ed.
1289 struct KnownIntrinsic {
1290 struct WidenedIntrinsic {
1291 const char *NarrowName;
1292 Intrinsic::ID ID; // wide id.
1293 using FnTypeFactory = FunctionType *(*)(LLVMContext &);
1294 FnTypeFactory MakeFnTy;
1297 static const char *get(LibFunc LFunc);
1299 // Given an intrinsic with an `FT` argument, try to find a wider intrinsic
1300 // that applies the same operation on the shadow argument.
1301 // Options are:
1302 // - pass in the ID and full function type,
1303 // - pass in the name, which includes the function type through mangling.
1304 static const WidenedIntrinsic *widen(StringRef Name);
1306 private:
1307 struct LFEntry {
1308 LibFunc LFunc;
1309 const char *IntrinsicName;
1311 static const LFEntry kLibfuncIntrinsics[];
1313 static const WidenedIntrinsic kWidenedIntrinsics[];
1315 } // namespace
1317 static FunctionType *makeDoubleDouble(LLVMContext &C) {
1318 return FunctionType::get(Type::getDoubleTy(C), {Type::getDoubleTy(C)}, false);
1321 static FunctionType *makeX86FP80X86FP80(LLVMContext &C) {
1322 return FunctionType::get(Type::getX86_FP80Ty(C), {Type::getX86_FP80Ty(C)},
1323 false);
1326 static FunctionType *makeDoubleDoubleI32(LLVMContext &C) {
1327 return FunctionType::get(Type::getDoubleTy(C),
1328 {Type::getDoubleTy(C), Type::getInt32Ty(C)}, false);
1331 static FunctionType *makeX86FP80X86FP80I32(LLVMContext &C) {
1332 return FunctionType::get(Type::getX86_FP80Ty(C),
1333 {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)},
1334 false);
1337 static FunctionType *makeDoubleDoubleDouble(LLVMContext &C) {
1338 return FunctionType::get(Type::getDoubleTy(C),
1339 {Type::getDoubleTy(C), Type::getDoubleTy(C)}, false);
1342 static FunctionType *makeX86FP80X86FP80X86FP80(LLVMContext &C) {
1343 return FunctionType::get(Type::getX86_FP80Ty(C),
1344 {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1345 false);
1348 static FunctionType *makeDoubleDoubleDoubleDouble(LLVMContext &C) {
1349 return FunctionType::get(
1350 Type::getDoubleTy(C),
1351 {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)},
1352 false);
1355 static FunctionType *makeX86FP80X86FP80X86FP80X86FP80(LLVMContext &C) {
1356 return FunctionType::get(
1357 Type::getX86_FP80Ty(C),
1358 {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1359 false);
1362 const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = {
1363 // TODO: Right now we ignore vector intrinsics.
1364 // This is hard because we have to model the semantics of the intrinsics,
1365 // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back.
1366 // Intrinsics that take any non-vector FT types:
1367 // NOTE: Right now because of
1368 // https://github.com/llvm/llvm-project/issues/44744
1369 // for f128 we need to use makeX86FP80X86FP80 (go to a lower precision and
1370 // come back).
1371 {"llvm.sqrt.f32", Intrinsic::sqrt, makeDoubleDouble},
1372 {"llvm.sqrt.f64", Intrinsic::sqrt, makeX86FP80X86FP80},
1373 {"llvm.sqrt.f80", Intrinsic::sqrt, makeX86FP80X86FP80},
1374 {"llvm.powi.f32", Intrinsic::powi, makeDoubleDoubleI32},
1375 {"llvm.powi.f64", Intrinsic::powi, makeX86FP80X86FP80I32},
1376 {"llvm.powi.f80", Intrinsic::powi, makeX86FP80X86FP80I32},
1377 {"llvm.sin.f32", Intrinsic::sin, makeDoubleDouble},
1378 {"llvm.sin.f64", Intrinsic::sin, makeX86FP80X86FP80},
1379 {"llvm.sin.f80", Intrinsic::sin, makeX86FP80X86FP80},
1380 {"llvm.cos.f32", Intrinsic::cos, makeDoubleDouble},
1381 {"llvm.cos.f64", Intrinsic::cos, makeX86FP80X86FP80},
1382 {"llvm.cos.f80", Intrinsic::cos, makeX86FP80X86FP80},
1383 {"llvm.pow.f32", Intrinsic::pow, makeDoubleDoubleDouble},
1384 {"llvm.pow.f64", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1385 {"llvm.pow.f80", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1386 {"llvm.exp.f32", Intrinsic::exp, makeDoubleDouble},
1387 {"llvm.exp.f64", Intrinsic::exp, makeX86FP80X86FP80},
1388 {"llvm.exp.f80", Intrinsic::exp, makeX86FP80X86FP80},
1389 {"llvm.exp2.f32", Intrinsic::exp2, makeDoubleDouble},
1390 {"llvm.exp2.f64", Intrinsic::exp2, makeX86FP80X86FP80},
1391 {"llvm.exp2.f80", Intrinsic::exp2, makeX86FP80X86FP80},
1392 {"llvm.log.f32", Intrinsic::log, makeDoubleDouble},
1393 {"llvm.log.f64", Intrinsic::log, makeX86FP80X86FP80},
1394 {"llvm.log.f80", Intrinsic::log, makeX86FP80X86FP80},
1395 {"llvm.log10.f32", Intrinsic::log10, makeDoubleDouble},
1396 {"llvm.log10.f64", Intrinsic::log10, makeX86FP80X86FP80},
1397 {"llvm.log10.f80", Intrinsic::log10, makeX86FP80X86FP80},
1398 {"llvm.log2.f32", Intrinsic::log2, makeDoubleDouble},
1399 {"llvm.log2.f64", Intrinsic::log2, makeX86FP80X86FP80},
1400 {"llvm.log2.f80", Intrinsic::log2, makeX86FP80X86FP80},
1401 {"llvm.fma.f32", Intrinsic::fma, makeDoubleDoubleDoubleDouble},
1403 {"llvm.fmuladd.f32", Intrinsic::fmuladd, makeDoubleDoubleDoubleDouble},
1405 {"llvm.fma.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1407 {"llvm.fmuladd.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1409 {"llvm.fma.f80", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1410 {"llvm.fabs.f32", Intrinsic::fabs, makeDoubleDouble},
1411 {"llvm.fabs.f64", Intrinsic::fabs, makeX86FP80X86FP80},
1412 {"llvm.fabs.f80", Intrinsic::fabs, makeX86FP80X86FP80},
1413 {"llvm.minnum.f32", Intrinsic::minnum, makeDoubleDoubleDouble},
1414 {"llvm.minnum.f64", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1415 {"llvm.minnum.f80", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1416 {"llvm.maxnum.f32", Intrinsic::maxnum, makeDoubleDoubleDouble},
1417 {"llvm.maxnum.f64", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1418 {"llvm.maxnum.f80", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1419 {"llvm.minimum.f32", Intrinsic::minimum, makeDoubleDoubleDouble},
1420 {"llvm.minimum.f64", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1421 {"llvm.minimum.f80", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1422 {"llvm.maximum.f32", Intrinsic::maximum, makeDoubleDoubleDouble},
1423 {"llvm.maximum.f64", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1424 {"llvm.maximum.f80", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1425 {"llvm.copysign.f32", Intrinsic::copysign, makeDoubleDoubleDouble},
1426 {"llvm.copysign.f64", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1427 {"llvm.copysign.f80", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1428 {"llvm.floor.f32", Intrinsic::floor, makeDoubleDouble},
1429 {"llvm.floor.f64", Intrinsic::floor, makeX86FP80X86FP80},
1430 {"llvm.floor.f80", Intrinsic::floor, makeX86FP80X86FP80},
1431 {"llvm.ceil.f32", Intrinsic::ceil, makeDoubleDouble},
1432 {"llvm.ceil.f64", Intrinsic::ceil, makeX86FP80X86FP80},
1433 {"llvm.ceil.f80", Intrinsic::ceil, makeX86FP80X86FP80},
1434 {"llvm.trunc.f32", Intrinsic::trunc, makeDoubleDouble},
1435 {"llvm.trunc.f64", Intrinsic::trunc, makeX86FP80X86FP80},
1436 {"llvm.trunc.f80", Intrinsic::trunc, makeX86FP80X86FP80},
1437 {"llvm.rint.f32", Intrinsic::rint, makeDoubleDouble},
1438 {"llvm.rint.f64", Intrinsic::rint, makeX86FP80X86FP80},
1439 {"llvm.rint.f80", Intrinsic::rint, makeX86FP80X86FP80},
1440 {"llvm.nearbyint.f32", Intrinsic::nearbyint, makeDoubleDouble},
1441 {"llvm.nearbyint.f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1442 {"llvm.nearbyin80f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1443 {"llvm.round.f32", Intrinsic::round, makeDoubleDouble},
1444 {"llvm.round.f64", Intrinsic::round, makeX86FP80X86FP80},
1445 {"llvm.round.f80", Intrinsic::round, makeX86FP80X86FP80},
1446 {"llvm.lround.f32", Intrinsic::lround, makeDoubleDouble},
1447 {"llvm.lround.f64", Intrinsic::lround, makeX86FP80X86FP80},
1448 {"llvm.lround.f80", Intrinsic::lround, makeX86FP80X86FP80},
1449 {"llvm.llround.f32", Intrinsic::llround, makeDoubleDouble},
1450 {"llvm.llround.f64", Intrinsic::llround, makeX86FP80X86FP80},
1451 {"llvm.llround.f80", Intrinsic::llround, makeX86FP80X86FP80},
1452 {"llvm.lrint.f32", Intrinsic::lrint, makeDoubleDouble},
1453 {"llvm.lrint.f64", Intrinsic::lrint, makeX86FP80X86FP80},
1454 {"llvm.lrint.f80", Intrinsic::lrint, makeX86FP80X86FP80},
1455 {"llvm.llrint.f32", Intrinsic::llrint, makeDoubleDouble},
1456 {"llvm.llrint.f64", Intrinsic::llrint, makeX86FP80X86FP80},
1457 {"llvm.llrint.f80", Intrinsic::llrint, makeX86FP80X86FP80},
1460 const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = {
1461 {LibFunc_sqrtf, "llvm.sqrt.f32"},
1462 {LibFunc_sqrt, "llvm.sqrt.f64"},
1463 {LibFunc_sqrtl, "llvm.sqrt.f80"},
1464 {LibFunc_sinf, "llvm.sin.f32"},
1465 {LibFunc_sin, "llvm.sin.f64"},
1466 {LibFunc_sinl, "llvm.sin.f80"},
1467 {LibFunc_cosf, "llvm.cos.f32"},
1468 {LibFunc_cos, "llvm.cos.f64"},
1469 {LibFunc_cosl, "llvm.cos.f80"},
1470 {LibFunc_powf, "llvm.pow.f32"},
1471 {LibFunc_pow, "llvm.pow.f64"},
1472 {LibFunc_powl, "llvm.pow.f80"},
1473 {LibFunc_expf, "llvm.exp.f32"},
1474 {LibFunc_exp, "llvm.exp.f64"},
1475 {LibFunc_expl, "llvm.exp.f80"},
1476 {LibFunc_exp2f, "llvm.exp2.f32"},
1477 {LibFunc_exp2, "llvm.exp2.f64"},
1478 {LibFunc_exp2l, "llvm.exp2.f80"},
1479 {LibFunc_logf, "llvm.log.f32"},
1480 {LibFunc_log, "llvm.log.f64"},
1481 {LibFunc_logl, "llvm.log.f80"},
1482 {LibFunc_log10f, "llvm.log10.f32"},
1483 {LibFunc_log10, "llvm.log10.f64"},
1484 {LibFunc_log10l, "llvm.log10.f80"},
1485 {LibFunc_log2f, "llvm.log2.f32"},
1486 {LibFunc_log2, "llvm.log2.f64"},
1487 {LibFunc_log2l, "llvm.log2.f80"},
1488 {LibFunc_fabsf, "llvm.fabs.f32"},
1489 {LibFunc_fabs, "llvm.fabs.f64"},
1490 {LibFunc_fabsl, "llvm.fabs.f80"},
1491 {LibFunc_copysignf, "llvm.copysign.f32"},
1492 {LibFunc_copysign, "llvm.copysign.f64"},
1493 {LibFunc_copysignl, "llvm.copysign.f80"},
1494 {LibFunc_floorf, "llvm.floor.f32"},
1495 {LibFunc_floor, "llvm.floor.f64"},
1496 {LibFunc_floorl, "llvm.floor.f80"},
1497 {LibFunc_fmaxf, "llvm.maxnum.f32"},
1498 {LibFunc_fmax, "llvm.maxnum.f64"},
1499 {LibFunc_fmaxl, "llvm.maxnum.f80"},
1500 {LibFunc_fminf, "llvm.minnum.f32"},
1501 {LibFunc_fmin, "llvm.minnum.f64"},
1502 {LibFunc_fminl, "llvm.minnum.f80"},
1503 {LibFunc_ceilf, "llvm.ceil.f32"},
1504 {LibFunc_ceil, "llvm.ceil.f64"},
1505 {LibFunc_ceill, "llvm.ceil.f80"},
1506 {LibFunc_truncf, "llvm.trunc.f32"},
1507 {LibFunc_trunc, "llvm.trunc.f64"},
1508 {LibFunc_truncl, "llvm.trunc.f80"},
1509 {LibFunc_rintf, "llvm.rint.f32"},
1510 {LibFunc_rint, "llvm.rint.f64"},
1511 {LibFunc_rintl, "llvm.rint.f80"},
1512 {LibFunc_nearbyintf, "llvm.nearbyint.f32"},
1513 {LibFunc_nearbyint, "llvm.nearbyint.f64"},
1514 {LibFunc_nearbyintl, "llvm.nearbyint.f80"},
1515 {LibFunc_roundf, "llvm.round.f32"},
1516 {LibFunc_round, "llvm.round.f64"},
1517 {LibFunc_roundl, "llvm.round.f80"},
1520 const char *KnownIntrinsic::get(LibFunc LFunc) {
1521 for (const auto &E : kLibfuncIntrinsics) {
1522 if (E.LFunc == LFunc)
1523 return E.IntrinsicName;
1525 return nullptr;
1528 const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) {
1529 for (const auto &E : kWidenedIntrinsics) {
1530 if (E.NarrowName == Name)
1531 return &E;
1533 return nullptr;
1536 // Returns the name of the LLVM intrinsic corresponding to the given function.
1537 static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT,
1538 const TargetLibraryInfo &TLI) {
1539 LibFunc LFunc;
1540 if (!TLI.getLibFunc(Fn, LFunc))
1541 return nullptr;
1543 if (const char *Name = KnownIntrinsic::get(LFunc))
1544 return Name;
1546 LLVM_DEBUG(errs() << "TODO: LibFunc: " << TLI.getName(LFunc) << "\n");
1547 return nullptr;
1550 // Try to handle a known function call.
1551 Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
1552 CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI,
1553 const ValueToShadowMap &Map, IRBuilder<> &Builder) {
1554 Function *Fn = Call.getCalledFunction();
1555 if (Fn == nullptr)
1556 return nullptr;
1558 Intrinsic::ID WidenedId = Intrinsic::ID();
1559 FunctionType *WidenedFnTy = nullptr;
1560 if (const auto ID = Fn->getIntrinsicID()) {
1561 const auto *Widened = KnownIntrinsic::widen(Fn->getName());
1562 if (Widened) {
1563 WidenedId = Widened->ID;
1564 WidenedFnTy = Widened->MakeFnTy(Context);
1565 } else {
1566 // If we don't know how to widen the intrinsic, we have no choice but to
1567 // call the non-wide version on a truncated shadow and extend again
1568 // afterwards.
1569 WidenedId = ID;
1570 WidenedFnTy = Fn->getFunctionType();
1572 } else if (const char *Name = getIntrinsicFromLibfunc(*Fn, VT, TLI)) {
1573 // We might have a call to a library function that we can replace with a
1574 // wider Intrinsic.
1575 const auto *Widened = KnownIntrinsic::widen(Name);
1576 assert(Widened && "make sure KnownIntrinsic entries are consistent");
1577 WidenedId = Widened->ID;
1578 WidenedFnTy = Widened->MakeFnTy(Context);
1579 } else {
1580 // This is not a known library function or intrinsic.
1581 return nullptr;
1584 // Check that the widened intrinsic is valid.
1585 SmallVector<Intrinsic::IITDescriptor, 8> Table;
1586 getIntrinsicInfoTableEntries(WidenedId, Table);
1587 SmallVector<Type *, 4> ArgTys;
1588 ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
1589 [[maybe_unused]] Intrinsic::MatchIntrinsicTypesResult MatchResult =
1590 Intrinsic::matchIntrinsicSignature(WidenedFnTy, TableRef, ArgTys);
1591 assert(MatchResult == Intrinsic::MatchIntrinsicTypes_Match &&
1592 "invalid widened intrinsic");
1593 // For known intrinsic functions, we create a second call to the same
1594 // intrinsic with a different type.
1595 SmallVector<Value *, 4> Args;
1596 // The last operand is the intrinsic itself, skip it.
1597 for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) {
1598 Value *Arg = Call.getOperand(I);
1599 Type *OrigArgTy = Arg->getType();
1600 Type *IntrinsicArgTy = WidenedFnTy->getParamType(I);
1601 if (OrigArgTy == IntrinsicArgTy) {
1602 Args.push_back(Arg); // The arg is passed as is.
1603 continue;
1605 Type *ShadowArgTy = Config.getExtendedFPType(Arg->getType());
1606 assert(ShadowArgTy &&
1607 "don't know how to get the shadow value for a non-FT");
1608 Value *Shadow = Map.getShadow(Arg);
1609 if (ShadowArgTy == IntrinsicArgTy) {
1610 // The shadow is the right type for the intrinsic.
1611 assert(Shadow->getType() == ShadowArgTy);
1612 Args.push_back(Shadow);
1613 continue;
1615 // There is no intrinsic with his level of precision, truncate the shadow.
1616 Args.push_back(Builder.CreateFPTrunc(Shadow, IntrinsicArgTy));
1618 Value *IntrinsicCall = Builder.CreateIntrinsic(WidenedId, ArgTys, Args);
1619 return WidenedFnTy->getReturnType() == ExtendedVT
1620 ? IntrinsicCall
1621 : Builder.CreateFPExt(IntrinsicCall, ExtendedVT);
1624 // Handle a CallBase, i.e. a function call, an inline asm sequence, or an
1625 // invoke.
1626 Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT,
1627 Type *ExtendedVT,
1628 const TargetLibraryInfo &TLI,
1629 const ValueToShadowMap &Map,
1630 IRBuilder<> &Builder) {
1631 // We cannot look inside inline asm, just expand the result again.
1632 if (Call.isInlineAsm())
1633 return Builder.CreateFPExt(&Call, ExtendedVT);
1635 // Intrinsics and library functions (e.g. sin, exp) are handled
1636 // specifically, because we know their semantics and can do better than
1637 // blindly calling them (e.g. compute the sinus in the actual shadow domain).
1638 if (Value *V =
1639 maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder))
1640 return V;
1642 // If the return tag matches that of the called function, read the extended
1643 // return value from the shadow ret ptr. Else, just extend the return value.
1644 Value *L =
1645 Builder.CreateLoad(IntptrTy, NsanShadowRetTag, /*isVolatile=*/false);
1646 Value *HasShadowRet = Builder.CreateICmpEQ(
1647 L, Builder.CreatePtrToInt(Call.getCalledOperand(), IntptrTy));
1649 Value *ShadowRetVal = Builder.CreateLoad(
1650 ExtendedVT,
1651 Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0),
1652 /*isVolatile=*/false);
1653 Value *Shadow = Builder.CreateSelect(HasShadowRet, ShadowRetVal,
1654 Builder.CreateFPExt(&Call, ExtendedVT));
1655 ++NumInstrumentedFTCalls;
1656 return Shadow;
1659 // Creates a shadow value for the given FT value. At that point all operands are
1660 // guaranteed to be available.
1661 Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
1662 Instruction &Inst, const TargetLibraryInfo &TLI,
1663 const ValueToShadowMap &Map) {
1664 Type *VT = Inst.getType();
1665 Type *ExtendedVT = Config.getExtendedFPType(VT);
1666 assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT");
1668 if (auto *Load = dyn_cast<LoadInst>(&Inst))
1669 return handleLoad(*Load, VT, ExtendedVT);
1671 if (auto *Call = dyn_cast<CallInst>(&Inst)) {
1672 // Insert after the call.
1673 BasicBlock::iterator It(Inst);
1674 IRBuilder<> Builder(Call->getParent(), ++It);
1675 Builder.SetCurrentDebugLocation(Call->getDebugLoc());
1676 return handleCallBase(*Call, VT, ExtendedVT, TLI, Map, Builder);
1679 if (auto *Invoke = dyn_cast<InvokeInst>(&Inst)) {
1680 // The Invoke terminates the basic block, create a new basic block in
1681 // between the successful invoke and the next block.
1682 BasicBlock *InvokeBB = Invoke->getParent();
1683 BasicBlock *NextBB = Invoke->getNormalDest();
1684 BasicBlock *NewBB =
1685 BasicBlock::Create(Context, "", NextBB->getParent(), NextBB);
1686 Inst.replaceSuccessorWith(NextBB, NewBB);
1688 IRBuilder<> Builder(NewBB);
1689 Builder.SetCurrentDebugLocation(Invoke->getDebugLoc());
1690 Value *Shadow = handleCallBase(*Invoke, VT, ExtendedVT, TLI, Map, Builder);
1691 Builder.CreateBr(NextBB);
1692 NewBB->replaceSuccessorsPhiUsesWith(InvokeBB, NewBB);
1693 return Shadow;
1696 IRBuilder<> Builder(Inst.getNextNode());
1697 Builder.SetCurrentDebugLocation(Inst.getDebugLoc());
1699 if (auto *Trunc = dyn_cast<FPTruncInst>(&Inst))
1700 return handleTrunc(*Trunc, VT, ExtendedVT, Map, Builder);
1701 if (auto *Ext = dyn_cast<FPExtInst>(&Inst))
1702 return handleExt(*Ext, VT, ExtendedVT, Map, Builder);
1704 if (auto *UnaryOp = dyn_cast<UnaryOperator>(&Inst))
1705 return Builder.CreateUnOp(UnaryOp->getOpcode(),
1706 Map.getShadow(UnaryOp->getOperand(0)));
1708 if (auto *BinOp = dyn_cast<BinaryOperator>(&Inst))
1709 return Builder.CreateBinOp(BinOp->getOpcode(),
1710 Map.getShadow(BinOp->getOperand(0)),
1711 Map.getShadow(BinOp->getOperand(1)));
1713 if (isa<UIToFPInst>(&Inst) || isa<SIToFPInst>(&Inst)) {
1714 auto *Cast = cast<CastInst>(&Inst);
1715 return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0),
1716 ExtendedVT);
1719 if (auto *S = dyn_cast<SelectInst>(&Inst))
1720 return Builder.CreateSelect(S->getCondition(),
1721 Map.getShadow(S->getTrueValue()),
1722 Map.getShadow(S->getFalseValue()));
1724 if (auto *Freeze = dyn_cast<FreezeInst>(&Inst))
1725 return Builder.CreateFreeze(Map.getShadow(Freeze->getOperand(0)));
1727 if (auto *Extract = dyn_cast<ExtractElementInst>(&Inst))
1728 return Builder.CreateExtractElement(
1729 Map.getShadow(Extract->getVectorOperand()), Extract->getIndexOperand());
1731 if (auto *Insert = dyn_cast<InsertElementInst>(&Inst))
1732 return Builder.CreateInsertElement(Map.getShadow(Insert->getOperand(0)),
1733 Map.getShadow(Insert->getOperand(1)),
1734 Insert->getOperand(2));
1736 if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(&Inst))
1737 return Builder.CreateShuffleVector(Map.getShadow(Shuffle->getOperand(0)),
1738 Map.getShadow(Shuffle->getOperand(1)),
1739 Shuffle->getShuffleMask());
1740 // TODO: We could make aggregate object first class citizens. For now we
1741 // just extend the extracted value.
1742 if (auto *Extract = dyn_cast<ExtractValueInst>(&Inst))
1743 return Builder.CreateFPExt(Extract, ExtendedVT);
1745 if (auto *BC = dyn_cast<BitCastInst>(&Inst))
1746 return Builder.CreateFPExt(BC, ExtendedVT);
1748 report_fatal_error("Unimplemented support for " +
1749 Twine(Inst.getOpcodeName()));
1752 // Creates a shadow value for an instruction that defines a value of FT type.
1753 // FT operands that do not already have shadow values are created recursively.
1754 // The DFS is guaranteed to not loop as phis and arguments already have
1755 // shadows.
1756 void NumericalStabilitySanitizer::maybeCreateShadowValue(
1757 Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
1758 Type *VT = Root.getType();
1759 Type *ExtendedVT = Config.getExtendedFPType(VT);
1760 if (ExtendedVT == nullptr)
1761 return; // Not an FT value.
1763 if (Map.hasShadow(&Root))
1764 return; // Shadow already exists.
1766 assert(!isa<PHINode>(Root) && "phi nodes should already have shadows");
1768 std::vector<Instruction *> DfsStack(1, &Root);
1769 while (!DfsStack.empty()) {
1770 // Ensure that all operands to the instruction have shadows before
1771 // proceeding.
1772 Instruction *I = DfsStack.back();
1773 // The shadow for the instruction might have been created deeper in the DFS,
1774 // see `forward_use_with_two_uses` test.
1775 if (Map.hasShadow(I)) {
1776 DfsStack.pop_back();
1777 continue;
1780 bool MissingShadow = false;
1781 for (Value *Op : I->operands()) {
1782 Type *VT = Op->getType();
1783 if (!Config.getExtendedFPType(VT))
1784 continue; // Not an FT value.
1785 if (Map.hasShadow(Op))
1786 continue; // Shadow is already available.
1787 MissingShadow = true;
1788 DfsStack.push_back(cast<Instruction>(Op));
1790 if (MissingShadow)
1791 continue; // Process operands and come back to this instruction later.
1793 // All operands have shadows. Create a shadow for the current value.
1794 Value *Shadow = createShadowValueWithOperandsAvailable(*I, TLI, Map);
1795 Map.setShadow(*I, *Shadow);
1796 DfsStack.pop_back();
1800 // A floating-point store needs its value and type written to shadow memory.
1801 void NumericalStabilitySanitizer::propagateFTStore(
1802 StoreInst &Store, Type *VT, Type *ExtendedVT, const ValueToShadowMap &Map) {
1803 Value *StoredValue = Store.getValueOperand();
1804 IRBuilder<> Builder(&Store);
1805 Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1806 const auto Extents = getMemoryExtentsOrDie(VT);
1807 Value *ShadowPtr = Builder.CreateCall(
1808 NsanGetShadowPtrForStore[Extents.ValueType],
1809 {Store.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1811 Value *StoredShadow = Map.getShadow(StoredValue);
1812 if (!Store.getParent()->getParent()->hasOptNone()) {
1813 // Only check stores when optimizing, because non-optimized code generates
1814 // too many stores to the stack, creating false positives.
1815 if (ClCheckStores) {
1816 StoredShadow = emitCheck(StoredValue, StoredShadow, Builder,
1817 CheckLoc::makeStore(Store.getPointerOperand()));
1818 ++NumInstrumentedFTStores;
1822 Builder.CreateAlignedStore(StoredShadow, ShadowPtr, Align(1),
1823 Store.isVolatile());
1826 // A non-ft store needs to invalidate shadow memory. Exceptions are:
1827 // - memory transfers of floating-point data through other pointer types (llvm
1828 // optimization passes transform `*(float*)a = *(float*)b` into
1829 // `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy.
1830 // - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
1831 // ints. Note that this is not really necessary because if the value is
1832 // unknown the framework will re-extend it on load anyway. It just felt
1833 // easier to debug tests with vectors of FTs.
1834 void NumericalStabilitySanitizer::propagateNonFTStore(
1835 StoreInst &Store, Type *VT, const ValueToShadowMap &Map) {
1836 Value *PtrOp = Store.getPointerOperand();
1837 IRBuilder<> Builder(Store.getNextNode());
1838 Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1839 Value *Dst = PtrOp;
1840 TypeSize SlotSize = DL.getTypeStoreSize(VT);
1841 assert(!SlotSize.isScalable() && "unsupported");
1842 const auto LoadSizeBytes = SlotSize.getFixedValue();
1843 Value *ValueSize = Constant::getIntegerValue(
1844 IntptrTy, APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes));
1846 ++NumInstrumentedNonFTStores;
1847 Value *StoredValue = Store.getValueOperand();
1848 if (LoadInst *Load = dyn_cast<LoadInst>(StoredValue)) {
1849 // TODO: Handle the case when the value is from a phi.
1850 // This is a memory transfer with memcpy semantics. Copy the type and
1851 // value from the source. Note that we cannot use __nsan_copy_values()
1852 // here, because that will not work when there is a write to memory in
1853 // between the load and the store, e.g. in the case of a swap.
1854 Type *ShadowTypeIntTy = Type::getIntNTy(Context, 8 * LoadSizeBytes);
1855 Type *ShadowValueIntTy =
1856 Type::getIntNTy(Context, 8 * kShadowScale * LoadSizeBytes);
1857 IRBuilder<> LoadBuilder(Load->getNextNode());
1858 Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1859 Value *LoadSrc = Load->getPointerOperand();
1860 // Read the shadow type and value at load time. The type has the same size
1861 // as the FT value, the value has twice its size.
1862 // TODO: cache them to avoid re-creating them when a load is used by
1863 // several stores. Maybe create them like the FT shadows when a load is
1864 // encountered.
1865 Value *RawShadowType = LoadBuilder.CreateAlignedLoad(
1866 ShadowTypeIntTy,
1867 LoadBuilder.CreateCall(NsanGetRawShadowTypePtr, {LoadSrc}), Align(1),
1868 /*isVolatile=*/false);
1869 Value *RawShadowValue = LoadBuilder.CreateAlignedLoad(
1870 ShadowValueIntTy,
1871 LoadBuilder.CreateCall(NsanGetRawShadowPtr, {LoadSrc}), Align(1),
1872 /*isVolatile=*/false);
1874 // Write back the shadow type and value at store time.
1875 Builder.CreateAlignedStore(
1876 RawShadowType, Builder.CreateCall(NsanGetRawShadowTypePtr, {Dst}),
1877 Align(1),
1878 /*isVolatile=*/false);
1879 Builder.CreateAlignedStore(RawShadowValue,
1880 Builder.CreateCall(NsanGetRawShadowPtr, {Dst}),
1881 Align(1),
1882 /*isVolatile=*/false);
1884 ++NumInstrumentedNonFTMemcpyStores;
1885 return;
1887 // ClPropagateNonFTConstStoresAsFT is by default false.
1888 if (Constant *C; ClPropagateNonFTConstStoresAsFT &&
1889 (C = dyn_cast<Constant>(StoredValue))) {
1890 // This might be a fp constant stored as an int. Bitcast and store if it has
1891 // appropriate size.
1892 Type *BitcastTy = nullptr; // The FT type to bitcast to.
1893 if (auto *CInt = dyn_cast<ConstantInt>(C)) {
1894 switch (CInt->getType()->getScalarSizeInBits()) {
1895 case 32:
1896 BitcastTy = Type::getFloatTy(Context);
1897 break;
1898 case 64:
1899 BitcastTy = Type::getDoubleTy(Context);
1900 break;
1901 case 80:
1902 BitcastTy = Type::getX86_FP80Ty(Context);
1903 break;
1904 default:
1905 break;
1907 } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
1908 const int NumElements =
1909 cast<VectorType>(CDV->getType())->getElementCount().getFixedValue();
1910 switch (CDV->getType()->getScalarSizeInBits()) {
1911 case 32:
1912 BitcastTy =
1913 VectorType::get(Type::getFloatTy(Context), NumElements, false);
1914 break;
1915 case 64:
1916 BitcastTy =
1917 VectorType::get(Type::getDoubleTy(Context), NumElements, false);
1918 break;
1919 case 80:
1920 BitcastTy =
1921 VectorType::get(Type::getX86_FP80Ty(Context), NumElements, false);
1922 break;
1923 default:
1924 break;
1927 if (BitcastTy) {
1928 const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy);
1929 Value *ShadowPtr = Builder.CreateCall(
1930 NsanGetShadowPtrForStore[Extents.ValueType],
1931 {PtrOp, ConstantInt::get(IntptrTy, Extents.NumElts)});
1932 // Bitcast the integer value to the appropriate FT type and extend to 2FT.
1933 Type *ExtVT = Config.getExtendedFPType(BitcastTy);
1934 Value *Shadow =
1935 Builder.CreateFPExt(Builder.CreateBitCast(C, BitcastTy), ExtVT);
1936 Builder.CreateAlignedStore(Shadow, ShadowPtr, Align(1),
1937 Store.isVolatile());
1938 return;
1941 // All other stores just reset the shadow value to unknown.
1942 Builder.CreateCall(NsanSetUnknownFns.getFallback(), {Dst, ValueSize});
1945 void NumericalStabilitySanitizer::propagateShadowValues(
1946 Instruction &Inst, const TargetLibraryInfo &TLI,
1947 const ValueToShadowMap &Map) {
1948 if (auto *Store = dyn_cast<StoreInst>(&Inst)) {
1949 Value *StoredValue = Store->getValueOperand();
1950 Type *VT = StoredValue->getType();
1951 Type *ExtendedVT = Config.getExtendedFPType(VT);
1952 if (ExtendedVT == nullptr)
1953 return propagateNonFTStore(*Store, VT, Map);
1954 return propagateFTStore(*Store, VT, ExtendedVT, Map);
1957 if (auto *FCmp = dyn_cast<FCmpInst>(&Inst)) {
1958 emitFCmpCheck(*FCmp, Map);
1959 return;
1962 if (auto *CB = dyn_cast<CallBase>(&Inst)) {
1963 maybeAddSuffixForNsanInterface(CB);
1964 if (CallInst *CI = dyn_cast<CallInst>(&Inst))
1965 maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
1966 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) {
1967 instrumentMemIntrinsic(MI);
1968 return;
1970 populateShadowStack(*CB, TLI, Map);
1971 return;
1974 if (auto *RetInst = dyn_cast<ReturnInst>(&Inst)) {
1975 if (!ClCheckRet)
1976 return;
1978 Value *RV = RetInst->getReturnValue();
1979 if (RV == nullptr)
1980 return; // This is a `ret void`.
1981 Type *VT = RV->getType();
1982 Type *ExtendedVT = Config.getExtendedFPType(VT);
1983 if (ExtendedVT == nullptr)
1984 return; // Not an FT ret.
1985 Value *RVShadow = Map.getShadow(RV);
1986 IRBuilder<> Builder(RetInst);
1988 RVShadow = emitCheck(RV, RVShadow, Builder, CheckLoc::makeRet());
1989 ++NumInstrumentedFTRets;
1990 // Store tag.
1991 Value *FnAddr =
1992 Builder.CreatePtrToInt(Inst.getParent()->getParent(), IntptrTy);
1993 Builder.CreateStore(FnAddr, NsanShadowRetTag);
1994 // Store value.
1995 Value *ShadowRetValPtr =
1996 Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0);
1997 Builder.CreateStore(RVShadow, ShadowRetValPtr);
1998 return;
2001 if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(&Inst)) {
2002 Value *V = Insert->getOperand(1);
2003 Type *VT = V->getType();
2004 Type *ExtendedVT = Config.getExtendedFPType(VT);
2005 if (ExtendedVT == nullptr)
2006 return;
2007 IRBuilder<> Builder(Insert);
2008 emitCheck(V, Map.getShadow(V), Builder, CheckLoc::makeInsert());
2009 return;
2013 // Moves fast math flags from the function to individual instructions, and
2014 // removes the attribute from the function.
2015 // TODO: Make this controllable with a flag.
2016 static void moveFastMathFlags(Function &F,
2017 std::vector<Instruction *> &Instructions) {
2018 FastMathFlags FMF;
2019 #define MOVE_FLAG(attr, setter) \
2020 if (F.getFnAttribute(attr).getValueAsString() == "true") { \
2021 F.removeFnAttr(attr); \
2022 FMF.set##setter(); \
2024 MOVE_FLAG("unsafe-fp-math", Fast)
2025 MOVE_FLAG("no-infs-fp-math", NoInfs)
2026 MOVE_FLAG("no-nans-fp-math", NoNaNs)
2027 MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
2028 #undef MOVE_FLAG
2030 for (Instruction *I : Instructions)
2031 if (isa<FPMathOperator>(I))
2032 I->setFastMathFlags(FMF);
2035 bool NumericalStabilitySanitizer::sanitizeFunction(
2036 Function &F, const TargetLibraryInfo &TLI) {
2037 if (!F.hasFnAttribute(Attribute::SanitizeNumericalStability) ||
2038 F.isDeclaration())
2039 return false;
2041 // This is required to prevent instrumenting call to __nsan_init from within
2042 // the module constructor.
2043 if (F.getName() == kNsanModuleCtorName)
2044 return false;
2045 SmallVector<Instruction *, 8> AllLoadsAndStores;
2046 SmallVector<Instruction *, 8> LocalLoadsAndStores;
2048 // The instrumentation maintains:
2049 // - for each IR value `v` of floating-point (or vector floating-point) type
2050 // FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g.
2051 // double for float and f128 for double).
2052 // - A shadow memory, which stores `s(v)` for any `v` that has been stored,
2053 // along with a shadow memory tag, which stores whether the value in the
2054 // corresponding shadow memory is valid. Note that this might be
2055 // incorrect if a non-instrumented function stores to memory, or if
2056 // memory is stored to through a char pointer.
2057 // - A shadow stack, which holds `s(v)` for any floating-point argument `v`
2058 // of a call to an instrumented function. This allows
2059 // instrumented functions to retrieve the shadow values for their
2060 // arguments.
2061 // Because instrumented functions can be called from non-instrumented
2062 // functions, the stack needs to include a tag so that the instrumented
2063 // function knows whether shadow values are available for their
2064 // parameters (i.e. whether is was called by an instrumented function).
2065 // When shadow arguments are not available, they have to be recreated by
2066 // extending the precision of the non-shadow arguments to the non-shadow
2067 // value. Non-instrumented functions do not modify (or even know about) the
2068 // shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow
2069 // stack tag is __nsan_shadow_args_tag. The tag is any unique identifier
2070 // for the function (we use the address of the function). Both variables
2071 // are thread local.
2072 // Example:
2073 // calls shadow stack tag shadow stack
2074 // =======================================================================
2075 // non_instrumented_1() 0 0
2076 // |
2077 // v
2078 // instrumented_2(float a) 0 0
2079 // |
2080 // v
2081 // instrumented_3(float b, double c) &instrumented_3 s(b),s(c)
2082 // |
2083 // v
2084 // instrumented_4(float d) &instrumented_4 s(d)
2085 // |
2086 // v
2087 // non_instrumented_5(float e) &non_instrumented_5 s(e)
2088 // |
2089 // v
2090 // instrumented_6(float f) &non_instrumented_5 s(e)
2092 // On entry, instrumented_2 checks whether the tag corresponds to its
2093 // function ptr.
2094 // Note that functions reset the tag to 0 after reading shadow parameters.
2095 // This ensures that the function does not erroneously read invalid data if
2096 // called twice in the same stack, once from an instrumented function and
2097 // once from an uninstrumented one. For example, in the following example,
2098 // resetting the tag in (A) ensures that (B) does not reuse the same the
2099 // shadow arguments (which would be incorrect).
2100 // instrumented_1(float a)
2101 // |
2102 // v
2103 // instrumented_2(float b) (A)
2104 // |
2105 // v
2106 // non_instrumented_3()
2107 // |
2108 // v
2109 // instrumented_2(float b) (B)
2111 // - A shadow return slot. Any function that returns a floating-point value
2112 // places a shadow return value in __nsan_shadow_ret_val. Again, because
2113 // we might be calling non-instrumented functions, this value is guarded
2114 // by __nsan_shadow_ret_tag marker indicating which instrumented function
2115 // placed the value in __nsan_shadow_ret_val, so that the caller can check
2116 // that this corresponds to the callee. Both variables are thread local.
2118 // For example, in the following example, the instrumentation in
2119 // `instrumented_1` rejects the shadow return value from `instrumented_3`
2120 // because is is not tagged as expected (`&instrumented_3` instead of
2121 // `non_instrumented_2`):
2123 // instrumented_1()
2124 // |
2125 // v
2126 // float non_instrumented_2()
2127 // |
2128 // v
2129 // float instrumented_3()
2131 // Calls of known math functions (sin, cos, exp, ...) are duplicated to call
2132 // their overload on the shadow type.
2134 // Collect all instructions before processing, as creating shadow values
2135 // creates new instructions inside the function.
2136 std::vector<Instruction *> OriginalInstructions;
2137 for (BasicBlock &BB : F)
2138 for (Instruction &Inst : BB)
2139 OriginalInstructions.emplace_back(&Inst);
2141 moveFastMathFlags(F, OriginalInstructions);
2142 ValueToShadowMap ValueToShadow(Config);
2144 // In the first pass, we create shadow values for all FT function arguments
2145 // and all phis. This ensures that the DFS of the next pass does not have
2146 // any loops.
2147 std::vector<PHINode *> OriginalPhis;
2148 createShadowArguments(F, TLI, ValueToShadow);
2149 for (Instruction *I : OriginalInstructions) {
2150 if (PHINode *Phi = dyn_cast<PHINode>(I)) {
2151 if (PHINode *Shadow = maybeCreateShadowPhi(*Phi, TLI)) {
2152 OriginalPhis.push_back(Phi);
2153 ValueToShadow.setShadow(*Phi, *Shadow);
2158 // Create shadow values for all instructions creating FT values.
2159 for (Instruction *I : OriginalInstructions)
2160 maybeCreateShadowValue(*I, TLI, ValueToShadow);
2162 // Propagate shadow values across stores, calls and rets.
2163 for (Instruction *I : OriginalInstructions)
2164 propagateShadowValues(*I, TLI, ValueToShadow);
2166 // The last pass populates shadow phis with shadow values.
2167 for (PHINode *Phi : OriginalPhis) {
2168 PHINode *ShadowPhi = cast<PHINode>(ValueToShadow.getShadow(Phi));
2169 for (unsigned I : seq(Phi->getNumOperands())) {
2170 Value *V = Phi->getOperand(I);
2171 Value *Shadow = ValueToShadow.getShadow(V);
2172 BasicBlock *IncomingBB = Phi->getIncomingBlock(I);
2173 // For some instructions (e.g. invoke), we create the shadow in a separate
2174 // block, different from the block where the original value is created.
2175 // In that case, the shadow phi might need to refer to this block instead
2176 // of the original block.
2177 // Note that this can only happen for instructions as constant shadows are
2178 // always created in the same block.
2179 ShadowPhi->addIncoming(Shadow, IncomingBB);
2183 return !ValueToShadow.empty();
2186 static uint64_t GetMemOpSize(Value *V) {
2187 uint64_t OpSize = 0;
2188 if (Constant *C = dyn_cast<Constant>(V)) {
2189 auto *CInt = dyn_cast<ConstantInt>(C);
2190 if (CInt && CInt->getValue().getBitWidth() <= 64)
2191 OpSize = CInt->getValue().getZExtValue();
2194 return OpSize;
2197 // Instrument the memory intrinsics so that they properly modify the shadow
2198 // memory.
2199 bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
2200 IRBuilder<> Builder(MI);
2201 if (auto *M = dyn_cast<MemSetInst>(MI)) {
2202 FunctionCallee SetUnknownFn =
2203 NsanSetUnknownFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
2204 if (SetUnknownFn.getFunctionType()->getNumParams() == 1)
2205 Builder.CreateCall(SetUnknownFn, {/*Address=*/M->getArgOperand(0)});
2206 else
2207 Builder.CreateCall(SetUnknownFn,
2208 {/*Address=*/M->getArgOperand(0),
2209 /*Size=*/Builder.CreateIntCast(M->getArgOperand(2),
2210 IntptrTy, false)});
2212 } else if (auto *M = dyn_cast<MemTransferInst>(MI)) {
2213 FunctionCallee CopyFn =
2214 NsanCopyFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
2216 if (CopyFn.getFunctionType()->getNumParams() == 2)
2217 Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
2218 /*Source=*/M->getArgOperand(1)});
2219 else
2220 Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
2221 /*Source=*/M->getArgOperand(1),
2222 /*Size=*/
2223 Builder.CreateIntCast(M->getArgOperand(2),
2224 IntptrTy, false)});
2226 return false;
2229 void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) {
2230 Function *Fn = CI->getCalledFunction();
2231 if (Fn == nullptr)
2232 return;
2234 if (!Fn->getName().starts_with("__nsan_"))
2235 return;
2237 if (Fn->getName() == "__nsan_dump_shadow_mem") {
2238 assert(CI->arg_size() == 4 &&
2239 "invalid prototype for __nsan_dump_shadow_mem");
2240 // __nsan_dump_shadow_mem requires an extra parameter with the dynamic
2241 // configuration:
2242 // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8)
2243 // | shadow_type_id_for_double
2244 const uint64_t shadow_value_type_ids =
2245 (static_cast<size_t>(Config.byValueType(kLongDouble).getNsanTypeId())
2246 << 16) |
2247 (static_cast<size_t>(Config.byValueType(kDouble).getNsanTypeId())
2248 << 8) |
2249 static_cast<size_t>(Config.byValueType(kFloat).getNsanTypeId());
2250 CI->setArgOperand(3, ConstantInt::get(IntptrTy, shadow_value_type_ids));