llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp

   1 //===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file contains the instrumentation pass for the numerical sanitizer.
  10 // Conceptually the pass injects shadow computations using higher precision
  11 // types and inserts consistency checks. For details see the paper
  12 // https://arxiv.org/abs/2102.12782.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
  17
  18 #include "llvm/ADT/DenseMap.h"
  19 #include "llvm/ADT/SmallVector.h"
  20 #include "llvm/ADT/Statistic.h"
  21 #include "llvm/ADT/StringExtras.h"
  22 #include "llvm/Analysis/TargetLibraryInfo.h"
  23 #include "llvm/Analysis/ValueTracking.h"
  24 #include "llvm/IR/DataLayout.h"
  25 #include "llvm/IR/Function.h"
  26 #include "llvm/IR/IRBuilder.h"
  27 #include "llvm/IR/IntrinsicInst.h"
  28 #include "llvm/IR/Intrinsics.h"
  29 #include "llvm/IR/LLVMContext.h"
  30 #include "llvm/IR/MDBuilder.h"
  31 #include "llvm/IR/Metadata.h"
  32 #include "llvm/IR/Module.h"
  33 #include "llvm/IR/Type.h"
  34 #include "llvm/Support/CommandLine.h"
  35 #include "llvm/Support/Debug.h"
  36 #include "llvm/Support/Regex.h"
  37 #include "llvm/Support/raw_ostream.h"
  38 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  39 #include "llvm/Transforms/Utils/Instrumentation.h"
  40 #include "llvm/Transforms/Utils/Local.h"
  41 #include "llvm/Transforms/Utils/ModuleUtils.h"
  42
  43 #include <cstdint>
  44
  45 using namespace llvm;
  46
  47 #define DEBUG_TYPE "nsan"
  48
  49 STATISTIC(NumInstrumentedFTLoads,
  50           "Number of instrumented floating-point loads");
  51
  52 STATISTIC(NumInstrumentedFTCalls,
  53           "Number of instrumented floating-point calls");
  54 STATISTIC(NumInstrumentedFTRets,
  55           "Number of instrumented floating-point returns");
  56 STATISTIC(NumInstrumentedFTStores,
  57           "Number of instrumented floating-point stores");
  58 STATISTIC(NumInstrumentedNonFTStores,
  59           "Number of instrumented non floating-point stores");
  60 STATISTIC(
  61     NumInstrumentedNonFTMemcpyStores,
  62     "Number of instrumented non floating-point stores with memcpy semantics");
  63 STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
  64
  65 // Using smaller shadow types types can help improve speed. For example, `dlq`
  66 // is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
  67 // `dqq`.
  68 static cl::opt<std::string> ClShadowMapping(
  69     "nsan-shadow-type-mapping", cl::init("dqq"),
  70     cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
  71              "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
  72              "ppc_fp128 (extended double) respectively. The default is to "
  73              "shadow `float` as `double`, and `double` and `x86_fp80` as "
  74              "`fp128`"),
  75     cl::Hidden);
  76
  77 static cl::opt<bool>
  78     ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
  79                      cl::desc("Instrument floating-point comparisons"),
  80                      cl::Hidden);
  81
  82 static cl::opt<std::string> ClCheckFunctionsFilter(
  83     "check-functions-filter",
  84     cl::desc("Only emit checks for arguments of functions "
  85              "whose names match the given regular expression"),
  86     cl::value_desc("regex"));
  87
  88 static cl::opt<bool> ClTruncateFCmpEq(
  89     "nsan-truncate-fcmp-eq", cl::init(true),
  90     cl::desc(
  91         "This flag controls the behaviour of fcmp equality comparisons."
  92         "For equality comparisons such as `x == 0.0f`, we can perform the "
  93         "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
  94         " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
  95         "catch the case when `x_shadow` is accurate enough (and therefore "
  96         "close enough to zero) so that `trunc(x_shadow)` is zero even though "
  97         "both `x` and `x_shadow` are not"),
  98     cl::Hidden);
  99
 100 // When there is external, uninstrumented code writing to memory, the shadow
 101 // memory can get out of sync with the application memory. Enabling this flag
 102 // emits consistency checks for loads to catch this situation.
 103 // When everything is instrumented, this is not strictly necessary because any
 104 // load should have a corresponding store, but can help debug cases when the
 105 // framework did a bad job at tracking shadow memory modifications by failing on
 106 // load rather than store.
 107 // TODO: provide a way to resume computations from the FT value when the load
 108 // is inconsistent. This ensures that further computations are not polluted.
 109 static cl::opt<bool> ClCheckLoads("nsan-check-loads",
 110                                   cl::desc("Check floating-point load"),
 111                                   cl::Hidden);
 112
 113 static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
 114                                    cl::desc("Check floating-point stores"),
 115                                    cl::Hidden);
 116
 117 static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
 118                                 cl::desc("Check floating-point return values"),
 119                                 cl::Hidden);
 120
 121 // LLVM may store constant floats as bitcasted ints.
 122 // It's not really necessary to shadow such stores,
 123 // if the shadow value is unknown the framework will re-extend it on load
 124 // anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
 125 // impossible to determine the floating-point type based on the size.
 126 // However, for debugging purposes it can be useful to model such stores.
 127 static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
 128     "nsan-propagate-non-ft-const-stores-as-ft",
 129     cl::desc(
 130         "Propagate non floating-point const stores as floating point values."
 131         "For debugging purposes only"),
 132     cl::Hidden);
 133
 134 constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
 135 constexpr StringLiteral kNsanInitName("__nsan_init");
 136
 137 // The following values must be kept in sync with the runtime.
 138 constexpr int kShadowScale = 2;
 139 constexpr int kMaxVectorWidth = 8;
 140 constexpr int kMaxNumArgs = 128;
 141 constexpr int kMaxShadowTypeSizeBytes = 16; // fp128
 142
 143 namespace {
 144
 145 // Defines the characteristics (type id, type, and floating-point semantics)
 146 // attached for all possible shadow types.
 147 class ShadowTypeConfig {
 148 public:
 149   static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
 150
 151   // The LLVM Type corresponding to the shadow type.
 152   virtual Type *getType(LLVMContext &Context) const = 0;
 153
 154   // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
 155   virtual char getNsanTypeId() const = 0;
 156
 157   virtual ~ShadowTypeConfig() = default;
 158 };
 159
 160 template <char NsanTypeId>
 161 class ShadowTypeConfigImpl : public ShadowTypeConfig {
 162 public:
 163   char getNsanTypeId() const override { return NsanTypeId; }
 164   static constexpr const char kNsanTypeId = NsanTypeId;
 165 };
 166
 167 // `double` (`d`) shadow type.
 168 class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
 169   Type *getType(LLVMContext &Context) const override {
 170     return Type::getDoubleTy(Context);
 171   }
 172 };
 173
 174 // `x86_fp80` (`l`) shadow type: X86 long double.
 175 class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
 176   Type *getType(LLVMContext &Context) const override {
 177     return Type::getX86_FP80Ty(Context);
 178   }
 179 };
 180
 181 // `fp128` (`q`) shadow type.
 182 class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
 183   Type *getType(LLVMContext &Context) const override {
 184     return Type::getFP128Ty(Context);
 185   }
 186 };
 187
 188 // `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
 189 class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
 190   Type *getType(LLVMContext &Context) const override {
 191     return Type::getPPC_FP128Ty(Context);
 192   }
 193 };
 194
 195 // Creates a ShadowTypeConfig given its type id.
 196 std::unique_ptr<ShadowTypeConfig>
 197 ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
 198   switch (TypeId) {
 199   case F64ShadowConfig::kNsanTypeId:
 200     return std::make_unique<F64ShadowConfig>();
 201   case F80ShadowConfig::kNsanTypeId:
 202     return std::make_unique<F80ShadowConfig>();
 203   case F128ShadowConfig::kNsanTypeId:
 204     return std::make_unique<F128ShadowConfig>();
 205   case PPC128ShadowConfig::kNsanTypeId:
 206     return std::make_unique<PPC128ShadowConfig>();
 207   }
 208   report_fatal_error("nsan: invalid shadow type id '" + Twine(TypeId) + "'");
 209 }
 210
 211 // An enum corresponding to shadow value types. Used as indices in arrays, so
 212 // not an `enum class`.
 213 enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
 214
 215 // If `FT` corresponds to a primitive FTValueType, return it.
 216 static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
 217   if (FT->isFloatTy())
 218     return kFloat;
 219   if (FT->isDoubleTy())
 220     return kDouble;
 221   if (FT->isX86_FP80Ty())
 222     return kLongDouble;
 223   return {};
 224 }
 225
 226 // Returns the LLVM type for an FTValueType.
 227 static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
 228   switch (VT) {
 229   case kFloat:
 230     return Type::getFloatTy(Context);
 231   case kDouble:
 232     return Type::getDoubleTy(Context);
 233   case kLongDouble:
 234     return Type::getX86_FP80Ty(Context);
 235   case kNumValueTypes:
 236     return nullptr;
 237   }
 238   llvm_unreachable("Unhandled FTValueType enum");
 239 }
 240
 241 // Returns the type name for an FTValueType.
 242 static const char *typeNameFromFTValueType(FTValueType VT) {
 243   switch (VT) {
 244   case kFloat:
 245     return "float";
 246   case kDouble:
 247     return "double";
 248   case kLongDouble:
 249     return "longdouble";
 250   case kNumValueTypes:
 251     return nullptr;
 252   }
 253   llvm_unreachable("Unhandled FTValueType enum");
 254 }
 255
 256 // A specific mapping configuration of application type to shadow type for nsan
 257 // (see -nsan-shadow-mapping flag).
 258 class MappingConfig {
 259 public:
 260   explicit MappingConfig(LLVMContext &C) : Context(C) {
 261     if (ClShadowMapping.size() != 3)
 262       report_fatal_error("Invalid nsan mapping: " + Twine(ClShadowMapping));
 263     unsigned ShadowTypeSizeBits[kNumValueTypes];
 264     for (int VT = 0; VT < kNumValueTypes; ++VT) {
 265       auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]);
 266       if (!Config)
 267         report_fatal_error("Failed to get ShadowTypeConfig for " +
 268                            Twine(ClShadowMapping[VT]));
 269       const unsigned AppTypeSize =
 270           typeFromFTValueType(static_cast<FTValueType>(VT), Context)
 271               ->getScalarSizeInBits();
 272       const unsigned ShadowTypeSize =
 273           Config->getType(Context)->getScalarSizeInBits();
 274       // Check that the shadow type size is at most kShadowScale times the
 275       // application type size, so that shadow memory compoutations are valid.
 276       if (ShadowTypeSize > kShadowScale * AppTypeSize)
 277         report_fatal_error("Invalid nsan mapping f" + Twine(AppTypeSize) +
 278                            "->f" + Twine(ShadowTypeSize) +
 279                            ": The shadow type size should be at most " +
 280                            Twine(kShadowScale) +
 281                            " times the application type size");
 282       ShadowTypeSizeBits[VT] = ShadowTypeSize;
 283       Configs[VT] = std::move(Config);
 284     }
 285
 286     // Check that the mapping is monotonous. This is required because if one
 287     // does an fpextend of `float->long double` in application code, nsan is
 288     // going to do an fpextend of `shadow(float) -> shadow(long double)` in
 289     // shadow code. This will fail in `qql` mode, since nsan would be
 290     // fpextending `f128->long`, which is invalid.
 291     // TODO: Relax this.
 292     if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] ||
 293         ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble])
 294       report_fatal_error("Invalid nsan mapping: { float->f" +
 295                          Twine(ShadowTypeSizeBits[kFloat]) + "; double->f" +
 296                          Twine(ShadowTypeSizeBits[kDouble]) +
 297                          "; long double->f" +
 298                          Twine(ShadowTypeSizeBits[kLongDouble]) + " }");
 299   }
 300
 301   const ShadowTypeConfig &byValueType(FTValueType VT) const {
 302     assert(VT < FTValueType::kNumValueTypes && "invalid value type");
 303     return *Configs[VT];
 304   }
 305
 306   // Returns the extended shadow type for a given application type.
 307   Type *getExtendedFPType(Type *FT) const {
 308     if (const auto VT = ftValueTypeFromType(FT))
 309       return Configs[*VT]->getType(Context);
 310     if (FT->isVectorTy()) {
 311       auto *VecTy = cast<VectorType>(FT);
 312       // TODO: add support for scalable vector types.
 313       if (VecTy->isScalableTy())
 314         return nullptr;
 315       Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType());
 316       return ExtendedScalar
 317                  ? VectorType::get(ExtendedScalar, VecTy->getElementCount())
 318                  : nullptr;
 319     }
 320     return nullptr;
 321   }
 322
 323 private:
 324   LLVMContext &Context;
 325   std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
 326 };
 327
 328 // The memory extents of a type specifies how many elements of a given
 329 // FTValueType needs to be stored when storing this type.
 330 struct MemoryExtents {
 331   FTValueType ValueType;
 332   uint64_t NumElts;
 333 };
 334
 335 static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
 336   if (const auto VT = ftValueTypeFromType(FT))
 337     return {*VT, 1};
 338   if (auto *VecTy = dyn_cast<VectorType>(FT)) {
 339     const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType());
 340     return {ScalarExtents.ValueType,
 341             ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
 342   }
 343   llvm_unreachable("invalid value type");
 344 }
 345
 346 // The location of a check. Passed as parameters to runtime checking functions.
 347 class CheckLoc {
 348 public:
 349   // Creates a location that references an application memory location.
 350   static CheckLoc makeStore(Value *Address) {
 351     CheckLoc Result(kStore);
 352     Result.Address = Address;
 353     return Result;
 354   }
 355   static CheckLoc makeLoad(Value *Address) {
 356     CheckLoc Result(kLoad);
 357     Result.Address = Address;
 358     return Result;
 359   }
 360
 361   // Creates a location that references an argument, given by id.
 362   static CheckLoc makeArg(int ArgId) {
 363     CheckLoc Result(kArg);
 364     Result.ArgId = ArgId;
 365     return Result;
 366   }
 367
 368   // Creates a location that references the return value of a function.
 369   static CheckLoc makeRet() { return CheckLoc(kRet); }
 370
 371   // Creates a location that references a vector insert.
 372   static CheckLoc makeInsert() { return CheckLoc(kInsert); }
 373
 374   // Returns the CheckType of location this refers to, as an integer-typed LLVM
 375   // IR value.
 376   Value *getType(LLVMContext &C) const {
 377     return ConstantInt::get(Type::getInt32Ty(C), static_cast<int>(CheckTy));
 378   }
 379
 380   // Returns a CheckType-specific value representing details of the location
 381   // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
 382   // IR value.
 383   Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const {
 384     switch (CheckTy) {
 385     case kUnknown:
 386       llvm_unreachable("unknown type");
 387     case kRet:
 388     case kInsert:
 389       return ConstantInt::get(IntptrTy, 0);
 390     case kArg:
 391       return ConstantInt::get(IntptrTy, ArgId);
 392     case kLoad:
 393     case kStore:
 394       return Builder.CreatePtrToInt(Address, IntptrTy);
 395     }
 396     llvm_unreachable("Unhandled CheckType enum");
 397   }
 398
 399 private:
 400   // Must be kept in sync with the runtime,
 401   // see compiler-rt/lib/nsan/nsan_stats.h
 402   enum CheckType {
 403     kUnknown = 0,
 404     kRet,
 405     kArg,
 406     kLoad,
 407     kStore,
 408     kInsert,
 409   };
 410   explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
 411
 412   Value *Address = nullptr;
 413   const CheckType CheckTy;
 414   int ArgId = -1;
 415 };
 416
 417 // A map of LLVM IR values to shadow LLVM IR values.
 418 class ValueToShadowMap {
 419 public:
 420   explicit ValueToShadowMap(const MappingConfig &Config) : Config(Config) {}
 421
 422   ValueToShadowMap(const ValueToShadowMap &) = delete;
 423   ValueToShadowMap &operator=(const ValueToShadowMap &) = delete;
 424
 425   // Sets the shadow value for a value. Asserts that the value does not already
 426   // have a value.
 427   void setShadow(Value &V, Value &Shadow) {
 428     [[maybe_unused]] const bool Inserted = Map.try_emplace(&V, &Shadow).second;
 429     LLVM_DEBUG({
 430       if (!Inserted) {
 431         if (auto *I = dyn_cast<Instruction>(&V))
 432           errs() << I->getFunction()->getName() << ": ";
 433         errs() << "duplicate shadow (" << &V << "): ";
 434         V.dump();
 435       }
 436     });
 437     assert(Inserted && "duplicate shadow");
 438   }
 439
 440   // Returns true if the value already has a shadow (including if the value is a
 441   // constant). If true, calling getShadow() is valid.
 442   bool hasShadow(Value *V) const {
 443     return isa<Constant>(V) || (Map.find(V) != Map.end());
 444   }
 445
 446   // Returns the shadow value for a given value. Asserts that the value has
 447   // a shadow value. Lazily creates shadows for constant values.
 448   Value *getShadow(Value *V) const {
 449     if (Constant *C = dyn_cast<Constant>(V))
 450       return getShadowConstant(C);
 451     return Map.find(V)->second;
 452   }
 453
 454   bool empty() const { return Map.empty(); }
 455
 456 private:
 457   // Extends a constant application value to its shadow counterpart.
 458   APFloat extendConstantFP(APFloat CV, const fltSemantics &To) const {
 459     bool LosesInfo = false;
 460     CV.convert(To, APFloatBase::rmTowardZero, &LosesInfo);
 461     return CV;
 462   }
 463
 464   // Returns the shadow constant for the given application constant.
 465   Constant *getShadowConstant(Constant *C) const {
 466     if (UndefValue *U = dyn_cast<UndefValue>(C)) {
 467       return UndefValue::get(Config.getExtendedFPType(U->getType()));
 468     }
 469     if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
 470       // Floating-point constants.
 471       Type *Ty = Config.getExtendedFPType(CFP->getType());
 472       return ConstantFP::get(
 473           Ty, extendConstantFP(CFP->getValueAPF(), Ty->getFltSemantics()));
 474     }
 475     // Vector, array, or aggregate constants.
 476     if (C->getType()->isVectorTy()) {
 477       SmallVector<Constant *, 8> Elements;
 478       for (int I = 0, E = cast<VectorType>(C->getType())
 479                               ->getElementCount()
 480                               .getFixedValue();
 481            I < E; ++I)
 482         Elements.push_back(getShadowConstant(C->getAggregateElement(I)));
 483       return ConstantVector::get(Elements);
 484     }
 485     llvm_unreachable("unimplemented");
 486   }
 487
 488   const MappingConfig &Config;
 489   DenseMap<Value *, Value *> Map;
 490 };
 491
 492 class NsanMemOpFn {
 493 public:
 494   NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized, StringRef Fallback,
 495               size_t NumArgs);
 496   FunctionCallee getFunctionFor(uint64_t MemOpSize) const;
 497   FunctionCallee getFallback() const;
 498
 499 private:
 500   SmallVector<FunctionCallee> Funcs;
 501   size_t NumSizedFuncs;
 502 };
 503
 504 NsanMemOpFn::NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized,
 505                          StringRef Fallback, size_t NumArgs) {
 506   LLVMContext &Ctx = M.getContext();
 507   AttributeList Attr;
 508   Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind);
 509   Type *PtrTy = PointerType::getUnqual(Ctx);
 510   Type *VoidTy = Type::getVoidTy(Ctx);
 511   IntegerType *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
 512   FunctionType *SizedFnTy = nullptr;
 513
 514   NumSizedFuncs = Sized.size();
 515
 516   // First entry is fallback function
 517   if (NumArgs == 3) {
 518     Funcs.push_back(
 519         M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, PtrTy, IntptrTy));
 520     SizedFnTy = FunctionType::get(VoidTy, {PtrTy, PtrTy}, false);
 521   } else if (NumArgs == 2) {
 522     Funcs.push_back(
 523         M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, IntptrTy));
 524     SizedFnTy = FunctionType::get(VoidTy, {PtrTy}, false);
 525   } else {
 526     llvm_unreachable("Unexpected value of sized functions arguments");
 527   }
 528
 529   for (size_t i = 0; i < NumSizedFuncs; ++i)
 530     Funcs.push_back(M.getOrInsertFunction(Sized[i], SizedFnTy, Attr));
 531 }
 532
 533 FunctionCallee NsanMemOpFn::getFunctionFor(uint64_t MemOpSize) const {
 534   // Now `getFunctionFor` operates on `Funcs` of size 4 (at least) and the
 535   // following code assumes that the number of functions in `Func` is sufficient
 536   assert(NumSizedFuncs >= 3 && "Unexpected number of sized functions");
 537
 538   size_t Idx =
 539       MemOpSize == 4 ? 1 : (MemOpSize == 8 ? 2 : (MemOpSize == 16 ? 3 : 0));
 540
 541   return Funcs[Idx];
 542 }
 543
 544 FunctionCallee NsanMemOpFn::getFallback() const { return Funcs[0]; }
 545
 546 /// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
 547 /// API function declarations into the module if they don't exist already.
 548 /// Instantiating ensures the __nsan_init function is in the list of global
 549 /// constructors for the module.
 550 class NumericalStabilitySanitizer {
 551 public:
 552   NumericalStabilitySanitizer(Module &M);
 553   bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
 554
 555 private:
 556   bool instrumentMemIntrinsic(MemIntrinsic *MI);
 557   void maybeAddSuffixForNsanInterface(CallBase *CI);
 558   bool addrPointsToConstantData(Value *Addr);
 559   void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI,
 560                               ValueToShadowMap &Map);
 561   Value *createShadowValueWithOperandsAvailable(Instruction &Inst,
 562                                                 const TargetLibraryInfo &TLI,
 563                                                 const ValueToShadowMap &Map);
 564   PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI);
 565   void createShadowArguments(Function &F, const TargetLibraryInfo &TLI,
 566                              ValueToShadowMap &Map);
 567
 568   void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI,
 569                            const ValueToShadowMap &Map);
 570
 571   void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI,
 572                              const ValueToShadowMap &Map);
 573   Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder,
 574                    CheckLoc Loc);
 575   Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder,
 576                            CheckLoc Loc);
 577   void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map);
 578
 579   // Value creation handlers.
 580   Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT);
 581   Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
 582                         const TargetLibraryInfo &TLI,
 583                         const ValueToShadowMap &Map, IRBuilder<> &Builder);
 584   Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
 585                                   const TargetLibraryInfo &TLI,
 586                                   const ValueToShadowMap &Map,
 587                                   IRBuilder<> &Builder);
 588   Value *handleTrunc(const FPTruncInst &Trunc, Type *VT, Type *ExtendedVT,
 589                      const ValueToShadowMap &Map, IRBuilder<> &Builder);
 590   Value *handleExt(const FPExtInst &Ext, Type *VT, Type *ExtendedVT,
 591                    const ValueToShadowMap &Map, IRBuilder<> &Builder);
 592
 593   // Value propagation handlers.
 594   void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT,
 595                         const ValueToShadowMap &Map);
 596   void propagateNonFTStore(StoreInst &Store, Type *VT,
 597                            const ValueToShadowMap &Map);
 598
 599   const DataLayout &DL;
 600   LLVMContext &Context;
 601   MappingConfig Config;
 602   IntegerType *IntptrTy = nullptr;
 603
 604   // TODO: Use std::array instead?
 605   FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes] = {};
 606   FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes] = {};
 607   FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes] = {};
 608   FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes] = {};
 609
 610   NsanMemOpFn NsanCopyFns;
 611   NsanMemOpFn NsanSetUnknownFns;
 612
 613   FunctionCallee NsanGetRawShadowTypePtr;
 614   FunctionCallee NsanGetRawShadowPtr;
 615   GlobalValue *NsanShadowRetTag = nullptr;
 616
 617   Type *NsanShadowRetType = nullptr;
 618   GlobalValue *NsanShadowRetPtr = nullptr;
 619
 620   GlobalValue *NsanShadowArgsTag = nullptr;
 621
 622   Type *NsanShadowArgsType = nullptr;
 623   GlobalValue *NsanShadowArgsPtr = nullptr;
 624
 625   std::optional<Regex> CheckFunctionsFilter;
 626 };
 627 } // end anonymous namespace
 628
 629 PreservedAnalyses
 630 NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) {
 631   getOrCreateSanitizerCtorAndInitFunctions(
 632       M, kNsanModuleCtorName, kNsanInitName, /*InitArgTypes=*/{},
 633       /*InitArgs=*/{},
 634       // This callback is invoked when the functions are created the first
 635       // time. Hook them into the global ctors list in that case:
 636       [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
 637
 638   NumericalStabilitySanitizer Nsan(M);
 639   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
 640   for (Function &F : M)
 641     Nsan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
 642
 643   return PreservedAnalyses::none();
 644 }
 645
 646 static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) {
 647   return dyn_cast<GlobalValue>(M.getOrInsertGlobal(Name, Ty, [&M, Ty, Name] {
 648     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
 649                               nullptr, Name, nullptr,
 650                               GlobalVariable::InitialExecTLSModel);
 651   }));
 652 }
 653
 654 NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
 655     : DL(M.getDataLayout()), Context(M.getContext()), Config(Context),
 656       NsanCopyFns(M, {"__nsan_copy_4", "__nsan_copy_8", "__nsan_copy_16"},
 657                   "__nsan_copy_values", /*NumArgs=*/3),
 658       NsanSetUnknownFns(M,
 659                         {"__nsan_set_value_unknown_4",
 660                          "__nsan_set_value_unknown_8",
 661                          "__nsan_set_value_unknown_16"},
 662                         "__nsan_set_value_unknown", /*NumArgs=*/2) {
 663   IntptrTy = DL.getIntPtrType(Context);
 664   Type *PtrTy = PointerType::getUnqual(Context);
 665   Type *Int32Ty = Type::getInt32Ty(Context);
 666   Type *Int1Ty = Type::getInt1Ty(Context);
 667   Type *VoidTy = Type::getVoidTy(Context);
 668
 669   AttributeList Attr;
 670   Attr = Attr.addFnAttribute(Context, Attribute::NoUnwind);
 671   // Initialize the runtime values (functions and global variables).
 672   for (int I = 0; I < kNumValueTypes; ++I) {
 673     const FTValueType VT = static_cast<FTValueType>(I);
 674     const char *VTName = typeNameFromFTValueType(VT);
 675     Type *VTTy = typeFromFTValueType(VT, Context);
 676
 677     // Load/store.
 678     const std::string GetterPrefix =
 679         std::string("__nsan_get_shadow_ptr_for_") + VTName;
 680     NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction(
 681         GetterPrefix + "_store", Attr, PtrTy, PtrTy, IntptrTy);
 682     NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction(
 683         GetterPrefix + "_load", Attr, PtrTy, PtrTy, IntptrTy);
 684
 685     // Check.
 686     const auto &ShadowConfig = Config.byValueType(VT);
 687     Type *ShadowTy = ShadowConfig.getType(Context);
 688     NsanCheckValue[VT] =
 689         M.getOrInsertFunction(std::string("__nsan_internal_check_") + VTName +
 690                                   "_" + ShadowConfig.getNsanTypeId(),
 691                               Attr, Int32Ty, VTTy, ShadowTy, Int32Ty, IntptrTy);
 692     NsanFCmpFail[VT] = M.getOrInsertFunction(
 693         std::string("__nsan_fcmp_fail_") + VTName + "_" +
 694             ShadowConfig.getNsanTypeId(),
 695         Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty);
 696   }
 697
 698   // TODO: Add attributes nofree, nosync, readnone, readonly,
 699   NsanGetRawShadowTypePtr = M.getOrInsertFunction(
 700       "__nsan_internal_get_raw_shadow_type_ptr", Attr, PtrTy, PtrTy);
 701   NsanGetRawShadowPtr = M.getOrInsertFunction(
 702       "__nsan_internal_get_raw_shadow_ptr", Attr, PtrTy, PtrTy);
 703
 704   NsanShadowRetTag = createThreadLocalGV("__nsan_shadow_ret_tag", M, IntptrTy);
 705
 706   NsanShadowRetType = ArrayType::get(Type::getInt8Ty(Context),
 707                                      kMaxVectorWidth * kMaxShadowTypeSizeBytes);
 708   NsanShadowRetPtr =
 709       createThreadLocalGV("__nsan_shadow_ret_ptr", M, NsanShadowRetType);
 710
 711   NsanShadowArgsTag =
 712       createThreadLocalGV("__nsan_shadow_args_tag", M, IntptrTy);
 713
 714   NsanShadowArgsType =
 715       ArrayType::get(Type::getInt8Ty(Context),
 716                      kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes);
 717
 718   NsanShadowArgsPtr =
 719       createThreadLocalGV("__nsan_shadow_args_ptr", M, NsanShadowArgsType);
 720
 721   if (!ClCheckFunctionsFilter.empty()) {
 722     Regex R = Regex(ClCheckFunctionsFilter);
 723     std::string RegexError;
 724     assert(R.isValid(RegexError));
 725     CheckFunctionsFilter = std::move(R);
 726   }
 727 }
 728
 729 // Returns true if the given LLVM Value points to constant data (typically, a
 730 // global variable reference).
 731 bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) {
 732   // If this is a GEP, just analyze its pointer operand.
 733   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
 734     Addr = GEP->getPointerOperand();
 735
 736   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr))
 737     return GV->isConstant();
 738   return false;
 739 }
 740
 741 // This instruments the function entry to create shadow arguments.
 742 // Pseudocode:
 743 //   if (this_fn_ptr == __nsan_shadow_args_tag) {
 744 //     s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
 745 //     s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
 746 //     ...
 747 //     __nsan_shadow_args_tag = 0;
 748 //   } else {
 749 //     s(arg0) = fext(arg0);
 750 //     s(arg1) = fext(arg1);
 751 //     ...
 752 //   }
 753 void NumericalStabilitySanitizer::createShadowArguments(
 754     Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
 755   assert(!F.getIntrinsicID() && "found a definition of an intrinsic");
 756
 757   // Do not bother if there are no FP args.
 758   if (all_of(F.args(), [this](const Argument &Arg) {
 759         return Config.getExtendedFPType(Arg.getType()) == nullptr;
 760       }))
 761     return;
 762
 763   IRBuilder<> Builder(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIIt());
 764   // The function has shadow args if the shadow args tag matches the function
 765   // address.
 766   Value *HasShadowArgs = Builder.CreateICmpEQ(
 767       Builder.CreateLoad(IntptrTy, NsanShadowArgsTag, /*isVolatile=*/false),
 768       Builder.CreatePtrToInt(&F, IntptrTy));
 769
 770   unsigned ShadowArgsOffsetBytes = 0;
 771   for (Argument &Arg : F.args()) {
 772     Type *VT = Arg.getType();
 773     Type *ExtendedVT = Config.getExtendedFPType(VT);
 774     if (ExtendedVT == nullptr)
 775       continue; // Not an FT value.
 776     Value *L = Builder.CreateAlignedLoad(
 777         ExtendedVT,
 778         Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
 779                                    ShadowArgsOffsetBytes),
 780         Align(1), /*isVolatile=*/false);
 781     Value *Shadow = Builder.CreateSelect(HasShadowArgs, L,
 782                                          Builder.CreateFPExt(&Arg, ExtendedVT));
 783     Map.setShadow(Arg, *Shadow);
 784     TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
 785     assert(!SlotSize.isScalable() && "unsupported");
 786     ShadowArgsOffsetBytes += SlotSize;
 787   }
 788   Builder.CreateStore(ConstantInt::get(IntptrTy, 0), NsanShadowArgsTag);
 789 }
 790
 791 // Returns true if the instrumentation should emit code to check arguments
 792 // before a function call.
 793 static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
 794                             const std::optional<Regex> &CheckFunctionsFilter) {
 795
 796   Function *Fn = CI.getCalledFunction();
 797
 798   if (CheckFunctionsFilter) {
 799     // Skip checking args of indirect calls.
 800     if (Fn == nullptr)
 801       return false;
 802     if (CheckFunctionsFilter->match(Fn->getName()))
 803       return true;
 804     return false;
 805   }
 806
 807   if (Fn == nullptr)
 808     return true; // Always check args of indirect calls.
 809
 810   // Never check nsan functions, the user called them for a reason.
 811   if (Fn->getName().starts_with("__nsan_"))
 812     return false;
 813
 814   const auto ID = Fn->getIntrinsicID();
 815   LibFunc LFunc = LibFunc::NumLibFuncs;
 816   // Always check args of unknown functions.
 817   if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc))
 818     return true;
 819
 820   // Do not check args of an `fabs` call that is used for a comparison.
 821   // This is typically used for `fabs(a-b) < tolerance`, where what matters is
 822   // the result of the comparison, which is already caught be the fcmp checks.
 823   if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf ||
 824       LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl)
 825     for (const auto &U : CI.users())
 826       if (isa<CmpInst>(U))
 827         return false;
 828
 829   return true; // Default is check.
 830 }
 831
 832 // Populates the shadow call stack (which contains shadow values for every
 833 // floating-point parameter to the function).
 834 void NumericalStabilitySanitizer::populateShadowStack(
 835     CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) {
 836   // Do not create a shadow stack for inline asm.
 837   if (CI.isInlineAsm())
 838     return;
 839
 840   // Do not bother if there are no FP args.
 841   if (all_of(CI.operands(), [this](const Value *Arg) {
 842         return Config.getExtendedFPType(Arg->getType()) == nullptr;
 843       }))
 844     return;
 845
 846   IRBuilder<> Builder(&CI);
 847   SmallVector<Value *, 8> ArgShadows;
 848   const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter);
 849   for (auto [ArgIdx, Arg] : enumerate(CI.operands())) {
 850     if (Config.getExtendedFPType(Arg->getType()) == nullptr)
 851       continue; // Not an FT value.
 852     Value *ArgShadow = Map.getShadow(Arg);
 853     ArgShadows.push_back(ShouldCheckArgs ? emitCheck(Arg, ArgShadow, Builder,
 854                                                      CheckLoc::makeArg(ArgIdx))
 855                                          : ArgShadow);
 856   }
 857
 858   // Do not create shadow stacks for intrinsics/known lib funcs.
 859   if (Function *Fn = CI.getCalledFunction()) {
 860     LibFunc LFunc;
 861     if (Fn->isIntrinsic() || TLI.getLibFunc(*Fn, LFunc))
 862       return;
 863   }
 864
 865   // Set the shadow stack tag.
 866   Builder.CreateStore(CI.getCalledOperand(), NsanShadowArgsTag);
 867   TypeSize ShadowArgsOffsetBytes = TypeSize::getFixed(0);
 868
 869   unsigned ShadowArgId = 0;
 870   for (const Value *Arg : CI.operands()) {
 871     Type *VT = Arg->getType();
 872     Type *ExtendedVT = Config.getExtendedFPType(VT);
 873     if (ExtendedVT == nullptr)
 874       continue; // Not an FT value.
 875     Builder.CreateAlignedStore(
 876         ArgShadows[ShadowArgId++],
 877         Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
 878                                    ShadowArgsOffsetBytes),
 879         Align(1), /*isVolatile=*/false);
 880     TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
 881     assert(!SlotSize.isScalable() && "unsupported");
 882     ShadowArgsOffsetBytes += SlotSize;
 883   }
 884 }
 885
 886 // Internal part of emitCheck(). Returns a value that indicates whether
 887 // computation should continue with the shadow or resume by re-fextending the
 888 // value.
 889 enum class ContinuationType { // Keep in sync with runtime.
 890   ContinueWithShadow = 0,
 891   ResumeFromValue = 1,
 892 };
 893
 894 Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV,
 895                                                       IRBuilder<> &Builder,
 896                                                       CheckLoc Loc) {
 897   // Do not emit checks for constant values, this is redundant.
 898   if (isa<Constant>(V))
 899     return ConstantInt::get(
 900         Builder.getInt32Ty(),
 901         static_cast<int>(ContinuationType::ContinueWithShadow));
 902
 903   Type *Ty = V->getType();
 904   if (const auto VT = ftValueTypeFromType(Ty))
 905     return Builder.CreateCall(
 906         NsanCheckValue[*VT],
 907         {V, ShadowV, Loc.getType(Context), Loc.getValue(IntptrTy, Builder)});
 908
 909   if (Ty->isVectorTy()) {
 910     auto *VecTy = cast<VectorType>(Ty);
 911     // We currently skip scalable vector types in MappingConfig,
 912     // thus we should not encounter any such types here.
 913     assert(!VecTy->isScalableTy() &&
 914            "Scalable vector types are not supported yet");
 915     Value *CheckResult = nullptr;
 916     for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) {
 917       // We resume if any element resumes. Another option would be to create a
 918       // vector shuffle with the array of ContinueWithShadow, but that is too
 919       // complex.
 920       Value *ExtractV = Builder.CreateExtractElement(V, I);
 921       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
 922       Value *ComponentCheckResult =
 923           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
 924       CheckResult = CheckResult
 925                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
 926                         : ComponentCheckResult;
 927     }
 928     return CheckResult;
 929   }
 930   if (Ty->isArrayTy()) {
 931     Value *CheckResult = nullptr;
 932     for (auto I : seq(Ty->getArrayNumElements())) {
 933       Value *ExtractV = Builder.CreateExtractElement(V, I);
 934       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
 935       Value *ComponentCheckResult =
 936           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
 937       CheckResult = CheckResult
 938                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
 939                         : ComponentCheckResult;
 940     }
 941     return CheckResult;
 942   }
 943   if (Ty->isStructTy()) {
 944     Value *CheckResult = nullptr;
 945     for (auto I : seq(Ty->getStructNumElements())) {
 946       if (Config.getExtendedFPType(Ty->getStructElementType(I)) == nullptr)
 947         continue; // Only check FT values.
 948       Value *ExtractV = Builder.CreateExtractValue(V, I);
 949       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
 950       Value *ComponentCheckResult =
 951           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
 952       CheckResult = CheckResult
 953                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
 954                         : ComponentCheckResult;
 955     }
 956     if (!CheckResult)
 957       return ConstantInt::get(
 958           Builder.getInt32Ty(),
 959           static_cast<int>(ContinuationType::ContinueWithShadow));
 960     return CheckResult;
 961   }
 962
 963   llvm_unreachable("not implemented");
 964 }
 965
 966 // Inserts a runtime check of V against its shadow value ShadowV.
 967 // We check values whenever they escape: on return, call, stores, and
 968 // insertvalue.
 969 // Returns the shadow value that should be used to continue the computations,
 970 // depending on the answer from the runtime.
 971 // TODO: Should we check on select ? phi ?
 972 Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV,
 973                                               IRBuilder<> &Builder,
 974                                               CheckLoc Loc) {
 975   // Do not emit checks for constant values, this is redundant.
 976   if (isa<Constant>(V))
 977     return ShadowV;
 978
 979   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
 980     Function *F = Inst->getFunction();
 981     if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) {
 982       return ShadowV;
 983     }
 984   }
 985
 986   Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc);
 987   Value *ICmpEQ = Builder.CreateICmpEQ(
 988       CheckResult,
 989       ConstantInt::get(Builder.getInt32Ty(),
 990                        static_cast<int>(ContinuationType::ResumeFromValue)));
 991   return Builder.CreateSelect(
 992       ICmpEQ, Builder.CreateFPExt(V, Config.getExtendedFPType(V->getType())),
 993       ShadowV);
 994 }
 995
 996 // Inserts a check that fcmp on shadow values are consistent with that on base
 997 // values.
 998 void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
 999                                                 const ValueToShadowMap &Map) {
1000   if (!ClInstrumentFCmp)
1001     return;
1002
1003   Function *F = FCmp.getFunction();
1004   if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName()))
1005     return;
1006
1007   Value *LHS = FCmp.getOperand(0);
1008   if (Config.getExtendedFPType(LHS->getType()) == nullptr)
1009     return;
1010   Value *RHS = FCmp.getOperand(1);
1011
1012   // Split the basic block. On mismatch, we'll jump to the new basic block with
1013   // a call to the runtime for error reporting.
1014   BasicBlock *FCmpBB = FCmp.getParent();
1015   BasicBlock *NextBB = FCmpBB->splitBasicBlock(FCmp.getNextNode());
1016   // Remove the newly created terminator unconditional branch.
1017   FCmpBB->back().eraseFromParent();
1018   BasicBlock *FailBB =
1019       BasicBlock::Create(Context, "", FCmpBB->getParent(), NextBB);
1020
1021   // Create the shadow fcmp and comparison between the fcmps.
1022   IRBuilder<> FCmpBuilder(FCmpBB);
1023   FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
1024   Value *ShadowLHS = Map.getShadow(LHS);
1025   Value *ShadowRHS = Map.getShadow(RHS);
1026   // See comment on ClTruncateFCmpEq.
1027   if (FCmp.isEquality() && ClTruncateFCmpEq) {
1028     Type *Ty = ShadowLHS->getType();
1029     ShadowLHS = FCmpBuilder.CreateFPExt(
1030         FCmpBuilder.CreateFPTrunc(ShadowLHS, LHS->getType()), Ty);
1031     ShadowRHS = FCmpBuilder.CreateFPExt(
1032         FCmpBuilder.CreateFPTrunc(ShadowRHS, RHS->getType()), Ty);
1033   }
1034   Value *ShadowFCmp =
1035       FCmpBuilder.CreateFCmp(FCmp.getPredicate(), ShadowLHS, ShadowRHS);
1036   Value *OriginalAndShadowFcmpMatch =
1037       FCmpBuilder.CreateICmpEQ(&FCmp, ShadowFCmp);
1038
1039   if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) {
1040     // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1,
1041     // where an element is true if the corresponding elements in original and
1042     // shadow are the same. We want all elements to be 1.
1043     OriginalAndShadowFcmpMatch =
1044         FCmpBuilder.CreateAndReduce(OriginalAndShadowFcmpMatch);
1045   }
1046
1047   // Use MDBuilder(*C).createLikelyBranchWeights() because "match" is the common
1048   // case.
1049   FCmpBuilder.CreateCondBr(OriginalAndShadowFcmpMatch, NextBB, FailBB,
1050                            MDBuilder(Context).createLikelyBranchWeights());
1051
1052   // Fill in FailBB.
1053   IRBuilder<> FailBuilder(FailBB);
1054   FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
1055
1056   const auto EmitFailCall = [this, &FCmp, &FCmpBuilder,
1057                              &FailBuilder](Value *L, Value *R, Value *ShadowL,
1058                                            Value *ShadowR, Value *Result,
1059                                            Value *ShadowResult) {
1060     Type *FT = L->getType();
1061     FunctionCallee *Callee = nullptr;
1062     if (FT->isFloatTy()) {
1063       Callee = &(NsanFCmpFail[kFloat]);
1064     } else if (FT->isDoubleTy()) {
1065       Callee = &(NsanFCmpFail[kDouble]);
1066     } else if (FT->isX86_FP80Ty()) {
1067       // TODO: make NsanFCmpFailLongDouble work.
1068       Callee = &(NsanFCmpFail[kDouble]);
1069       L = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1070       R = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1071     } else {
1072       llvm_unreachable("not implemented");
1073     }
1074     FailBuilder.CreateCall(*Callee, {L, R, ShadowL, ShadowR,
1075                                      ConstantInt::get(FCmpBuilder.getInt32Ty(),
1076                                                       FCmp.getPredicate()),
1077                                      Result, ShadowResult});
1078   };
1079   if (LHS->getType()->isVectorTy()) {
1080     for (int I = 0, E = cast<VectorType>(LHS->getType())
1081                             ->getElementCount()
1082                             .getFixedValue();
1083          I < E; ++I) {
1084       Value *ExtractLHS = FailBuilder.CreateExtractElement(LHS, I);
1085       Value *ExtractRHS = FailBuilder.CreateExtractElement(RHS, I);
1086       Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(ShadowLHS, I);
1087       Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(ShadowRHS, I);
1088       Value *ExtractFCmp = FailBuilder.CreateExtractElement(&FCmp, I);
1089       Value *ExtractShadowFCmp =
1090           FailBuilder.CreateExtractElement(ShadowFCmp, I);
1091       EmitFailCall(ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS,
1092                    ExtractFCmp, ExtractShadowFCmp);
1093     }
1094   } else {
1095     EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp);
1096   }
1097   FailBuilder.CreateBr(NextBB);
1098
1099   ++NumInstrumentedFCmp;
1100 }
1101
1102 // Creates a shadow phi value for any phi that defines a value of FT type.
1103 PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
1104     PHINode &Phi, const TargetLibraryInfo &TLI) {
1105   Type *VT = Phi.getType();
1106   Type *ExtendedVT = Config.getExtendedFPType(VT);
1107   if (ExtendedVT == nullptr)
1108     return nullptr; // Not an FT value.
1109   // The phi operands are shadow values and are not available when the phi is
1110   // created. They will be populated in a final phase, once all shadow values
1111   // have been created.
1112   PHINode *Shadow = PHINode::Create(ExtendedVT, Phi.getNumIncomingValues());
1113   Shadow->insertAfter(Phi.getIterator());
1114   return Shadow;
1115 }
1116
1117 Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT,
1118                                                Type *ExtendedVT) {
1119   IRBuilder<> Builder(Load.getNextNode());
1120   Builder.SetCurrentDebugLocation(Load.getDebugLoc());
1121   if (addrPointsToConstantData(Load.getPointerOperand())) {
1122     // No need to look into the shadow memory, the value is a constant. Just
1123     // convert from FT to 2FT.
1124     return Builder.CreateFPExt(&Load, ExtendedVT);
1125   }
1126
1127   // if (%shadowptr == &)
1128   //    %shadow = fpext %v
1129   // else
1130   //    %shadow = load (ptrcast %shadow_ptr))
1131   // Considered options here:
1132   //  - Have `NsanGetShadowPtrForLoad` return a fixed address
1133   //    &__nsan_unknown_value_shadow_address that is valid to load from, and
1134   //    use a select. This has the advantage that the generated IR is simpler.
1135   //  - Have `NsanGetShadowPtrForLoad` return nullptr.  Because `select` does
1136   //    not short-circuit, dereferencing the returned pointer is no longer an
1137   //    option, have to split and create a separate basic block. This has the
1138   //    advantage of being easier to debug because it crashes if we ever mess
1139   //    up.
1140
1141   const auto Extents = getMemoryExtentsOrDie(VT);
1142   Value *ShadowPtr = Builder.CreateCall(
1143       NsanGetShadowPtrForLoad[Extents.ValueType],
1144       {Load.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1145   ++NumInstrumentedFTLoads;
1146
1147   // Split the basic block.
1148   BasicBlock *LoadBB = Load.getParent();
1149   BasicBlock *NextBB = LoadBB->splitBasicBlock(Builder.GetInsertPoint());
1150   // Create the two options for creating the shadow value.
1151   BasicBlock *ShadowLoadBB =
1152       BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1153   BasicBlock *FExtBB =
1154       BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1155
1156   // Replace the newly created terminator unconditional branch by a conditional
1157   // branch to one of the options.
1158   {
1159     LoadBB->back().eraseFromParent();
1160     IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated.
1161     LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1162     LoadBBBuilder.CreateCondBr(LoadBBBuilder.CreateIsNull(ShadowPtr), FExtBB,
1163                                ShadowLoadBB);
1164   }
1165
1166   // Fill in ShadowLoadBB.
1167   IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB);
1168   ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1169   Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad(
1170       ExtendedVT, ShadowPtr, Align(1), Load.isVolatile());
1171   if (ClCheckLoads) {
1172     ShadowLoad = emitCheck(&Load, ShadowLoad, ShadowLoadBBBuilder,
1173                            CheckLoc::makeLoad(Load.getPointerOperand()));
1174   }
1175   ShadowLoadBBBuilder.CreateBr(NextBB);
1176
1177   // Fill in FExtBB.
1178   IRBuilder<> FExtBBBuilder(FExtBB);
1179   FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1180   Value *FExt = FExtBBBuilder.CreateFPExt(&Load, ExtendedVT);
1181   FExtBBBuilder.CreateBr(NextBB);
1182
1183   // The shadow value come from any of the options.
1184   IRBuilder<> NextBBBuilder(&*NextBB->begin());
1185   NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1186   PHINode *ShadowPhi = NextBBBuilder.CreatePHI(ExtendedVT, 2);
1187   ShadowPhi->addIncoming(ShadowLoad, ShadowLoadBB);
1188   ShadowPhi->addIncoming(FExt, FExtBB);
1189   return ShadowPhi;
1190 }
1191
1192 Value *NumericalStabilitySanitizer::handleTrunc(const FPTruncInst &Trunc,
1193                                                 Type *VT, Type *ExtendedVT,
1194                                                 const ValueToShadowMap &Map,
1195                                                 IRBuilder<> &Builder) {
1196   Value *OrigSource = Trunc.getOperand(0);
1197   Type *OrigSourceTy = OrigSource->getType();
1198   Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1199
1200   // When truncating:
1201   //  - (A) If the source has a shadow, we truncate from the shadow, else we
1202   //    truncate from the original source.
1203   //  - (B) If the shadow of the source is larger than the shadow of the dest,
1204   //    we still need a truncate. Else, the shadow of the source is the same
1205   //    type as the shadow of the dest (because mappings are non-decreasing), so
1206   //   we don't need to emit a truncate.
1207   // Examples,
1208   //   with a mapping of {f32->f64;f64->f80;f80->f128}
1209   //     fptrunc double   %1 to float     ->  fptrunc x86_fp80 s(%1) to double
1210   //     fptrunc x86_fp80 %1 to float     ->  fptrunc fp128    s(%1) to double
1211   //     fptrunc fp128    %1 to float     ->  fptrunc fp128    %1    to double
1212   //     fptrunc x86_fp80 %1 to double    ->  x86_fp80 s(%1)
1213   //     fptrunc fp128    %1 to double    ->  fptrunc fp128 %1 to x86_fp80
1214   //     fptrunc fp128    %1 to x86_fp80  ->  fp128 %1
1215   //   with a mapping of {f32->f64;f64->f128;f80->f128}
1216   //     fptrunc double   %1 to float     ->  fptrunc fp128    s(%1) to double
1217   //     fptrunc x86_fp80 %1 to float     ->  fptrunc fp128    s(%1) to double
1218   //     fptrunc fp128    %1 to float     ->  fptrunc fp128    %1    to double
1219   //     fptrunc x86_fp80 %1 to double    ->  fp128 %1
1220   //     fptrunc fp128    %1 to double    ->  fp128 %1
1221   //     fptrunc fp128    %1 to x86_fp80  ->  fp128 %1
1222   //   with a mapping of {f32->f32;f64->f32;f80->f64}
1223   //     fptrunc double   %1 to float     ->  float s(%1)
1224   //     fptrunc x86_fp80 %1 to float     ->  fptrunc double    s(%1) to float
1225   //     fptrunc fp128    %1 to float     ->  fptrunc fp128     %1    to float
1226   //     fptrunc x86_fp80 %1 to double    ->  fptrunc double    s(%1) to float
1227   //     fptrunc fp128    %1 to double    ->  fptrunc fp128     %1    to float
1228   //     fptrunc fp128    %1 to x86_fp80  ->  fptrunc fp128     %1    to double
1229
1230   // See (A) above.
1231   Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1232   Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1233   // See (B) above.
1234   if (SourceTy == ExtendedVT)
1235     return Source;
1236
1237   return Builder.CreateFPTrunc(Source, ExtendedVT);
1238 }
1239
1240 Value *NumericalStabilitySanitizer::handleExt(const FPExtInst &Ext, Type *VT,
1241                                               Type *ExtendedVT,
1242                                               const ValueToShadowMap &Map,
1243                                               IRBuilder<> &Builder) {
1244   Value *OrigSource = Ext.getOperand(0);
1245   Type *OrigSourceTy = OrigSource->getType();
1246   Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1247   // When extending:
1248   //  - (A) If the source has a shadow, we extend from the shadow, else we
1249   //    extend from the original source.
1250   //  - (B) If the shadow of the dest is larger than the shadow of the source,
1251   //    we still need an extend. Else, the shadow of the source is the same
1252   //    type as the shadow of the dest (because mappings are non-decreasing), so
1253   //    we don't need to emit an extend.
1254   // Examples,
1255   //   with a mapping of {f32->f64;f64->f80;f80->f128}
1256   //     fpext half    %1 to float     ->  fpext half     %1    to double
1257   //     fpext half    %1 to double    ->  fpext half     %1    to x86_fp80
1258   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to fp128
1259   //     fpext float   %1 to double    ->  double s(%1)
1260   //     fpext float   %1 to x86_fp80  ->  fpext double   s(%1) to fp128
1261   //     fpext double  %1 to x86_fp80  ->  fpext x86_fp80 s(%1) to fp128
1262   //   with a mapping of {f32->f64;f64->f128;f80->f128}
1263   //     fpext half    %1 to float     ->  fpext half     %1    to double
1264   //     fpext half    %1 to double    ->  fpext half     %1    to fp128
1265   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to fp128
1266   //     fpext float   %1 to double    ->  fpext double   s(%1) to fp128
1267   //     fpext float   %1 to x86_fp80  ->  fpext double   s(%1) to fp128
1268   //     fpext double  %1 to x86_fp80  ->  fp128 s(%1)
1269   //   with a mapping of {f32->f32;f64->f32;f80->f64}
1270   //     fpext half    %1 to float     ->  fpext half     %1    to float
1271   //     fpext half    %1 to double    ->  fpext half     %1    to float
1272   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to double
1273   //     fpext float   %1 to double    ->  s(%1)
1274   //     fpext float   %1 to x86_fp80  ->  fpext float    s(%1) to double
1275   //     fpext double  %1 to x86_fp80  ->  fpext float    s(%1) to double
1276
1277   // See (A) above.
1278   Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1279   Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1280   // See (B) above.
1281   if (SourceTy == ExtendedVT)
1282     return Source;
1283
1284   return Builder.CreateFPExt(Source, ExtendedVT);
1285 }
1286
1287 namespace {
1288 // TODO: This should be tablegen-ed.
1289 struct KnownIntrinsic {
1290   struct WidenedIntrinsic {
1291     const char *NarrowName;
1292     Intrinsic::ID ID; // wide id.
1293     using FnTypeFactory = FunctionType *(*)(LLVMContext &);
1294     FnTypeFactory MakeFnTy;
1295   };
1296
1297   static const char *get(LibFunc LFunc);
1298
1299   // Given an intrinsic with an `FT` argument, try to find a wider intrinsic
1300   // that applies the same operation on the shadow argument.
1301   // Options are:
1302   //  - pass in the ID and full function type,
1303   //  - pass in the name, which includes the function type through mangling.
1304   static const WidenedIntrinsic *widen(StringRef Name);
1305
1306 private:
1307   struct LFEntry {
1308     LibFunc LFunc;
1309     const char *IntrinsicName;
1310   };
1311   static const LFEntry kLibfuncIntrinsics[];
1312
1313   static const WidenedIntrinsic kWidenedIntrinsics[];
1314 };
1315 } // namespace
1316
1317 static FunctionType *makeDoubleDouble(LLVMContext &C) {
1318   return FunctionType::get(Type::getDoubleTy(C), {Type::getDoubleTy(C)}, false);
1319 }
1320
1321 static FunctionType *makeX86FP80X86FP80(LLVMContext &C) {
1322   return FunctionType::get(Type::getX86_FP80Ty(C), {Type::getX86_FP80Ty(C)},
1323                            false);
1324 }
1325
1326 static FunctionType *makeDoubleDoubleI32(LLVMContext &C) {
1327   return FunctionType::get(Type::getDoubleTy(C),
1328                            {Type::getDoubleTy(C), Type::getInt32Ty(C)}, false);
1329 }
1330
1331 static FunctionType *makeX86FP80X86FP80I32(LLVMContext &C) {
1332   return FunctionType::get(Type::getX86_FP80Ty(C),
1333                            {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)},
1334                            false);
1335 }
1336
1337 static FunctionType *makeDoubleDoubleDouble(LLVMContext &C) {
1338   return FunctionType::get(Type::getDoubleTy(C),
1339                            {Type::getDoubleTy(C), Type::getDoubleTy(C)}, false);
1340 }
1341
1342 static FunctionType *makeX86FP80X86FP80X86FP80(LLVMContext &C) {
1343   return FunctionType::get(Type::getX86_FP80Ty(C),
1344                            {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1345                            false);
1346 }
1347
1348 static FunctionType *makeDoubleDoubleDoubleDouble(LLVMContext &C) {
1349   return FunctionType::get(
1350       Type::getDoubleTy(C),
1351       {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)},
1352       false);
1353 }
1354
1355 static FunctionType *makeX86FP80X86FP80X86FP80X86FP80(LLVMContext &C) {
1356   return FunctionType::get(
1357       Type::getX86_FP80Ty(C),
1358       {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1359       false);
1360 }
1361
1362 const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = {
1363     // TODO: Right now we ignore vector intrinsics.
1364     // This is hard because we have to model the semantics of the intrinsics,
1365     // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back.
1366     // Intrinsics that take any non-vector FT types:
1367     // NOTE: Right now because of
1368     // https://github.com/llvm/llvm-project/issues/44744
1369     // for f128 we need to use makeX86FP80X86FP80 (go to a lower precision and
1370     // come back).
1371     {"llvm.sqrt.f32", Intrinsic::sqrt, makeDoubleDouble},
1372     {"llvm.sqrt.f64", Intrinsic::sqrt, makeX86FP80X86FP80},
1373     {"llvm.sqrt.f80", Intrinsic::sqrt, makeX86FP80X86FP80},
1374     {"llvm.powi.f32", Intrinsic::powi, makeDoubleDoubleI32},
1375     {"llvm.powi.f64", Intrinsic::powi, makeX86FP80X86FP80I32},
1376     {"llvm.powi.f80", Intrinsic::powi, makeX86FP80X86FP80I32},
1377     {"llvm.sin.f32", Intrinsic::sin, makeDoubleDouble},
1378     {"llvm.sin.f64", Intrinsic::sin, makeX86FP80X86FP80},
1379     {"llvm.sin.f80", Intrinsic::sin, makeX86FP80X86FP80},
1380     {"llvm.cos.f32", Intrinsic::cos, makeDoubleDouble},
1381     {"llvm.cos.f64", Intrinsic::cos, makeX86FP80X86FP80},
1382     {"llvm.cos.f80", Intrinsic::cos, makeX86FP80X86FP80},
1383     {"llvm.pow.f32", Intrinsic::pow, makeDoubleDoubleDouble},
1384     {"llvm.pow.f64", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1385     {"llvm.pow.f80", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1386     {"llvm.exp.f32", Intrinsic::exp, makeDoubleDouble},
1387     {"llvm.exp.f64", Intrinsic::exp, makeX86FP80X86FP80},
1388     {"llvm.exp.f80", Intrinsic::exp, makeX86FP80X86FP80},
1389     {"llvm.exp2.f32", Intrinsic::exp2, makeDoubleDouble},
1390     {"llvm.exp2.f64", Intrinsic::exp2, makeX86FP80X86FP80},
1391     {"llvm.exp2.f80", Intrinsic::exp2, makeX86FP80X86FP80},
1392     {"llvm.log.f32", Intrinsic::log, makeDoubleDouble},
1393     {"llvm.log.f64", Intrinsic::log, makeX86FP80X86FP80},
1394     {"llvm.log.f80", Intrinsic::log, makeX86FP80X86FP80},
1395     {"llvm.log10.f32", Intrinsic::log10, makeDoubleDouble},
1396     {"llvm.log10.f64", Intrinsic::log10, makeX86FP80X86FP80},
1397     {"llvm.log10.f80", Intrinsic::log10, makeX86FP80X86FP80},
1398     {"llvm.log2.f32", Intrinsic::log2, makeDoubleDouble},
1399     {"llvm.log2.f64", Intrinsic::log2, makeX86FP80X86FP80},
1400     {"llvm.log2.f80", Intrinsic::log2, makeX86FP80X86FP80},
1401     {"llvm.fma.f32", Intrinsic::fma, makeDoubleDoubleDoubleDouble},
1402
1403     {"llvm.fmuladd.f32", Intrinsic::fmuladd, makeDoubleDoubleDoubleDouble},
1404
1405     {"llvm.fma.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1406
1407     {"llvm.fmuladd.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1408
1409     {"llvm.fma.f80", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1410     {"llvm.fabs.f32", Intrinsic::fabs, makeDoubleDouble},
1411     {"llvm.fabs.f64", Intrinsic::fabs, makeX86FP80X86FP80},
1412     {"llvm.fabs.f80", Intrinsic::fabs, makeX86FP80X86FP80},
1413     {"llvm.minnum.f32", Intrinsic::minnum, makeDoubleDoubleDouble},
1414     {"llvm.minnum.f64", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1415     {"llvm.minnum.f80", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1416     {"llvm.maxnum.f32", Intrinsic::maxnum, makeDoubleDoubleDouble},
1417     {"llvm.maxnum.f64", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1418     {"llvm.maxnum.f80", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1419     {"llvm.minimum.f32", Intrinsic::minimum, makeDoubleDoubleDouble},
1420     {"llvm.minimum.f64", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1421     {"llvm.minimum.f80", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1422     {"llvm.maximum.f32", Intrinsic::maximum, makeDoubleDoubleDouble},
1423     {"llvm.maximum.f64", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1424     {"llvm.maximum.f80", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1425     {"llvm.copysign.f32", Intrinsic::copysign, makeDoubleDoubleDouble},
1426     {"llvm.copysign.f64", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1427     {"llvm.copysign.f80", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1428     {"llvm.floor.f32", Intrinsic::floor, makeDoubleDouble},
1429     {"llvm.floor.f64", Intrinsic::floor, makeX86FP80X86FP80},
1430     {"llvm.floor.f80", Intrinsic::floor, makeX86FP80X86FP80},
1431     {"llvm.ceil.f32", Intrinsic::ceil, makeDoubleDouble},
1432     {"llvm.ceil.f64", Intrinsic::ceil, makeX86FP80X86FP80},
1433     {"llvm.ceil.f80", Intrinsic::ceil, makeX86FP80X86FP80},
1434     {"llvm.trunc.f32", Intrinsic::trunc, makeDoubleDouble},
1435     {"llvm.trunc.f64", Intrinsic::trunc, makeX86FP80X86FP80},
1436     {"llvm.trunc.f80", Intrinsic::trunc, makeX86FP80X86FP80},
1437     {"llvm.rint.f32", Intrinsic::rint, makeDoubleDouble},
1438     {"llvm.rint.f64", Intrinsic::rint, makeX86FP80X86FP80},
1439     {"llvm.rint.f80", Intrinsic::rint, makeX86FP80X86FP80},
1440     {"llvm.nearbyint.f32", Intrinsic::nearbyint, makeDoubleDouble},
1441     {"llvm.nearbyint.f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1442     {"llvm.nearbyin80f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1443     {"llvm.round.f32", Intrinsic::round, makeDoubleDouble},
1444     {"llvm.round.f64", Intrinsic::round, makeX86FP80X86FP80},
1445     {"llvm.round.f80", Intrinsic::round, makeX86FP80X86FP80},
1446     {"llvm.lround.f32", Intrinsic::lround, makeDoubleDouble},
1447     {"llvm.lround.f64", Intrinsic::lround, makeX86FP80X86FP80},
1448     {"llvm.lround.f80", Intrinsic::lround, makeX86FP80X86FP80},
1449     {"llvm.llround.f32", Intrinsic::llround, makeDoubleDouble},
1450     {"llvm.llround.f64", Intrinsic::llround, makeX86FP80X86FP80},
1451     {"llvm.llround.f80", Intrinsic::llround, makeX86FP80X86FP80},
1452     {"llvm.lrint.f32", Intrinsic::lrint, makeDoubleDouble},
1453     {"llvm.lrint.f64", Intrinsic::lrint, makeX86FP80X86FP80},
1454     {"llvm.lrint.f80", Intrinsic::lrint, makeX86FP80X86FP80},
1455     {"llvm.llrint.f32", Intrinsic::llrint, makeDoubleDouble},
1456     {"llvm.llrint.f64", Intrinsic::llrint, makeX86FP80X86FP80},
1457     {"llvm.llrint.f80", Intrinsic::llrint, makeX86FP80X86FP80},
1458 };
1459
1460 const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = {
1461     {LibFunc_sqrtf, "llvm.sqrt.f32"},
1462     {LibFunc_sqrt, "llvm.sqrt.f64"},
1463     {LibFunc_sqrtl, "llvm.sqrt.f80"},
1464     {LibFunc_sinf, "llvm.sin.f32"},
1465     {LibFunc_sin, "llvm.sin.f64"},
1466     {LibFunc_sinl, "llvm.sin.f80"},
1467     {LibFunc_cosf, "llvm.cos.f32"},
1468     {LibFunc_cos, "llvm.cos.f64"},
1469     {LibFunc_cosl, "llvm.cos.f80"},
1470     {LibFunc_powf, "llvm.pow.f32"},
1471     {LibFunc_pow, "llvm.pow.f64"},
1472     {LibFunc_powl, "llvm.pow.f80"},
1473     {LibFunc_expf, "llvm.exp.f32"},
1474     {LibFunc_exp, "llvm.exp.f64"},
1475     {LibFunc_expl, "llvm.exp.f80"},
1476     {LibFunc_exp2f, "llvm.exp2.f32"},
1477     {LibFunc_exp2, "llvm.exp2.f64"},
1478     {LibFunc_exp2l, "llvm.exp2.f80"},
1479     {LibFunc_logf, "llvm.log.f32"},
1480     {LibFunc_log, "llvm.log.f64"},
1481     {LibFunc_logl, "llvm.log.f80"},
1482     {LibFunc_log10f, "llvm.log10.f32"},
1483     {LibFunc_log10, "llvm.log10.f64"},
1484     {LibFunc_log10l, "llvm.log10.f80"},
1485     {LibFunc_log2f, "llvm.log2.f32"},
1486     {LibFunc_log2, "llvm.log2.f64"},
1487     {LibFunc_log2l, "llvm.log2.f80"},
1488     {LibFunc_fabsf, "llvm.fabs.f32"},
1489     {LibFunc_fabs, "llvm.fabs.f64"},
1490     {LibFunc_fabsl, "llvm.fabs.f80"},
1491     {LibFunc_copysignf, "llvm.copysign.f32"},
1492     {LibFunc_copysign, "llvm.copysign.f64"},
1493     {LibFunc_copysignl, "llvm.copysign.f80"},
1494     {LibFunc_floorf, "llvm.floor.f32"},
1495     {LibFunc_floor, "llvm.floor.f64"},
1496     {LibFunc_floorl, "llvm.floor.f80"},
1497     {LibFunc_fmaxf, "llvm.maxnum.f32"},
1498     {LibFunc_fmax, "llvm.maxnum.f64"},
1499     {LibFunc_fmaxl, "llvm.maxnum.f80"},
1500     {LibFunc_fminf, "llvm.minnum.f32"},
1501     {LibFunc_fmin, "llvm.minnum.f64"},
1502     {LibFunc_fminl, "llvm.minnum.f80"},
1503     {LibFunc_ceilf, "llvm.ceil.f32"},
1504     {LibFunc_ceil, "llvm.ceil.f64"},
1505     {LibFunc_ceill, "llvm.ceil.f80"},
1506     {LibFunc_truncf, "llvm.trunc.f32"},
1507     {LibFunc_trunc, "llvm.trunc.f64"},
1508     {LibFunc_truncl, "llvm.trunc.f80"},
1509     {LibFunc_rintf, "llvm.rint.f32"},
1510     {LibFunc_rint, "llvm.rint.f64"},
1511     {LibFunc_rintl, "llvm.rint.f80"},
1512     {LibFunc_nearbyintf, "llvm.nearbyint.f32"},
1513     {LibFunc_nearbyint, "llvm.nearbyint.f64"},
1514     {LibFunc_nearbyintl, "llvm.nearbyint.f80"},
1515     {LibFunc_roundf, "llvm.round.f32"},
1516     {LibFunc_round, "llvm.round.f64"},
1517     {LibFunc_roundl, "llvm.round.f80"},
1518 };
1519
1520 const char *KnownIntrinsic::get(LibFunc LFunc) {
1521   for (const auto &E : kLibfuncIntrinsics) {
1522     if (E.LFunc == LFunc)
1523       return E.IntrinsicName;
1524   }
1525   return nullptr;
1526 }
1527
1528 const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) {
1529   for (const auto &E : kWidenedIntrinsics) {
1530     if (E.NarrowName == Name)
1531       return &E;
1532   }
1533   return nullptr;
1534 }
1535
1536 // Returns the name of the LLVM intrinsic corresponding to the given function.
1537 static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT,
1538                                            const TargetLibraryInfo &TLI) {
1539   LibFunc LFunc;
1540   if (!TLI.getLibFunc(Fn, LFunc))
1541     return nullptr;
1542
1543   if (const char *Name = KnownIntrinsic::get(LFunc))
1544     return Name;
1545
1546   LLVM_DEBUG(errs() << "TODO: LibFunc: " << TLI.getName(LFunc) << "\n");
1547   return nullptr;
1548 }
1549
1550 // Try to handle a known function call.
1551 Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
1552     CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI,
1553     const ValueToShadowMap &Map, IRBuilder<> &Builder) {
1554   Function *Fn = Call.getCalledFunction();
1555   if (Fn == nullptr)
1556     return nullptr;
1557
1558   Intrinsic::ID WidenedId = Intrinsic::ID();
1559   FunctionType *WidenedFnTy = nullptr;
1560   if (const auto ID = Fn->getIntrinsicID()) {
1561     const auto *Widened = KnownIntrinsic::widen(Fn->getName());
1562     if (Widened) {
1563       WidenedId = Widened->ID;
1564       WidenedFnTy = Widened->MakeFnTy(Context);
1565     } else {
1566       // If we don't know how to widen the intrinsic, we have no choice but to
1567       // call the non-wide version on a truncated shadow and extend again
1568       // afterwards.
1569       WidenedId = ID;
1570       WidenedFnTy = Fn->getFunctionType();
1571     }
1572   } else if (const char *Name = getIntrinsicFromLibfunc(*Fn, VT, TLI)) {
1573     // We might have a call to a library function that we can replace with a
1574     // wider Intrinsic.
1575     const auto *Widened = KnownIntrinsic::widen(Name);
1576     assert(Widened && "make sure KnownIntrinsic entries are consistent");
1577     WidenedId = Widened->ID;
1578     WidenedFnTy = Widened->MakeFnTy(Context);
1579   } else {
1580     // This is not a known library function or intrinsic.
1581     return nullptr;
1582   }
1583
1584   // Check that the widened intrinsic is valid.
1585   SmallVector<Intrinsic::IITDescriptor, 8> Table;
1586   getIntrinsicInfoTableEntries(WidenedId, Table);
1587   SmallVector<Type *, 4> ArgTys;
1588   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
1589   [[maybe_unused]] Intrinsic::MatchIntrinsicTypesResult MatchResult =
1590       Intrinsic::matchIntrinsicSignature(WidenedFnTy, TableRef, ArgTys);
1591   assert(MatchResult == Intrinsic::MatchIntrinsicTypes_Match &&
1592          "invalid widened intrinsic");
1593   // For known intrinsic functions, we create a second call to the same
1594   // intrinsic with a different type.
1595   SmallVector<Value *, 4> Args;
1596   // The last operand is the intrinsic itself, skip it.
1597   for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) {
1598     Value *Arg = Call.getOperand(I);
1599     Type *OrigArgTy = Arg->getType();
1600     Type *IntrinsicArgTy = WidenedFnTy->getParamType(I);
1601     if (OrigArgTy == IntrinsicArgTy) {
1602       Args.push_back(Arg); // The arg is passed as is.
1603       continue;
1604     }
1605     Type *ShadowArgTy = Config.getExtendedFPType(Arg->getType());
1606     assert(ShadowArgTy &&
1607            "don't know how to get the shadow value for a non-FT");
1608     Value *Shadow = Map.getShadow(Arg);
1609     if (ShadowArgTy == IntrinsicArgTy) {
1610       // The shadow is the right type for the intrinsic.
1611       assert(Shadow->getType() == ShadowArgTy);
1612       Args.push_back(Shadow);
1613       continue;
1614     }
1615     // There is no intrinsic with his level of precision, truncate the shadow.
1616     Args.push_back(Builder.CreateFPTrunc(Shadow, IntrinsicArgTy));
1617   }
1618   Value *IntrinsicCall = Builder.CreateIntrinsic(WidenedId, ArgTys, Args);
1619   return WidenedFnTy->getReturnType() == ExtendedVT
1620              ? IntrinsicCall
1621              : Builder.CreateFPExt(IntrinsicCall, ExtendedVT);
1622 }
1623
1624 // Handle a CallBase, i.e. a function call, an inline asm sequence, or an
1625 // invoke.
1626 Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT,
1627                                                    Type *ExtendedVT,
1628                                                    const TargetLibraryInfo &TLI,
1629                                                    const ValueToShadowMap &Map,
1630                                                    IRBuilder<> &Builder) {
1631   // We cannot look inside inline asm, just expand the result again.
1632   if (Call.isInlineAsm())
1633     return Builder.CreateFPExt(&Call, ExtendedVT);
1634
1635   // Intrinsics and library functions (e.g. sin, exp) are handled
1636   // specifically, because we know their semantics and can do better than
1637   // blindly calling them (e.g. compute the sinus in the actual shadow domain).
1638   if (Value *V =
1639           maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder))
1640     return V;
1641
1642   // If the return tag matches that of the called function, read the extended
1643   // return value from the shadow ret ptr. Else, just extend the return value.
1644   Value *L =
1645       Builder.CreateLoad(IntptrTy, NsanShadowRetTag, /*isVolatile=*/false);
1646   Value *HasShadowRet = Builder.CreateICmpEQ(
1647       L, Builder.CreatePtrToInt(Call.getCalledOperand(), IntptrTy));
1648
1649   Value *ShadowRetVal = Builder.CreateLoad(
1650       ExtendedVT,
1651       Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0),
1652       /*isVolatile=*/false);
1653   Value *Shadow = Builder.CreateSelect(HasShadowRet, ShadowRetVal,
1654                                        Builder.CreateFPExt(&Call, ExtendedVT));
1655   ++NumInstrumentedFTCalls;
1656   return Shadow;
1657 }
1658
1659 // Creates a shadow value for the given FT value. At that point all operands are
1660 // guaranteed to be available.
1661 Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
1662     Instruction &Inst, const TargetLibraryInfo &TLI,
1663     const ValueToShadowMap &Map) {
1664   Type *VT = Inst.getType();
1665   Type *ExtendedVT = Config.getExtendedFPType(VT);
1666   assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT");
1667
1668   if (auto *Load = dyn_cast<LoadInst>(&Inst))
1669     return handleLoad(*Load, VT, ExtendedVT);
1670
1671   if (auto *Call = dyn_cast<CallInst>(&Inst)) {
1672     // Insert after the call.
1673     BasicBlock::iterator It(Inst);
1674     IRBuilder<> Builder(Call->getParent(), ++It);
1675     Builder.SetCurrentDebugLocation(Call->getDebugLoc());
1676     return handleCallBase(*Call, VT, ExtendedVT, TLI, Map, Builder);
1677   }
1678
1679   if (auto *Invoke = dyn_cast<InvokeInst>(&Inst)) {
1680     // The Invoke terminates the basic block, create a new basic block in
1681     // between the successful invoke and the next block.
1682     BasicBlock *InvokeBB = Invoke->getParent();
1683     BasicBlock *NextBB = Invoke->getNormalDest();
1684     BasicBlock *NewBB =
1685         BasicBlock::Create(Context, "", NextBB->getParent(), NextBB);
1686     Inst.replaceSuccessorWith(NextBB, NewBB);
1687
1688     IRBuilder<> Builder(NewBB);
1689     Builder.SetCurrentDebugLocation(Invoke->getDebugLoc());
1690     Value *Shadow = handleCallBase(*Invoke, VT, ExtendedVT, TLI, Map, Builder);
1691     Builder.CreateBr(NextBB);
1692     NewBB->replaceSuccessorsPhiUsesWith(InvokeBB, NewBB);
1693     return Shadow;
1694   }
1695
1696   IRBuilder<> Builder(Inst.getNextNode());
1697   Builder.SetCurrentDebugLocation(Inst.getDebugLoc());
1698
1699   if (auto *Trunc = dyn_cast<FPTruncInst>(&Inst))
1700     return handleTrunc(*Trunc, VT, ExtendedVT, Map, Builder);
1701   if (auto *Ext = dyn_cast<FPExtInst>(&Inst))
1702     return handleExt(*Ext, VT, ExtendedVT, Map, Builder);
1703
1704   if (auto *UnaryOp = dyn_cast<UnaryOperator>(&Inst))
1705     return Builder.CreateUnOp(UnaryOp->getOpcode(),
1706                               Map.getShadow(UnaryOp->getOperand(0)));
1707
1708   if (auto *BinOp = dyn_cast<BinaryOperator>(&Inst))
1709     return Builder.CreateBinOp(BinOp->getOpcode(),
1710                                Map.getShadow(BinOp->getOperand(0)),
1711                                Map.getShadow(BinOp->getOperand(1)));
1712
1713   if (isa<UIToFPInst>(&Inst) || isa<SIToFPInst>(&Inst)) {
1714     auto *Cast = cast<CastInst>(&Inst);
1715     return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0),
1716                               ExtendedVT);
1717   }
1718
1719   if (auto *S = dyn_cast<SelectInst>(&Inst))
1720     return Builder.CreateSelect(S->getCondition(),
1721                                 Map.getShadow(S->getTrueValue()),
1722                                 Map.getShadow(S->getFalseValue()));
1723
1724   if (auto *Freeze = dyn_cast<FreezeInst>(&Inst))
1725     return Builder.CreateFreeze(Map.getShadow(Freeze->getOperand(0)));
1726
1727   if (auto *Extract = dyn_cast<ExtractElementInst>(&Inst))
1728     return Builder.CreateExtractElement(
1729         Map.getShadow(Extract->getVectorOperand()), Extract->getIndexOperand());
1730
1731   if (auto *Insert = dyn_cast<InsertElementInst>(&Inst))
1732     return Builder.CreateInsertElement(Map.getShadow(Insert->getOperand(0)),
1733                                        Map.getShadow(Insert->getOperand(1)),
1734                                        Insert->getOperand(2));
1735
1736   if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(&Inst))
1737     return Builder.CreateShuffleVector(Map.getShadow(Shuffle->getOperand(0)),
1738                                        Map.getShadow(Shuffle->getOperand(1)),
1739                                        Shuffle->getShuffleMask());
1740   // TODO: We could make aggregate object first class citizens. For now we
1741   // just extend the extracted value.
1742   if (auto *Extract = dyn_cast<ExtractValueInst>(&Inst))
1743     return Builder.CreateFPExt(Extract, ExtendedVT);
1744
1745   if (auto *BC = dyn_cast<BitCastInst>(&Inst))
1746     return Builder.CreateFPExt(BC, ExtendedVT);
1747
1748   report_fatal_error("Unimplemented support for " +
1749                      Twine(Inst.getOpcodeName()));
1750 }
1751
1752 // Creates a shadow value for an instruction that defines a value of FT type.
1753 // FT operands that do not already have shadow values are created recursively.
1754 // The DFS is guaranteed to not loop as phis and arguments already have
1755 // shadows.
1756 void NumericalStabilitySanitizer::maybeCreateShadowValue(
1757     Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
1758   Type *VT = Root.getType();
1759   Type *ExtendedVT = Config.getExtendedFPType(VT);
1760   if (ExtendedVT == nullptr)
1761     return; // Not an FT value.
1762
1763   if (Map.hasShadow(&Root))
1764     return; // Shadow already exists.
1765
1766   assert(!isa<PHINode>(Root) && "phi nodes should already have shadows");
1767
1768   std::vector<Instruction *> DfsStack(1, &Root);
1769   while (!DfsStack.empty()) {
1770     // Ensure that all operands to the instruction have shadows before
1771     // proceeding.
1772     Instruction *I = DfsStack.back();
1773     // The shadow for the instruction might have been created deeper in the DFS,
1774     // see `forward_use_with_two_uses` test.
1775     if (Map.hasShadow(I)) {
1776       DfsStack.pop_back();
1777       continue;
1778     }
1779
1780     bool MissingShadow = false;
1781     for (Value *Op : I->operands()) {
1782       Type *VT = Op->getType();
1783       if (!Config.getExtendedFPType(VT))
1784         continue; // Not an FT value.
1785       if (Map.hasShadow(Op))
1786         continue; // Shadow is already available.
1787       MissingShadow = true;
1788       DfsStack.push_back(cast<Instruction>(Op));
1789     }
1790     if (MissingShadow)
1791       continue; // Process operands and come back to this instruction later.
1792
1793     // All operands have shadows. Create a shadow for the current value.
1794     Value *Shadow = createShadowValueWithOperandsAvailable(*I, TLI, Map);
1795     Map.setShadow(*I, *Shadow);
1796     DfsStack.pop_back();
1797   }
1798 }
1799
1800 // A floating-point store needs its value and type written to shadow memory.
1801 void NumericalStabilitySanitizer::propagateFTStore(
1802     StoreInst &Store, Type *VT, Type *ExtendedVT, const ValueToShadowMap &Map) {
1803   Value *StoredValue = Store.getValueOperand();
1804   IRBuilder<> Builder(&Store);
1805   Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1806   const auto Extents = getMemoryExtentsOrDie(VT);
1807   Value *ShadowPtr = Builder.CreateCall(
1808       NsanGetShadowPtrForStore[Extents.ValueType],
1809       {Store.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1810
1811   Value *StoredShadow = Map.getShadow(StoredValue);
1812   if (!Store.getParent()->getParent()->hasOptNone()) {
1813     // Only check stores when optimizing, because non-optimized code generates
1814     // too many stores to the stack, creating false positives.
1815     if (ClCheckStores) {
1816       StoredShadow = emitCheck(StoredValue, StoredShadow, Builder,
1817                                CheckLoc::makeStore(Store.getPointerOperand()));
1818       ++NumInstrumentedFTStores;
1819     }
1820   }
1821
1822   Builder.CreateAlignedStore(StoredShadow, ShadowPtr, Align(1),
1823                              Store.isVolatile());
1824 }
1825
1826 // A non-ft store needs to invalidate shadow memory. Exceptions are:
1827 //   - memory transfers of floating-point data through other pointer types (llvm
1828 //     optimization passes transform `*(float*)a = *(float*)b` into
1829 //     `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy.
1830 //   - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
1831 //     ints. Note that this is not really necessary because if the value is
1832 //     unknown the framework will re-extend it on load anyway. It just felt
1833 //     easier to debug tests with vectors of FTs.
1834 void NumericalStabilitySanitizer::propagateNonFTStore(
1835     StoreInst &Store, Type *VT, const ValueToShadowMap &Map) {
1836   Value *PtrOp = Store.getPointerOperand();
1837   IRBuilder<> Builder(Store.getNextNode());
1838   Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1839   Value *Dst = PtrOp;
1840   TypeSize SlotSize = DL.getTypeStoreSize(VT);
1841   assert(!SlotSize.isScalable() && "unsupported");
1842   const auto LoadSizeBytes = SlotSize.getFixedValue();
1843   Value *ValueSize = Constant::getIntegerValue(
1844       IntptrTy, APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes));
1845
1846   ++NumInstrumentedNonFTStores;
1847   Value *StoredValue = Store.getValueOperand();
1848   if (LoadInst *Load = dyn_cast<LoadInst>(StoredValue)) {
1849     // TODO: Handle the case when the value is from a phi.
1850     // This is a memory transfer with memcpy semantics. Copy the type and
1851     // value from the source. Note that we cannot use __nsan_copy_values()
1852     // here, because that will not work when there is a write to memory in
1853     // between the load and the store, e.g. in the case of a swap.
1854     Type *ShadowTypeIntTy = Type::getIntNTy(Context, 8 * LoadSizeBytes);
1855     Type *ShadowValueIntTy =
1856         Type::getIntNTy(Context, 8 * kShadowScale * LoadSizeBytes);
1857     IRBuilder<> LoadBuilder(Load->getNextNode());
1858     Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1859     Value *LoadSrc = Load->getPointerOperand();
1860     // Read the shadow type and value at load time. The type has the same size
1861     // as the FT value, the value has twice its size.
1862     // TODO: cache them to avoid re-creating them when a load is used by
1863     // several stores. Maybe create them like the FT shadows when a load is
1864     // encountered.
1865     Value *RawShadowType = LoadBuilder.CreateAlignedLoad(
1866         ShadowTypeIntTy,
1867         LoadBuilder.CreateCall(NsanGetRawShadowTypePtr, {LoadSrc}), Align(1),
1868         /*isVolatile=*/false);
1869     Value *RawShadowValue = LoadBuilder.CreateAlignedLoad(
1870         ShadowValueIntTy,
1871         LoadBuilder.CreateCall(NsanGetRawShadowPtr, {LoadSrc}), Align(1),
1872         /*isVolatile=*/false);
1873
1874     // Write back the shadow type and value at store time.
1875     Builder.CreateAlignedStore(
1876         RawShadowType, Builder.CreateCall(NsanGetRawShadowTypePtr, {Dst}),
1877         Align(1),
1878         /*isVolatile=*/false);
1879     Builder.CreateAlignedStore(RawShadowValue,
1880                                Builder.CreateCall(NsanGetRawShadowPtr, {Dst}),
1881                                Align(1),
1882                                /*isVolatile=*/false);
1883
1884     ++NumInstrumentedNonFTMemcpyStores;
1885     return;
1886   }
1887   // ClPropagateNonFTConstStoresAsFT is by default false.
1888   if (Constant *C; ClPropagateNonFTConstStoresAsFT &&
1889                    (C = dyn_cast<Constant>(StoredValue))) {
1890     // This might be a fp constant stored as an int. Bitcast and store if it has
1891     // appropriate size.
1892     Type *BitcastTy = nullptr; // The FT type to bitcast to.
1893     if (auto *CInt = dyn_cast<ConstantInt>(C)) {
1894       switch (CInt->getType()->getScalarSizeInBits()) {
1895       case 32:
1896         BitcastTy = Type::getFloatTy(Context);
1897         break;
1898       case 64:
1899         BitcastTy = Type::getDoubleTy(Context);
1900         break;
1901       case 80:
1902         BitcastTy = Type::getX86_FP80Ty(Context);
1903         break;
1904       default:
1905         break;
1906       }
1907     } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
1908       const int NumElements =
1909           cast<VectorType>(CDV->getType())->getElementCount().getFixedValue();
1910       switch (CDV->getType()->getScalarSizeInBits()) {
1911       case 32:
1912         BitcastTy =
1913             VectorType::get(Type::getFloatTy(Context), NumElements, false);
1914         break;
1915       case 64:
1916         BitcastTy =
1917             VectorType::get(Type::getDoubleTy(Context), NumElements, false);
1918         break;
1919       case 80:
1920         BitcastTy =
1921             VectorType::get(Type::getX86_FP80Ty(Context), NumElements, false);
1922         break;
1923       default:
1924         break;
1925       }
1926     }
1927     if (BitcastTy) {
1928       const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy);
1929       Value *ShadowPtr = Builder.CreateCall(
1930           NsanGetShadowPtrForStore[Extents.ValueType],
1931           {PtrOp, ConstantInt::get(IntptrTy, Extents.NumElts)});
1932       // Bitcast the integer value to the appropriate FT type and extend to 2FT.
1933       Type *ExtVT = Config.getExtendedFPType(BitcastTy);
1934       Value *Shadow =
1935           Builder.CreateFPExt(Builder.CreateBitCast(C, BitcastTy), ExtVT);
1936       Builder.CreateAlignedStore(Shadow, ShadowPtr, Align(1),
1937                                  Store.isVolatile());
1938       return;
1939     }
1940   }
1941   // All other stores just reset the shadow value to unknown.
1942   Builder.CreateCall(NsanSetUnknownFns.getFallback(), {Dst, ValueSize});
1943 }
1944
1945 void NumericalStabilitySanitizer::propagateShadowValues(
1946     Instruction &Inst, const TargetLibraryInfo &TLI,
1947     const ValueToShadowMap &Map) {
1948   if (auto *Store = dyn_cast<StoreInst>(&Inst)) {
1949     Value *StoredValue = Store->getValueOperand();
1950     Type *VT = StoredValue->getType();
1951     Type *ExtendedVT = Config.getExtendedFPType(VT);
1952     if (ExtendedVT == nullptr)
1953       return propagateNonFTStore(*Store, VT, Map);
1954     return propagateFTStore(*Store, VT, ExtendedVT, Map);
1955   }
1956
1957   if (auto *FCmp = dyn_cast<FCmpInst>(&Inst)) {
1958     emitFCmpCheck(*FCmp, Map);
1959     return;
1960   }
1961
1962   if (auto *CB = dyn_cast<CallBase>(&Inst)) {
1963     maybeAddSuffixForNsanInterface(CB);
1964     if (CallInst *CI = dyn_cast<CallInst>(&Inst))
1965       maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
1966     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) {
1967       instrumentMemIntrinsic(MI);
1968       return;
1969     }
1970     populateShadowStack(*CB, TLI, Map);
1971     return;
1972   }
1973
1974   if (auto *RetInst = dyn_cast<ReturnInst>(&Inst)) {
1975     if (!ClCheckRet)
1976       return;
1977
1978     Value *RV = RetInst->getReturnValue();
1979     if (RV == nullptr)
1980       return; // This is a `ret void`.
1981     Type *VT = RV->getType();
1982     Type *ExtendedVT = Config.getExtendedFPType(VT);
1983     if (ExtendedVT == nullptr)
1984       return; // Not an FT ret.
1985     Value *RVShadow = Map.getShadow(RV);
1986     IRBuilder<> Builder(RetInst);
1987
1988     RVShadow = emitCheck(RV, RVShadow, Builder, CheckLoc::makeRet());
1989     ++NumInstrumentedFTRets;
1990     // Store tag.
1991     Value *FnAddr =
1992         Builder.CreatePtrToInt(Inst.getParent()->getParent(), IntptrTy);
1993     Builder.CreateStore(FnAddr, NsanShadowRetTag);
1994     // Store value.
1995     Value *ShadowRetValPtr =
1996         Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0);
1997     Builder.CreateStore(RVShadow, ShadowRetValPtr);
1998     return;
1999   }
2000
2001   if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(&Inst)) {
2002     Value *V = Insert->getOperand(1);
2003     Type *VT = V->getType();
2004     Type *ExtendedVT = Config.getExtendedFPType(VT);
2005     if (ExtendedVT == nullptr)
2006       return;
2007     IRBuilder<> Builder(Insert);
2008     emitCheck(V, Map.getShadow(V), Builder, CheckLoc::makeInsert());
2009     return;
2010   }
2011 }
2012
2013 // Moves fast math flags from the function to individual instructions, and
2014 // removes the attribute from the function.
2015 // TODO: Make this controllable with a flag.
2016 static void moveFastMathFlags(Function &F,
2017                               std::vector<Instruction *> &Instructions) {
2018   FastMathFlags FMF;
2019 #define MOVE_FLAG(attr, setter)                                                \
2020   if (F.getFnAttribute(attr).getValueAsString() == "true") {                   \
2021     F.removeFnAttr(attr);                                                      \
2022     FMF.set##setter();                                                         \
2023   }
2024   MOVE_FLAG("unsafe-fp-math", Fast)
2025   MOVE_FLAG("no-infs-fp-math", NoInfs)
2026   MOVE_FLAG("no-nans-fp-math", NoNaNs)
2027   MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
2028 #undef MOVE_FLAG
2029
2030   for (Instruction *I : Instructions)
2031     if (isa<FPMathOperator>(I))
2032       I->setFastMathFlags(FMF);
2033 }
2034
2035 bool NumericalStabilitySanitizer::sanitizeFunction(
2036     Function &F, const TargetLibraryInfo &TLI) {
2037   if (!F.hasFnAttribute(Attribute::SanitizeNumericalStability) ||
2038       F.isDeclaration())
2039     return false;
2040
2041   // This is required to prevent instrumenting call to __nsan_init from within
2042   // the module constructor.
2043   if (F.getName() == kNsanModuleCtorName)
2044     return false;
2045   SmallVector<Instruction *, 8> AllLoadsAndStores;
2046   SmallVector<Instruction *, 8> LocalLoadsAndStores;
2047
2048   // The instrumentation maintains:
2049   //  - for each IR value `v` of floating-point (or vector floating-point) type
2050   //    FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g.
2051   //    double for float and f128 for double).
2052   //  - A shadow memory, which stores `s(v)` for any `v` that has been stored,
2053   //    along with a shadow memory tag, which stores whether the value in the
2054   //    corresponding shadow memory is valid. Note that this might be
2055   //    incorrect if a non-instrumented function stores to memory, or if
2056   //    memory is stored to through a char pointer.
2057   //  - A shadow stack, which holds `s(v)` for any floating-point argument `v`
2058   //    of a call to an instrumented function. This allows
2059   //    instrumented functions to retrieve the shadow values for their
2060   //    arguments.
2061   //    Because instrumented functions can be called from non-instrumented
2062   //    functions, the stack needs to include a tag so that the instrumented
2063   //    function knows whether shadow values are available for their
2064   //    parameters (i.e. whether is was called by an instrumented function).
2065   //    When shadow arguments are not available, they have to be recreated by
2066   //    extending the precision of the non-shadow arguments to the non-shadow
2067   //    value. Non-instrumented functions do not modify (or even know about) the
2068   //    shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow
2069   //    stack tag is __nsan_shadow_args_tag. The tag is any unique identifier
2070   //    for the function (we use the address of the function). Both variables
2071   //    are thread local.
2072   //    Example:
2073   //     calls                             shadow stack tag      shadow stack
2074   //     =======================================================================
2075   //     non_instrumented_1()              0                     0
2076   //             |
2077   //             v
2078   //     instrumented_2(float a)           0                     0
2079   //             |
2080   //             v
2081   //     instrumented_3(float b, double c) &instrumented_3       s(b),s(c)
2082   //             |
2083   //             v
2084   //     instrumented_4(float d)           &instrumented_4       s(d)
2085   //             |
2086   //             v
2087   //     non_instrumented_5(float e)       &non_instrumented_5   s(e)
2088   //             |
2089   //             v
2090   //     instrumented_6(float f)           &non_instrumented_5   s(e)
2091   //
2092   //   On entry, instrumented_2 checks whether the tag corresponds to its
2093   //   function ptr.
2094   //   Note that functions reset the tag to 0 after reading shadow parameters.
2095   //   This ensures that the function does not erroneously read invalid data if
2096   //   called twice in the same stack, once from an instrumented function and
2097   //   once from an uninstrumented one. For example, in the following example,
2098   //   resetting the tag in (A) ensures that (B) does not reuse the same the
2099   //   shadow arguments (which would be incorrect).
2100   //      instrumented_1(float a)
2101   //             |
2102   //             v
2103   //      instrumented_2(float b)  (A)
2104   //             |
2105   //             v
2106   //      non_instrumented_3()
2107   //             |
2108   //             v
2109   //      instrumented_2(float b)  (B)
2110   //
2111   //  - A shadow return slot. Any function that returns a floating-point value
2112   //    places a shadow return value in __nsan_shadow_ret_val. Again, because
2113   //    we might be calling non-instrumented functions, this value is guarded
2114   //    by __nsan_shadow_ret_tag marker indicating which instrumented function
2115   //    placed the value in __nsan_shadow_ret_val, so that the caller can check
2116   //    that this corresponds to the callee. Both variables are thread local.
2117   //
2118   //    For example, in the following example, the instrumentation in
2119   //    `instrumented_1` rejects the shadow return value from `instrumented_3`
2120   //    because is is not tagged as expected (`&instrumented_3` instead of
2121   //    `non_instrumented_2`):
2122   //
2123   //        instrumented_1()
2124   //            |
2125   //            v
2126   //        float non_instrumented_2()
2127   //            |
2128   //            v
2129   //        float instrumented_3()
2130   //
2131   // Calls of known math functions (sin, cos, exp, ...) are duplicated to call
2132   // their overload on the shadow type.
2133
2134   // Collect all instructions before processing, as creating shadow values
2135   // creates new instructions inside the function.
2136   std::vector<Instruction *> OriginalInstructions;
2137   for (BasicBlock &BB : F)
2138     for (Instruction &Inst : BB)
2139       OriginalInstructions.emplace_back(&Inst);
2140
2141   moveFastMathFlags(F, OriginalInstructions);
2142   ValueToShadowMap ValueToShadow(Config);
2143
2144   // In the first pass, we create shadow values for all FT function arguments
2145   // and all phis. This ensures that the DFS of the next pass does not have
2146   // any loops.
2147   std::vector<PHINode *> OriginalPhis;
2148   createShadowArguments(F, TLI, ValueToShadow);
2149   for (Instruction *I : OriginalInstructions) {
2150     if (PHINode *Phi = dyn_cast<PHINode>(I)) {
2151       if (PHINode *Shadow = maybeCreateShadowPhi(*Phi, TLI)) {
2152         OriginalPhis.push_back(Phi);
2153         ValueToShadow.setShadow(*Phi, *Shadow);
2154       }
2155     }
2156   }
2157
2158   // Create shadow values for all instructions creating FT values.
2159   for (Instruction *I : OriginalInstructions)
2160     maybeCreateShadowValue(*I, TLI, ValueToShadow);
2161
2162   // Propagate shadow values across stores, calls and rets.
2163   for (Instruction *I : OriginalInstructions)
2164     propagateShadowValues(*I, TLI, ValueToShadow);
2165
2166   // The last pass populates shadow phis with shadow values.
2167   for (PHINode *Phi : OriginalPhis) {
2168     PHINode *ShadowPhi = cast<PHINode>(ValueToShadow.getShadow(Phi));
2169     for (unsigned I : seq(Phi->getNumOperands())) {
2170       Value *V = Phi->getOperand(I);
2171       Value *Shadow = ValueToShadow.getShadow(V);
2172       BasicBlock *IncomingBB = Phi->getIncomingBlock(I);
2173       // For some instructions (e.g. invoke), we create the shadow in a separate
2174       // block, different from the block where the original value is created.
2175       // In that case, the shadow phi might need to refer to this block instead
2176       // of the original block.
2177       // Note that this can only happen for instructions as constant shadows are
2178       // always created in the same block.
2179       ShadowPhi->addIncoming(Shadow, IncomingBB);
2180     }
2181   }
2182
2183   return !ValueToShadow.empty();
2184 }
2185
2186 static uint64_t GetMemOpSize(Value *V) {
2187   uint64_t OpSize = 0;
2188   if (Constant *C = dyn_cast<Constant>(V)) {
2189     auto *CInt = dyn_cast<ConstantInt>(C);
2190     if (CInt && CInt->getValue().getBitWidth() <= 64)
2191       OpSize = CInt->getValue().getZExtValue();
2192   }
2193
2194   return OpSize;
2195 }
2196
2197 // Instrument the memory intrinsics so that they properly modify the shadow
2198 // memory.
2199 bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
2200   IRBuilder<> Builder(MI);
2201   if (auto *M = dyn_cast<MemSetInst>(MI)) {
2202     FunctionCallee SetUnknownFn =
2203         NsanSetUnknownFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
2204     if (SetUnknownFn.getFunctionType()->getNumParams() == 1)
2205       Builder.CreateCall(SetUnknownFn, {/*Address=*/M->getArgOperand(0)});
2206     else
2207       Builder.CreateCall(SetUnknownFn,
2208                          {/*Address=*/M->getArgOperand(0),
2209                           /*Size=*/Builder.CreateIntCast(M->getArgOperand(2),
2210                                                          IntptrTy, false)});
2211
2212   } else if (auto *M = dyn_cast<MemTransferInst>(MI)) {
2213     FunctionCallee CopyFn =
2214         NsanCopyFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
2215
2216     if (CopyFn.getFunctionType()->getNumParams() == 2)
2217       Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
2218                                   /*Source=*/M->getArgOperand(1)});
2219     else
2220       Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
2221                                   /*Source=*/M->getArgOperand(1),
2222                                   /*Size=*/
2223                                   Builder.CreateIntCast(M->getArgOperand(2),
2224                                                         IntptrTy, false)});
2225   }
2226   return false;
2227 }
2228
2229 void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) {
2230   Function *Fn = CI->getCalledFunction();
2231   if (Fn == nullptr)
2232     return;
2233
2234   if (!Fn->getName().starts_with("__nsan_"))
2235     return;
2236
2237   if (Fn->getName() == "__nsan_dump_shadow_mem") {
2238     assert(CI->arg_size() == 4 &&
2239            "invalid prototype for __nsan_dump_shadow_mem");
2240     // __nsan_dump_shadow_mem requires an extra parameter with the dynamic
2241     // configuration:
2242     // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8)
2243     // | shadow_type_id_for_double
2244     const uint64_t shadow_value_type_ids =
2245         (static_cast<size_t>(Config.byValueType(kLongDouble).getNsanTypeId())
2246          << 16) |
2247         (static_cast<size_t>(Config.byValueType(kDouble).getNsanTypeId())
2248          << 8) |
2249         static_cast<size_t>(Config.byValueType(kFloat).getNsanTypeId());
2250     CI->setArgOperand(3, ConstantInt::get(IntptrTy, shadow_value_type_ids));
2251   }
2252 }