lib/Target/AArch64/AArch64SpeculationHardening.cpp

   1 //===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation  --===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file contains a pass to insert code to mitigate against side channel
  10 // vulnerabilities that may happen under control flow miss-speculation.
  11 //
  12 // The pass implements tracking of control flow miss-speculation into a "taint"
  13 // register. That taint register can then be used to mask off registers with
  14 // sensitive data when executing under miss-speculation, a.k.a. "transient
  15 // execution".
  16 // This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
  17 //
  18 // It also implements speculative load hardening, i.e. using the taint register
  19 // to automatically mask off loaded data.
  20 //
  21 // As a possible follow-on improvement, also an intrinsics-based approach as
  22 // explained at https://lwn.net/Articles/759423/ could be implemented on top of
  23 // the current design.
  24 //
  25 // For AArch64, the following implementation choices are made to implement the
  26 // tracking of control flow miss-speculation into a taint register:
  27 // Some of these are different than the implementation choices made in
  28 // the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
  29 // the instruction set characteristics result in different trade-offs.
  30 // - The speculation hardening is done after register allocation. With a
  31 //   relative abundance of registers, one register is reserved (X16) to be
  32 //   the taint register. X16 is expected to not clash with other register
  33 //   reservation mechanisms with very high probability because:
  34 //   . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
  35 //   . The only way to request X16 to be used as a programmer is through
  36 //     inline assembly. In the rare case a function explicitly demands to
  37 //     use X16/W16, this pass falls back to hardening against speculation
  38 //     by inserting a DSB SYS/ISB barrier pair which will prevent control
  39 //     flow speculation.
  40 // - It is easy to insert mask operations at this late stage as we have
  41 //   mask operations available that don't set flags.
  42 // - The taint variable contains all-ones when no miss-speculation is detected,
  43 //   and contains all-zeros when miss-speculation is detected. Therefore, when
  44 //   masking, an AND instruction (which only changes the register to be masked,
  45 //   no other side effects) can easily be inserted anywhere that's needed.
  46 // - The tracking of miss-speculation is done by using a data-flow conditional
  47 //   select instruction (CSEL) to evaluate the flags that were also used to
  48 //   make conditional branch direction decisions. Speculation of the CSEL
  49 //   instruction can be limited with a CSDB instruction - so the combination of
  50 //   CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
  51 //   aren't speculated. When conditional branch direction gets miss-speculated,
  52 //   the semantics of the inserted CSEL instruction is such that the taint
  53 //   register will contain all zero bits.
  54 //   One key requirement for this to work is that the conditional branch is
  55 //   followed by an execution of the CSEL instruction, where the CSEL
  56 //   instruction needs to use the same flags status as the conditional branch.
  57 //   This means that the conditional branches must not be implemented as one
  58 //   of the AArch64 conditional branches that do not use the flags as input
  59 //   (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
  60 //   selectors to not produce these instructions when speculation hardening
  61 //   is enabled. This pass will assert if it does encounter such an instruction.
  62 // - On function call boundaries, the miss-speculation state is transferred from
  63 //   the taint register X16 to be encoded in the SP register as value 0.
  64 //
  65 // For the aspect of automatically hardening loads, using the taint register,
  66 // (a.k.a. speculative load hardening, see
  67 //  https://llvm.org/docs/SpeculativeLoadHardening.html), the following
  68 // implementation choices are made for AArch64:
  69 //   - Many of the optimizations described at
  70 //     https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer
  71 //     loads haven't been implemented yet - but for some of them there are
  72 //     FIXMEs in the code.
  73 //   - loads that load into general purpose (X or W) registers get hardened by
  74 //     masking the loaded data. For loads that load into other registers, the
  75 //     address loaded from gets hardened. It is expected that hardening the
  76 //     loaded data may be more efficient; but masking data in registers other
  77 //     than X or W is not easy and may result in being slower than just
  78 //     hardening the X address register loaded from.
  79 //   - On AArch64, CSDB instructions are inserted between the masking of the
  80 //     register and its first use, to ensure there's no non-control-flow
  81 //     speculation that might undermine the hardening mechanism.
  82 //
  83 // Future extensions/improvements could be:
  84 // - Implement this functionality using full speculation barriers, akin to the
  85 //   x86-slh-lfence option. This may be more useful for the intrinsics-based
  86 //   approach than for the SLH approach to masking.
  87 //   Note that this pass already inserts the full speculation barriers if the
  88 //   function for some niche reason makes use of X16/W16.
  89 // - no indirect branch misprediction gets protected/instrumented; but this
  90 //   could be done for some indirect branches, such as switch jump tables.
  91 //===----------------------------------------------------------------------===//
  92
  93 #include "AArch64InstrInfo.h"
  94 #include "AArch64Subtarget.h"
  95 #include "Utils/AArch64BaseInfo.h"
  96 #include "llvm/ADT/BitVector.h"
  97 #include "llvm/ADT/SmallVector.h"
  98 #include "llvm/CodeGen/MachineBasicBlock.h"
  99 #include "llvm/CodeGen/MachineFunction.h"
 100 #include "llvm/CodeGen/MachineFunctionPass.h"
 101 #include "llvm/CodeGen/MachineInstr.h"
 102 #include "llvm/CodeGen/MachineInstrBuilder.h"
 103 #include "llvm/CodeGen/MachineOperand.h"
 104 #include "llvm/CodeGen/MachineRegisterInfo.h"
 105 #include "llvm/CodeGen/RegisterScavenging.h"
 106 #include "llvm/IR/DebugLoc.h"
 107 #include "llvm/Pass.h"
 108 #include "llvm/Support/CodeGen.h"
 109 #include "llvm/Support/Debug.h"
 110 #include "llvm/Target/TargetMachine.h"
 111 #include <cassert>
 112
 113 using namespace llvm;
 114
 115 #define DEBUG_TYPE "aarch64-speculation-hardening"
 116
 117 #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
 118
 119 static cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
 120                                  cl::desc("Sanitize loads from memory."),
 121                                  cl::init(true));
 122
 123 namespace {
 124
 125 class AArch64SpeculationHardening : public MachineFunctionPass {
 126 public:
 127   const TargetInstrInfo *TII;
 128   const TargetRegisterInfo *TRI;
 129
 130   static char ID;
 131
 132   AArch64SpeculationHardening() : MachineFunctionPass(ID) {
 133     initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry());
 134   }
 135
 136   bool runOnMachineFunction(MachineFunction &Fn) override;
 137
 138   StringRef getPassName() const override {
 139     return AARCH64_SPECULATION_HARDENING_NAME;
 140   }
 141
 142 private:
 143   unsigned MisspeculatingTaintReg;
 144   unsigned MisspeculatingTaintReg32Bit;
 145   bool UseControlFlowSpeculationBarrier;
 146   BitVector RegsNeedingCSDBBeforeUse;
 147   BitVector RegsAlreadyMasked;
 148
 149   bool functionUsesHardeningRegister(MachineFunction &MF) const;
 150   bool instrumentControlFlow(MachineBasicBlock &MBB,
 151                              bool &UsesFullSpeculationBarrier);
 152   bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
 153                                MachineBasicBlock *&FBB,
 154                                AArch64CC::CondCode &CondCode) const;
 155   void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
 156                           AArch64CC::CondCode &CondCode, DebugLoc DL) const;
 157   void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
 158                                      MachineBasicBlock::iterator MBBI) const;
 159   void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
 160                                      MachineBasicBlock::iterator MBBI,
 161                                      unsigned TmpReg) const;
 162   void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
 163                                     MachineBasicBlock::iterator MBBI,
 164                                     DebugLoc DL) const;
 165
 166   bool slhLoads(MachineBasicBlock &MBB);
 167   bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
 168                               MachineBasicBlock::iterator MBBI,
 169                               MachineInstr &MI, unsigned Reg);
 170   bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
 171                                         bool UsesFullSpeculationBarrier);
 172   bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
 173                                   MachineBasicBlock::iterator MBBI,
 174                                   bool UsesFullSpeculationBarrier);
 175   bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
 176                   DebugLoc DL);
 177 };
 178
 179 } // end anonymous namespace
 180
 181 char AArch64SpeculationHardening::ID = 0;
 182
 183 INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
 184                 AARCH64_SPECULATION_HARDENING_NAME, false, false)
 185
 186 bool AArch64SpeculationHardening::endsWithCondControlFlow(
 187     MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
 188     AArch64CC::CondCode &CondCode) const {
 189   SmallVector<MachineOperand, 1> analyzeBranchCondCode;
 190   if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false))
 191     return false;
 192
 193   // Ignore if the BB ends in an unconditional branch/fall-through.
 194   if (analyzeBranchCondCode.empty())
 195     return false;
 196
 197   // If the BB ends with a single conditional branch, FBB will be set to
 198   // nullptr (see API docs for TII->analyzeBranch). For the rest of the
 199   // analysis we want the FBB block to be set always.
 200   assert(TBB != nullptr);
 201   if (FBB == nullptr)
 202     FBB = MBB.getFallThrough();
 203
 204   // If both the true and the false condition jump to the same basic block,
 205   // there isn't need for any protection - whether the branch is speculated
 206   // correctly or not, we end up executing the architecturally correct code.
 207   if (TBB == FBB)
 208     return false;
 209
 210   assert(MBB.succ_size() == 2);
 211   // translate analyzeBranchCondCode to CondCode.
 212   assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
 213   CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
 214   return true;
 215 }
 216
 217 void AArch64SpeculationHardening::insertFullSpeculationBarrier(
 218     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
 219     DebugLoc DL) const {
 220   // A full control flow speculation barrier consists of (DSB SYS + ISB)
 221   BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
 222   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
 223 }
 224
 225 void AArch64SpeculationHardening::insertTrackingCode(
 226     MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
 227     DebugLoc DL) const {
 228   if (UseControlFlowSpeculationBarrier) {
 229     insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
 230   } else {
 231     BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
 232         .addDef(MisspeculatingTaintReg)
 233         .addUse(MisspeculatingTaintReg)
 234         .addUse(AArch64::XZR)
 235         .addImm(CondCode);
 236     SplitEdgeBB.addLiveIn(AArch64::NZCV);
 237   }
 238 }
 239
 240 bool AArch64SpeculationHardening::instrumentControlFlow(
 241     MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
 242   LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
 243
 244   bool Modified = false;
 245   MachineBasicBlock *TBB = nullptr;
 246   MachineBasicBlock *FBB = nullptr;
 247   AArch64CC::CondCode CondCode;
 248
 249   if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
 250     LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
 251   } else {
 252     // Now insert:
 253     // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
 254     // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
 255     // edge.
 256     AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode);
 257
 258     MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this);
 259     MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this);
 260
 261     assert(SplitEdgeTBB != nullptr);
 262     assert(SplitEdgeFBB != nullptr);
 263
 264     DebugLoc DL;
 265     if (MBB.instr_end() != MBB.instr_begin())
 266       DL = (--MBB.instr_end())->getDebugLoc();
 267
 268     insertTrackingCode(*SplitEdgeTBB, CondCode, DL);
 269     insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL);
 270
 271     LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
 272     LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
 273     Modified = true;
 274   }
 275
 276   // Perform correct code generation around function calls and before returns.
 277   // The below variables record the return/terminator instructions and the call
 278   // instructions respectively; including which register is available as a
 279   // temporary register just before the recorded instructions.
 280   SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
 281   SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
 282   // if a temporary register is not available for at least one of the
 283   // instructions for which we need to transfer taint to the stack pointer, we
 284   // need to insert a full speculation barrier.
 285   // TmpRegisterNotAvailableEverywhere tracks that condition.
 286   bool TmpRegisterNotAvailableEverywhere = false;
 287
 288   RegScavenger RS;
 289   RS.enterBasicBlock(MBB);
 290
 291   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
 292     MachineInstr &MI = *I;
 293     if (!MI.isReturn() && !MI.isCall())
 294       continue;
 295
 296     // The RegScavenger represents registers available *after* the MI
 297     // instruction pointed to by RS.getCurrentPosition().
 298     // We need to have a register that is available *before* the MI is executed.
 299     if (I != MBB.begin())
 300       RS.forward(std::prev(I));
 301     // FIXME: The below just finds *a* unused register. Maybe code could be
 302     // optimized more if this looks for the register that isn't used for the
 303     // longest time around this place, to enable more scheduling freedom. Not
 304     // sure if that would actually result in a big performance difference
 305     // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
 306     // already to do this - but it's unclear if that could easily be used here.
 307     unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
 308     LLVM_DEBUG(dbgs() << "RS finds "
 309                       << ((TmpReg == 0) ? "no register " : "register ");
 310                if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
 311                dbgs() << "to be available at MI " << MI);
 312     if (TmpReg == 0)
 313       TmpRegisterNotAvailableEverywhere = true;
 314     if (MI.isReturn())
 315       ReturnInstructions.push_back({&MI, TmpReg});
 316     else if (MI.isCall())
 317       CallInstructions.push_back({&MI, TmpReg});
 318   }
 319
 320   if (TmpRegisterNotAvailableEverywhere) {
 321     // When a temporary register is not available everywhere in this basic
 322     // basic block where a propagate-taint-to-sp operation is needed, just
 323     // emit a full speculation barrier at the start of this basic block, which
 324     // renders the taint/speculation tracking in this basic block unnecessary.
 325     insertFullSpeculationBarrier(MBB, MBB.begin(),
 326                                  (MBB.begin())->getDebugLoc());
 327     UsesFullSpeculationBarrier = true;
 328     Modified = true;
 329   } else {
 330     for (auto MI_Reg : ReturnInstructions) {
 331       assert(MI_Reg.second != 0);
 332       LLVM_DEBUG(
 333           dbgs()
 334           << " About to insert Reg to SP taint propagation with temp register "
 335           << printReg(MI_Reg.second, TRI)
 336           << " on instruction: " << *MI_Reg.first);
 337       insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
 338       Modified = true;
 339     }
 340
 341     for (auto MI_Reg : CallInstructions) {
 342       assert(MI_Reg.second != 0);
 343       LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
 344                            "propagation with temp register "
 345                         << printReg(MI_Reg.second, TRI)
 346                         << " around instruction: " << *MI_Reg.first);
 347       // Just after the call:
 348       insertSPToRegTaintPropagation(
 349           MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
 350       // Just before the call:
 351       insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
 352       Modified = true;
 353     }
 354   }
 355   return Modified;
 356 }
 357
 358 void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
 359     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
 360   // If full control flow speculation barriers are used, emit a control flow
 361   // barrier to block potential miss-speculation in flight coming in to this
 362   // function.
 363   if (UseControlFlowSpeculationBarrier) {
 364     insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
 365     return;
 366   }
 367
 368   // CMP   SP, #0   === SUBS   xzr, SP, #0
 369   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
 370       .addDef(AArch64::XZR)
 371       .addUse(AArch64::SP)
 372       .addImm(0)
 373       .addImm(0); // no shift
 374   // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ
 375   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
 376       .addDef(MisspeculatingTaintReg)
 377       .addUse(AArch64::XZR)
 378       .addUse(AArch64::XZR)
 379       .addImm(AArch64CC::EQ);
 380 }
 381
 382 void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
 383     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
 384     unsigned TmpReg) const {
 385   // If full control flow speculation barriers are used, there will not be
 386   // miss-speculation when returning from this function, and therefore, also
 387   // no need to encode potential miss-speculation into the stack pointer.
 388   if (UseControlFlowSpeculationBarrier)
 389     return;
 390
 391   // mov   Xtmp, SP  === ADD  Xtmp, SP, #0
 392   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
 393       .addDef(TmpReg)
 394       .addUse(AArch64::SP)
 395       .addImm(0)
 396       .addImm(0); // no shift
 397   // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
 398   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
 399       .addDef(TmpReg, RegState::Renamable)
 400       .addUse(TmpReg, RegState::Kill | RegState::Renamable)
 401       .addUse(MisspeculatingTaintReg, RegState::Kill)
 402       .addImm(0);
 403   // mov   SP, Xtmp === ADD SP, Xtmp, #0
 404   BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
 405       .addDef(AArch64::SP)
 406       .addUse(TmpReg, RegState::Kill)
 407       .addImm(0)
 408       .addImm(0); // no shift
 409 }
 410
 411 bool AArch64SpeculationHardening::functionUsesHardeningRegister(
 412     MachineFunction &MF) const {
 413   for (MachineBasicBlock &MBB : MF) {
 414     for (MachineInstr &MI : MBB) {
 415       // treat function calls specially, as the hardening register does not
 416       // need to remain live across function calls.
 417       if (MI.isCall())
 418         continue;
 419       if (MI.readsRegister(MisspeculatingTaintReg, TRI) ||
 420           MI.modifiesRegister(MisspeculatingTaintReg, TRI))
 421         return true;
 422     }
 423   }
 424   return false;
 425 }
 426
 427 // Make GPR register Reg speculation-safe by putting it through the
 428 // SpeculationSafeValue pseudo instruction, if we can't prove that
 429 // the value in the register has already been hardened.
 430 bool AArch64SpeculationHardening::makeGPRSpeculationSafe(
 431     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI,
 432     unsigned Reg) {
 433   assert(AArch64::GPR32allRegClass.contains(Reg) ||
 434          AArch64::GPR64allRegClass.contains(Reg));
 435
 436   // Loads cannot directly load a value into the SP (nor WSP).
 437   // Therefore, if Reg is SP or WSP, it is because the instruction loads from
 438   // the stack through the stack pointer.
 439   //
 440   // Since the stack pointer is never dynamically controllable, don't harden it.
 441   if (Reg == AArch64::SP || Reg == AArch64::WSP)
 442     return false;
 443
 444   // Do not harden the register again if already hardened before.
 445   if (RegsAlreadyMasked[Reg])
 446     return false;
 447
 448   const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg);
 449   LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n");
 450   BuildMI(MBB, MBBI, MI.getDebugLoc(),
 451           TII->get(Is64Bit ? AArch64::SpeculationSafeValueX
 452                            : AArch64::SpeculationSafeValueW))
 453       .addDef(Reg)
 454       .addUse(Reg);
 455   RegsAlreadyMasked.set(Reg);
 456   return true;
 457 }
 458
 459 bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
 460   bool Modified = false;
 461
 462   LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB);
 463
 464   RegsAlreadyMasked.reset();
 465
 466   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
 467   MachineBasicBlock::iterator NextMBBI;
 468   for (; MBBI != E; MBBI = NextMBBI) {
 469     MachineInstr &MI = *MBBI;
 470     NextMBBI = std::next(MBBI);
 471     // Only harden loaded values or addresses used in loads.
 472     if (!MI.mayLoad())
 473       continue;
 474
 475     LLVM_DEBUG(dbgs() << "About to harden: " << MI);
 476
 477     // For general purpose register loads, harden the registers loaded into.
 478     // For other loads, harden the address loaded from.
 479     // Masking the loaded value is expected to result in less performance
 480     // overhead, as the load can still execute speculatively in comparison to
 481     // when the address loaded from gets masked. However, masking is only
 482     // easy to do efficiently on GPR registers, so for loads into non-GPR
 483     // registers (e.g. floating point loads), mask the address loaded from.
 484     bool AllDefsAreGPR = llvm::all_of(MI.defs(), [&](MachineOperand &Op) {
 485       return Op.isReg() && (AArch64::GPR32allRegClass.contains(Op.getReg()) ||
 486                             AArch64::GPR64allRegClass.contains(Op.getReg()));
 487     });
 488     // FIXME: it might be a worthwhile optimization to not mask loaded
 489     // values if all the registers involved in address calculation are already
 490     // hardened, leading to this load not able to execute on a miss-speculated
 491     // path.
 492     bool HardenLoadedData = AllDefsAreGPR;
 493     bool HardenAddressLoadedFrom = !HardenLoadedData;
 494
 495     // First remove registers from AlreadyMaskedRegisters if their value is
 496     // updated by this instruction - it makes them contain a new value that is
 497     // not guaranteed to already have been masked.
 498     for (MachineOperand Op : MI.defs())
 499       for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
 500         RegsAlreadyMasked.reset(*AI);
 501
 502     // FIXME: loads from the stack with an immediate offset from the stack
 503     // pointer probably shouldn't be hardened, which could result in a
 504     // significant optimization. See section "Don’t check loads from
 505     // compile-time constant stack offsets", in
 506     // https://llvm.org/docs/SpeculativeLoadHardening.html
 507
 508     if (HardenLoadedData)
 509       for (auto Def : MI.defs()) {
 510         if (Def.isDead())
 511           // Do not mask a register that is not used further.
 512           continue;
 513         // FIXME: For pre/post-increment addressing modes, the base register
 514         // used in address calculation is also defined by this instruction.
 515         // It might be a worthwhile optimization to not harden that
 516         // base register increment/decrement when the increment/decrement is
 517         // an immediate.
 518         Modified |= makeGPRSpeculationSafe(MBB, NextMBBI, MI, Def.getReg());
 519       }
 520
 521     if (HardenAddressLoadedFrom)
 522       for (auto Use : MI.uses()) {
 523         if (!Use.isReg())
 524           continue;
 525         Register Reg = Use.getReg();
 526         // Some loads of floating point data have implicit defs/uses on a
 527         // super register of that floating point data. Some examples:
 528         // $s0 = LDRSui $sp, 22, implicit-def $q0
 529         // $q0 = LD1i64 $q0, 1, renamable $x0
 530         // We need to filter out these uses for non-GPR register which occur
 531         // because the load partially fills a non-GPR register with the loaded
 532         // data. Just skipping all non-GPR registers is safe (for now) as all
 533         // AArch64 load instructions only use GPR registers to perform the
 534         // address calculation. FIXME: However that might change once we can
 535         // produce SVE gather instructions.
 536         if (!(AArch64::GPR32allRegClass.contains(Reg) ||
 537               AArch64::GPR64allRegClass.contains(Reg)))
 538           continue;
 539         Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg);
 540       }
 541   }
 542   return Modified;
 543 }
 544
 545 /// \brief If MBBI references a pseudo instruction that should be expanded
 546 /// here, do the expansion and return true. Otherwise return false.
 547 bool AArch64SpeculationHardening::expandSpeculationSafeValue(
 548     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
 549     bool UsesFullSpeculationBarrier) {
 550   MachineInstr &MI = *MBBI;
 551   unsigned Opcode = MI.getOpcode();
 552   bool Is64Bit = true;
 553
 554   switch (Opcode) {
 555   default:
 556     break;
 557   case AArch64::SpeculationSafeValueW:
 558     Is64Bit = false;
 559     LLVM_FALLTHROUGH;
 560   case AArch64::SpeculationSafeValueX:
 561     // Just remove the SpeculationSafe pseudo's if control flow
 562     // miss-speculation isn't happening because we're already inserting barriers
 563     // to guarantee that.
 564     if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
 565       Register DstReg = MI.getOperand(0).getReg();
 566       Register SrcReg = MI.getOperand(1).getReg();
 567       // Mark this register and all its aliasing registers as needing to be
 568       // value speculation hardened before its next use, by using a CSDB
 569       // barrier instruction.
 570       for (MachineOperand Op : MI.defs())
 571         for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
 572           RegsNeedingCSDBBeforeUse.set(*AI);
 573
 574       // Mask off with taint state.
 575       BuildMI(MBB, MBBI, MI.getDebugLoc(),
 576               Is64Bit ? TII->get(AArch64::ANDXrs) : TII->get(AArch64::ANDWrs))
 577           .addDef(DstReg)
 578           .addUse(SrcReg, RegState::Kill)
 579           .addUse(Is64Bit ? MisspeculatingTaintReg
 580                           : MisspeculatingTaintReg32Bit)
 581           .addImm(0);
 582     }
 583     MI.eraseFromParent();
 584     return true;
 585   }
 586   return false;
 587 }
 588
 589 bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
 590                                              MachineBasicBlock::iterator MBBI,
 591                                              DebugLoc DL) {
 592   assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when "
 593                                               "control flow miss-speculation "
 594                                               "is already blocked");
 595   // insert data value speculation barrier (CSDB)
 596   BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT)).addImm(0x14);
 597   RegsNeedingCSDBBeforeUse.reset();
 598   return true;
 599 }
 600
 601 bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
 602     MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
 603   bool Modified = false;
 604
 605   RegsNeedingCSDBBeforeUse.reset();
 606
 607   // The following loop iterates over all instructions in the basic block,
 608   // and performs 2 operations:
 609   // 1. Insert a CSDB at this location if needed.
 610   // 2. Expand the SpeculationSafeValuePseudo if the current instruction is
 611   // one.
 612   //
 613   // The insertion of the CSDB is done as late as possible (i.e. just before
 614   // the use of a masked register), in the hope that that will reduce the
 615   // total number of CSDBs in a block when there are multiple masked registers
 616   // in the block.
 617   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
 618   DebugLoc DL;
 619   while (MBBI != E) {
 620     MachineInstr &MI = *MBBI;
 621     DL = MI.getDebugLoc();
 622     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
 623
 624     // First check if a CSDB needs to be inserted due to earlier registers
 625     // that were masked and that are used by the next instruction.
 626     // Also emit the barrier on any potential control flow changes.
 627     bool NeedToEmitBarrier = false;
 628     if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator()))
 629       NeedToEmitBarrier = true;
 630     if (!NeedToEmitBarrier)
 631       for (MachineOperand Op : MI.uses())
 632         if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) {
 633           NeedToEmitBarrier = true;
 634           break;
 635         }
 636
 637     if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
 638       Modified |= insertCSDB(MBB, MBBI, DL);
 639
 640     Modified |=
 641         expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
 642
 643     MBBI = NMBBI;
 644   }
 645
 646   if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
 647     Modified |= insertCSDB(MBB, MBBI, DL);
 648
 649   return Modified;
 650 }
 651
 652 bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
 653   if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
 654     return false;
 655
 656   MisspeculatingTaintReg = AArch64::X16;
 657   MisspeculatingTaintReg32Bit = AArch64::W16;
 658   TII = MF.getSubtarget().getInstrInfo();
 659   TRI = MF.getSubtarget().getRegisterInfo();
 660   RegsNeedingCSDBBeforeUse.resize(TRI->getNumRegs());
 661   RegsAlreadyMasked.resize(TRI->getNumRegs());
 662   UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
 663
 664   bool Modified = false;
 665
 666   // Step 1: Enable automatic insertion of SpeculationSafeValue.
 667   if (HardenLoads) {
 668     LLVM_DEBUG(
 669         dbgs() << "***** AArch64SpeculationHardening - automatic insertion of "
 670                   "SpeculationSafeValue intrinsics *****\n");
 671     for (auto &MBB : MF)
 672       Modified |= slhLoads(MBB);
 673   }
 674
 675   // 2. Add instrumentation code to function entry and exits.
 676   LLVM_DEBUG(
 677       dbgs()
 678       << "***** AArch64SpeculationHardening - track control flow *****\n");
 679
 680   SmallVector<MachineBasicBlock *, 2> EntryBlocks;
 681   EntryBlocks.push_back(&MF.front());
 682   for (const LandingPadInfo &LPI : MF.getLandingPads())
 683     EntryBlocks.push_back(LPI.LandingPadBlock);
 684   for (auto Entry : EntryBlocks)
 685     insertSPToRegTaintPropagation(
 686         *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
 687
 688   // 3. Add instrumentation code to every basic block.
 689   for (auto &MBB : MF) {
 690     bool UsesFullSpeculationBarrier = false;
 691     Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
 692     Modified |=
 693         lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
 694   }
 695
 696   return Modified;
 697 }
 698
 699 /// \brief Returns an instance of the pseudo instruction expansion pass.
 700 FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
 701   return new AArch64SpeculationHardening();
 702 }