lib/Target/X86/X86FlagsCopyLowering.cpp

   1 //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 ///
  10 /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
  11 /// flag bits.
  12 ///
  13 /// We have to do this by carefully analyzing and rewriting the usage of the
  14 /// copied EFLAGS register because there is no general way to rematerialize the
  15 /// entire EFLAGS register safely and efficiently. Using `popf` both forces
  16 /// dynamic stack adjustment and can create correctness issues due to IF, TF,
  17 /// and other non-status flags being overwritten. Using sequences involving
  18 /// SAHF don't work on all x86 processors and are often quite slow compared to
  19 /// directly testing a single status preserved in its own GPR.
  20 ///
  21 //===----------------------------------------------------------------------===//
  22
  23 #include "X86.h"
  24 #include "X86InstrBuilder.h"
  25 #include "X86InstrInfo.h"
  26 #include "X86Subtarget.h"
  27 #include "llvm/ADT/ArrayRef.h"
  28 #include "llvm/ADT/DenseMap.h"
  29 #include "llvm/ADT/PostOrderIterator.h"
  30 #include "llvm/ADT/STLExtras.h"
  31 #include "llvm/ADT/ScopeExit.h"
  32 #include "llvm/ADT/SmallPtrSet.h"
  33 #include "llvm/ADT/SmallSet.h"
  34 #include "llvm/ADT/SmallVector.h"
  35 #include "llvm/ADT/SparseBitVector.h"
  36 #include "llvm/ADT/Statistic.h"
  37 #include "llvm/CodeGen/MachineBasicBlock.h"
  38 #include "llvm/CodeGen/MachineConstantPool.h"
  39 #include "llvm/CodeGen/MachineDominators.h"
  40 #include "llvm/CodeGen/MachineFunction.h"
  41 #include "llvm/CodeGen/MachineFunctionPass.h"
  42 #include "llvm/CodeGen/MachineInstr.h"
  43 #include "llvm/CodeGen/MachineInstrBuilder.h"
  44 #include "llvm/CodeGen/MachineModuleInfo.h"
  45 #include "llvm/CodeGen/MachineOperand.h"
  46 #include "llvm/CodeGen/MachineRegisterInfo.h"
  47 #include "llvm/CodeGen/MachineSSAUpdater.h"
  48 #include "llvm/CodeGen/TargetInstrInfo.h"
  49 #include "llvm/CodeGen/TargetRegisterInfo.h"
  50 #include "llvm/CodeGen/TargetSchedule.h"
  51 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  52 #include "llvm/IR/DebugLoc.h"
  53 #include "llvm/MC/MCSchedule.h"
  54 #include "llvm/Pass.h"
  55 #include "llvm/Support/CommandLine.h"
  56 #include "llvm/Support/Debug.h"
  57 #include "llvm/Support/raw_ostream.h"
  58 #include <algorithm>
  59 #include <cassert>
  60 #include <iterator>
  61 #include <utility>
  62
  63 using namespace llvm;
  64
  65 #define PASS_KEY "x86-flags-copy-lowering"
  66 #define DEBUG_TYPE PASS_KEY
  67
  68 STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
  69 STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
  70 STATISTIC(NumTestsInserted, "Number of test instructions inserted");
  71 STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
  72
  73 namespace {
  74
  75 // Convenient array type for storing registers associated with each condition.
  76 using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
  77
  78 class X86FlagsCopyLoweringPass : public MachineFunctionPass {
  79 public:
  80   X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { }
  81
  82   StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
  83   bool runOnMachineFunction(MachineFunction &MF) override;
  84   void getAnalysisUsage(AnalysisUsage &AU) const override;
  85
  86   /// Pass identification, replacement for typeid.
  87   static char ID;
  88
  89 private:
  90   MachineRegisterInfo *MRI;
  91   const X86Subtarget *Subtarget;
  92   const X86InstrInfo *TII;
  93   const TargetRegisterInfo *TRI;
  94   const TargetRegisterClass *PromoteRC;
  95   MachineDominatorTree *MDT;
  96
  97   CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
  98                                   MachineBasicBlock::iterator CopyDefI);
  99
 100   unsigned promoteCondToReg(MachineBasicBlock &MBB,
 101                             MachineBasicBlock::iterator TestPos,
 102                             DebugLoc TestLoc, X86::CondCode Cond);
 103   std::pair<unsigned, bool>
 104   getCondOrInverseInReg(MachineBasicBlock &TestMBB,
 105                         MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
 106                         X86::CondCode Cond, CondRegArray &CondRegs);
 107   void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
 108                   DebugLoc Loc, unsigned Reg);
 109
 110   void rewriteArithmetic(MachineBasicBlock &TestMBB,
 111                          MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
 112                          MachineInstr &MI, MachineOperand &FlagUse,
 113                          CondRegArray &CondRegs);
 114   void rewriteCMov(MachineBasicBlock &TestMBB,
 115                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
 116                    MachineInstr &CMovI, MachineOperand &FlagUse,
 117                    CondRegArray &CondRegs);
 118   void rewriteCondJmp(MachineBasicBlock &TestMBB,
 119                       MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
 120                       MachineInstr &JmpI, CondRegArray &CondRegs);
 121   void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
 122                    MachineInstr &CopyDefI);
 123   void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
 124                                MachineBasicBlock::iterator TestPos,
 125                                DebugLoc TestLoc, MachineInstr &SetBI,
 126                                MachineOperand &FlagUse, CondRegArray &CondRegs);
 127   void rewriteSetCC(MachineBasicBlock &TestMBB,
 128                     MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
 129                     MachineInstr &SetCCI, MachineOperand &FlagUse,
 130                     CondRegArray &CondRegs);
 131 };
 132
 133 } // end anonymous namespace
 134
 135 INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
 136                       "X86 EFLAGS copy lowering", false, false)
 137 INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
 138                     "X86 EFLAGS copy lowering", false, false)
 139
 140 FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
 141   return new X86FlagsCopyLoweringPass();
 142 }
 143
 144 char X86FlagsCopyLoweringPass::ID = 0;
 145
 146 void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
 147   AU.addRequired<MachineDominatorTree>();
 148   MachineFunctionPass::getAnalysisUsage(AU);
 149 }
 150
 151 namespace {
 152 /// An enumeration of the arithmetic instruction mnemonics which have
 153 /// interesting flag semantics.
 154 ///
 155 /// We can map instruction opcodes into these mnemonics to make it easy to
 156 /// dispatch with specific functionality.
 157 enum class FlagArithMnemonic {
 158   ADC,
 159   ADCX,
 160   ADOX,
 161   RCL,
 162   RCR,
 163   SBB,
 164 };
 165 } // namespace
 166
 167 static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
 168   switch (Opcode) {
 169   default:
 170     report_fatal_error("No support for lowering a copy into EFLAGS when used "
 171                        "by this instruction!");
 172
 173 #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX)                              \
 174   case X86::MNEMONIC##8##SUFFIX:                                               \
 175   case X86::MNEMONIC##16##SUFFIX:                                              \
 176   case X86::MNEMONIC##32##SUFFIX:                                              \
 177   case X86::MNEMONIC##64##SUFFIX:
 178
 179 #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC)                                    \
 180   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr)                                        \
 181   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV)                                    \
 182   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm)                                        \
 183   LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr)                                        \
 184   case X86::MNEMONIC##8ri:                                                     \
 185   case X86::MNEMONIC##16ri8:                                                   \
 186   case X86::MNEMONIC##32ri8:                                                   \
 187   case X86::MNEMONIC##64ri8:                                                   \
 188   case X86::MNEMONIC##16ri:                                                    \
 189   case X86::MNEMONIC##32ri:                                                    \
 190   case X86::MNEMONIC##64ri32:                                                  \
 191   case X86::MNEMONIC##8mi:                                                     \
 192   case X86::MNEMONIC##16mi8:                                                   \
 193   case X86::MNEMONIC##32mi8:                                                   \
 194   case X86::MNEMONIC##64mi8:                                                   \
 195   case X86::MNEMONIC##16mi:                                                    \
 196   case X86::MNEMONIC##32mi:                                                    \
 197   case X86::MNEMONIC##64mi32:                                                  \
 198   case X86::MNEMONIC##8i8:                                                     \
 199   case X86::MNEMONIC##16i16:                                                   \
 200   case X86::MNEMONIC##32i32:                                                   \
 201   case X86::MNEMONIC##64i32:
 202
 203     LLVM_EXPAND_ADC_SBB_INSTR(ADC)
 204     return FlagArithMnemonic::ADC;
 205
 206     LLVM_EXPAND_ADC_SBB_INSTR(SBB)
 207     return FlagArithMnemonic::SBB;
 208
 209 #undef LLVM_EXPAND_ADC_SBB_INSTR
 210
 211     LLVM_EXPAND_INSTR_SIZES(RCL, rCL)
 212     LLVM_EXPAND_INSTR_SIZES(RCL, r1)
 213     LLVM_EXPAND_INSTR_SIZES(RCL, ri)
 214     return FlagArithMnemonic::RCL;
 215
 216     LLVM_EXPAND_INSTR_SIZES(RCR, rCL)
 217     LLVM_EXPAND_INSTR_SIZES(RCR, r1)
 218     LLVM_EXPAND_INSTR_SIZES(RCR, ri)
 219     return FlagArithMnemonic::RCR;
 220
 221 #undef LLVM_EXPAND_INSTR_SIZES
 222
 223   case X86::ADCX32rr:
 224   case X86::ADCX64rr:
 225   case X86::ADCX32rm:
 226   case X86::ADCX64rm:
 227     return FlagArithMnemonic::ADCX;
 228
 229   case X86::ADOX32rr:
 230   case X86::ADOX64rr:
 231   case X86::ADOX32rm:
 232   case X86::ADOX64rm:
 233     return FlagArithMnemonic::ADOX;
 234   }
 235 }
 236
 237 static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
 238                                      MachineInstr &SplitI,
 239                                      const X86InstrInfo &TII) {
 240   MachineFunction &MF = *MBB.getParent();
 241
 242   assert(SplitI.getParent() == &MBB &&
 243          "Split instruction must be in the split block!");
 244   assert(SplitI.isBranch() &&
 245          "Only designed to split a tail of branch instructions!");
 246   assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
 247          "Must split on an actual jCC instruction!");
 248
 249   // Dig out the previous instruction to the split point.
 250   MachineInstr &PrevI = *std::prev(SplitI.getIterator());
 251   assert(PrevI.isBranch() && "Must split after a branch!");
 252   assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
 253          "Must split after an actual jCC instruction!");
 254   assert(!std::prev(PrevI.getIterator())->isTerminator() &&
 255          "Must only have this one terminator prior to the split!");
 256
 257   // Grab the one successor edge that will stay in `MBB`.
 258   MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
 259
 260   // Analyze the original block to see if we are actually splitting an edge
 261   // into two edges. This can happen when we have multiple conditional jumps to
 262   // the same successor.
 263   bool IsEdgeSplit =
 264       std::any_of(SplitI.getIterator(), MBB.instr_end(),
 265                   [&](MachineInstr &MI) {
 266                     assert(MI.isTerminator() &&
 267                            "Should only have spliced terminators!");
 268                     return llvm::any_of(
 269                         MI.operands(), [&](MachineOperand &MOp) {
 270                           return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
 271                         });
 272                   }) ||
 273       MBB.getFallThrough() == &UnsplitSucc;
 274
 275   MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
 276
 277   // Insert the new block immediately after the current one. Any existing
 278   // fallthrough will be sunk into this new block anyways.
 279   MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
 280
 281   // Splice the tail of instructions into the new block.
 282   NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
 283
 284   // Copy the necessary succesors (and their probability info) into the new
 285   // block.
 286   for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
 287     if (IsEdgeSplit || *SI != &UnsplitSucc)
 288       NewMBB.copySuccessor(&MBB, SI);
 289   // Normalize the probabilities if we didn't end up splitting the edge.
 290   if (!IsEdgeSplit)
 291     NewMBB.normalizeSuccProbs();
 292
 293   // Now replace all of the moved successors in the original block with the new
 294   // block. This will merge their probabilities.
 295   for (MachineBasicBlock *Succ : NewMBB.successors())
 296     if (Succ != &UnsplitSucc)
 297       MBB.replaceSuccessor(Succ, &NewMBB);
 298
 299   // We should always end up replacing at least one successor.
 300   assert(MBB.isSuccessor(&NewMBB) &&
 301          "Failed to make the new block a successor!");
 302
 303   // Now update all the PHIs.
 304   for (MachineBasicBlock *Succ : NewMBB.successors()) {
 305     for (MachineInstr &MI : *Succ) {
 306       if (!MI.isPHI())
 307         break;
 308
 309       for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
 310            OpIdx += 2) {
 311         MachineOperand &OpV = MI.getOperand(OpIdx);
 312         MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
 313         assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
 314         if (OpMBB.getMBB() != &MBB)
 315           continue;
 316
 317         // Replace the operand for unsplit successors
 318         if (!IsEdgeSplit || Succ != &UnsplitSucc) {
 319           OpMBB.setMBB(&NewMBB);
 320
 321           // We have to continue scanning as there may be multiple entries in
 322           // the PHI.
 323           continue;
 324         }
 325
 326         // When we have split the edge append a new successor.
 327         MI.addOperand(MF, OpV);
 328         MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
 329         break;
 330       }
 331     }
 332   }
 333
 334   return NewMBB;
 335 }
 336
 337 bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
 338   LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
 339                     << " **********\n");
 340
 341   Subtarget = &MF.getSubtarget<X86Subtarget>();
 342   MRI = &MF.getRegInfo();
 343   TII = Subtarget->getInstrInfo();
 344   TRI = Subtarget->getRegisterInfo();
 345   MDT = &getAnalysis<MachineDominatorTree>();
 346   PromoteRC = &X86::GR8RegClass;
 347
 348   if (MF.begin() == MF.end())
 349     // Nothing to do for a degenerate empty function...
 350     return false;
 351
 352   // Collect the copies in RPO so that when there are chains where a copy is in
 353   // turn copied again we visit the first one first. This ensures we can find
 354   // viable locations for testing the original EFLAGS that dominate all the
 355   // uses across complex CFGs.
 356   SmallVector<MachineInstr *, 4> Copies;
 357   ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
 358   for (MachineBasicBlock *MBB : RPOT)
 359     for (MachineInstr &MI : *MBB)
 360       if (MI.getOpcode() == TargetOpcode::COPY &&
 361           MI.getOperand(0).getReg() == X86::EFLAGS)
 362         Copies.push_back(&MI);
 363
 364   for (MachineInstr *CopyI : Copies) {
 365     MachineBasicBlock &MBB = *CopyI->getParent();
 366
 367     MachineOperand &VOp = CopyI->getOperand(1);
 368     assert(VOp.isReg() &&
 369            "The input to the copy for EFLAGS should always be a register!");
 370     MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
 371     if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
 372       // FIXME: The big likely candidate here are PHI nodes. We could in theory
 373       // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
 374       // enough that it is probably better to change every other part of LLVM
 375       // to avoid creating them. The issue is that once we have PHIs we won't
 376       // know which original EFLAGS value we need to capture with our setCCs
 377       // below. The end result will be computing a complete set of setCCs that
 378       // we *might* want, computing them in every place where we copy *out* of
 379       // EFLAGS and then doing SSA formation on all of them to insert necessary
 380       // PHI nodes and consume those here. Then hoping that somehow we DCE the
 381       // unnecessary ones. This DCE seems very unlikely to be successful and so
 382       // we will almost certainly end up with a glut of dead setCC
 383       // instructions. Until we have a motivating test case and fail to avoid
 384       // it by changing other parts of LLVM's lowering, we refuse to handle
 385       // this complex case here.
 386       LLVM_DEBUG(
 387           dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
 388           CopyDefI.dump());
 389       report_fatal_error(
 390           "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
 391     }
 392
 393     auto Cleanup = make_scope_exit([&] {
 394       // All uses of the EFLAGS copy are now rewritten, kill the copy into
 395       // eflags and if dead the copy from.
 396       CopyI->eraseFromParent();
 397       if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
 398         CopyDefI.eraseFromParent();
 399       ++NumCopiesEliminated;
 400     });
 401
 402     MachineOperand &DOp = CopyI->getOperand(0);
 403     assert(DOp.isDef() && "Expected register def!");
 404     assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
 405     if (DOp.isDead())
 406       continue;
 407
 408     MachineBasicBlock *TestMBB = CopyDefI.getParent();
 409     auto TestPos = CopyDefI.getIterator();
 410     DebugLoc TestLoc = CopyDefI.getDebugLoc();
 411
 412     LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
 413
 414     // Walk up across live-in EFLAGS to find where they were actually def'ed.
 415     //
 416     // This copy's def may just be part of a region of blocks covered by
 417     // a single def of EFLAGS and we want to find the top of that region where
 418     // possible.
 419     //
 420     // This is essentially a search for a *candidate* reaching definition
 421     // location. We don't need to ever find the actual reaching definition here,
 422     // but we want to walk up the dominator tree to find the highest point which
 423     // would be viable for such a definition.
 424     auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
 425                                 MachineBasicBlock::iterator End) {
 426       // Scan backwards as we expect these to be relatively short and often find
 427       // a clobber near the end.
 428       return llvm::any_of(
 429           llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) {
 430             // Flag any instruction (other than the copy we are
 431             // currently rewriting) that defs EFLAGS.
 432             return &MI != CopyI && MI.findRegisterDefOperand(X86::EFLAGS);
 433           });
 434     };
 435     auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
 436                                     MachineBasicBlock *EndMBB) {
 437       assert(MDT->dominates(BeginMBB, EndMBB) &&
 438              "Only support paths down the dominator tree!");
 439       SmallPtrSet<MachineBasicBlock *, 4> Visited;
 440       SmallVector<MachineBasicBlock *, 4> Worklist;
 441       // We terminate at the beginning. No need to scan it.
 442       Visited.insert(BeginMBB);
 443       Worklist.push_back(EndMBB);
 444       do {
 445         auto *MBB = Worklist.pop_back_val();
 446         for (auto *PredMBB : MBB->predecessors()) {
 447           if (!Visited.insert(PredMBB).second)
 448             continue;
 449           if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
 450             return true;
 451           // Enqueue this block to walk its predecessors.
 452           Worklist.push_back(PredMBB);
 453         }
 454       } while (!Worklist.empty());
 455       // No clobber found along a path from the begin to end.
 456       return false;
 457     };
 458     while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
 459            !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
 460       // Find the nearest common dominator of the predecessors, as
 461       // that will be the best candidate to hoist into.
 462       MachineBasicBlock *HoistMBB =
 463           std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
 464                           *TestMBB->pred_begin(),
 465                           [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
 466                             return MDT->findNearestCommonDominator(LHS, RHS);
 467                           });
 468
 469       // Now we need to scan all predecessors that may be reached along paths to
 470       // the hoist block. A clobber anywhere in any of these blocks the hoist.
 471       // Note that this even handles loops because we require *no* clobbers.
 472       if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
 473         break;
 474
 475       // We also need the terminators to not sneakily clobber flags.
 476       if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
 477                            HoistMBB->instr_end()))
 478         break;
 479
 480       // We found a viable location, hoist our test position to it.
 481       TestMBB = HoistMBB;
 482       TestPos = TestMBB->getFirstTerminator()->getIterator();
 483       // Clear the debug location as it would just be confusing after hoisting.
 484       TestLoc = DebugLoc();
 485     }
 486     LLVM_DEBUG({
 487       auto DefIt = llvm::find_if(
 488           llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
 489           [&](MachineInstr &MI) {
 490             return MI.findRegisterDefOperand(X86::EFLAGS);
 491           });
 492       if (DefIt.base() != TestMBB->instr_begin()) {
 493         dbgs() << "  Using EFLAGS defined by: ";
 494         DefIt->dump();
 495       } else {
 496         dbgs() << "  Using live-in flags for BB:\n";
 497         TestMBB->dump();
 498       }
 499     });
 500
 501     // While rewriting uses, we buffer jumps and rewrite them in a second pass
 502     // because doing so will perturb the CFG that we are walking to find the
 503     // uses in the first place.
 504     SmallVector<MachineInstr *, 4> JmpIs;
 505
 506     // Gather the condition flags that have already been preserved in
 507     // registers. We do this from scratch each time as we expect there to be
 508     // very few of them and we expect to not revisit the same copy definition
 509     // many times. If either of those change sufficiently we could build a map
 510     // of these up front instead.
 511     CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
 512
 513     // Collect the basic blocks we need to scan. Typically this will just be
 514     // a single basic block but we may have to scan multiple blocks if the
 515     // EFLAGS copy lives into successors.
 516     SmallVector<MachineBasicBlock *, 2> Blocks;
 517     SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
 518     Blocks.push_back(&MBB);
 519
 520     do {
 521       MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
 522
 523       // Track when if/when we find a kill of the flags in this block.
 524       bool FlagsKilled = false;
 525
 526       // In most cases, we walk from the beginning to the end of the block. But
 527       // when the block is the same block as the copy is from, we will visit it
 528       // twice. The first time we start from the copy and go to the end. The
 529       // second time we start from the beginning and go to the copy. This lets
 530       // us handle copies inside of cycles.
 531       // FIXME: This loop is *super* confusing. This is at least in part
 532       // a symptom of all of this routine needing to be refactored into
 533       // documentable components. Once done, there may be a better way to write
 534       // this loop.
 535       for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
 536                           ? std::next(CopyI->getIterator())
 537                           : UseMBB.instr_begin(),
 538                 MIE = UseMBB.instr_end();
 539            MII != MIE;) {
 540         MachineInstr &MI = *MII++;
 541         // If we are in the original copy block and encounter either the copy
 542         // def or the copy itself, break so that we don't re-process any part of
 543         // the block or process the instructions in the range that was copied
 544         // over.
 545         if (&MI == CopyI || &MI == &CopyDefI) {
 546           assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
 547                  "Should only encounter these on the second pass over the "
 548                  "original block.");
 549           break;
 550         }
 551
 552         MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
 553         if (!FlagUse) {
 554           if (MI.findRegisterDefOperand(X86::EFLAGS)) {
 555             // If EFLAGS are defined, it's as-if they were killed. We can stop
 556             // scanning here.
 557             //
 558             // NB!!! Many instructions only modify some flags. LLVM currently
 559             // models this as clobbering all flags, but if that ever changes
 560             // this will need to be carefully updated to handle that more
 561             // complex logic.
 562             FlagsKilled = true;
 563             break;
 564           }
 565           continue;
 566         }
 567
 568         LLVM_DEBUG(dbgs() << "  Rewriting use: "; MI.dump());
 569
 570         // Check the kill flag before we rewrite as that may change it.
 571         if (FlagUse->isKill())
 572           FlagsKilled = true;
 573
 574         // Once we encounter a branch, the rest of the instructions must also be
 575         // branches. We can't rewrite in place here, so we handle them below.
 576         //
 577         // Note that we don't have to handle tail calls here, even conditional
 578         // tail calls, as those are not introduced into the X86 MI until post-RA
 579         // branch folding or black placement. As a consequence, we get to deal
 580         // with the simpler formulation of conditional branches followed by tail
 581         // calls.
 582         if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
 583           auto JmpIt = MI.getIterator();
 584           do {
 585             JmpIs.push_back(&*JmpIt);
 586             ++JmpIt;
 587           } while (JmpIt != UseMBB.instr_end() &&
 588                    X86::getCondFromBranch(*JmpIt) !=
 589                        X86::COND_INVALID);
 590           break;
 591         }
 592
 593         // Otherwise we can just rewrite in-place.
 594         if (X86::getCondFromCMov(MI) != X86::COND_INVALID) {
 595           rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
 596         } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
 597           rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
 598         } else if (MI.getOpcode() == TargetOpcode::COPY) {
 599           rewriteCopy(MI, *FlagUse, CopyDefI);
 600         } else {
 601           // We assume all other instructions that use flags also def them.
 602           assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
 603                  "Expected a def of EFLAGS for this instruction!");
 604
 605           // NB!!! Several arithmetic instructions only *partially* update
 606           // flags. Theoretically, we could generate MI code sequences that
 607           // would rely on this fact and observe different flags independently.
 608           // But currently LLVM models all of these instructions as clobbering
 609           // all the flags in an undef way. We rely on that to simplify the
 610           // logic.
 611           FlagsKilled = true;
 612
 613           switch (MI.getOpcode()) {
 614           case X86::SETB_C8r:
 615           case X86::SETB_C16r:
 616           case X86::SETB_C32r:
 617           case X86::SETB_C64r:
 618             // Use custom lowering for arithmetic that is merely extending the
 619             // carry flag. We model this as the SETB_C* pseudo instructions.
 620             rewriteSetCarryExtended(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
 621                                     CondRegs);
 622             break;
 623
 624           default:
 625             // Generically handle remaining uses as arithmetic instructions.
 626             rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
 627                               CondRegs);
 628             break;
 629           }
 630           break;
 631         }
 632
 633         // If this was the last use of the flags, we're done.
 634         if (FlagsKilled)
 635           break;
 636       }
 637
 638       // If the flags were killed, we're done with this block.
 639       if (FlagsKilled)
 640         continue;
 641
 642       // Otherwise we need to scan successors for ones where the flags live-in
 643       // and queue those up for processing.
 644       for (MachineBasicBlock *SuccMBB : UseMBB.successors())
 645         if (SuccMBB->isLiveIn(X86::EFLAGS) &&
 646             VisitedBlocks.insert(SuccMBB).second) {
 647           // We currently don't do any PHI insertion and so we require that the
 648           // test basic block dominates all of the use basic blocks. Further, we
 649           // can't have a cycle from the test block back to itself as that would
 650           // create a cycle requiring a PHI to break it.
 651           //
 652           // We could in theory do PHI insertion here if it becomes useful by
 653           // just taking undef values in along every edge that we don't trace
 654           // this EFLAGS copy along. This isn't as bad as fully general PHI
 655           // insertion, but still seems like a great deal of complexity.
 656           //
 657           // Because it is theoretically possible that some earlier MI pass or
 658           // other lowering transformation could induce this to happen, we do
 659           // a hard check even in non-debug builds here.
 660           if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
 661             LLVM_DEBUG({
 662               dbgs()
 663                   << "ERROR: Encountered use that is not dominated by our test "
 664                      "basic block! Rewriting this would require inserting PHI "
 665                      "nodes to track the flag state across the CFG.\n\nTest "
 666                      "block:\n";
 667               TestMBB->dump();
 668               dbgs() << "Use block:\n";
 669               SuccMBB->dump();
 670             });
 671             report_fatal_error(
 672                 "Cannot lower EFLAGS copy when original copy def "
 673                 "does not dominate all uses.");
 674           }
 675
 676           Blocks.push_back(SuccMBB);
 677         }
 678     } while (!Blocks.empty());
 679
 680     // Now rewrite the jumps that use the flags. These we handle specially
 681     // because if there are multiple jumps in a single basic block we'll have
 682     // to do surgery on the CFG.
 683     MachineBasicBlock *LastJmpMBB = nullptr;
 684     for (MachineInstr *JmpI : JmpIs) {
 685       // Past the first jump within a basic block we need to split the blocks
 686       // apart.
 687       if (JmpI->getParent() == LastJmpMBB)
 688         splitBlock(*JmpI->getParent(), *JmpI, *TII);
 689       else
 690         LastJmpMBB = JmpI->getParent();
 691
 692       rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
 693     }
 694
 695     // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
 696     // the copy's def operand is itself a kill.
 697   }
 698
 699 #ifndef NDEBUG
 700   for (MachineBasicBlock &MBB : MF)
 701     for (MachineInstr &MI : MBB)
 702       if (MI.getOpcode() == TargetOpcode::COPY &&
 703           (MI.getOperand(0).getReg() == X86::EFLAGS ||
 704            MI.getOperand(1).getReg() == X86::EFLAGS)) {
 705         LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
 706                    MI.dump());
 707         llvm_unreachable("Unlowered EFLAGS copy!");
 708       }
 709 #endif
 710
 711   return true;
 712 }
 713
 714 /// Collect any conditions that have already been set in registers so that we
 715 /// can re-use them rather than adding duplicates.
 716 CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
 717     MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
 718   CondRegArray CondRegs = {};
 719
 720   // Scan backwards across the range of instructions with live EFLAGS.
 721   for (MachineInstr &MI :
 722        llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
 723     X86::CondCode Cond = X86::getCondFromSETCC(MI);
 724     if (Cond != X86::COND_INVALID && !MI.mayStore() &&
 725         MI.getOperand(0).isReg() &&
 726         Register::isVirtualRegister(MI.getOperand(0).getReg())) {
 727       assert(MI.getOperand(0).isDef() &&
 728              "A non-storing SETcc should always define a register!");
 729       CondRegs[Cond] = MI.getOperand(0).getReg();
 730     }
 731
 732     // Stop scanning when we see the first definition of the EFLAGS as prior to
 733     // this we would potentially capture the wrong flag state.
 734     if (MI.findRegisterDefOperand(X86::EFLAGS))
 735       break;
 736   }
 737   return CondRegs;
 738 }
 739
 740 unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
 741     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
 742     DebugLoc TestLoc, X86::CondCode Cond) {
 743   Register Reg = MRI->createVirtualRegister(PromoteRC);
 744   auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
 745                       TII->get(X86::SETCCr), Reg).addImm(Cond);
 746   (void)SetI;
 747   LLVM_DEBUG(dbgs() << "    save cond: "; SetI->dump());
 748   ++NumSetCCsInserted;
 749   return Reg;
 750 }
 751
 752 std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
 753     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
 754     DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
 755   unsigned &CondReg = CondRegs[Cond];
 756   unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
 757   if (!CondReg && !InvCondReg)
 758     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
 759
 760   if (CondReg)
 761     return {CondReg, false};
 762   else
 763     return {InvCondReg, true};
 764 }
 765
 766 void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
 767                                           MachineBasicBlock::iterator Pos,
 768                                           DebugLoc Loc, unsigned Reg) {
 769   auto TestI =
 770       BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
 771   (void)TestI;
 772   LLVM_DEBUG(dbgs() << "    test cond: "; TestI->dump());
 773   ++NumTestsInserted;
 774 }
 775
 776 void X86FlagsCopyLoweringPass::rewriteArithmetic(
 777     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
 778     DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse,
 779     CondRegArray &CondRegs) {
 780   // Arithmetic is either reading CF or OF. Figure out which condition we need
 781   // to preserve in a register.
 782   X86::CondCode Cond;
 783
 784   // The addend to use to reset CF or OF when added to the flag value.
 785   int Addend;
 786
 787   switch (getMnemonicFromOpcode(MI.getOpcode())) {
 788   case FlagArithMnemonic::ADC:
 789   case FlagArithMnemonic::ADCX:
 790   case FlagArithMnemonic::RCL:
 791   case FlagArithMnemonic::RCR:
 792   case FlagArithMnemonic::SBB:
 793     Cond = X86::COND_B; // CF == 1
 794     // Set up an addend that when one is added will need a carry due to not
 795     // having a higher bit available.
 796     Addend = 255;
 797     break;
 798
 799   case FlagArithMnemonic::ADOX:
 800     Cond = X86::COND_O; // OF == 1
 801     // Set up an addend that when one is added will turn from positive to
 802     // negative and thus overflow in the signed domain.
 803     Addend = 127;
 804     break;
 805   }
 806
 807   // Now get a register that contains the value of the flag input to the
 808   // arithmetic. We require exactly this flag to simplify the arithmetic
 809   // required to materialize it back into the flag.
 810   unsigned &CondReg = CondRegs[Cond];
 811   if (!CondReg)
 812     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
 813
 814   MachineBasicBlock &MBB = *MI.getParent();
 815
 816   // Insert an instruction that will set the flag back to the desired value.
 817   Register TmpReg = MRI->createVirtualRegister(PromoteRC);
 818   auto AddI =
 819       BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
 820           .addDef(TmpReg, RegState::Dead)
 821           .addReg(CondReg)
 822           .addImm(Addend);
 823   (void)AddI;
 824   LLVM_DEBUG(dbgs() << "    add cond: "; AddI->dump());
 825   ++NumAddsInserted;
 826   FlagUse.setIsKill(true);
 827 }
 828
 829 void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
 830                                            MachineBasicBlock::iterator TestPos,
 831                                            DebugLoc TestLoc,
 832                                            MachineInstr &CMovI,
 833                                            MachineOperand &FlagUse,
 834                                            CondRegArray &CondRegs) {
 835   // First get the register containing this specific condition.
 836   X86::CondCode Cond = X86::getCondFromCMov(CMovI);
 837   unsigned CondReg;
 838   bool Inverted;
 839   std::tie(CondReg, Inverted) =
 840       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
 841
 842   MachineBasicBlock &MBB = *CMovI.getParent();
 843
 844   // Insert a direct test of the saved register.
 845   insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
 846
 847   // Rewrite the CMov to use the !ZF flag from the test, and then kill its use
 848   // of the flags afterward.
 849   CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1)
 850       .setImm(Inverted ? X86::COND_E : X86::COND_NE);
 851   FlagUse.setIsKill(true);
 852   LLVM_DEBUG(dbgs() << "    fixed cmov: "; CMovI.dump());
 853 }
 854
 855 void X86FlagsCopyLoweringPass::rewriteCondJmp(
 856     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
 857     DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
 858   // First get the register containing this specific condition.
 859   X86::CondCode Cond = X86::getCondFromBranch(JmpI);
 860   unsigned CondReg;
 861   bool Inverted;
 862   std::tie(CondReg, Inverted) =
 863       getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
 864
 865   MachineBasicBlock &JmpMBB = *JmpI.getParent();
 866
 867   // Insert a direct test of the saved register.
 868   insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg);
 869
 870   // Rewrite the jump to use the !ZF flag from the test, and kill its use of
 871   // flags afterward.
 872   JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE);
 873   JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
 874   LLVM_DEBUG(dbgs() << "    fixed jCC: "; JmpI.dump());
 875 }
 876
 877 void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
 878                                            MachineOperand &FlagUse,
 879                                            MachineInstr &CopyDefI) {
 880   // Just replace this copy with the original copy def.
 881   MRI->replaceRegWith(MI.getOperand(0).getReg(),
 882                       CopyDefI.getOperand(0).getReg());
 883   MI.eraseFromParent();
 884 }
 885
 886 void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
 887     MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
 888     DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
 889     CondRegArray &CondRegs) {
 890   // This routine is only used to handle pseudos for setting a register to zero
 891   // or all ones based on CF. This is essentially the sign extended from 1-bit
 892   // form of SETB and modeled with the SETB_C* pseudos. They require special
 893   // handling as they aren't normal SETcc instructions and are lowered to an
 894   // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
 895   // they are only provided in reg-defining forms. A complicating factor is that
 896   // they can define many different register widths.
 897   assert(SetBI.getOperand(0).isReg() &&
 898          "Cannot have a non-register defined operand to this variant of SETB!");
 899
 900   // Little helper to do the common final step of replacing the register def'ed
 901   // by this SETB instruction with a new register and removing the SETB
 902   // instruction.
 903   auto RewriteToReg = [&](unsigned Reg) {
 904     MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
 905     SetBI.eraseFromParent();
 906   };
 907
 908   // Grab the register class used for this particular instruction.
 909   auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
 910
 911   MachineBasicBlock &MBB = *SetBI.getParent();
 912   auto SetPos = SetBI.getIterator();
 913   auto SetLoc = SetBI.getDebugLoc();
 914
 915   auto AdjustReg = [&](unsigned Reg) {
 916     auto &OrigRC = *MRI->getRegClass(Reg);
 917     if (&OrigRC == &SetBRC)
 918       return Reg;
 919
 920     unsigned NewReg;
 921
 922     int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
 923     int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
 924     assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
 925     assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
 926     int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
 927                        X86::NoSubRegister, X86::sub_32bit};
 928
 929     // If the original size is smaller than the target *and* is smaller than 4
 930     // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
 931     // to maximize the chance of being able to CSE that operation and to avoid
 932     // partial dependency stalls extending to 2-bytes.
 933     if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
 934       NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
 935       BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
 936           .addReg(Reg);
 937       if (&SetBRC == &X86::GR32RegClass)
 938         return NewReg;
 939       Reg = NewReg;
 940       OrigRegSize = 4;
 941     }
 942
 943     NewReg = MRI->createVirtualRegister(&SetBRC);
 944     if (OrigRegSize < TargetRegSize) {
 945       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
 946               NewReg)
 947           .addImm(0)
 948           .addReg(Reg)
 949           .addImm(SubRegIdx[OrigRegSize]);
 950     } else if (OrigRegSize > TargetRegSize) {
 951       if (TargetRegSize == 1 && !Subtarget->is64Bit()) {
 952         // Need to constrain the register class.
 953         MRI->constrainRegClass(Reg, &X86::GR32_ABCDRegClass);
 954       }
 955
 956       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY),
 957               NewReg)
 958           .addReg(Reg, 0, SubRegIdx[TargetRegSize]);
 959     } else {
 960       BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
 961           .addReg(Reg);
 962     }
 963     return NewReg;
 964   };
 965
 966   unsigned &CondReg = CondRegs[X86::COND_B];
 967   if (!CondReg)
 968     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
 969
 970   // Adjust the condition to have the desired register width by zero-extending
 971   // as needed.
 972   // FIXME: We should use a better API to avoid the local reference and using a
 973   // different variable here.
 974   unsigned ExtCondReg = AdjustReg(CondReg);
 975
 976   // Now we need to turn this into a bitmask. We do this by subtracting it from
 977   // zero.
 978   Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
 979   BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
 980   ZeroReg = AdjustReg(ZeroReg);
 981
 982   unsigned Sub;
 983   switch (SetBI.getOpcode()) {
 984   case X86::SETB_C8r:
 985     Sub = X86::SUB8rr;
 986     break;
 987
 988   case X86::SETB_C16r:
 989     Sub = X86::SUB16rr;
 990     break;
 991
 992   case X86::SETB_C32r:
 993     Sub = X86::SUB32rr;
 994     break;
 995
 996   case X86::SETB_C64r:
 997     Sub = X86::SUB64rr;
 998     break;
 999
1000   default:
1001     llvm_unreachable("Invalid SETB_C* opcode!");
1002   }
1003   Register ResultReg = MRI->createVirtualRegister(&SetBRC);
1004   BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
1005       .addReg(ZeroReg)
1006       .addReg(ExtCondReg);
1007   return RewriteToReg(ResultReg);
1008 }
1009
1010 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
1011                                             MachineBasicBlock::iterator TestPos,
1012                                             DebugLoc TestLoc,
1013                                             MachineInstr &SetCCI,
1014                                             MachineOperand &FlagUse,
1015                                             CondRegArray &CondRegs) {
1016   X86::CondCode Cond = X86::getCondFromSETCC(SetCCI);
1017   // Note that we can't usefully rewrite this to the inverse without complex
1018   // analysis of the users of the setCC. Largely we rely on duplicates which
1019   // could have been avoided already being avoided here.
1020   unsigned &CondReg = CondRegs[Cond];
1021   if (!CondReg)
1022     CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
1023
1024   // Rewriting a register def is trivial: we just replace the register and
1025   // remove the setcc.
1026   if (!SetCCI.mayStore()) {
1027     assert(SetCCI.getOperand(0).isReg() &&
1028            "Cannot have a non-register defined operand to SETcc!");
1029     MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
1030     SetCCI.eraseFromParent();
1031     return;
1032   }
1033
1034   // Otherwise, we need to emit a store.
1035   auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(),
1036                      SetCCI.getDebugLoc(), TII->get(X86::MOV8mr));
1037   // Copy the address operands.
1038   for (int i = 0; i < X86::AddrNumOperands; ++i)
1039     MIB.add(SetCCI.getOperand(i));
1040
1041   MIB.addReg(CondReg);
1042
1043   MIB.setMemRefs(SetCCI.memoperands());
1044
1045   SetCCI.eraseFromParent();
1046   return;
1047 }