llvm/lib/Target/X86/X86MCInstLower.cpp

   1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file contains code to lower X86 MachineInstrs to their corresponding
  10 // MCInst records.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "MCTargetDesc/X86ATTInstPrinter.h"
  15 #include "MCTargetDesc/X86BaseInfo.h"
  16 #include "MCTargetDesc/X86InstComments.h"
  17 #include "MCTargetDesc/X86ShuffleDecode.h"
  18 #include "MCTargetDesc/X86TargetStreamer.h"
  19 #include "X86AsmPrinter.h"
  20 #include "X86RegisterInfo.h"
  21 #include "X86ShuffleDecodeConstantPool.h"
  22 #include "X86Subtarget.h"
  23 #include "llvm/ADT/Optional.h"
  24 #include "llvm/ADT/SmallString.h"
  25 #include "llvm/ADT/iterator_range.h"
  26 #include "llvm/CodeGen/MachineConstantPool.h"
  27 #include "llvm/CodeGen/MachineFunction.h"
  28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
  29 #include "llvm/CodeGen/MachineOperand.h"
  30 #include "llvm/CodeGen/StackMaps.h"
  31 #include "llvm/IR/DataLayout.h"
  32 #include "llvm/IR/GlobalValue.h"
  33 #include "llvm/IR/Mangler.h"
  34 #include "llvm/MC/MCAsmInfo.h"
  35 #include "llvm/MC/MCCodeEmitter.h"
  36 #include "llvm/MC/MCContext.h"
  37 #include "llvm/MC/MCExpr.h"
  38 #include "llvm/MC/MCFixup.h"
  39 #include "llvm/MC/MCInst.h"
  40 #include "llvm/MC/MCInstBuilder.h"
  41 #include "llvm/MC/MCSection.h"
  42 #include "llvm/MC/MCSectionELF.h"
  43 #include "llvm/MC/MCStreamer.h"
  44 #include "llvm/MC/MCSymbol.h"
  45 #include "llvm/MC/MCSymbolELF.h"
  46 #include "llvm/Target/TargetLoweringObjectFile.h"
  47 #include "llvm/Target/TargetMachine.h"
  48
  49 using namespace llvm;
  50
  51 namespace {
  52
  53 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
  54 class X86MCInstLower {
  55   MCContext &Ctx;
  56   const MachineFunction &MF;
  57   const TargetMachine &TM;
  58   const MCAsmInfo &MAI;
  59   X86AsmPrinter &AsmPrinter;
  60
  61 public:
  62   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
  63
  64   Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
  65                                           const MachineOperand &MO) const;
  66   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
  67
  68   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
  69   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
  70
  71 private:
  72   MachineModuleInfoMachO &getMachOMMI() const;
  73 };
  74
  75 } // end anonymous namespace
  76
  77 /// A RAII helper which defines a region of instructions which can't have
  78 /// padding added between them for correctness.
  79 struct NoAutoPaddingScope {
  80   MCStreamer &OS;
  81   const bool OldAllowAutoPadding;
  82   NoAutoPaddingScope(MCStreamer &OS)
  83       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
  84     changeAndComment(false);
  85   }
  86   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
  87   void changeAndComment(bool b) {
  88     if (b == OS.getAllowAutoPadding())
  89       return;
  90     OS.setAllowAutoPadding(b);
  91     if (b)
  92       OS.emitRawComment("autopadding");
  93     else
  94       OS.emitRawComment("noautopadding");
  95   }
  96 };
  97
  98 // Emit a minimal sequence of nops spanning NumBytes bytes.
  99 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
 100                         const X86Subtarget *Subtarget);
 101
 102 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
 103                                                  const MCSubtargetInfo &STI,
 104                                                  MCCodeEmitter *CodeEmitter) {
 105   if (InShadow) {
 106     SmallString<256> Code;
 107     SmallVector<MCFixup, 4> Fixups;
 108     raw_svector_ostream VecOS(Code);
 109     CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
 110     CurrentShadowSize += Code.size();
 111     if (CurrentShadowSize >= RequiredShadowSize)
 112       InShadow = false; // The shadow is big enough. Stop counting.
 113   }
 114 }
 115
 116 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
 117     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
 118   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
 119     InShadow = false;
 120     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
 121                 &MF->getSubtarget<X86Subtarget>());
 122   }
 123 }
 124
 125 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
 126   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
 127   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
 128 }
 129
 130 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
 131                                X86AsmPrinter &asmprinter)
 132     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
 133       AsmPrinter(asmprinter) {}
 134
 135 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
 136   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
 137 }
 138
 139 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
 140 /// operand to an MCSymbol.
 141 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
 142   const Triple &TT = TM.getTargetTriple();
 143   if (MO.isGlobal() && TT.isOSBinFormatELF())
 144     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
 145
 146   const DataLayout &DL = MF.getDataLayout();
 147   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
 148          "Isn't a symbol reference");
 149
 150   MCSymbol *Sym = nullptr;
 151   SmallString<128> Name;
 152   StringRef Suffix;
 153
 154   switch (MO.getTargetFlags()) {
 155   case X86II::MO_DLLIMPORT:
 156     // Handle dllimport linkage.
 157     Name += "__imp_";
 158     break;
 159   case X86II::MO_COFFSTUB:
 160     Name += ".refptr.";
 161     break;
 162   case X86II::MO_DARWIN_NONLAZY:
 163   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
 164     Suffix = "$non_lazy_ptr";
 165     break;
 166   }
 167
 168   if (!Suffix.empty())
 169     Name += DL.getPrivateGlobalPrefix();
 170
 171   if (MO.isGlobal()) {
 172     const GlobalValue *GV = MO.getGlobal();
 173     AsmPrinter.getNameWithPrefix(Name, GV);
 174   } else if (MO.isSymbol()) {
 175     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
 176   } else if (MO.isMBB()) {
 177     assert(Suffix.empty());
 178     Sym = MO.getMBB()->getSymbol();
 179   }
 180
 181   Name += Suffix;
 182   if (!Sym)
 183     Sym = Ctx.getOrCreateSymbol(Name);
 184
 185   // If the target flags on the operand changes the name of the symbol, do that
 186   // before we return the symbol.
 187   switch (MO.getTargetFlags()) {
 188   default:
 189     break;
 190   case X86II::MO_COFFSTUB: {
 191     MachineModuleInfoCOFF &MMICOFF =
 192         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
 193     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
 194     if (!StubSym.getPointer()) {
 195       assert(MO.isGlobal() && "Extern symbol not handled yet");
 196       StubSym = MachineModuleInfoImpl::StubValueTy(
 197           AsmPrinter.getSymbol(MO.getGlobal()), true);
 198     }
 199     break;
 200   }
 201   case X86II::MO_DARWIN_NONLAZY:
 202   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
 203     MachineModuleInfoImpl::StubValueTy &StubSym =
 204         getMachOMMI().getGVStubEntry(Sym);
 205     if (!StubSym.getPointer()) {
 206       assert(MO.isGlobal() && "Extern symbol not handled yet");
 207       StubSym = MachineModuleInfoImpl::StubValueTy(
 208           AsmPrinter.getSymbol(MO.getGlobal()),
 209           !MO.getGlobal()->hasInternalLinkage());
 210     }
 211     break;
 212   }
 213   }
 214
 215   return Sym;
 216 }
 217
 218 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
 219                                              MCSymbol *Sym) const {
 220   // FIXME: We would like an efficient form for this, so we don't have to do a
 221   // lot of extra uniquing.
 222   const MCExpr *Expr = nullptr;
 223   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
 224
 225   switch (MO.getTargetFlags()) {
 226   default:
 227     llvm_unreachable("Unknown target flag on GV operand");
 228   case X86II::MO_NO_FLAG: // No flag.
 229   // These affect the name of the symbol, not any suffix.
 230   case X86II::MO_DARWIN_NONLAZY:
 231   case X86II::MO_DLLIMPORT:
 232   case X86II::MO_COFFSTUB:
 233     break;
 234
 235   case X86II::MO_TLVP:
 236     RefKind = MCSymbolRefExpr::VK_TLVP;
 237     break;
 238   case X86II::MO_TLVP_PIC_BASE:
 239     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
 240     // Subtract the pic base.
 241     Expr = MCBinaryExpr::createSub(
 242         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
 243     break;
 244   case X86II::MO_SECREL:
 245     RefKind = MCSymbolRefExpr::VK_SECREL;
 246     break;
 247   case X86II::MO_TLSGD:
 248     RefKind = MCSymbolRefExpr::VK_TLSGD;
 249     break;
 250   case X86II::MO_TLSLD:
 251     RefKind = MCSymbolRefExpr::VK_TLSLD;
 252     break;
 253   case X86II::MO_TLSLDM:
 254     RefKind = MCSymbolRefExpr::VK_TLSLDM;
 255     break;
 256   case X86II::MO_GOTTPOFF:
 257     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
 258     break;
 259   case X86II::MO_INDNTPOFF:
 260     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
 261     break;
 262   case X86II::MO_TPOFF:
 263     RefKind = MCSymbolRefExpr::VK_TPOFF;
 264     break;
 265   case X86II::MO_DTPOFF:
 266     RefKind = MCSymbolRefExpr::VK_DTPOFF;
 267     break;
 268   case X86II::MO_NTPOFF:
 269     RefKind = MCSymbolRefExpr::VK_NTPOFF;
 270     break;
 271   case X86II::MO_GOTNTPOFF:
 272     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
 273     break;
 274   case X86II::MO_GOTPCREL:
 275     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
 276     break;
 277   case X86II::MO_GOT:
 278     RefKind = MCSymbolRefExpr::VK_GOT;
 279     break;
 280   case X86II::MO_GOTOFF:
 281     RefKind = MCSymbolRefExpr::VK_GOTOFF;
 282     break;
 283   case X86II::MO_PLT:
 284     RefKind = MCSymbolRefExpr::VK_PLT;
 285     break;
 286   case X86II::MO_ABS8:
 287     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
 288     break;
 289   case X86II::MO_PIC_BASE_OFFSET:
 290   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
 291     Expr = MCSymbolRefExpr::create(Sym, Ctx);
 292     // Subtract the pic base.
 293     Expr = MCBinaryExpr::createSub(
 294         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
 295     if (MO.isJTI()) {
 296       assert(MAI.doesSetDirectiveSuppressReloc());
 297       // If .set directive is supported, use it to reduce the number of
 298       // relocations the assembler will generate for differences between
 299       // local labels. This is only safe when the symbols are in the same
 300       // section so we are restricting it to jumptable references.
 301       MCSymbol *Label = Ctx.createTempSymbol();
 302       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
 303       Expr = MCSymbolRefExpr::create(Label, Ctx);
 304     }
 305     break;
 306   }
 307
 308   if (!Expr)
 309     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
 310
 311   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
 312     Expr = MCBinaryExpr::createAdd(
 313         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
 314   return MCOperand::createExpr(Expr);
 315 }
 316
 317 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
 318 /// a short fixed-register form.
 319 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
 320   unsigned ImmOp = Inst.getNumOperands() - 1;
 321   assert(Inst.getOperand(0).isReg() &&
 322          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
 323          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
 324            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
 325           Inst.getNumOperands() == 2) &&
 326          "Unexpected instruction!");
 327
 328   // Check whether the destination register can be fixed.
 329   unsigned Reg = Inst.getOperand(0).getReg();
 330   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
 331     return;
 332
 333   // If so, rewrite the instruction.
 334   MCOperand Saved = Inst.getOperand(ImmOp);
 335   Inst = MCInst();
 336   Inst.setOpcode(Opcode);
 337   Inst.addOperand(Saved);
 338 }
 339
 340 /// If a movsx instruction has a shorter encoding for the used register
 341 /// simplify the instruction to use it instead.
 342 static void SimplifyMOVSX(MCInst &Inst) {
 343   unsigned NewOpcode = 0;
 344   unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
 345   switch (Inst.getOpcode()) {
 346   default:
 347     llvm_unreachable("Unexpected instruction!");
 348   case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
 349     if (Op0 == X86::AX && Op1 == X86::AL)
 350       NewOpcode = X86::CBW;
 351     break;
 352   case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
 353     if (Op0 == X86::EAX && Op1 == X86::AX)
 354       NewOpcode = X86::CWDE;
 355     break;
 356   case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
 357     if (Op0 == X86::RAX && Op1 == X86::EAX)
 358       NewOpcode = X86::CDQE;
 359     break;
 360   }
 361
 362   if (NewOpcode != 0) {
 363     Inst = MCInst();
 364     Inst.setOpcode(NewOpcode);
 365   }
 366 }
 367
 368 /// Simplify things like MOV32rm to MOV32o32a.
 369 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
 370                                   unsigned Opcode) {
 371   // Don't make these simplifications in 64-bit mode; other assemblers don't
 372   // perform them because they make the code larger.
 373   if (Printer.getSubtarget().is64Bit())
 374     return;
 375
 376   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
 377   unsigned AddrBase = IsStore;
 378   unsigned RegOp = IsStore ? 0 : 5;
 379   unsigned AddrOp = AddrBase + 3;
 380   assert(
 381       Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
 382       Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
 383       Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
 384       Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
 385       Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
 386       (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
 387       "Unexpected instruction!");
 388
 389   // Check whether the destination register can be fixed.
 390   unsigned Reg = Inst.getOperand(RegOp).getReg();
 391   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
 392     return;
 393
 394   // Check whether this is an absolute address.
 395   // FIXME: We know TLVP symbol refs aren't, but there should be a better way
 396   // to do this here.
 397   bool Absolute = true;
 398   if (Inst.getOperand(AddrOp).isExpr()) {
 399     const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
 400     if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
 401       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
 402         Absolute = false;
 403   }
 404
 405   if (Absolute &&
 406       (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
 407        Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
 408        Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
 409     return;
 410
 411   // If so, rewrite the instruction.
 412   MCOperand Saved = Inst.getOperand(AddrOp);
 413   MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
 414   Inst = MCInst();
 415   Inst.setOpcode(Opcode);
 416   Inst.addOperand(Saved);
 417   Inst.addOperand(Seg);
 418 }
 419
 420 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
 421   return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
 422 }
 423
 424 Optional<MCOperand>
 425 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
 426                                     const MachineOperand &MO) const {
 427   switch (MO.getType()) {
 428   default:
 429     MI->print(errs());
 430     llvm_unreachable("unknown operand type");
 431   case MachineOperand::MO_Register:
 432     // Ignore all implicit register operands.
 433     if (MO.isImplicit())
 434       return None;
 435     return MCOperand::createReg(MO.getReg());
 436   case MachineOperand::MO_Immediate:
 437     return MCOperand::createImm(MO.getImm());
 438   case MachineOperand::MO_MachineBasicBlock:
 439   case MachineOperand::MO_GlobalAddress:
 440   case MachineOperand::MO_ExternalSymbol:
 441     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
 442   case MachineOperand::MO_MCSymbol:
 443     return LowerSymbolOperand(MO, MO.getMCSymbol());
 444   case MachineOperand::MO_JumpTableIndex:
 445     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
 446   case MachineOperand::MO_ConstantPoolIndex:
 447     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
 448   case MachineOperand::MO_BlockAddress:
 449     return LowerSymbolOperand(
 450         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
 451   case MachineOperand::MO_RegisterMask:
 452     // Ignore call clobbers.
 453     return None;
 454   }
 455 }
 456
 457 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
 458 // information.
 459 static unsigned convertTailJumpOpcode(unsigned Opcode) {
 460   switch (Opcode) {
 461   case X86::TAILJMPr:
 462     Opcode = X86::JMP32r;
 463     break;
 464   case X86::TAILJMPm:
 465     Opcode = X86::JMP32m;
 466     break;
 467   case X86::TAILJMPr64:
 468     Opcode = X86::JMP64r;
 469     break;
 470   case X86::TAILJMPm64:
 471     Opcode = X86::JMP64m;
 472     break;
 473   case X86::TAILJMPr64_REX:
 474     Opcode = X86::JMP64r_REX;
 475     break;
 476   case X86::TAILJMPm64_REX:
 477     Opcode = X86::JMP64m_REX;
 478     break;
 479   case X86::TAILJMPd:
 480   case X86::TAILJMPd64:
 481     Opcode = X86::JMP_1;
 482     break;
 483   case X86::TAILJMPd_CC:
 484   case X86::TAILJMPd64_CC:
 485     Opcode = X86::JCC_1;
 486     break;
 487   }
 488
 489   return Opcode;
 490 }
 491
 492 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
 493   OutMI.setOpcode(MI->getOpcode());
 494
 495   for (const MachineOperand &MO : MI->operands())
 496     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
 497       OutMI.addOperand(MaybeMCOp.getValue());
 498
 499   // Handle a few special cases to eliminate operand modifiers.
 500   switch (OutMI.getOpcode()) {
 501   case X86::LEA64_32r:
 502   case X86::LEA64r:
 503   case X86::LEA16r:
 504   case X86::LEA32r:
 505     // LEA should have a segment register, but it must be empty.
 506     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
 507            "Unexpected # of LEA operands");
 508     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
 509            "LEA has segment specified!");
 510     break;
 511
 512   case X86::MULX32Hrr:
 513   case X86::MULX32Hrm:
 514   case X86::MULX64Hrr:
 515   case X86::MULX64Hrm: {
 516     // Turn into regular MULX by duplicating the destination.
 517     unsigned NewOpc;
 518     switch (OutMI.getOpcode()) {
 519     default: llvm_unreachable("Invalid opcode");
 520     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
 521     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
 522     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
 523     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
 524     }
 525     OutMI.setOpcode(NewOpc);
 526     // Duplicate the destination.
 527     unsigned DestReg = OutMI.getOperand(0).getReg();
 528     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
 529     break;
 530   }
 531
 532   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
 533   // if one of the registers is extended, but other isn't.
 534   case X86::VMOVZPQILo2PQIrr:
 535   case X86::VMOVAPDrr:
 536   case X86::VMOVAPDYrr:
 537   case X86::VMOVAPSrr:
 538   case X86::VMOVAPSYrr:
 539   case X86::VMOVDQArr:
 540   case X86::VMOVDQAYrr:
 541   case X86::VMOVDQUrr:
 542   case X86::VMOVDQUYrr:
 543   case X86::VMOVUPDrr:
 544   case X86::VMOVUPDYrr:
 545   case X86::VMOVUPSrr:
 546   case X86::VMOVUPSYrr: {
 547     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
 548         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
 549       unsigned NewOpc;
 550       switch (OutMI.getOpcode()) {
 551       default: llvm_unreachable("Invalid opcode");
 552       case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
 553       case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
 554       case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
 555       case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
 556       case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
 557       case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
 558       case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
 559       case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
 560       case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
 561       case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
 562       case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
 563       case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
 564       case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
 565       }
 566       OutMI.setOpcode(NewOpc);
 567     }
 568     break;
 569   }
 570   case X86::VMOVSDrr:
 571   case X86::VMOVSSrr: {
 572     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
 573         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
 574       unsigned NewOpc;
 575       switch (OutMI.getOpcode()) {
 576       default: llvm_unreachable("Invalid opcode");
 577       case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
 578       case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
 579       }
 580       OutMI.setOpcode(NewOpc);
 581     }
 582     break;
 583   }
 584
 585   case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
 586   case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
 587   case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
 588   case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
 589   case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
 590   case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
 591   case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
 592   case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
 593   case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
 594   case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
 595   case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
 596   case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
 597   case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
 598   case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
 599   case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
 600   case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
 601   case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
 602   case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
 603   case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
 604   case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
 605   case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
 606   case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
 607   case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
 608   case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
 609   case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
 610   case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
 611   case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
 612   case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
 613   case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
 614   case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
 615     // Turn immediate 0 into the VPCMPEQ instruction.
 616     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
 617       unsigned NewOpc;
 618       switch (OutMI.getOpcode()) {
 619       default: llvm_unreachable("Invalid opcode");
 620       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
 621       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
 622       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
 623       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
 624       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
 625       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
 626       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
 627       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
 628       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
 629       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
 630       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
 631       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
 632       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
 633       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
 634       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
 635       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
 636       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
 637       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
 638       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
 639       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
 640       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
 641       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
 642       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
 643       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
 644       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
 645       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
 646       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
 647       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
 648       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
 649       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
 650       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
 651       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
 652       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
 653       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
 654       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
 655       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
 656       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
 657       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
 658       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
 659       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
 660       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
 661       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
 662       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
 663       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
 664       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
 665       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
 666       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
 667       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
 668       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
 669       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
 670       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
 671       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
 672       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
 673       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
 674       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
 675       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
 676       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
 677       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
 678       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
 679       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
 680       }
 681
 682       OutMI.setOpcode(NewOpc);
 683       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
 684       break;
 685     }
 686
 687     // Turn immediate 6 into the VPCMPGT instruction.
 688     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
 689       unsigned NewOpc;
 690       switch (OutMI.getOpcode()) {
 691       default: llvm_unreachable("Invalid opcode");
 692       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
 693       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
 694       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
 695       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
 696       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
 697       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
 698       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
 699       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
 700       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
 701       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
 702       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
 703       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
 704       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
 705       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
 706       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
 707       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
 708       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
 709       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
 710       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
 711       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
 712       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
 713       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
 714       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
 715       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
 716       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
 717       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
 718       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
 719       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
 720       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
 721       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
 722       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
 723       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
 724       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
 725       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
 726       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
 727       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
 728       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
 729       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
 730       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
 731       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
 732       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
 733       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
 734       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
 735       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
 736       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
 737       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
 738       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
 739       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
 740       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
 741       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
 742       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
 743       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
 744       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
 745       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
 746       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
 747       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
 748       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
 749       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
 750       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
 751       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
 752       }
 753
 754       OutMI.setOpcode(NewOpc);
 755       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
 756       break;
 757     }
 758
 759     break;
 760   }
 761
 762   // CALL64r, CALL64pcrel32 - These instructions used to have
 763   // register inputs modeled as normal uses instead of implicit uses.  As such,
 764   // they we used to truncate off all but the first operand (the callee). This
 765   // issue seems to have been fixed at some point. This assert verifies that.
 766   case X86::CALL64r:
 767   case X86::CALL64pcrel32:
 768     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
 769     break;
 770
 771   case X86::EH_RETURN:
 772   case X86::EH_RETURN64: {
 773     OutMI = MCInst();
 774     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
 775     break;
 776   }
 777
 778   case X86::CLEANUPRET: {
 779     // Replace CLEANUPRET with the appropriate RET.
 780     OutMI = MCInst();
 781     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
 782     break;
 783   }
 784
 785   case X86::CATCHRET: {
 786     // Replace CATCHRET with the appropriate RET.
 787     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
 788     unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
 789     OutMI = MCInst();
 790     OutMI.setOpcode(getRetOpcode(Subtarget));
 791     OutMI.addOperand(MCOperand::createReg(ReturnReg));
 792     break;
 793   }
 794
 795   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
 796   // instruction.
 797   case X86::TAILJMPr:
 798   case X86::TAILJMPr64:
 799   case X86::TAILJMPr64_REX:
 800   case X86::TAILJMPd:
 801   case X86::TAILJMPd64:
 802     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
 803     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
 804     break;
 805
 806   case X86::TAILJMPd_CC:
 807   case X86::TAILJMPd64_CC:
 808     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
 809     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
 810     break;
 811
 812   case X86::TAILJMPm:
 813   case X86::TAILJMPm64:
 814   case X86::TAILJMPm64_REX:
 815     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
 816            "Unexpected number of operands!");
 817     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
 818     break;
 819
 820   case X86::DEC16r:
 821   case X86::DEC32r:
 822   case X86::INC16r:
 823   case X86::INC32r:
 824     // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
 825     if (!AsmPrinter.getSubtarget().is64Bit()) {
 826       unsigned Opcode;
 827       switch (OutMI.getOpcode()) {
 828       default: llvm_unreachable("Invalid opcode");
 829       case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
 830       case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
 831       case X86::INC16r: Opcode = X86::INC16r_alt; break;
 832       case X86::INC32r: Opcode = X86::INC32r_alt; break;
 833       }
 834       OutMI.setOpcode(Opcode);
 835     }
 836     break;
 837
 838   // We don't currently select the correct instruction form for instructions
 839   // which have a short %eax, etc. form. Handle this by custom lowering, for
 840   // now.
 841   //
 842   // Note, we are currently not handling the following instructions:
 843   // MOV64ao8, MOV64o8a
 844   // XCHG16ar, XCHG32ar, XCHG64ar
 845   case X86::MOV8mr_NOREX:
 846   case X86::MOV8mr:
 847   case X86::MOV8rm_NOREX:
 848   case X86::MOV8rm:
 849   case X86::MOV16mr:
 850   case X86::MOV16rm:
 851   case X86::MOV32mr:
 852   case X86::MOV32rm: {
 853     unsigned NewOpc;
 854     switch (OutMI.getOpcode()) {
 855     default: llvm_unreachable("Invalid opcode");
 856     case X86::MOV8mr_NOREX:
 857     case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
 858     case X86::MOV8rm_NOREX:
 859     case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
 860     case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
 861     case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
 862     case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
 863     case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
 864     }
 865     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
 866     break;
 867   }
 868
 869   case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
 870   case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
 871   case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
 872   case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
 873   case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
 874   case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
 875   case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
 876   case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
 877   case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
 878     unsigned NewOpc;
 879     switch (OutMI.getOpcode()) {
 880     default: llvm_unreachable("Invalid opcode");
 881     case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
 882     case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
 883     case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
 884     case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
 885     case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
 886     case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
 887     case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
 888     case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
 889     case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
 890     case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
 891     case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
 892     case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
 893     case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
 894     case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
 895     case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
 896     case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
 897     case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
 898     case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
 899     case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
 900     case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
 901     case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
 902     case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
 903     case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
 904     case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
 905     case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
 906     case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
 907     case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
 908     case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
 909     case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
 910     case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
 911     case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
 912     case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
 913     case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
 914     case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
 915     case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
 916     case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
 917     }
 918     SimplifyShortImmForm(OutMI, NewOpc);
 919     break;
 920   }
 921
 922   // Try to shrink some forms of movsx.
 923   case X86::MOVSX16rr8:
 924   case X86::MOVSX32rr16:
 925   case X86::MOVSX64rr32:
 926     SimplifyMOVSX(OutMI);
 927     break;
 928
 929   case X86::VCMPPDrri:
 930   case X86::VCMPPDYrri:
 931   case X86::VCMPPSrri:
 932   case X86::VCMPPSYrri:
 933   case X86::VCMPSDrr:
 934   case X86::VCMPSSrr: {
 935     // Swap the operands if it will enable a 2 byte VEX encoding.
 936     // FIXME: Change the immediate to improve opportunities?
 937     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
 938         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
 939       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
 940       switch (Imm) {
 941       default: break;
 942       case 0x00: // EQUAL
 943       case 0x03: // UNORDERED
 944       case 0x04: // NOT EQUAL
 945       case 0x07: // ORDERED
 946         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
 947         break;
 948       }
 949     }
 950     break;
 951   }
 952
 953   case X86::VMOVHLPSrr:
 954   case X86::VUNPCKHPDrr:
 955     // These are not truly commutable so hide them from the default case.
 956     break;
 957
 958   default: {
 959     // If the instruction is a commutable arithmetic instruction we might be
 960     // able to commute the operands to get a 2 byte VEX prefix.
 961     uint64_t TSFlags = MI->getDesc().TSFlags;
 962     if (MI->getDesc().isCommutable() &&
 963         (TSFlags & X86II::EncodingMask) == X86II::VEX &&
 964         (TSFlags & X86II::OpMapMask) == X86II::TB &&
 965         (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
 966         !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
 967         OutMI.getNumOperands() == 3) {
 968       if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
 969           X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
 970         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
 971     }
 972     break;
 973   }
 974   }
 975 }
 976
 977 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
 978                                  const MachineInstr &MI) {
 979   NoAutoPaddingScope NoPadScope(*OutStreamer);
 980   bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
 981                   MI.getOpcode() != X86::TLS_base_addr32;
 982   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
 983                       MI.getOpcode() == X86::TLS_base_addr64;
 984   MCContext &Ctx = OutStreamer->getContext();
 985
 986   MCSymbolRefExpr::VariantKind SRVK;
 987   switch (MI.getOpcode()) {
 988   case X86::TLS_addr32:
 989   case X86::TLS_addr64:
 990   case X86::TLS_addrX32:
 991     SRVK = MCSymbolRefExpr::VK_TLSGD;
 992     break;
 993   case X86::TLS_base_addr32:
 994     SRVK = MCSymbolRefExpr::VK_TLSLDM;
 995     break;
 996   case X86::TLS_base_addr64:
 997   case X86::TLS_base_addrX32:
 998     SRVK = MCSymbolRefExpr::VK_TLSLD;
 999     break;
1000   default:
1001     llvm_unreachable("unexpected opcode");
1002   }
1003
1004   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1005       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1006
1007   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1008   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1009   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1010   // only using GOT when GOTPCRELX is enabled.
1011   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1012   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1013                 Ctx.getAsmInfo()->canRelaxRelocations();
1014
1015   if (Is64Bits) {
1016     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1017     if (NeedsPadding && Is64BitsLP64)
1018       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1019     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1020                                 .addReg(X86::RDI)
1021                                 .addReg(X86::RIP)
1022                                 .addImm(1)
1023                                 .addReg(0)
1024                                 .addExpr(Sym)
1025                                 .addReg(0));
1026     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1027     if (NeedsPadding) {
1028       if (!UseGot)
1029         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1030       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1031       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1032     }
1033     if (UseGot) {
1034       const MCExpr *Expr = MCSymbolRefExpr::create(
1035           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1036       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1037                                   .addReg(X86::RIP)
1038                                   .addImm(1)
1039                                   .addReg(0)
1040                                   .addExpr(Expr)
1041                                   .addReg(0));
1042     } else {
1043       EmitAndCountInstruction(
1044           MCInstBuilder(X86::CALL64pcrel32)
1045               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1046                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1047     }
1048   } else {
1049     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1050       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1051                                   .addReg(X86::EAX)
1052                                   .addReg(0)
1053                                   .addImm(1)
1054                                   .addReg(X86::EBX)
1055                                   .addExpr(Sym)
1056                                   .addReg(0));
1057     } else {
1058       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1059                                   .addReg(X86::EAX)
1060                                   .addReg(X86::EBX)
1061                                   .addImm(1)
1062                                   .addReg(0)
1063                                   .addExpr(Sym)
1064                                   .addReg(0));
1065     }
1066
1067     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1068     if (UseGot) {
1069       const MCExpr *Expr =
1070           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1071       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1072                                   .addReg(X86::EBX)
1073                                   .addImm(1)
1074                                   .addReg(0)
1075                                   .addExpr(Expr)
1076                                   .addReg(0));
1077     } else {
1078       EmitAndCountInstruction(
1079           MCInstBuilder(X86::CALLpcrel32)
1080               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1081                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1082     }
1083   }
1084 }
1085
1086 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1087 /// bytes.  Return the size of nop emitted.
1088 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1089                         const X86Subtarget *Subtarget) {
1090   // Determine the longest nop which can be efficiently decoded for the given
1091   // target cpu.  15-bytes is the longest single NOP instruction, but some
1092   // platforms can't decode the longest forms efficiently.
1093   unsigned MaxNopLength = 1;
1094   if (Subtarget->is64Bit()) {
1095     // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
1096     // IndexReg/BaseReg below need to be updated.
1097     if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
1098       MaxNopLength = 7;
1099     else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
1100       MaxNopLength = 15;
1101     else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
1102       MaxNopLength = 11;
1103     else
1104       MaxNopLength = 10;
1105   } if (Subtarget->is32Bit())
1106     MaxNopLength = 2;
1107
1108   // Cap a single nop emission at the profitable value for the target
1109   NumBytes = std::min(NumBytes, MaxNopLength);
1110
1111   unsigned NopSize;
1112   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1113   IndexReg = Displacement = SegmentReg = 0;
1114   BaseReg = X86::RAX;
1115   ScaleVal = 1;
1116   switch (NumBytes) {
1117   case 0:
1118     llvm_unreachable("Zero nops?");
1119     break;
1120   case 1:
1121     NopSize = 1;
1122     Opc = X86::NOOP;
1123     break;
1124   case 2:
1125     NopSize = 2;
1126     Opc = X86::XCHG16ar;
1127     break;
1128   case 3:
1129     NopSize = 3;
1130     Opc = X86::NOOPL;
1131     break;
1132   case 4:
1133     NopSize = 4;
1134     Opc = X86::NOOPL;
1135     Displacement = 8;
1136     break;
1137   case 5:
1138     NopSize = 5;
1139     Opc = X86::NOOPL;
1140     Displacement = 8;
1141     IndexReg = X86::RAX;
1142     break;
1143   case 6:
1144     NopSize = 6;
1145     Opc = X86::NOOPW;
1146     Displacement = 8;
1147     IndexReg = X86::RAX;
1148     break;
1149   case 7:
1150     NopSize = 7;
1151     Opc = X86::NOOPL;
1152     Displacement = 512;
1153     break;
1154   case 8:
1155     NopSize = 8;
1156     Opc = X86::NOOPL;
1157     Displacement = 512;
1158     IndexReg = X86::RAX;
1159     break;
1160   case 9:
1161     NopSize = 9;
1162     Opc = X86::NOOPW;
1163     Displacement = 512;
1164     IndexReg = X86::RAX;
1165     break;
1166   default:
1167     NopSize = 10;
1168     Opc = X86::NOOPW;
1169     Displacement = 512;
1170     IndexReg = X86::RAX;
1171     SegmentReg = X86::CS;
1172     break;
1173   }
1174
1175   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1176   NopSize += NumPrefixes;
1177   for (unsigned i = 0; i != NumPrefixes; ++i)
1178     OS.emitBytes("\x66");
1179
1180   switch (Opc) {
1181   default: llvm_unreachable("Unexpected opcode");
1182   case X86::NOOP:
1183     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1184     break;
1185   case X86::XCHG16ar:
1186     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1187                        *Subtarget);
1188     break;
1189   case X86::NOOPL:
1190   case X86::NOOPW:
1191     OS.emitInstruction(MCInstBuilder(Opc)
1192                            .addReg(BaseReg)
1193                            .addImm(ScaleVal)
1194                            .addReg(IndexReg)
1195                            .addImm(Displacement)
1196                            .addReg(SegmentReg),
1197                        *Subtarget);
1198     break;
1199   }
1200   assert(NopSize <= NumBytes && "We overemitted?");
1201   return NopSize;
1202 }
1203
1204 /// Emit the optimal amount of multi-byte nops on X86.
1205 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1206                         const X86Subtarget *Subtarget) {
1207   unsigned NopsToEmit = NumBytes;
1208   (void)NopsToEmit;
1209   while (NumBytes) {
1210     NumBytes -= emitNop(OS, NumBytes, Subtarget);
1211     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1212   }
1213 }
1214
1215 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1216                                     X86MCInstLower &MCIL) {
1217   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1218
1219   NoAutoPaddingScope NoPadScope(*OutStreamer);
1220
1221   StatepointOpers SOpers(&MI);
1222   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1223     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1224   } else {
1225     // Lower call target and choose correct opcode
1226     const MachineOperand &CallTarget = SOpers.getCallTarget();
1227     MCOperand CallTargetMCOp;
1228     unsigned CallOpcode;
1229     switch (CallTarget.getType()) {
1230     case MachineOperand::MO_GlobalAddress:
1231     case MachineOperand::MO_ExternalSymbol:
1232       CallTargetMCOp = MCIL.LowerSymbolOperand(
1233           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1234       CallOpcode = X86::CALL64pcrel32;
1235       // Currently, we only support relative addressing with statepoints.
1236       // Otherwise, we'll need a scratch register to hold the target
1237       // address.  You'll fail asserts during load & relocation if this
1238       // symbol is to far away. (TODO: support non-relative addressing)
1239       break;
1240     case MachineOperand::MO_Immediate:
1241       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1242       CallOpcode = X86::CALL64pcrel32;
1243       // Currently, we only support relative addressing with statepoints.
1244       // Otherwise, we'll need a scratch register to hold the target
1245       // immediate.  You'll fail asserts during load & relocation if this
1246       // address is to far away. (TODO: support non-relative addressing)
1247       break;
1248     case MachineOperand::MO_Register:
1249       // FIXME: Add retpoline support and remove this.
1250       if (Subtarget->useIndirectThunkCalls())
1251         report_fatal_error("Lowering register statepoints with thunks not "
1252                            "yet implemented.");
1253       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1254       CallOpcode = X86::CALL64r;
1255       break;
1256     default:
1257       llvm_unreachable("Unsupported operand type in statepoint call target");
1258       break;
1259     }
1260
1261     // Emit call
1262     MCInst CallInst;
1263     CallInst.setOpcode(CallOpcode);
1264     CallInst.addOperand(CallTargetMCOp);
1265     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1266   }
1267
1268   // Record our statepoint node in the same section used by STACKMAP
1269   // and PATCHPOINT
1270   auto &Ctx = OutStreamer->getContext();
1271   MCSymbol *MILabel = Ctx.createTempSymbol();
1272   OutStreamer->emitLabel(MILabel);
1273   SM.recordStatepoint(*MILabel, MI);
1274 }
1275
1276 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1277                                      X86MCInstLower &MCIL) {
1278   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1279   //                  <opcode>, <operands>
1280
1281   NoAutoPaddingScope NoPadScope(*OutStreamer);
1282
1283   Register DefRegister = FaultingMI.getOperand(0).getReg();
1284   FaultMaps::FaultKind FK =
1285       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1286   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1287   unsigned Opcode = FaultingMI.getOperand(3).getImm();
1288   unsigned OperandsBeginIdx = 4;
1289
1290   auto &Ctx = OutStreamer->getContext();
1291   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1292   OutStreamer->emitLabel(FaultingLabel);
1293
1294   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1295   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1296
1297   MCInst MI;
1298   MI.setOpcode(Opcode);
1299
1300   if (DefRegister != X86::NoRegister)
1301     MI.addOperand(MCOperand::createReg(DefRegister));
1302
1303   for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1304             E = FaultingMI.operands_end();
1305        I != E; ++I)
1306     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1307       MI.addOperand(MaybeOperand.getValue());
1308
1309   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1310   OutStreamer->emitInstruction(MI, getSubtargetInfo());
1311 }
1312
1313 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1314                                      X86MCInstLower &MCIL) {
1315   bool Is64Bits = Subtarget->is64Bit();
1316   MCContext &Ctx = OutStreamer->getContext();
1317   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1318   const MCSymbolRefExpr *Op =
1319       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1320
1321   EmitAndCountInstruction(
1322       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1323           .addExpr(Op));
1324 }
1325
1326 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1327                                       X86MCInstLower &MCIL) {
1328   // PATCHABLE_OP minsize, opcode, operands
1329
1330   NoAutoPaddingScope NoPadScope(*OutStreamer);
1331
1332   unsigned MinSize = MI.getOperand(0).getImm();
1333   unsigned Opcode = MI.getOperand(1).getImm();
1334
1335   MCInst MCI;
1336   MCI.setOpcode(Opcode);
1337   for (auto &MO : drop_begin(MI.operands(), 2))
1338     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1339       MCI.addOperand(MaybeOperand.getValue());
1340
1341   SmallString<256> Code;
1342   SmallVector<MCFixup, 4> Fixups;
1343   raw_svector_ostream VecOS(Code);
1344   CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1345
1346   if (Code.size() < MinSize) {
1347     if (MinSize == 2 && Subtarget->is32Bit() &&
1348         Subtarget->isTargetWindowsMSVC() &&
1349         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1350       // For compatibilty reasons, when targetting MSVC, is is important to
1351       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1352       // rely specifically on this pattern to be able to patch a function.
1353       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1354       OutStreamer->emitInstruction(
1355           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1356           *Subtarget);
1357     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1358       // This is an optimization that lets us get away without emitting a nop in
1359       // many cases.
1360       //
1361       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1362       // bytes too, so the check on MinSize is important.
1363       MCI.setOpcode(X86::PUSH64rmr);
1364     } else {
1365       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1366       assert(NopSize == MinSize && "Could not implement MinSize!");
1367       (void)NopSize;
1368     }
1369   }
1370
1371   OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1372 }
1373
1374 // Lower a stackmap of the form:
1375 // <id>, <shadowBytes>, ...
1376 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1377   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1378
1379   auto &Ctx = OutStreamer->getContext();
1380   MCSymbol *MILabel = Ctx.createTempSymbol();
1381   OutStreamer->emitLabel(MILabel);
1382
1383   SM.recordStackMap(*MILabel, MI);
1384   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1385   SMShadowTracker.reset(NumShadowBytes);
1386 }
1387
1388 // Lower a patchpoint of the form:
1389 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1390 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1391                                     X86MCInstLower &MCIL) {
1392   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1393
1394   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1395
1396   NoAutoPaddingScope NoPadScope(*OutStreamer);
1397
1398   auto &Ctx = OutStreamer->getContext();
1399   MCSymbol *MILabel = Ctx.createTempSymbol();
1400   OutStreamer->emitLabel(MILabel);
1401   SM.recordPatchPoint(*MILabel, MI);
1402
1403   PatchPointOpers opers(&MI);
1404   unsigned ScratchIdx = opers.getNextScratchIdx();
1405   unsigned EncodedBytes = 0;
1406   const MachineOperand &CalleeMO = opers.getCallTarget();
1407
1408   // Check for null target. If target is non-null (i.e. is non-zero or is
1409   // symbolic) then emit a call.
1410   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1411     MCOperand CalleeMCOp;
1412     switch (CalleeMO.getType()) {
1413     default:
1414       /// FIXME: Add a verifier check for bad callee types.
1415       llvm_unreachable("Unrecognized callee operand type.");
1416     case MachineOperand::MO_Immediate:
1417       if (CalleeMO.getImm())
1418         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1419       break;
1420     case MachineOperand::MO_ExternalSymbol:
1421     case MachineOperand::MO_GlobalAddress:
1422       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1423                                            MCIL.GetSymbolFromOperand(CalleeMO));
1424       break;
1425     }
1426
1427     // Emit MOV to materialize the target address and the CALL to target.
1428     // This is encoded with 12-13 bytes, depending on which register is used.
1429     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1430     if (X86II::isX86_64ExtendedReg(ScratchReg))
1431       EncodedBytes = 13;
1432     else
1433       EncodedBytes = 12;
1434
1435     EmitAndCountInstruction(
1436         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1437     // FIXME: Add retpoline support and remove this.
1438     if (Subtarget->useIndirectThunkCalls())
1439       report_fatal_error(
1440           "Lowering patchpoint with thunks not yet implemented.");
1441     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1442   }
1443
1444   // Emit padding.
1445   unsigned NumBytes = opers.getNumPatchBytes();
1446   assert(NumBytes >= EncodedBytes &&
1447          "Patchpoint can't request size less than the length of a call.");
1448
1449   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1450 }
1451
1452 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1453                                               X86MCInstLower &MCIL) {
1454   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1455
1456   NoAutoPaddingScope NoPadScope(*OutStreamer);
1457
1458   // We want to emit the following pattern, which follows the x86 calling
1459   // convention to prepare for the trampoline call to be patched in.
1460   //
1461   //   .p2align 1, ...
1462   // .Lxray_event_sled_N:
1463   //   jmp +N                        // jump across the instrumentation sled
1464   //   ...                           // set up arguments in register
1465   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1466   //   ...
1467   //   <jump here>
1468   //
1469   // After patching, it would look something like:
1470   //
1471   //   nopw (2-byte nop)
1472   //   ...
1473   //   callq __xrayCustomEvent  // already lowered
1474   //   ...
1475   //
1476   // ---
1477   // First we emit the label and the jump.
1478   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1479   OutStreamer->AddComment("# XRay Custom Event Log");
1480   OutStreamer->emitCodeAlignment(2);
1481   OutStreamer->emitLabel(CurSled);
1482
1483   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1484   // an operand (computed as an offset from the jmp instruction).
1485   // FIXME: Find another less hacky way do force the relative jump.
1486   OutStreamer->emitBinaryData("\xeb\x0f");
1487
1488   // The default C calling convention will place two arguments into %rcx and
1489   // %rdx -- so we only work with those.
1490   const Register DestRegs[] = {X86::RDI, X86::RSI};
1491   bool UsedMask[] = {false, false};
1492   // Filled out in loop.
1493   Register SrcRegs[] = {0, 0};
1494
1495   // Then we put the operands in the %rdi and %rsi registers. We spill the
1496   // values in the register before we clobber them, and mark them as used in
1497   // UsedMask. In case the arguments are already in the correct register, we use
1498   // emit nops appropriately sized to keep the sled the same size in every
1499   // situation.
1500   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1501     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1502       assert(Op->isReg() && "Only support arguments in registers");
1503       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1504       if (SrcRegs[I] != DestRegs[I]) {
1505         UsedMask[I] = true;
1506         EmitAndCountInstruction(
1507             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1508       } else {
1509         emitX86Nops(*OutStreamer, 4, Subtarget);
1510       }
1511     }
1512
1513   // Now that the register values are stashed, mov arguments into place.
1514   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1515   // earlier DestReg. We will have already overwritten over the register before
1516   // we can copy from it.
1517   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1518     if (SrcRegs[I] != DestRegs[I])
1519       EmitAndCountInstruction(
1520           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1521
1522   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1523   // name of the trampoline to be implemented by the XRay runtime.
1524   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1525   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1526   if (isPositionIndependent())
1527     TOp.setTargetFlags(X86II::MO_PLT);
1528
1529   // Emit the call instruction.
1530   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1531                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1532
1533   // Restore caller-saved and used registers.
1534   for (unsigned I = sizeof UsedMask; I-- > 0;)
1535     if (UsedMask[I])
1536       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1537     else
1538       emitX86Nops(*OutStreamer, 1, Subtarget);
1539
1540   OutStreamer->AddComment("xray custom event end.");
1541
1542   // Record the sled version. Version 0 of this sled was spelled differently, so
1543   // we let the runtime handle the different offsets we're using. Version 2
1544   // changed the absolute address to a PC-relative address.
1545   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1546 }
1547
1548 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1549                                                     X86MCInstLower &MCIL) {
1550   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1551
1552   NoAutoPaddingScope NoPadScope(*OutStreamer);
1553
1554   // We want to emit the following pattern, which follows the x86 calling
1555   // convention to prepare for the trampoline call to be patched in.
1556   //
1557   //   .p2align 1, ...
1558   // .Lxray_event_sled_N:
1559   //   jmp +N                        // jump across the instrumentation sled
1560   //   ...                           // set up arguments in register
1561   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1562   //   ...
1563   //   <jump here>
1564   //
1565   // After patching, it would look something like:
1566   //
1567   //   nopw (2-byte nop)
1568   //   ...
1569   //   callq __xrayTypedEvent  // already lowered
1570   //   ...
1571   //
1572   // ---
1573   // First we emit the label and the jump.
1574   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1575   OutStreamer->AddComment("# XRay Typed Event Log");
1576   OutStreamer->emitCodeAlignment(2);
1577   OutStreamer->emitLabel(CurSled);
1578
1579   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1580   // an operand (computed as an offset from the jmp instruction).
1581   // FIXME: Find another less hacky way do force the relative jump.
1582   OutStreamer->emitBinaryData("\xeb\x14");
1583
1584   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1585   // so we'll work with those. Or we may be called via SystemV, in which case
1586   // we don't have to do any translation.
1587   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1588   bool UsedMask[] = {false, false, false};
1589
1590   // Will fill out src regs in the loop.
1591   Register SrcRegs[] = {0, 0, 0};
1592
1593   // Then we put the operands in the SystemV registers. We spill the values in
1594   // the registers before we clobber them, and mark them as used in UsedMask.
1595   // In case the arguments are already in the correct register, we emit nops
1596   // appropriately sized to keep the sled the same size in every situation.
1597   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1598     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1599       // TODO: Is register only support adequate?
1600       assert(Op->isReg() && "Only supports arguments in registers");
1601       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1602       if (SrcRegs[I] != DestRegs[I]) {
1603         UsedMask[I] = true;
1604         EmitAndCountInstruction(
1605             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1606       } else {
1607         emitX86Nops(*OutStreamer, 4, Subtarget);
1608       }
1609     }
1610
1611   // In the above loop we only stash all of the destination registers or emit
1612   // nops if the arguments are already in the right place. Doing the actually
1613   // moving is postponed until after all the registers are stashed so nothing
1614   // is clobbers. We've already added nops to account for the size of mov and
1615   // push if the register is in the right place, so we only have to worry about
1616   // emitting movs.
1617   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1618   // earlier DestReg. We will have already overwritten over the register before
1619   // we can copy from it.
1620   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1621     if (UsedMask[I])
1622       EmitAndCountInstruction(
1623           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1624
1625   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1626   // name of the trampoline to be implemented by the XRay runtime.
1627   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1628   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1629   if (isPositionIndependent())
1630     TOp.setTargetFlags(X86II::MO_PLT);
1631
1632   // Emit the call instruction.
1633   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1634                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1635
1636   // Restore caller-saved and used registers.
1637   for (unsigned I = sizeof UsedMask; I-- > 0;)
1638     if (UsedMask[I])
1639       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1640     else
1641       emitX86Nops(*OutStreamer, 1, Subtarget);
1642
1643   OutStreamer->AddComment("xray typed event end.");
1644
1645   // Record the sled version.
1646   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1647 }
1648
1649 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1650                                                   X86MCInstLower &MCIL) {
1651
1652   NoAutoPaddingScope NoPadScope(*OutStreamer);
1653
1654   const Function &F = MF->getFunction();
1655   if (F.hasFnAttribute("patchable-function-entry")) {
1656     unsigned Num;
1657     if (F.getFnAttribute("patchable-function-entry")
1658             .getValueAsString()
1659             .getAsInteger(10, Num))
1660       return;
1661     emitX86Nops(*OutStreamer, Num, Subtarget);
1662     return;
1663   }
1664   // We want to emit the following pattern:
1665   //
1666   //   .p2align 1, ...
1667   // .Lxray_sled_N:
1668   //   jmp .tmpN
1669   //   # 9 bytes worth of noops
1670   //
1671   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1672   // bytes with the following pattern:
1673   //
1674   //   mov %r10, <function id, 32-bit>   // 6 bytes
1675   //   call <relative offset, 32-bits>   // 5 bytes
1676   //
1677   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1678   OutStreamer->emitCodeAlignment(2);
1679   OutStreamer->emitLabel(CurSled);
1680
1681   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1682   // an operand (computed as an offset from the jmp instruction).
1683   // FIXME: Find another less hacky way do force the relative jump.
1684   OutStreamer->emitBytes("\xeb\x09");
1685   emitX86Nops(*OutStreamer, 9, Subtarget);
1686   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1687 }
1688
1689 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1690                                        X86MCInstLower &MCIL) {
1691   NoAutoPaddingScope NoPadScope(*OutStreamer);
1692
1693   // Since PATCHABLE_RET takes the opcode of the return statement as an
1694   // argument, we use that to emit the correct form of the RET that we want.
1695   // i.e. when we see this:
1696   //
1697   //   PATCHABLE_RET X86::RET ...
1698   //
1699   // We should emit the RET followed by sleds.
1700   //
1701   //   .p2align 1, ...
1702   // .Lxray_sled_N:
1703   //   ret  # or equivalent instruction
1704   //   # 10 bytes worth of noops
1705   //
1706   // This just makes sure that the alignment for the next instruction is 2.
1707   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1708   OutStreamer->emitCodeAlignment(2);
1709   OutStreamer->emitLabel(CurSled);
1710   unsigned OpCode = MI.getOperand(0).getImm();
1711   MCInst Ret;
1712   Ret.setOpcode(OpCode);
1713   for (auto &MO : drop_begin(MI.operands()))
1714     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1715       Ret.addOperand(MaybeOperand.getValue());
1716   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1717   emitX86Nops(*OutStreamer, 10, Subtarget);
1718   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1719 }
1720
1721 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1722                                              X86MCInstLower &MCIL) {
1723   NoAutoPaddingScope NoPadScope(*OutStreamer);
1724
1725   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1726   // instruction so we lower that particular instruction and its operands.
1727   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1728   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1729   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1730   // tail call much like how we have it in PATCHABLE_RET.
1731   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1732   OutStreamer->emitCodeAlignment(2);
1733   OutStreamer->emitLabel(CurSled);
1734   auto Target = OutContext.createTempSymbol();
1735
1736   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1737   // an operand (computed as an offset from the jmp instruction).
1738   // FIXME: Find another less hacky way do force the relative jump.
1739   OutStreamer->emitBytes("\xeb\x09");
1740   emitX86Nops(*OutStreamer, 9, Subtarget);
1741   OutStreamer->emitLabel(Target);
1742   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1743
1744   unsigned OpCode = MI.getOperand(0).getImm();
1745   OpCode = convertTailJumpOpcode(OpCode);
1746   MCInst TC;
1747   TC.setOpcode(OpCode);
1748
1749   // Before emitting the instruction, add a comment to indicate that this is
1750   // indeed a tail call.
1751   OutStreamer->AddComment("TAILCALL");
1752   for (auto &MO : drop_begin(MI.operands()))
1753     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1754       TC.addOperand(MaybeOperand.getValue());
1755   OutStreamer->emitInstruction(TC, getSubtargetInfo());
1756 }
1757
1758 // Returns instruction preceding MBBI in MachineFunction.
1759 // If MBBI is the first instruction of the first basic block, returns null.
1760 static MachineBasicBlock::const_iterator
1761 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1762   const MachineBasicBlock *MBB = MBBI->getParent();
1763   while (MBBI == MBB->begin()) {
1764     if (MBB == &MBB->getParent()->front())
1765       return MachineBasicBlock::const_iterator();
1766     MBB = MBB->getPrevNode();
1767     MBBI = MBB->end();
1768   }
1769   --MBBI;
1770   return MBBI;
1771 }
1772
1773 static const Constant *getConstantFromPool(const MachineInstr &MI,
1774                                            const MachineOperand &Op) {
1775   if (!Op.isCPI() || Op.getOffset() != 0)
1776     return nullptr;
1777
1778   ArrayRef<MachineConstantPoolEntry> Constants =
1779       MI.getParent()->getParent()->getConstantPool()->getConstants();
1780   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1781
1782   // Bail if this is a machine constant pool entry, we won't be able to dig out
1783   // anything useful.
1784   if (ConstantEntry.isMachineConstantPoolEntry())
1785     return nullptr;
1786
1787   return ConstantEntry.Val.ConstVal;
1788 }
1789
1790 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1791                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1792   std::string Comment;
1793
1794   // Compute the name for a register. This is really goofy because we have
1795   // multiple instruction printers that could (in theory) use different
1796   // names. Fortunately most people use the ATT style (outside of Windows)
1797   // and they actually agree on register naming here. Ultimately, this is
1798   // a comment, and so its OK if it isn't perfect.
1799   auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1800     return X86ATTInstPrinter::getRegisterName(RegNum);
1801   };
1802
1803   const MachineOperand &DstOp = MI->getOperand(0);
1804   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1805   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1806
1807   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1808   StringRef Src1Name =
1809       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1810   StringRef Src2Name =
1811       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1812
1813   // One source operand, fix the mask to print all elements in one span.
1814   SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1815   if (Src1Name == Src2Name)
1816     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1817       if (ShuffleMask[i] >= e)
1818         ShuffleMask[i] -= e;
1819
1820   raw_string_ostream CS(Comment);
1821   CS << DstName;
1822
1823   // Handle AVX512 MASK/MASXZ write mask comments.
1824   // MASK: zmmX {%kY}
1825   // MASKZ: zmmX {%kY} {z}
1826   if (SrcOp1Idx > 1) {
1827     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1828
1829     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1830     if (WriteMaskOp.isReg()) {
1831       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1832
1833       if (SrcOp1Idx == 2) {
1834         CS << " {z}";
1835       }
1836     }
1837   }
1838
1839   CS << " = ";
1840
1841   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1842     if (i != 0)
1843       CS << ",";
1844     if (ShuffleMask[i] == SM_SentinelZero) {
1845       CS << "zero";
1846       continue;
1847     }
1848
1849     // Otherwise, it must come from src1 or src2.  Print the span of elements
1850     // that comes from this src.
1851     bool isSrc1 = ShuffleMask[i] < (int)e;
1852     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1853
1854     bool IsFirst = true;
1855     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1856            (ShuffleMask[i] < (int)e) == isSrc1) {
1857       if (!IsFirst)
1858         CS << ',';
1859       else
1860         IsFirst = false;
1861       if (ShuffleMask[i] == SM_SentinelUndef)
1862         CS << "u";
1863       else
1864         CS << ShuffleMask[i] % (int)e;
1865       ++i;
1866     }
1867     CS << ']';
1868     --i; // For loop increments element #.
1869   }
1870   CS.flush();
1871
1872   return Comment;
1873 }
1874
1875 static void printConstant(const APInt &Val, raw_ostream &CS) {
1876   if (Val.getBitWidth() <= 64) {
1877     CS << Val.getZExtValue();
1878   } else {
1879     // print multi-word constant as (w0,w1)
1880     CS << "(";
1881     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1882       if (i > 0)
1883         CS << ",";
1884       CS << Val.getRawData()[i];
1885     }
1886     CS << ")";
1887   }
1888 }
1889
1890 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1891   SmallString<32> Str;
1892   // Force scientific notation to distinquish from integers.
1893   Flt.toString(Str, 0, 0);
1894   CS << Str;
1895 }
1896
1897 static void printConstant(const Constant *COp, raw_ostream &CS) {
1898   if (isa<UndefValue>(COp)) {
1899     CS << "u";
1900   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1901     printConstant(CI->getValue(), CS);
1902   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1903     printConstant(CF->getValueAPF(), CS);
1904   } else {
1905     CS << "?";
1906   }
1907 }
1908
1909 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1910   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1911   assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1912
1913   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1914   if (EmitFPOData) {
1915     X86TargetStreamer *XTS =
1916         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1917     switch (MI->getOpcode()) {
1918     case X86::SEH_PushReg:
1919       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1920       break;
1921     case X86::SEH_StackAlloc:
1922       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1923       break;
1924     case X86::SEH_StackAlign:
1925       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1926       break;
1927     case X86::SEH_SetFrame:
1928       assert(MI->getOperand(1).getImm() == 0 &&
1929              ".cv_fpo_setframe takes no offset");
1930       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1931       break;
1932     case X86::SEH_EndPrologue:
1933       XTS->emitFPOEndPrologue();
1934       break;
1935     case X86::SEH_SaveReg:
1936     case X86::SEH_SaveXMM:
1937     case X86::SEH_PushFrame:
1938       llvm_unreachable("SEH_ directive incompatible with FPO");
1939       break;
1940     default:
1941       llvm_unreachable("expected SEH_ instruction");
1942     }
1943     return;
1944   }
1945
1946   // Otherwise, use the .seh_ directives for all other Windows platforms.
1947   switch (MI->getOpcode()) {
1948   case X86::SEH_PushReg:
1949     OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
1950     break;
1951
1952   case X86::SEH_SaveReg:
1953     OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
1954                                    MI->getOperand(1).getImm());
1955     break;
1956
1957   case X86::SEH_SaveXMM:
1958     OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
1959                                    MI->getOperand(1).getImm());
1960     break;
1961
1962   case X86::SEH_StackAlloc:
1963     OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
1964     break;
1965
1966   case X86::SEH_SetFrame:
1967     OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
1968                                     MI->getOperand(1).getImm());
1969     break;
1970
1971   case X86::SEH_PushFrame:
1972     OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
1973     break;
1974
1975   case X86::SEH_EndPrologue:
1976     OutStreamer->EmitWinCFIEndProlog();
1977     break;
1978
1979   default:
1980     llvm_unreachable("expected SEH_ instruction");
1981   }
1982 }
1983
1984 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1985   if (Info.RegClass == X86::VR128RegClassID ||
1986       Info.RegClass == X86::VR128XRegClassID)
1987     return 128;
1988   if (Info.RegClass == X86::VR256RegClassID ||
1989       Info.RegClass == X86::VR256XRegClassID)
1990     return 256;
1991   if (Info.RegClass == X86::VR512RegClassID)
1992     return 512;
1993   llvm_unreachable("Unknown register class!");
1994 }
1995
1996 static void addConstantComments(const MachineInstr *MI,
1997                                 MCStreamer &OutStreamer) {
1998   switch (MI->getOpcode()) {
1999   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2000   // a constant shuffle mask. We won't be able to do this at the MC layer
2001   // because the mask isn't an immediate.
2002   case X86::PSHUFBrm:
2003   case X86::VPSHUFBrm:
2004   case X86::VPSHUFBYrm:
2005   case X86::VPSHUFBZ128rm:
2006   case X86::VPSHUFBZ128rmk:
2007   case X86::VPSHUFBZ128rmkz:
2008   case X86::VPSHUFBZ256rm:
2009   case X86::VPSHUFBZ256rmk:
2010   case X86::VPSHUFBZ256rmkz:
2011   case X86::VPSHUFBZrm:
2012   case X86::VPSHUFBZrmk:
2013   case X86::VPSHUFBZrmkz: {
2014     unsigned SrcIdx = 1;
2015     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2016       // Skip mask operand.
2017       ++SrcIdx;
2018       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2019         // Skip passthru operand.
2020         ++SrcIdx;
2021       }
2022     }
2023     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2024
2025     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2026            "Unexpected number of operands!");
2027
2028     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2029     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2030       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2031       SmallVector<int, 64> Mask;
2032       DecodePSHUFBMask(C, Width, Mask);
2033       if (!Mask.empty())
2034         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2035     }
2036     break;
2037   }
2038
2039   case X86::VPERMILPSrm:
2040   case X86::VPERMILPSYrm:
2041   case X86::VPERMILPSZ128rm:
2042   case X86::VPERMILPSZ128rmk:
2043   case X86::VPERMILPSZ128rmkz:
2044   case X86::VPERMILPSZ256rm:
2045   case X86::VPERMILPSZ256rmk:
2046   case X86::VPERMILPSZ256rmkz:
2047   case X86::VPERMILPSZrm:
2048   case X86::VPERMILPSZrmk:
2049   case X86::VPERMILPSZrmkz:
2050   case X86::VPERMILPDrm:
2051   case X86::VPERMILPDYrm:
2052   case X86::VPERMILPDZ128rm:
2053   case X86::VPERMILPDZ128rmk:
2054   case X86::VPERMILPDZ128rmkz:
2055   case X86::VPERMILPDZ256rm:
2056   case X86::VPERMILPDZ256rmk:
2057   case X86::VPERMILPDZ256rmkz:
2058   case X86::VPERMILPDZrm:
2059   case X86::VPERMILPDZrmk:
2060   case X86::VPERMILPDZrmkz: {
2061     unsigned ElSize;
2062     switch (MI->getOpcode()) {
2063     default: llvm_unreachable("Invalid opcode");
2064     case X86::VPERMILPSrm:
2065     case X86::VPERMILPSYrm:
2066     case X86::VPERMILPSZ128rm:
2067     case X86::VPERMILPSZ256rm:
2068     case X86::VPERMILPSZrm:
2069     case X86::VPERMILPSZ128rmkz:
2070     case X86::VPERMILPSZ256rmkz:
2071     case X86::VPERMILPSZrmkz:
2072     case X86::VPERMILPSZ128rmk:
2073     case X86::VPERMILPSZ256rmk:
2074     case X86::VPERMILPSZrmk:
2075       ElSize = 32;
2076       break;
2077     case X86::VPERMILPDrm:
2078     case X86::VPERMILPDYrm:
2079     case X86::VPERMILPDZ128rm:
2080     case X86::VPERMILPDZ256rm:
2081     case X86::VPERMILPDZrm:
2082     case X86::VPERMILPDZ128rmkz:
2083     case X86::VPERMILPDZ256rmkz:
2084     case X86::VPERMILPDZrmkz:
2085     case X86::VPERMILPDZ128rmk:
2086     case X86::VPERMILPDZ256rmk:
2087     case X86::VPERMILPDZrmk:
2088       ElSize = 64;
2089       break;
2090     }
2091
2092     unsigned SrcIdx = 1;
2093     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2094       // Skip mask operand.
2095       ++SrcIdx;
2096       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2097         // Skip passthru operand.
2098         ++SrcIdx;
2099       }
2100     }
2101     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2102
2103     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2104            "Unexpected number of operands!");
2105
2106     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2107     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2108       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2109       SmallVector<int, 16> Mask;
2110       DecodeVPERMILPMask(C, ElSize, Width, Mask);
2111       if (!Mask.empty())
2112         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2113     }
2114     break;
2115   }
2116
2117   case X86::VPERMIL2PDrm:
2118   case X86::VPERMIL2PSrm:
2119   case X86::VPERMIL2PDYrm:
2120   case X86::VPERMIL2PSYrm: {
2121     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2122            "Unexpected number of operands!");
2123
2124     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2125     if (!CtrlOp.isImm())
2126       break;
2127
2128     unsigned ElSize;
2129     switch (MI->getOpcode()) {
2130     default: llvm_unreachable("Invalid opcode");
2131     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2132     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2133     }
2134
2135     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2136     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2137       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2138       SmallVector<int, 16> Mask;
2139       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2140       if (!Mask.empty())
2141         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2142     }
2143     break;
2144   }
2145
2146   case X86::VPPERMrrm: {
2147     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2148            "Unexpected number of operands!");
2149
2150     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2151     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2152       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2153       SmallVector<int, 16> Mask;
2154       DecodeVPPERMMask(C, Width, Mask);
2155       if (!Mask.empty())
2156         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2157     }
2158     break;
2159   }
2160
2161   case X86::MMX_MOVQ64rm: {
2162     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2163            "Unexpected number of operands!");
2164     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2165       std::string Comment;
2166       raw_string_ostream CS(Comment);
2167       const MachineOperand &DstOp = MI->getOperand(0);
2168       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2169       if (auto *CF = dyn_cast<ConstantFP>(C)) {
2170         CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
2171         OutStreamer.AddComment(CS.str());
2172       }
2173     }
2174     break;
2175   }
2176
2177 #define MOV_CASE(Prefix, Suffix)                                               \
2178   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2179   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2180   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2181   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2182   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2183   case X86::Prefix##MOVDQU##Suffix##rm:
2184
2185 #define MOV_AVX512_CASE(Suffix)                                                \
2186   case X86::VMOVDQA64##Suffix##rm:                                             \
2187   case X86::VMOVDQA32##Suffix##rm:                                             \
2188   case X86::VMOVDQU64##Suffix##rm:                                             \
2189   case X86::VMOVDQU32##Suffix##rm:                                             \
2190   case X86::VMOVDQU16##Suffix##rm:                                             \
2191   case X86::VMOVDQU8##Suffix##rm:                                              \
2192   case X86::VMOVAPS##Suffix##rm:                                               \
2193   case X86::VMOVAPD##Suffix##rm:                                               \
2194   case X86::VMOVUPS##Suffix##rm:                                               \
2195   case X86::VMOVUPD##Suffix##rm:
2196
2197 #define CASE_ALL_MOV_RM()                                                      \
2198   MOV_CASE(, )   /* SSE */                                                     \
2199   MOV_CASE(V, )  /* AVX-128 */                                                 \
2200   MOV_CASE(V, Y) /* AVX-256 */                                                 \
2201   MOV_AVX512_CASE(Z)                                                           \
2202   MOV_AVX512_CASE(Z256)                                                        \
2203   MOV_AVX512_CASE(Z128)
2204
2205     // For loads from a constant pool to a vector register, print the constant
2206     // loaded.
2207     CASE_ALL_MOV_RM()
2208   case X86::VBROADCASTF128:
2209   case X86::VBROADCASTI128:
2210   case X86::VBROADCASTF32X4Z256rm:
2211   case X86::VBROADCASTF32X4rm:
2212   case X86::VBROADCASTF32X8rm:
2213   case X86::VBROADCASTF64X2Z128rm:
2214   case X86::VBROADCASTF64X2rm:
2215   case X86::VBROADCASTF64X4rm:
2216   case X86::VBROADCASTI32X4Z256rm:
2217   case X86::VBROADCASTI32X4rm:
2218   case X86::VBROADCASTI32X8rm:
2219   case X86::VBROADCASTI64X2Z128rm:
2220   case X86::VBROADCASTI64X2rm:
2221   case X86::VBROADCASTI64X4rm:
2222     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2223            "Unexpected number of operands!");
2224     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2225       int NumLanes = 1;
2226       // Override NumLanes for the broadcast instructions.
2227       switch (MI->getOpcode()) {
2228       case X86::VBROADCASTF128:        NumLanes = 2; break;
2229       case X86::VBROADCASTI128:        NumLanes = 2; break;
2230       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2231       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2232       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2233       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2234       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2235       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2236       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2237       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2238       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2239       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2240       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2241       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2242       }
2243
2244       std::string Comment;
2245       raw_string_ostream CS(Comment);
2246       const MachineOperand &DstOp = MI->getOperand(0);
2247       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2248       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2249         CS << "[";
2250         for (int l = 0; l != NumLanes; ++l) {
2251           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2252                ++i) {
2253             if (i != 0 || l != 0)
2254               CS << ",";
2255             if (CDS->getElementType()->isIntegerTy())
2256               printConstant(CDS->getElementAsAPInt(i), CS);
2257             else if (CDS->getElementType()->isHalfTy() ||
2258                      CDS->getElementType()->isFloatTy() ||
2259                      CDS->getElementType()->isDoubleTy())
2260               printConstant(CDS->getElementAsAPFloat(i), CS);
2261             else
2262               CS << "?";
2263           }
2264         }
2265         CS << "]";
2266         OutStreamer.AddComment(CS.str());
2267       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2268         CS << "<";
2269         for (int l = 0; l != NumLanes; ++l) {
2270           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2271                ++i) {
2272             if (i != 0 || l != 0)
2273               CS << ",";
2274             printConstant(CV->getOperand(i), CS);
2275           }
2276         }
2277         CS << ">";
2278         OutStreamer.AddComment(CS.str());
2279       }
2280     }
2281     break;
2282
2283   case X86::MOVDDUPrm:
2284   case X86::VMOVDDUPrm:
2285   case X86::VMOVDDUPZ128rm:
2286   case X86::VBROADCASTSSrm:
2287   case X86::VBROADCASTSSYrm:
2288   case X86::VBROADCASTSSZ128rm:
2289   case X86::VBROADCASTSSZ256rm:
2290   case X86::VBROADCASTSSZrm:
2291   case X86::VBROADCASTSDYrm:
2292   case X86::VBROADCASTSDZ256rm:
2293   case X86::VBROADCASTSDZrm:
2294   case X86::VPBROADCASTBrm:
2295   case X86::VPBROADCASTBYrm:
2296   case X86::VPBROADCASTBZ128rm:
2297   case X86::VPBROADCASTBZ256rm:
2298   case X86::VPBROADCASTBZrm:
2299   case X86::VPBROADCASTDrm:
2300   case X86::VPBROADCASTDYrm:
2301   case X86::VPBROADCASTDZ128rm:
2302   case X86::VPBROADCASTDZ256rm:
2303   case X86::VPBROADCASTDZrm:
2304   case X86::VPBROADCASTQrm:
2305   case X86::VPBROADCASTQYrm:
2306   case X86::VPBROADCASTQZ128rm:
2307   case X86::VPBROADCASTQZ256rm:
2308   case X86::VPBROADCASTQZrm:
2309   case X86::VPBROADCASTWrm:
2310   case X86::VPBROADCASTWYrm:
2311   case X86::VPBROADCASTWZ128rm:
2312   case X86::VPBROADCASTWZ256rm:
2313   case X86::VPBROADCASTWZrm:
2314     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2315            "Unexpected number of operands!");
2316     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2317       int NumElts;
2318       switch (MI->getOpcode()) {
2319       default: llvm_unreachable("Invalid opcode");
2320       case X86::MOVDDUPrm:          NumElts = 2;  break;
2321       case X86::VMOVDDUPrm:         NumElts = 2;  break;
2322       case X86::VMOVDDUPZ128rm:     NumElts = 2;  break;
2323       case X86::VBROADCASTSSrm:     NumElts = 4;  break;
2324       case X86::VBROADCASTSSYrm:    NumElts = 8;  break;
2325       case X86::VBROADCASTSSZ128rm: NumElts = 4;  break;
2326       case X86::VBROADCASTSSZ256rm: NumElts = 8;  break;
2327       case X86::VBROADCASTSSZrm:    NumElts = 16; break;
2328       case X86::VBROADCASTSDYrm:    NumElts = 4;  break;
2329       case X86::VBROADCASTSDZ256rm: NumElts = 4;  break;
2330       case X86::VBROADCASTSDZrm:    NumElts = 8;  break;
2331       case X86::VPBROADCASTBrm:     NumElts = 16; break;
2332       case X86::VPBROADCASTBYrm:    NumElts = 32; break;
2333       case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2334       case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2335       case X86::VPBROADCASTBZrm:    NumElts = 64; break;
2336       case X86::VPBROADCASTDrm:     NumElts = 4;  break;
2337       case X86::VPBROADCASTDYrm:    NumElts = 8;  break;
2338       case X86::VPBROADCASTDZ128rm: NumElts = 4;  break;
2339       case X86::VPBROADCASTDZ256rm: NumElts = 8;  break;
2340       case X86::VPBROADCASTDZrm:    NumElts = 16; break;
2341       case X86::VPBROADCASTQrm:     NumElts = 2;  break;
2342       case X86::VPBROADCASTQYrm:    NumElts = 4;  break;
2343       case X86::VPBROADCASTQZ128rm: NumElts = 2;  break;
2344       case X86::VPBROADCASTQZ256rm: NumElts = 4;  break;
2345       case X86::VPBROADCASTQZrm:    NumElts = 8;  break;
2346       case X86::VPBROADCASTWrm:     NumElts = 8;  break;
2347       case X86::VPBROADCASTWYrm:    NumElts = 16; break;
2348       case X86::VPBROADCASTWZ128rm: NumElts = 8;  break;
2349       case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2350       case X86::VPBROADCASTWZrm:    NumElts = 32; break;
2351       }
2352
2353       std::string Comment;
2354       raw_string_ostream CS(Comment);
2355       const MachineOperand &DstOp = MI->getOperand(0);
2356       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2357       CS << "[";
2358       for (int i = 0; i != NumElts; ++i) {
2359         if (i != 0)
2360           CS << ",";
2361         printConstant(C, CS);
2362       }
2363       CS << "]";
2364       OutStreamer.AddComment(CS.str());
2365     }
2366   }
2367 }
2368
2369 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2370   X86MCInstLower MCInstLowering(*MF, *this);
2371   const X86RegisterInfo *RI =
2372       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2373
2374   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2375   // are compressed from EVEX encoding to VEX encoding.
2376   if (TM.Options.MCOptions.ShowMCEncoding) {
2377     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2378       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2379   }
2380
2381   // Add comments for values loaded from constant pool.
2382   if (OutStreamer->isVerboseAsm())
2383     addConstantComments(MI, *OutStreamer);
2384
2385   switch (MI->getOpcode()) {
2386   case TargetOpcode::DBG_VALUE:
2387     llvm_unreachable("Should be handled target independently");
2388
2389   // Emit nothing here but a comment if we can.
2390   case X86::Int_MemBarrier:
2391     OutStreamer->emitRawComment("MEMBARRIER");
2392     return;
2393
2394   case X86::EH_RETURN:
2395   case X86::EH_RETURN64: {
2396     // Lower these as normal, but add some comments.
2397     Register Reg = MI->getOperand(0).getReg();
2398     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2399                             X86ATTInstPrinter::getRegisterName(Reg));
2400     break;
2401   }
2402   case X86::CLEANUPRET: {
2403     // Lower these as normal, but add some comments.
2404     OutStreamer->AddComment("CLEANUPRET");
2405     break;
2406   }
2407
2408   case X86::CATCHRET: {
2409     // Lower these as normal, but add some comments.
2410     OutStreamer->AddComment("CATCHRET");
2411     break;
2412   }
2413
2414   case X86::ENDBR32:
2415   case X86::ENDBR64: {
2416     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2417     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2418     // non-empty. If MI is the initial ENDBR, place the
2419     // __patchable_function_entries label after ENDBR.
2420     if (CurrentPatchableFunctionEntrySym &&
2421         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2422         MI == &MF->front().front()) {
2423       MCInst Inst;
2424       MCInstLowering.Lower(MI, Inst);
2425       EmitAndCountInstruction(Inst);
2426       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2427       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2428       return;
2429     }
2430     break;
2431   }
2432
2433   case X86::TAILJMPr:
2434   case X86::TAILJMPm:
2435   case X86::TAILJMPd:
2436   case X86::TAILJMPd_CC:
2437   case X86::TAILJMPr64:
2438   case X86::TAILJMPm64:
2439   case X86::TAILJMPd64:
2440   case X86::TAILJMPd64_CC:
2441   case X86::TAILJMPr64_REX:
2442   case X86::TAILJMPm64_REX:
2443     // Lower these as normal, but add some comments.
2444     OutStreamer->AddComment("TAILCALL");
2445     break;
2446
2447   case X86::TLS_addr32:
2448   case X86::TLS_addr64:
2449   case X86::TLS_addrX32:
2450   case X86::TLS_base_addr32:
2451   case X86::TLS_base_addr64:
2452   case X86::TLS_base_addrX32:
2453     return LowerTlsAddr(MCInstLowering, *MI);
2454
2455   case X86::MOVPC32r: {
2456     // This is a pseudo op for a two instruction sequence with a label, which
2457     // looks like:
2458     //     call "L1$pb"
2459     // "L1$pb":
2460     //     popl %esi
2461
2462     // Emit the call.
2463     MCSymbol *PICBase = MF->getPICBaseSymbol();
2464     // FIXME: We would like an efficient form for this, so we don't have to do a
2465     // lot of extra uniquing.
2466     EmitAndCountInstruction(
2467         MCInstBuilder(X86::CALLpcrel32)
2468             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2469
2470     const X86FrameLowering *FrameLowering =
2471         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2472     bool hasFP = FrameLowering->hasFP(*MF);
2473
2474     // TODO: This is needed only if we require precise CFA.
2475     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2476                                !OutStreamer->getDwarfFrameInfos().back().End;
2477
2478     int stackGrowth = -RI->getSlotSize();
2479
2480     if (HasActiveDwarfFrame && !hasFP) {
2481       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2482     }
2483
2484     // Emit the label.
2485     OutStreamer->emitLabel(PICBase);
2486
2487     // popl $reg
2488     EmitAndCountInstruction(
2489         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2490
2491     if (HasActiveDwarfFrame && !hasFP) {
2492       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2493     }
2494     return;
2495   }
2496
2497   case X86::ADD32ri: {
2498     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2499     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2500       break;
2501
2502     // Okay, we have something like:
2503     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2504
2505     // For this, we want to print something like:
2506     //   MYGLOBAL + (. - PICBASE)
2507     // However, we can't generate a ".", so just emit a new label here and refer
2508     // to it.
2509     MCSymbol *DotSym = OutContext.createTempSymbol();
2510     OutStreamer->emitLabel(DotSym);
2511
2512     // Now that we have emitted the label, lower the complex operand expression.
2513     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2514
2515     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2516     const MCExpr *PICBase =
2517         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2518     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2519
2520     DotExpr = MCBinaryExpr::createAdd(
2521         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2522
2523     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2524                                 .addReg(MI->getOperand(0).getReg())
2525                                 .addReg(MI->getOperand(1).getReg())
2526                                 .addExpr(DotExpr));
2527     return;
2528   }
2529   case TargetOpcode::STATEPOINT:
2530     return LowerSTATEPOINT(*MI, MCInstLowering);
2531
2532   case TargetOpcode::FAULTING_OP:
2533     return LowerFAULTING_OP(*MI, MCInstLowering);
2534
2535   case TargetOpcode::FENTRY_CALL:
2536     return LowerFENTRY_CALL(*MI, MCInstLowering);
2537
2538   case TargetOpcode::PATCHABLE_OP:
2539     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2540
2541   case TargetOpcode::STACKMAP:
2542     return LowerSTACKMAP(*MI);
2543
2544   case TargetOpcode::PATCHPOINT:
2545     return LowerPATCHPOINT(*MI, MCInstLowering);
2546
2547   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2548     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2549
2550   case TargetOpcode::PATCHABLE_RET:
2551     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2552
2553   case TargetOpcode::PATCHABLE_TAIL_CALL:
2554     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2555
2556   case TargetOpcode::PATCHABLE_EVENT_CALL:
2557     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2558
2559   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2560     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2561
2562   case X86::MORESTACK_RET:
2563     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2564     return;
2565
2566   case X86::MORESTACK_RET_RESTORE_R10:
2567     // Return, then restore R10.
2568     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2569     EmitAndCountInstruction(
2570         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2571     return;
2572
2573   case X86::SEH_PushReg:
2574   case X86::SEH_SaveReg:
2575   case X86::SEH_SaveXMM:
2576   case X86::SEH_StackAlloc:
2577   case X86::SEH_StackAlign:
2578   case X86::SEH_SetFrame:
2579   case X86::SEH_PushFrame:
2580   case X86::SEH_EndPrologue:
2581     EmitSEHInstruction(MI);
2582     return;
2583
2584   case X86::SEH_Epilogue: {
2585     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2586     MachineBasicBlock::const_iterator MBBI(MI);
2587     // Check if preceded by a call and emit nop if so.
2588     for (MBBI = PrevCrossBBInst(MBBI);
2589          MBBI != MachineBasicBlock::const_iterator();
2590          MBBI = PrevCrossBBInst(MBBI)) {
2591       // Conservatively assume that pseudo instructions don't emit code and keep
2592       // looking for a call. We may emit an unnecessary nop in some cases.
2593       if (!MBBI->isPseudo()) {
2594         if (MBBI->isCall())
2595           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2596         break;
2597       }
2598     }
2599     return;
2600   }
2601   case X86::UBSAN_UD1:
2602     EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2603                                 .addReg(X86::EAX)
2604                                 .addReg(X86::EAX)
2605                                 .addImm(1)
2606                                 .addReg(X86::NoRegister)
2607                                 .addImm(MI->getOperand(0).getImm())
2608                                 .addReg(X86::NoRegister));
2609     return;
2610   }
2611
2612   MCInst TmpInst;
2613   MCInstLowering.Lower(MI, TmpInst);
2614
2615   // Stackmap shadows cannot include branch targets, so we can count the bytes
2616   // in a call towards the shadow, but must ensure that the no thread returns
2617   // in to the stackmap shadow.  The only way to achieve this is if the call
2618   // is at the end of the shadow.
2619   if (MI->isCall()) {
2620     // Count then size of the call towards the shadow
2621     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2622     // Then flush the shadow so that we fill with nops before the call, not
2623     // after it.
2624     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2625     // Then emit the call
2626     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2627     return;
2628   }
2629
2630   EmitAndCountInstruction(TmpInst);
2631 }