lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

   1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "AMDGPU.h"
  11 #include "AMDKernelCodeT.h"
  12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
  14 #include "SIDefines.h"
  15 #include "SIInstrInfo.h"
  16 #include "Utils/AMDGPUAsmUtils.h"
  17 #include "Utils/AMDGPUBaseInfo.h"
  18 #include "Utils/AMDKernelCodeTUtils.h"
  19 #include "llvm/ADT/APFloat.h"
  20 #include "llvm/ADT/APInt.h"
  21 #include "llvm/ADT/ArrayRef.h"
  22 #include "llvm/ADT/STLExtras.h"
  23 #include "llvm/ADT/SmallBitVector.h"
  24 #include "llvm/ADT/SmallString.h"
  25 #include "llvm/ADT/StringRef.h"
  26 #include "llvm/ADT/StringSwitch.h"
  27 #include "llvm/ADT/Twine.h"
  28 #include "llvm/BinaryFormat/ELF.h"
  29 #include "llvm/MC/MCAsmInfo.h"
  30 #include "llvm/MC/MCContext.h"
  31 #include "llvm/MC/MCExpr.h"
  32 #include "llvm/MC/MCInst.h"
  33 #include "llvm/MC/MCInstrDesc.h"
  34 #include "llvm/MC/MCInstrInfo.h"
  35 #include "llvm/MC/MCParser/MCAsmLexer.h"
  36 #include "llvm/MC/MCParser/MCAsmParser.h"
  37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
  38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
  40 #include "llvm/MC/MCRegisterInfo.h"
  41 #include "llvm/MC/MCStreamer.h"
  42 #include "llvm/MC/MCSubtargetInfo.h"
  43 #include "llvm/MC/MCSymbol.h"
  44 #include "llvm/Support/AMDGPUMetadata.h"
  45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
  46 #include "llvm/Support/Casting.h"
  47 #include "llvm/Support/Compiler.h"
  48 #include "llvm/Support/ErrorHandling.h"
  49 #include "llvm/Support/MachineValueType.h"
  50 #include "llvm/Support/MathExtras.h"
  51 #include "llvm/Support/SMLoc.h"
  52 #include "llvm/Support/TargetRegistry.h"
  53 #include "llvm/Support/raw_ostream.h"
  54 #include <algorithm>
  55 #include <cassert>
  56 #include <cstdint>
  57 #include <cstring>
  58 #include <iterator>
  59 #include <map>
  60 #include <memory>
  61 #include <string>
  62
  63 using namespace llvm;
  64 using namespace llvm::AMDGPU;
  65 using namespace llvm::amdhsa;
  66
  67 namespace {
  68
  69 class AMDGPUAsmParser;
  70
  71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
  72
  73 //===----------------------------------------------------------------------===//
  74 // Operand
  75 //===----------------------------------------------------------------------===//
  76
  77 class AMDGPUOperand : public MCParsedAsmOperand {
  78   enum KindTy {
  79     Token,
  80     Immediate,
  81     Register,
  82     Expression
  83   } Kind;
  84
  85   SMLoc StartLoc, EndLoc;
  86   const AMDGPUAsmParser *AsmParser;
  87
  88 public:
  89   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
  90     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
  91
  92   using Ptr = std::unique_ptr<AMDGPUOperand>;
  93
  94   struct Modifiers {
  95     bool Abs = false;
  96     bool Neg = false;
  97     bool Sext = false;
  98
  99     bool hasFPModifiers() const { return Abs || Neg; }
 100     bool hasIntModifiers() const { return Sext; }
 101     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
 102
 103     int64_t getFPModifiersOperand() const {
 104       int64_t Operand = 0;
 105       Operand |= Abs ? SISrcMods::ABS : 0;
 106       Operand |= Neg ? SISrcMods::NEG : 0;
 107       return Operand;
 108     }
 109
 110     int64_t getIntModifiersOperand() const {
 111       int64_t Operand = 0;
 112       Operand |= Sext ? SISrcMods::SEXT : 0;
 113       return Operand;
 114     }
 115
 116     int64_t getModifiersOperand() const {
 117       assert(!(hasFPModifiers() && hasIntModifiers())
 118            && "fp and int modifiers should not be used simultaneously");
 119       if (hasFPModifiers()) {
 120         return getFPModifiersOperand();
 121       } else if (hasIntModifiers()) {
 122         return getIntModifiersOperand();
 123       } else {
 124         return 0;
 125       }
 126     }
 127
 128     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
 129   };
 130
 131   enum ImmTy {
 132     ImmTyNone,
 133     ImmTyGDS,
 134     ImmTyLDS,
 135     ImmTyOffen,
 136     ImmTyIdxen,
 137     ImmTyAddr64,
 138     ImmTyOffset,
 139     ImmTyInstOffset,
 140     ImmTyOffset0,
 141     ImmTyOffset1,
 142     ImmTyGLC,
 143     ImmTySLC,
 144     ImmTyTFE,
 145     ImmTyD16,
 146     ImmTyClampSI,
 147     ImmTyOModSI,
 148     ImmTyDppCtrl,
 149     ImmTyDppRowMask,
 150     ImmTyDppBankMask,
 151     ImmTyDppBoundCtrl,
 152     ImmTySdwaDstSel,
 153     ImmTySdwaSrc0Sel,
 154     ImmTySdwaSrc1Sel,
 155     ImmTySdwaDstUnused,
 156     ImmTyDMask,
 157     ImmTyUNorm,
 158     ImmTyDA,
 159     ImmTyR128,
 160     ImmTyLWE,
 161     ImmTyExpTgt,
 162     ImmTyExpCompr,
 163     ImmTyExpVM,
 164     ImmTyDFMT,
 165     ImmTyNFMT,
 166     ImmTyHwreg,
 167     ImmTyOff,
 168     ImmTySendMsg,
 169     ImmTyInterpSlot,
 170     ImmTyInterpAttr,
 171     ImmTyAttrChan,
 172     ImmTyOpSel,
 173     ImmTyOpSelHi,
 174     ImmTyNegLo,
 175     ImmTyNegHi,
 176     ImmTySwizzle,
 177     ImmTyHigh
 178   };
 179
 180   struct TokOp {
 181     const char *Data;
 182     unsigned Length;
 183   };
 184
 185   struct ImmOp {
 186     int64_t Val;
 187     ImmTy Type;
 188     bool IsFPImm;
 189     Modifiers Mods;
 190   };
 191
 192   struct RegOp {
 193     unsigned RegNo;
 194     bool IsForcedVOP3;
 195     Modifiers Mods;
 196   };
 197
 198   union {
 199     TokOp Tok;
 200     ImmOp Imm;
 201     RegOp Reg;
 202     const MCExpr *Expr;
 203   };
 204
 205   bool isToken() const override {
 206     if (Kind == Token)
 207       return true;
 208
 209     if (Kind != Expression || !Expr)
 210       return false;
 211
 212     // When parsing operands, we can't always tell if something was meant to be
 213     // a token, like 'gds', or an expression that references a global variable.
 214     // In this case, we assume the string is an expression, and if we need to
 215     // interpret is a token, then we treat the symbol name as the token.
 216     return isa<MCSymbolRefExpr>(Expr);
 217   }
 218
 219   bool isImm() const override {
 220     return Kind == Immediate;
 221   }
 222
 223   bool isInlinableImm(MVT type) const;
 224   bool isLiteralImm(MVT type) const;
 225
 226   bool isRegKind() const {
 227     return Kind == Register;
 228   }
 229
 230   bool isReg() const override {
 231     return isRegKind() && !hasModifiers();
 232   }
 233
 234   bool isRegOrImmWithInputMods(MVT type) const {
 235     return isRegKind() || isInlinableImm(type);
 236   }
 237
 238   bool isRegOrImmWithInt16InputMods() const {
 239     return isRegOrImmWithInputMods(MVT::i16);
 240   }
 241
 242   bool isRegOrImmWithInt32InputMods() const {
 243     return isRegOrImmWithInputMods(MVT::i32);
 244   }
 245
 246   bool isRegOrImmWithInt64InputMods() const {
 247     return isRegOrImmWithInputMods(MVT::i64);
 248   }
 249
 250   bool isRegOrImmWithFP16InputMods() const {
 251     return isRegOrImmWithInputMods(MVT::f16);
 252   }
 253
 254   bool isRegOrImmWithFP32InputMods() const {
 255     return isRegOrImmWithInputMods(MVT::f32);
 256   }
 257
 258   bool isRegOrImmWithFP64InputMods() const {
 259     return isRegOrImmWithInputMods(MVT::f64);
 260   }
 261
 262   bool isVReg() const {
 263     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
 264            isRegClass(AMDGPU::VReg_64RegClassID) ||
 265            isRegClass(AMDGPU::VReg_96RegClassID) ||
 266            isRegClass(AMDGPU::VReg_128RegClassID) ||
 267            isRegClass(AMDGPU::VReg_256RegClassID) ||
 268            isRegClass(AMDGPU::VReg_512RegClassID);
 269   }
 270
 271   bool isVReg32OrOff() const {
 272     return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
 273   }
 274
 275   bool isSDWAOperand(MVT type) const;
 276   bool isSDWAFP16Operand() const;
 277   bool isSDWAFP32Operand() const;
 278   bool isSDWAInt16Operand() const;
 279   bool isSDWAInt32Operand() const;
 280
 281   bool isImmTy(ImmTy ImmT) const {
 282     return isImm() && Imm.Type == ImmT;
 283   }
 284
 285   bool isImmModifier() const {
 286     return isImm() && Imm.Type != ImmTyNone;
 287   }
 288
 289   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
 290   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
 291   bool isDMask() const { return isImmTy(ImmTyDMask); }
 292   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
 293   bool isDA() const { return isImmTy(ImmTyDA); }
 294   bool isR128() const { return isImmTy(ImmTyR128); }
 295   bool isLWE() const { return isImmTy(ImmTyLWE); }
 296   bool isOff() const { return isImmTy(ImmTyOff); }
 297   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
 298   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
 299   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
 300   bool isOffen() const { return isImmTy(ImmTyOffen); }
 301   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
 302   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
 303   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
 304   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
 305   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
 306
 307   bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
 308   bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
 309   bool isGDS() const { return isImmTy(ImmTyGDS); }
 310   bool isLDS() const { return isImmTy(ImmTyLDS); }
 311   bool isGLC() const { return isImmTy(ImmTyGLC); }
 312   bool isSLC() const { return isImmTy(ImmTySLC); }
 313   bool isTFE() const { return isImmTy(ImmTyTFE); }
 314   bool isD16() const { return isImmTy(ImmTyD16); }
 315   bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
 316   bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
 317   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
 318   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
 319   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
 320   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
 321   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
 322   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
 323   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
 324   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
 325   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
 326   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
 327   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
 328   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
 329   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
 330   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
 331   bool isHigh() const { return isImmTy(ImmTyHigh); }
 332
 333   bool isMod() const {
 334     return isClampSI() || isOModSI();
 335   }
 336
 337   bool isRegOrImm() const {
 338     return isReg() || isImm();
 339   }
 340
 341   bool isRegClass(unsigned RCID) const;
 342
 343   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
 344     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
 345   }
 346
 347   bool isSCSrcB16() const {
 348     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
 349   }
 350
 351   bool isSCSrcV2B16() const {
 352     return isSCSrcB16();
 353   }
 354
 355   bool isSCSrcB32() const {
 356     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
 357   }
 358
 359   bool isSCSrcB64() const {
 360     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
 361   }
 362
 363   bool isSCSrcF16() const {
 364     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
 365   }
 366
 367   bool isSCSrcV2F16() const {
 368     return isSCSrcF16();
 369   }
 370
 371   bool isSCSrcF32() const {
 372     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
 373   }
 374
 375   bool isSCSrcF64() const {
 376     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
 377   }
 378
 379   bool isSSrcB32() const {
 380     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
 381   }
 382
 383   bool isSSrcB16() const {
 384     return isSCSrcB16() || isLiteralImm(MVT::i16);
 385   }
 386
 387   bool isSSrcV2B16() const {
 388     llvm_unreachable("cannot happen");
 389     return isSSrcB16();
 390   }
 391
 392   bool isSSrcB64() const {
 393     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
 394     // See isVSrc64().
 395     return isSCSrcB64() || isLiteralImm(MVT::i64);
 396   }
 397
 398   bool isSSrcF32() const {
 399     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
 400   }
 401
 402   bool isSSrcF64() const {
 403     return isSCSrcB64() || isLiteralImm(MVT::f64);
 404   }
 405
 406   bool isSSrcF16() const {
 407     return isSCSrcB16() || isLiteralImm(MVT::f16);
 408   }
 409
 410   bool isSSrcV2F16() const {
 411     llvm_unreachable("cannot happen");
 412     return isSSrcF16();
 413   }
 414
 415   bool isVCSrcB32() const {
 416     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
 417   }
 418
 419   bool isVCSrcB64() const {
 420     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
 421   }
 422
 423   bool isVCSrcB16() const {
 424     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
 425   }
 426
 427   bool isVCSrcV2B16() const {
 428     return isVCSrcB16();
 429   }
 430
 431   bool isVCSrcF32() const {
 432     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
 433   }
 434
 435   bool isVCSrcF64() const {
 436     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
 437   }
 438
 439   bool isVCSrcF16() const {
 440     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
 441   }
 442
 443   bool isVCSrcV2F16() const {
 444     return isVCSrcF16();
 445   }
 446
 447   bool isVSrcB32() const {
 448     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
 449   }
 450
 451   bool isVSrcB64() const {
 452     return isVCSrcF64() || isLiteralImm(MVT::i64);
 453   }
 454
 455   bool isVSrcB16() const {
 456     return isVCSrcF16() || isLiteralImm(MVT::i16);
 457   }
 458
 459   bool isVSrcV2B16() const {
 460     llvm_unreachable("cannot happen");
 461     return isVSrcB16();
 462   }
 463
 464   bool isVSrcF32() const {
 465     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
 466   }
 467
 468   bool isVSrcF64() const {
 469     return isVCSrcF64() || isLiteralImm(MVT::f64);
 470   }
 471
 472   bool isVSrcF16() const {
 473     return isVCSrcF16() || isLiteralImm(MVT::f16);
 474   }
 475
 476   bool isVSrcV2F16() const {
 477     llvm_unreachable("cannot happen");
 478     return isVSrcF16();
 479   }
 480
 481   bool isKImmFP32() const {
 482     return isLiteralImm(MVT::f32);
 483   }
 484
 485   bool isKImmFP16() const {
 486     return isLiteralImm(MVT::f16);
 487   }
 488
 489   bool isMem() const override {
 490     return false;
 491   }
 492
 493   bool isExpr() const {
 494     return Kind == Expression;
 495   }
 496
 497   bool isSoppBrTarget() const {
 498     return isExpr() || isImm();
 499   }
 500
 501   bool isSWaitCnt() const;
 502   bool isHwreg() const;
 503   bool isSendMsg() const;
 504   bool isSwizzle() const;
 505   bool isSMRDOffset8() const;
 506   bool isSMRDOffset20() const;
 507   bool isSMRDLiteralOffset() const;
 508   bool isDPPCtrl() const;
 509   bool isGPRIdxMode() const;
 510   bool isS16Imm() const;
 511   bool isU16Imm() const;
 512
 513   StringRef getExpressionAsToken() const {
 514     assert(isExpr());
 515     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
 516     return S->getSymbol().getName();
 517   }
 518
 519   StringRef getToken() const {
 520     assert(isToken());
 521
 522     if (Kind == Expression)
 523       return getExpressionAsToken();
 524
 525     return StringRef(Tok.Data, Tok.Length);
 526   }
 527
 528   int64_t getImm() const {
 529     assert(isImm());
 530     return Imm.Val;
 531   }
 532
 533   ImmTy getImmTy() const {
 534     assert(isImm());
 535     return Imm.Type;
 536   }
 537
 538   unsigned getReg() const override {
 539     return Reg.RegNo;
 540   }
 541
 542   SMLoc getStartLoc() const override {
 543     return StartLoc;
 544   }
 545
 546   SMLoc getEndLoc() const override {
 547     return EndLoc;
 548   }
 549
 550   SMRange getLocRange() const {
 551     return SMRange(StartLoc, EndLoc);
 552   }
 553
 554   Modifiers getModifiers() const {
 555     assert(isRegKind() || isImmTy(ImmTyNone));
 556     return isRegKind() ? Reg.Mods : Imm.Mods;
 557   }
 558
 559   void setModifiers(Modifiers Mods) {
 560     assert(isRegKind() || isImmTy(ImmTyNone));
 561     if (isRegKind())
 562       Reg.Mods = Mods;
 563     else
 564       Imm.Mods = Mods;
 565   }
 566
 567   bool hasModifiers() const {
 568     return getModifiers().hasModifiers();
 569   }
 570
 571   bool hasFPModifiers() const {
 572     return getModifiers().hasFPModifiers();
 573   }
 574
 575   bool hasIntModifiers() const {
 576     return getModifiers().hasIntModifiers();
 577   }
 578
 579   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
 580
 581   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
 582
 583   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
 584
 585   template <unsigned Bitwidth>
 586   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
 587
 588   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
 589     addKImmFPOperands<16>(Inst, N);
 590   }
 591
 592   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
 593     addKImmFPOperands<32>(Inst, N);
 594   }
 595
 596   void addRegOperands(MCInst &Inst, unsigned N) const;
 597
 598   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
 599     if (isRegKind())
 600       addRegOperands(Inst, N);
 601     else if (isExpr())
 602       Inst.addOperand(MCOperand::createExpr(Expr));
 603     else
 604       addImmOperands(Inst, N);
 605   }
 606
 607   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
 608     Modifiers Mods = getModifiers();
 609     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
 610     if (isRegKind()) {
 611       addRegOperands(Inst, N);
 612     } else {
 613       addImmOperands(Inst, N, false);
 614     }
 615   }
 616
 617   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
 618     assert(!hasIntModifiers());
 619     addRegOrImmWithInputModsOperands(Inst, N);
 620   }
 621
 622   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
 623     assert(!hasFPModifiers());
 624     addRegOrImmWithInputModsOperands(Inst, N);
 625   }
 626
 627   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
 628     Modifiers Mods = getModifiers();
 629     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
 630     assert(isRegKind());
 631     addRegOperands(Inst, N);
 632   }
 633
 634   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
 635     assert(!hasIntModifiers());
 636     addRegWithInputModsOperands(Inst, N);
 637   }
 638
 639   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
 640     assert(!hasFPModifiers());
 641     addRegWithInputModsOperands(Inst, N);
 642   }
 643
 644   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
 645     if (isImm())
 646       addImmOperands(Inst, N);
 647     else {
 648       assert(isExpr());
 649       Inst.addOperand(MCOperand::createExpr(Expr));
 650     }
 651   }
 652
 653   static void printImmTy(raw_ostream& OS, ImmTy Type) {
 654     switch (Type) {
 655     case ImmTyNone: OS << "None"; break;
 656     case ImmTyGDS: OS << "GDS"; break;
 657     case ImmTyLDS: OS << "LDS"; break;
 658     case ImmTyOffen: OS << "Offen"; break;
 659     case ImmTyIdxen: OS << "Idxen"; break;
 660     case ImmTyAddr64: OS << "Addr64"; break;
 661     case ImmTyOffset: OS << "Offset"; break;
 662     case ImmTyInstOffset: OS << "InstOffset"; break;
 663     case ImmTyOffset0: OS << "Offset0"; break;
 664     case ImmTyOffset1: OS << "Offset1"; break;
 665     case ImmTyGLC: OS << "GLC"; break;
 666     case ImmTySLC: OS << "SLC"; break;
 667     case ImmTyTFE: OS << "TFE"; break;
 668     case ImmTyD16: OS << "D16"; break;
 669     case ImmTyDFMT: OS << "DFMT"; break;
 670     case ImmTyNFMT: OS << "NFMT"; break;
 671     case ImmTyClampSI: OS << "ClampSI"; break;
 672     case ImmTyOModSI: OS << "OModSI"; break;
 673     case ImmTyDppCtrl: OS << "DppCtrl"; break;
 674     case ImmTyDppRowMask: OS << "DppRowMask"; break;
 675     case ImmTyDppBankMask: OS << "DppBankMask"; break;
 676     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
 677     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
 678     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
 679     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
 680     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
 681     case ImmTyDMask: OS << "DMask"; break;
 682     case ImmTyUNorm: OS << "UNorm"; break;
 683     case ImmTyDA: OS << "DA"; break;
 684     case ImmTyR128: OS << "R128"; break;
 685     case ImmTyLWE: OS << "LWE"; break;
 686     case ImmTyOff: OS << "Off"; break;
 687     case ImmTyExpTgt: OS << "ExpTgt"; break;
 688     case ImmTyExpCompr: OS << "ExpCompr"; break;
 689     case ImmTyExpVM: OS << "ExpVM"; break;
 690     case ImmTyHwreg: OS << "Hwreg"; break;
 691     case ImmTySendMsg: OS << "SendMsg"; break;
 692     case ImmTyInterpSlot: OS << "InterpSlot"; break;
 693     case ImmTyInterpAttr: OS << "InterpAttr"; break;
 694     case ImmTyAttrChan: OS << "AttrChan"; break;
 695     case ImmTyOpSel: OS << "OpSel"; break;
 696     case ImmTyOpSelHi: OS << "OpSelHi"; break;
 697     case ImmTyNegLo: OS << "NegLo"; break;
 698     case ImmTyNegHi: OS << "NegHi"; break;
 699     case ImmTySwizzle: OS << "Swizzle"; break;
 700     case ImmTyHigh: OS << "High"; break;
 701     }
 702   }
 703
 704   void print(raw_ostream &OS) const override {
 705     switch (Kind) {
 706     case Register:
 707       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
 708       break;
 709     case Immediate:
 710       OS << '<' << getImm();
 711       if (getImmTy() != ImmTyNone) {
 712         OS << " type: "; printImmTy(OS, getImmTy());
 713       }
 714       OS << " mods: " << Imm.Mods << '>';
 715       break;
 716     case Token:
 717       OS << '\'' << getToken() << '\'';
 718       break;
 719     case Expression:
 720       OS << "<expr " << *Expr << '>';
 721       break;
 722     }
 723   }
 724
 725   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
 726                                       int64_t Val, SMLoc Loc,
 727                                       ImmTy Type = ImmTyNone,
 728                                       bool IsFPImm = false) {
 729     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
 730     Op->Imm.Val = Val;
 731     Op->Imm.IsFPImm = IsFPImm;
 732     Op->Imm.Type = Type;
 733     Op->Imm.Mods = Modifiers();
 734     Op->StartLoc = Loc;
 735     Op->EndLoc = Loc;
 736     return Op;
 737   }
 738
 739   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
 740                                         StringRef Str, SMLoc Loc,
 741                                         bool HasExplicitEncodingSize = true) {
 742     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
 743     Res->Tok.Data = Str.data();
 744     Res->Tok.Length = Str.size();
 745     Res->StartLoc = Loc;
 746     Res->EndLoc = Loc;
 747     return Res;
 748   }
 749
 750   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
 751                                       unsigned RegNo, SMLoc S,
 752                                       SMLoc E,
 753                                       bool ForceVOP3) {
 754     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
 755     Op->Reg.RegNo = RegNo;
 756     Op->Reg.Mods = Modifiers();
 757     Op->Reg.IsForcedVOP3 = ForceVOP3;
 758     Op->StartLoc = S;
 759     Op->EndLoc = E;
 760     return Op;
 761   }
 762
 763   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
 764                                        const class MCExpr *Expr, SMLoc S) {
 765     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
 766     Op->Expr = Expr;
 767     Op->StartLoc = S;
 768     Op->EndLoc = S;
 769     return Op;
 770   }
 771 };
 772
 773 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
 774   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
 775   return OS;
 776 }
 777
 778 //===----------------------------------------------------------------------===//
 779 // AsmParser
 780 //===----------------------------------------------------------------------===//
 781
 782 // Holds info related to the current kernel, e.g. count of SGPRs used.
 783 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
 784 // .amdgpu_hsa_kernel or at EOF.
 785 class KernelScopeInfo {
 786   int SgprIndexUnusedMin = -1;
 787   int VgprIndexUnusedMin = -1;
 788   MCContext *Ctx = nullptr;
 789
 790   void usesSgprAt(int i) {
 791     if (i >= SgprIndexUnusedMin) {
 792       SgprIndexUnusedMin = ++i;
 793       if (Ctx) {
 794         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
 795         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
 796       }
 797     }
 798   }
 799
 800   void usesVgprAt(int i) {
 801     if (i >= VgprIndexUnusedMin) {
 802       VgprIndexUnusedMin = ++i;
 803       if (Ctx) {
 804         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
 805         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
 806       }
 807     }
 808   }
 809
 810 public:
 811   KernelScopeInfo() = default;
 812
 813   void initialize(MCContext &Context) {
 814     Ctx = &Context;
 815     usesSgprAt(SgprIndexUnusedMin = -1);
 816     usesVgprAt(VgprIndexUnusedMin = -1);
 817   }
 818
 819   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
 820     switch (RegKind) {
 821       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
 822       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
 823       default: break;
 824     }
 825   }
 826 };
 827
 828 class AMDGPUAsmParser : public MCTargetAsmParser {
 829   MCAsmParser &Parser;
 830
 831   // Number of extra operands parsed after the first optional operand.
 832   // This may be necessary to skip hardcoded mandatory operands.
 833   static const unsigned MAX_OPR_LOOKAHEAD = 8;
 834
 835   unsigned ForcedEncodingSize = 0;
 836   bool ForcedDPP = false;
 837   bool ForcedSDWA = false;
 838   KernelScopeInfo KernelScope;
 839
 840   /// @name Auto-generated Match Functions
 841   /// {
 842
 843 #define GET_ASSEMBLER_HEADER
 844 #include "AMDGPUGenAsmMatcher.inc"
 845
 846   /// }
 847
 848 private:
 849   bool ParseAsAbsoluteExpression(uint32_t &Ret);
 850   bool OutOfRangeError(SMRange Range);
 851   /// Calculate VGPR/SGPR blocks required for given target, reserved
 852   /// registers, and user-specified NextFreeXGPR values.
 853   ///
 854   /// \param Features [in] Target features, used for bug corrections.
 855   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
 856   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
 857   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
 858   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
 859   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
 860   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
 861   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
 862   /// \param VGPRBlocks [out] Result VGPR block count.
 863   /// \param SGPRBlocks [out] Result SGPR block count.
 864   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
 865                           bool FlatScrUsed, bool XNACKUsed,
 866                           unsigned NextFreeVGPR, SMRange VGPRRange,
 867                           unsigned NextFreeSGPR, SMRange SGPRRange,
 868                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
 869   bool ParseDirectiveAMDGCNTarget();
 870   bool ParseDirectiveAMDHSAKernel();
 871   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
 872   bool ParseDirectiveHSACodeObjectVersion();
 873   bool ParseDirectiveHSACodeObjectISA();
 874   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
 875   bool ParseDirectiveAMDKernelCodeT();
 876   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
 877   bool ParseDirectiveAMDGPUHsaKernel();
 878
 879   bool ParseDirectiveISAVersion();
 880   bool ParseDirectiveHSAMetadata();
 881   bool ParseDirectivePALMetadata();
 882
 883   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
 884                              RegisterKind RegKind, unsigned Reg1,
 885                              unsigned RegNum);
 886   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
 887                            unsigned& RegNum, unsigned& RegWidth,
 888                            unsigned *DwordRegIndex);
 889   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
 890   void initializeGprCountSymbol(RegisterKind RegKind);
 891   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
 892                              unsigned RegWidth);
 893   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
 894                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
 895   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
 896                  bool IsGdsHardcoded);
 897
 898 public:
 899   enum AMDGPUMatchResultTy {
 900     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
 901   };
 902
 903   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
 904
 905   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
 906                const MCInstrInfo &MII,
 907                const MCTargetOptions &Options)
 908       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
 909     MCAsmParserExtension::Initialize(Parser);
 910
 911     if (getFeatureBits().none()) {
 912       // Set default features.
 913       copySTI().ToggleFeature("SOUTHERN_ISLANDS");
 914     }
 915
 916     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
 917
 918     {
 919       // TODO: make those pre-defined variables read-only.
 920       // Currently there is none suitable machinery in the core llvm-mc for this.
 921       // MCSymbol::isRedefinable is intended for another purpose, and
 922       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
 923       AMDGPU::IsaInfo::IsaVersion ISA =
 924           AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
 925       MCContext &Ctx = getContext();
 926       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
 927         MCSymbol *Sym =
 928             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
 929         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
 930       } else {
 931         MCSymbol *Sym =
 932             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
 933         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
 934         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
 935         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
 936         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
 937         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
 938       }
 939       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
 940         initializeGprCountSymbol(IS_VGPR);
 941         initializeGprCountSymbol(IS_SGPR);
 942       } else
 943         KernelScope.initialize(getContext());
 944     }
 945   }
 946
 947   bool hasXNACK() const {
 948     return AMDGPU::hasXNACK(getSTI());
 949   }
 950
 951   bool hasMIMG_R128() const {
 952     return AMDGPU::hasMIMG_R128(getSTI());
 953   }
 954
 955   bool hasPackedD16() const {
 956     return AMDGPU::hasPackedD16(getSTI());
 957   }
 958
 959   bool isSI() const {
 960     return AMDGPU::isSI(getSTI());
 961   }
 962
 963   bool isCI() const {
 964     return AMDGPU::isCI(getSTI());
 965   }
 966
 967   bool isVI() const {
 968     return AMDGPU::isVI(getSTI());
 969   }
 970
 971   bool isGFX9() const {
 972     return AMDGPU::isGFX9(getSTI());
 973   }
 974
 975   bool hasInv2PiInlineImm() const {
 976     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
 977   }
 978
 979   bool hasFlatOffsets() const {
 980     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
 981   }
 982
 983   bool hasSGPR102_SGPR103() const {
 984     return !isVI();
 985   }
 986
 987   bool hasIntClamp() const {
 988     return getFeatureBits()[AMDGPU::FeatureIntClamp];
 989   }
 990
 991   AMDGPUTargetStreamer &getTargetStreamer() {
 992     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
 993     return static_cast<AMDGPUTargetStreamer &>(TS);
 994   }
 995
 996   const MCRegisterInfo *getMRI() const {
 997     // We need this const_cast because for some reason getContext() is not const
 998     // in MCAsmParser.
 999     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1000   }
1001
1002   const MCInstrInfo *getMII() const {
1003     return &MII;
1004   }
1005
1006   const FeatureBitset &getFeatureBits() const {
1007     return getSTI().getFeatureBits();
1008   }
1009
1010   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1011   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1012   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1013
1014   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1015   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1016   bool isForcedDPP() const { return ForcedDPP; }
1017   bool isForcedSDWA() const { return ForcedSDWA; }
1018   ArrayRef<unsigned> getMatchedVariants() const;
1019
1020   std::unique_ptr<AMDGPUOperand> parseRegister();
1021   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1022   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1023   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1024                                       unsigned Kind) override;
1025   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1026                                OperandVector &Operands, MCStreamer &Out,
1027                                uint64_t &ErrorInfo,
1028                                bool MatchingInlineAsm) override;
1029   bool ParseDirective(AsmToken DirectiveID) override;
1030   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1031   StringRef parseMnemonicSuffix(StringRef Name);
1032   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1033                         SMLoc NameLoc, OperandVector &Operands) override;
1034   //bool ProcessInstruction(MCInst &Inst);
1035
1036   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1037
1038   OperandMatchResultTy
1039   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1040                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1041                      bool (*ConvertResult)(int64_t &) = nullptr);
1042
1043   OperandMatchResultTy parseOperandArrayWithPrefix(
1044     const char *Prefix,
1045     OperandVector &Operands,
1046     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1047     bool (*ConvertResult)(int64_t&) = nullptr);
1048
1049   OperandMatchResultTy
1050   parseNamedBit(const char *Name, OperandVector &Operands,
1051                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1052   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1053                                              StringRef &Value);
1054
1055   bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1056   OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1057   OperandMatchResultTy parseReg(OperandVector &Operands);
1058   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1059   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1060   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1061   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1062   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1063   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1064
1065   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1066   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1067   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1068   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1069
1070   bool parseCnt(int64_t &IntVal);
1071   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1072   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1073
1074 private:
1075   struct OperandInfoTy {
1076     int64_t Id;
1077     bool IsSymbolic = false;
1078
1079     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1080   };
1081
1082   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1083   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1084
1085   void errorExpTgt();
1086   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1087
1088   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1089   bool validateConstantBusLimitations(const MCInst &Inst);
1090   bool validateEarlyClobberLimitations(const MCInst &Inst);
1091   bool validateIntClampSupported(const MCInst &Inst);
1092   bool validateMIMGAtomicDMask(const MCInst &Inst);
1093   bool validateMIMGGatherDMask(const MCInst &Inst);
1094   bool validateMIMGDataSize(const MCInst &Inst);
1095   bool validateMIMGR128(const MCInst &Inst);
1096   bool validateMIMGD16(const MCInst &Inst);
1097   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1098   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1099   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1100
1101   bool trySkipId(const StringRef Id);
1102   bool trySkipToken(const AsmToken::TokenKind Kind);
1103   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1104   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1105   bool parseExpr(int64_t &Imm);
1106
1107 public:
1108   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1109   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1110
1111   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1112   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1113   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1114   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1115   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1116
1117   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1118                             const unsigned MinVal,
1119                             const unsigned MaxVal,
1120                             const StringRef ErrMsg);
1121   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1122   bool parseSwizzleOffset(int64_t &Imm);
1123   bool parseSwizzleMacro(int64_t &Imm);
1124   bool parseSwizzleQuadPerm(int64_t &Imm);
1125   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1126   bool parseSwizzleBroadcast(int64_t &Imm);
1127   bool parseSwizzleSwap(int64_t &Imm);
1128   bool parseSwizzleReverse(int64_t &Imm);
1129
1130   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1131   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1132   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1133   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1134   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1135
1136   AMDGPUOperand::Ptr defaultGLC() const;
1137   AMDGPUOperand::Ptr defaultSLC() const;
1138
1139   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1140   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1141   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1142   AMDGPUOperand::Ptr defaultOffsetU12() const;
1143   AMDGPUOperand::Ptr defaultOffsetS13() const;
1144
1145   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1146
1147   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1148                OptionalImmIndexMap &OptionalIdx);
1149   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1150   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1151   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1152
1153   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1154
1155   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1156                bool IsAtomic = false);
1157   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1158
1159   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1160   AMDGPUOperand::Ptr defaultRowMask() const;
1161   AMDGPUOperand::Ptr defaultBankMask() const;
1162   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1163   void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1164
1165   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1166                                     AMDGPUOperand::ImmTy Type);
1167   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1168   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1169   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1170   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1171   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1172   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1173                 uint64_t BasicInstType, bool skipVcc = false);
1174 };
1175
1176 struct OptionalOperand {
1177   const char *Name;
1178   AMDGPUOperand::ImmTy Type;
1179   bool IsBit;
1180   bool (*ConvertResult)(int64_t&);
1181 };
1182
1183 } // end anonymous namespace
1184
1185 // May be called with integer type with equivalent bitwidth.
1186 static const fltSemantics *getFltSemantics(unsigned Size) {
1187   switch (Size) {
1188   case 4:
1189     return &APFloat::IEEEsingle();
1190   case 8:
1191     return &APFloat::IEEEdouble();
1192   case 2:
1193     return &APFloat::IEEEhalf();
1194   default:
1195     llvm_unreachable("unsupported fp type");
1196   }
1197 }
1198
1199 static const fltSemantics *getFltSemantics(MVT VT) {
1200   return getFltSemantics(VT.getSizeInBits() / 8);
1201 }
1202
1203 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1204   switch (OperandType) {
1205   case AMDGPU::OPERAND_REG_IMM_INT32:
1206   case AMDGPU::OPERAND_REG_IMM_FP32:
1207   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1208   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1209     return &APFloat::IEEEsingle();
1210   case AMDGPU::OPERAND_REG_IMM_INT64:
1211   case AMDGPU::OPERAND_REG_IMM_FP64:
1212   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1213   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1214     return &APFloat::IEEEdouble();
1215   case AMDGPU::OPERAND_REG_IMM_INT16:
1216   case AMDGPU::OPERAND_REG_IMM_FP16:
1217   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1218   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1219   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1220   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1221     return &APFloat::IEEEhalf();
1222   default:
1223     llvm_unreachable("unsupported fp type");
1224   }
1225 }
1226
1227 //===----------------------------------------------------------------------===//
1228 // Operand
1229 //===----------------------------------------------------------------------===//
1230
1231 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1232   bool Lost;
1233
1234   // Convert literal to single precision
1235   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1236                                                APFloat::rmNearestTiesToEven,
1237                                                &Lost);
1238   // We allow precision lost but not overflow or underflow
1239   if (Status != APFloat::opOK &&
1240       Lost &&
1241       ((Status & APFloat::opOverflow)  != 0 ||
1242        (Status & APFloat::opUnderflow) != 0)) {
1243     return false;
1244   }
1245
1246   return true;
1247 }
1248
1249 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1250   if (!isImmTy(ImmTyNone)) {
1251     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1252     return false;
1253   }
1254   // TODO: We should avoid using host float here. It would be better to
1255   // check the float bit values which is what a few other places do.
1256   // We've had bot failures before due to weird NaN support on mips hosts.
1257
1258   APInt Literal(64, Imm.Val);
1259
1260   if (Imm.IsFPImm) { // We got fp literal token
1261     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1262       return AMDGPU::isInlinableLiteral64(Imm.Val,
1263                                           AsmParser->hasInv2PiInlineImm());
1264     }
1265
1266     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1267     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1268       return false;
1269
1270     if (type.getScalarSizeInBits() == 16) {
1271       return AMDGPU::isInlinableLiteral16(
1272         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1273         AsmParser->hasInv2PiInlineImm());
1274     }
1275
1276     // Check if single precision literal is inlinable
1277     return AMDGPU::isInlinableLiteral32(
1278       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1279       AsmParser->hasInv2PiInlineImm());
1280   }
1281
1282   // We got int literal token.
1283   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1284     return AMDGPU::isInlinableLiteral64(Imm.Val,
1285                                         AsmParser->hasInv2PiInlineImm());
1286   }
1287
1288   if (type.getScalarSizeInBits() == 16) {
1289     return AMDGPU::isInlinableLiteral16(
1290       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1291       AsmParser->hasInv2PiInlineImm());
1292   }
1293
1294   return AMDGPU::isInlinableLiteral32(
1295     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1296     AsmParser->hasInv2PiInlineImm());
1297 }
1298
1299 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1300   // Check that this immediate can be added as literal
1301   if (!isImmTy(ImmTyNone)) {
1302     return false;
1303   }
1304
1305   if (!Imm.IsFPImm) {
1306     // We got int literal token.
1307
1308     if (type == MVT::f64 && hasFPModifiers()) {
1309       // Cannot apply fp modifiers to int literals preserving the same semantics
1310       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1311       // disable these cases.
1312       return false;
1313     }
1314
1315     unsigned Size = type.getSizeInBits();
1316     if (Size == 64)
1317       Size = 32;
1318
1319     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1320     // types.
1321     return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1322   }
1323
1324   // We got fp literal token
1325   if (type == MVT::f64) { // Expected 64-bit fp operand
1326     // We would set low 64-bits of literal to zeroes but we accept this literals
1327     return true;
1328   }
1329
1330   if (type == MVT::i64) { // Expected 64-bit int operand
1331     // We don't allow fp literals in 64-bit integer instructions. It is
1332     // unclear how we should encode them.
1333     return false;
1334   }
1335
1336   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1337   return canLosslesslyConvertToFPType(FPLiteral, type);
1338 }
1339
1340 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1341   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1342 }
1343
1344 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1345   if (AsmParser->isVI())
1346     return isVReg();
1347   else if (AsmParser->isGFX9())
1348     return isRegKind() || isInlinableImm(type);
1349   else
1350     return false;
1351 }
1352
1353 bool AMDGPUOperand::isSDWAFP16Operand() const {
1354   return isSDWAOperand(MVT::f16);
1355 }
1356
1357 bool AMDGPUOperand::isSDWAFP32Operand() const {
1358   return isSDWAOperand(MVT::f32);
1359 }
1360
1361 bool AMDGPUOperand::isSDWAInt16Operand() const {
1362   return isSDWAOperand(MVT::i16);
1363 }
1364
1365 bool AMDGPUOperand::isSDWAInt32Operand() const {
1366   return isSDWAOperand(MVT::i32);
1367 }
1368
1369 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1370 {
1371   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1372   assert(Size == 2 || Size == 4 || Size == 8);
1373
1374   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1375
1376   if (Imm.Mods.Abs) {
1377     Val &= ~FpSignMask;
1378   }
1379   if (Imm.Mods.Neg) {
1380     Val ^= FpSignMask;
1381   }
1382
1383   return Val;
1384 }
1385
1386 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1387   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1388                              Inst.getNumOperands())) {
1389     addLiteralImmOperand(Inst, Imm.Val,
1390                          ApplyModifiers &
1391                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1392   } else {
1393     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1394     Inst.addOperand(MCOperand::createImm(Imm.Val));
1395   }
1396 }
1397
1398 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1399   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1400   auto OpNum = Inst.getNumOperands();
1401   // Check that this operand accepts literals
1402   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1403
1404   if (ApplyModifiers) {
1405     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1406     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1407     Val = applyInputFPModifiers(Val, Size);
1408   }
1409
1410   APInt Literal(64, Val);
1411   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1412
1413   if (Imm.IsFPImm) { // We got fp literal token
1414     switch (OpTy) {
1415     case AMDGPU::OPERAND_REG_IMM_INT64:
1416     case AMDGPU::OPERAND_REG_IMM_FP64:
1417     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1418     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1419       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1420                                        AsmParser->hasInv2PiInlineImm())) {
1421         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1422         return;
1423       }
1424
1425       // Non-inlineable
1426       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1427         // For fp operands we check if low 32 bits are zeros
1428         if (Literal.getLoBits(32) != 0) {
1429           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1430           "Can't encode literal as exact 64-bit floating-point operand. "
1431           "Low 32-bits will be set to zero");
1432         }
1433
1434         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1435         return;
1436       }
1437
1438       // We don't allow fp literals in 64-bit integer instructions. It is
1439       // unclear how we should encode them. This case should be checked earlier
1440       // in predicate methods (isLiteralImm())
1441       llvm_unreachable("fp literal in 64-bit integer instruction.");
1442
1443     case AMDGPU::OPERAND_REG_IMM_INT32:
1444     case AMDGPU::OPERAND_REG_IMM_FP32:
1445     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1446     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1447     case AMDGPU::OPERAND_REG_IMM_INT16:
1448     case AMDGPU::OPERAND_REG_IMM_FP16:
1449     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1450     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1451     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1452     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1453       bool lost;
1454       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1455       // Convert literal to single precision
1456       FPLiteral.convert(*getOpFltSemantics(OpTy),
1457                         APFloat::rmNearestTiesToEven, &lost);
1458       // We allow precision lost but not overflow or underflow. This should be
1459       // checked earlier in isLiteralImm()
1460
1461       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1462       if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1463           OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1464         ImmVal |= (ImmVal << 16);
1465       }
1466
1467       Inst.addOperand(MCOperand::createImm(ImmVal));
1468       return;
1469     }
1470     default:
1471       llvm_unreachable("invalid operand size");
1472     }
1473
1474     return;
1475   }
1476
1477    // We got int literal token.
1478   // Only sign extend inline immediates.
1479   // FIXME: No errors on truncation
1480   switch (OpTy) {
1481   case AMDGPU::OPERAND_REG_IMM_INT32:
1482   case AMDGPU::OPERAND_REG_IMM_FP32:
1483   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1484   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1485     if (isInt<32>(Val) &&
1486         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1487                                      AsmParser->hasInv2PiInlineImm())) {
1488       Inst.addOperand(MCOperand::createImm(Val));
1489       return;
1490     }
1491
1492     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1493     return;
1494
1495   case AMDGPU::OPERAND_REG_IMM_INT64:
1496   case AMDGPU::OPERAND_REG_IMM_FP64:
1497   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1498   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1499     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1500       Inst.addOperand(MCOperand::createImm(Val));
1501       return;
1502     }
1503
1504     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1505     return;
1506
1507   case AMDGPU::OPERAND_REG_IMM_INT16:
1508   case AMDGPU::OPERAND_REG_IMM_FP16:
1509   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1510   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1511     if (isInt<16>(Val) &&
1512         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1513                                      AsmParser->hasInv2PiInlineImm())) {
1514       Inst.addOperand(MCOperand::createImm(Val));
1515       return;
1516     }
1517
1518     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1519     return;
1520
1521   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1522   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1523     auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1524     assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1525                                         AsmParser->hasInv2PiInlineImm()));
1526
1527     uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1528                       static_cast<uint32_t>(LiteralVal);
1529     Inst.addOperand(MCOperand::createImm(ImmVal));
1530     return;
1531   }
1532   default:
1533     llvm_unreachable("invalid operand size");
1534   }
1535 }
1536
1537 template <unsigned Bitwidth>
1538 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1539   APInt Literal(64, Imm.Val);
1540
1541   if (!Imm.IsFPImm) {
1542     // We got int literal token.
1543     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1544     return;
1545   }
1546
1547   bool Lost;
1548   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1549   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1550                     APFloat::rmNearestTiesToEven, &Lost);
1551   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1552 }
1553
1554 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1555   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1556 }
1557
1558 //===----------------------------------------------------------------------===//
1559 // AsmParser
1560 //===----------------------------------------------------------------------===//
1561
1562 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1563   if (Is == IS_VGPR) {
1564     switch (RegWidth) {
1565       default: return -1;
1566       case 1: return AMDGPU::VGPR_32RegClassID;
1567       case 2: return AMDGPU::VReg_64RegClassID;
1568       case 3: return AMDGPU::VReg_96RegClassID;
1569       case 4: return AMDGPU::VReg_128RegClassID;
1570       case 8: return AMDGPU::VReg_256RegClassID;
1571       case 16: return AMDGPU::VReg_512RegClassID;
1572     }
1573   } else if (Is == IS_TTMP) {
1574     switch (RegWidth) {
1575       default: return -1;
1576       case 1: return AMDGPU::TTMP_32RegClassID;
1577       case 2: return AMDGPU::TTMP_64RegClassID;
1578       case 4: return AMDGPU::TTMP_128RegClassID;
1579       case 8: return AMDGPU::TTMP_256RegClassID;
1580       case 16: return AMDGPU::TTMP_512RegClassID;
1581     }
1582   } else if (Is == IS_SGPR) {
1583     switch (RegWidth) {
1584       default: return -1;
1585       case 1: return AMDGPU::SGPR_32RegClassID;
1586       case 2: return AMDGPU::SGPR_64RegClassID;
1587       case 4: return AMDGPU::SGPR_128RegClassID;
1588       case 8: return AMDGPU::SGPR_256RegClassID;
1589       case 16: return AMDGPU::SGPR_512RegClassID;
1590     }
1591   }
1592   return -1;
1593 }
1594
1595 static unsigned getSpecialRegForName(StringRef RegName) {
1596   return StringSwitch<unsigned>(RegName)
1597     .Case("exec", AMDGPU::EXEC)
1598     .Case("vcc", AMDGPU::VCC)
1599     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1600     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1601     .Case("m0", AMDGPU::M0)
1602     .Case("scc", AMDGPU::SCC)
1603     .Case("tba", AMDGPU::TBA)
1604     .Case("tma", AMDGPU::TMA)
1605     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1606     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1607     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1608     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1609     .Case("vcc_lo", AMDGPU::VCC_LO)
1610     .Case("vcc_hi", AMDGPU::VCC_HI)
1611     .Case("exec_lo", AMDGPU::EXEC_LO)
1612     .Case("exec_hi", AMDGPU::EXEC_HI)
1613     .Case("tma_lo", AMDGPU::TMA_LO)
1614     .Case("tma_hi", AMDGPU::TMA_HI)
1615     .Case("tba_lo", AMDGPU::TBA_LO)
1616     .Case("tba_hi", AMDGPU::TBA_HI)
1617     .Default(0);
1618 }
1619
1620 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1621                                     SMLoc &EndLoc) {
1622   auto R = parseRegister();
1623   if (!R) return true;
1624   assert(R->isReg());
1625   RegNo = R->getReg();
1626   StartLoc = R->getStartLoc();
1627   EndLoc = R->getEndLoc();
1628   return false;
1629 }
1630
1631 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1632                                             RegisterKind RegKind, unsigned Reg1,
1633                                             unsigned RegNum) {
1634   switch (RegKind) {
1635   case IS_SPECIAL:
1636     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1637       Reg = AMDGPU::EXEC;
1638       RegWidth = 2;
1639       return true;
1640     }
1641     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1642       Reg = AMDGPU::FLAT_SCR;
1643       RegWidth = 2;
1644       return true;
1645     }
1646     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1647       Reg = AMDGPU::XNACK_MASK;
1648       RegWidth = 2;
1649       return true;
1650     }
1651     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1652       Reg = AMDGPU::VCC;
1653       RegWidth = 2;
1654       return true;
1655     }
1656     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1657       Reg = AMDGPU::TBA;
1658       RegWidth = 2;
1659       return true;
1660     }
1661     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1662       Reg = AMDGPU::TMA;
1663       RegWidth = 2;
1664       return true;
1665     }
1666     return false;
1667   case IS_VGPR:
1668   case IS_SGPR:
1669   case IS_TTMP:
1670     if (Reg1 != Reg + RegWidth) {
1671       return false;
1672     }
1673     RegWidth++;
1674     return true;
1675   default:
1676     llvm_unreachable("unexpected register kind");
1677   }
1678 }
1679
1680 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1681                                           unsigned &RegNum, unsigned &RegWidth,
1682                                           unsigned *DwordRegIndex) {
1683   if (DwordRegIndex) { *DwordRegIndex = 0; }
1684   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1685   if (getLexer().is(AsmToken::Identifier)) {
1686     StringRef RegName = Parser.getTok().getString();
1687     if ((Reg = getSpecialRegForName(RegName))) {
1688       Parser.Lex();
1689       RegKind = IS_SPECIAL;
1690     } else {
1691       unsigned RegNumIndex = 0;
1692       if (RegName[0] == 'v') {
1693         RegNumIndex = 1;
1694         RegKind = IS_VGPR;
1695       } else if (RegName[0] == 's') {
1696         RegNumIndex = 1;
1697         RegKind = IS_SGPR;
1698       } else if (RegName.startswith("ttmp")) {
1699         RegNumIndex = strlen("ttmp");
1700         RegKind = IS_TTMP;
1701       } else {
1702         return false;
1703       }
1704       if (RegName.size() > RegNumIndex) {
1705         // Single 32-bit register: vXX.
1706         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1707           return false;
1708         Parser.Lex();
1709         RegWidth = 1;
1710       } else {
1711         // Range of registers: v[XX:YY]. ":YY" is optional.
1712         Parser.Lex();
1713         int64_t RegLo, RegHi;
1714         if (getLexer().isNot(AsmToken::LBrac))
1715           return false;
1716         Parser.Lex();
1717
1718         if (getParser().parseAbsoluteExpression(RegLo))
1719           return false;
1720
1721         const bool isRBrace = getLexer().is(AsmToken::RBrac);
1722         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1723           return false;
1724         Parser.Lex();
1725
1726         if (isRBrace) {
1727           RegHi = RegLo;
1728         } else {
1729           if (getParser().parseAbsoluteExpression(RegHi))
1730             return false;
1731
1732           if (getLexer().isNot(AsmToken::RBrac))
1733             return false;
1734           Parser.Lex();
1735         }
1736         RegNum = (unsigned) RegLo;
1737         RegWidth = (RegHi - RegLo) + 1;
1738       }
1739     }
1740   } else if (getLexer().is(AsmToken::LBrac)) {
1741     // List of consecutive registers: [s0,s1,s2,s3]
1742     Parser.Lex();
1743     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1744       return false;
1745     if (RegWidth != 1)
1746       return false;
1747     RegisterKind RegKind1;
1748     unsigned Reg1, RegNum1, RegWidth1;
1749     do {
1750       if (getLexer().is(AsmToken::Comma)) {
1751         Parser.Lex();
1752       } else if (getLexer().is(AsmToken::RBrac)) {
1753         Parser.Lex();
1754         break;
1755       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1756         if (RegWidth1 != 1) {
1757           return false;
1758         }
1759         if (RegKind1 != RegKind) {
1760           return false;
1761         }
1762         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1763           return false;
1764         }
1765       } else {
1766         return false;
1767       }
1768     } while (true);
1769   } else {
1770     return false;
1771   }
1772   switch (RegKind) {
1773   case IS_SPECIAL:
1774     RegNum = 0;
1775     RegWidth = 1;
1776     break;
1777   case IS_VGPR:
1778   case IS_SGPR:
1779   case IS_TTMP:
1780   {
1781     unsigned Size = 1;
1782     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1783       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1784       Size = std::min(RegWidth, 4u);
1785     }
1786     if (RegNum % Size != 0)
1787       return false;
1788     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1789     RegNum = RegNum / Size;
1790     int RCID = getRegClass(RegKind, RegWidth);
1791     if (RCID == -1)
1792       return false;
1793     const MCRegisterClass RC = TRI->getRegClass(RCID);
1794     if (RegNum >= RC.getNumRegs())
1795       return false;
1796     Reg = RC.getRegister(RegNum);
1797     break;
1798   }
1799
1800   default:
1801     llvm_unreachable("unexpected register kind");
1802   }
1803
1804   if (!subtargetHasRegister(*TRI, Reg))
1805     return false;
1806   return true;
1807 }
1808
1809 Optional<StringRef>
1810 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1811   switch (RegKind) {
1812   case IS_VGPR:
1813     return StringRef(".amdgcn.next_free_vgpr");
1814   case IS_SGPR:
1815     return StringRef(".amdgcn.next_free_sgpr");
1816   default:
1817     return None;
1818   }
1819 }
1820
1821 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1822   auto SymbolName = getGprCountSymbolName(RegKind);
1823   assert(SymbolName && "initializing invalid register kind");
1824   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1825   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1826 }
1827
1828 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1829                                             unsigned DwordRegIndex,
1830                                             unsigned RegWidth) {
1831   // Symbols are only defined for GCN targets
1832   if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
1833     return true;
1834
1835   auto SymbolName = getGprCountSymbolName(RegKind);
1836   if (!SymbolName)
1837     return true;
1838   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1839
1840   int64_t NewMax = DwordRegIndex + RegWidth - 1;
1841   int64_t OldCount;
1842
1843   if (!Sym->isVariable())
1844     return !Error(getParser().getTok().getLoc(),
1845                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1846   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1847     return !Error(
1848         getParser().getTok().getLoc(),
1849         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1850
1851   if (OldCount <= NewMax)
1852     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1853
1854   return true;
1855 }
1856
1857 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1858   const auto &Tok = Parser.getTok();
1859   SMLoc StartLoc = Tok.getLoc();
1860   SMLoc EndLoc = Tok.getEndLoc();
1861   RegisterKind RegKind;
1862   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1863
1864   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1865     return nullptr;
1866   }
1867   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1868     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1869       return nullptr;
1870   } else
1871     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1872   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1873 }
1874
1875 bool
1876 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1877   if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1878       (getLexer().getKind() == AsmToken::Integer ||
1879        getLexer().getKind() == AsmToken::Real)) {
1880     // This is a workaround for handling operands like these:
1881     //     |1.0|
1882     //     |-1|
1883     // This syntax is not compatible with syntax of standard
1884     // MC expressions (due to the trailing '|').
1885
1886     SMLoc EndLoc;
1887     const MCExpr *Expr;
1888
1889     if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1890       return true;
1891     }
1892
1893     return !Expr->evaluateAsAbsolute(Val);
1894   }
1895
1896   return getParser().parseAbsoluteExpression(Val);
1897 }
1898
1899 OperandMatchResultTy
1900 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1901   // TODO: add syntactic sugar for 1/(2*PI)
1902   bool Minus = false;
1903   if (getLexer().getKind() == AsmToken::Minus) {
1904     const AsmToken NextToken = getLexer().peekTok();
1905     if (!NextToken.is(AsmToken::Integer) &&
1906         !NextToken.is(AsmToken::Real)) {
1907         return MatchOperand_NoMatch;
1908     }
1909     Minus = true;
1910     Parser.Lex();
1911   }
1912
1913   SMLoc S = Parser.getTok().getLoc();
1914   switch(getLexer().getKind()) {
1915   case AsmToken::Integer: {
1916     int64_t IntVal;
1917     if (parseAbsoluteExpr(IntVal, AbsMod))
1918       return MatchOperand_ParseFail;
1919     if (Minus)
1920       IntVal *= -1;
1921     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1922     return MatchOperand_Success;
1923   }
1924   case AsmToken::Real: {
1925     int64_t IntVal;
1926     if (parseAbsoluteExpr(IntVal, AbsMod))
1927       return MatchOperand_ParseFail;
1928
1929     APFloat F(BitsToDouble(IntVal));
1930     if (Minus)
1931       F.changeSign();
1932     Operands.push_back(
1933         AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1934                                  AMDGPUOperand::ImmTyNone, true));
1935     return MatchOperand_Success;
1936   }
1937   default:
1938     return MatchOperand_NoMatch;
1939   }
1940 }
1941
1942 OperandMatchResultTy
1943 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1944   if (auto R = parseRegister()) {
1945     assert(R->isReg());
1946     R->Reg.IsForcedVOP3 = isForcedVOP3();
1947     Operands.push_back(std::move(R));
1948     return MatchOperand_Success;
1949   }
1950   return MatchOperand_NoMatch;
1951 }
1952
1953 OperandMatchResultTy
1954 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1955   auto res = parseImm(Operands, AbsMod);
1956   if (res != MatchOperand_NoMatch) {
1957     return res;
1958   }
1959
1960   return parseReg(Operands);
1961 }
1962
1963 OperandMatchResultTy
1964 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1965                                               bool AllowImm) {
1966   bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1967
1968   if (getLexer().getKind()== AsmToken::Minus) {
1969     const AsmToken NextToken = getLexer().peekTok();
1970
1971     // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1972     if (NextToken.is(AsmToken::Minus)) {
1973       Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1974       return MatchOperand_ParseFail;
1975     }
1976
1977     // '-' followed by an integer literal N should be interpreted as integer
1978     // negation rather than a floating-point NEG modifier applied to N.
1979     // Beside being contr-intuitive, such use of floating-point NEG modifier
1980     // results in different meaning of integer literals used with VOP1/2/C
1981     // and VOP3, for example:
1982     //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1983     //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1984     // Negative fp literals should be handled likewise for unifomtity
1985     if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1986       Parser.Lex();
1987       Negate = true;
1988     }
1989   }
1990
1991   if (getLexer().getKind() == AsmToken::Identifier &&
1992       Parser.getTok().getString() == "neg") {
1993     if (Negate) {
1994       Error(Parser.getTok().getLoc(), "expected register or immediate");
1995       return MatchOperand_ParseFail;
1996     }
1997     Parser.Lex();
1998     Negate2 = true;
1999     if (getLexer().isNot(AsmToken::LParen)) {
2000       Error(Parser.getTok().getLoc(), "expected left paren after neg");
2001       return MatchOperand_ParseFail;
2002     }
2003     Parser.Lex();
2004   }
2005
2006   if (getLexer().getKind() == AsmToken::Identifier &&
2007       Parser.getTok().getString() == "abs") {
2008     Parser.Lex();
2009     Abs2 = true;
2010     if (getLexer().isNot(AsmToken::LParen)) {
2011       Error(Parser.getTok().getLoc(), "expected left paren after abs");
2012       return MatchOperand_ParseFail;
2013     }
2014     Parser.Lex();
2015   }
2016
2017   if (getLexer().getKind() == AsmToken::Pipe) {
2018     if (Abs2) {
2019       Error(Parser.getTok().getLoc(), "expected register or immediate");
2020       return MatchOperand_ParseFail;
2021     }
2022     Parser.Lex();
2023     Abs = true;
2024   }
2025
2026   OperandMatchResultTy Res;
2027   if (AllowImm) {
2028     Res = parseRegOrImm(Operands, Abs);
2029   } else {
2030     Res = parseReg(Operands);
2031   }
2032   if (Res != MatchOperand_Success) {
2033     return Res;
2034   }
2035
2036   AMDGPUOperand::Modifiers Mods;
2037   if (Abs) {
2038     if (getLexer().getKind() != AsmToken::Pipe) {
2039       Error(Parser.getTok().getLoc(), "expected vertical bar");
2040       return MatchOperand_ParseFail;
2041     }
2042     Parser.Lex();
2043     Mods.Abs = true;
2044   }
2045   if (Abs2) {
2046     if (getLexer().isNot(AsmToken::RParen)) {
2047       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2048       return MatchOperand_ParseFail;
2049     }
2050     Parser.Lex();
2051     Mods.Abs = true;
2052   }
2053
2054   if (Negate) {
2055     Mods.Neg = true;
2056   } else if (Negate2) {
2057     if (getLexer().isNot(AsmToken::RParen)) {
2058       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2059       return MatchOperand_ParseFail;
2060     }
2061     Parser.Lex();
2062     Mods.Neg = true;
2063   }
2064
2065   if (Mods.hasFPModifiers()) {
2066     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2067     Op.setModifiers(Mods);
2068   }
2069   return MatchOperand_Success;
2070 }
2071
2072 OperandMatchResultTy
2073 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2074                                                bool AllowImm) {
2075   bool Sext = false;
2076
2077   if (getLexer().getKind() == AsmToken::Identifier &&
2078       Parser.getTok().getString() == "sext") {
2079     Parser.Lex();
2080     Sext = true;
2081     if (getLexer().isNot(AsmToken::LParen)) {
2082       Error(Parser.getTok().getLoc(), "expected left paren after sext");
2083       return MatchOperand_ParseFail;
2084     }
2085     Parser.Lex();
2086   }
2087
2088   OperandMatchResultTy Res;
2089   if (AllowImm) {
2090     Res = parseRegOrImm(Operands);
2091   } else {
2092     Res = parseReg(Operands);
2093   }
2094   if (Res != MatchOperand_Success) {
2095     return Res;
2096   }
2097
2098   AMDGPUOperand::Modifiers Mods;
2099   if (Sext) {
2100     if (getLexer().isNot(AsmToken::RParen)) {
2101       Error(Parser.getTok().getLoc(), "expected closing parentheses");
2102       return MatchOperand_ParseFail;
2103     }
2104     Parser.Lex();
2105     Mods.Sext = true;
2106   }
2107
2108   if (Mods.hasIntModifiers()) {
2109     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2110     Op.setModifiers(Mods);
2111   }
2112
2113   return MatchOperand_Success;
2114 }
2115
2116 OperandMatchResultTy
2117 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2118   return parseRegOrImmWithFPInputMods(Operands, false);
2119 }
2120
2121 OperandMatchResultTy
2122 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2123   return parseRegOrImmWithIntInputMods(Operands, false);
2124 }
2125
2126 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2127   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2128   if (Reg) {
2129     Operands.push_back(std::move(Reg));
2130     return MatchOperand_Success;
2131   }
2132
2133   const AsmToken &Tok = Parser.getTok();
2134   if (Tok.getString() == "off") {
2135     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2136                                                 AMDGPUOperand::ImmTyOff, false));
2137     Parser.Lex();
2138     return MatchOperand_Success;
2139   }
2140
2141   return MatchOperand_NoMatch;
2142 }
2143
2144 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2145   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2146
2147   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2148       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2149       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2150       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2151     return Match_InvalidOperand;
2152
2153   if ((TSFlags & SIInstrFlags::VOP3) &&
2154       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2155       getForcedEncodingSize() != 64)
2156     return Match_PreferE32;
2157
2158   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2159       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2160     // v_mac_f32/16 allow only dst_sel == DWORD;
2161     auto OpNum =
2162         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2163     const auto &Op = Inst.getOperand(OpNum);
2164     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2165       return Match_InvalidOperand;
2166     }
2167   }
2168
2169   if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2170     // FIXME: Produces error without correct column reported.
2171     auto OpNum =
2172         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2173     const auto &Op = Inst.getOperand(OpNum);
2174     if (Op.getImm() != 0)
2175       return Match_InvalidOperand;
2176   }
2177
2178   return Match_Success;
2179 }
2180
2181 // What asm variants we should check
2182 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2183   if (getForcedEncodingSize() == 32) {
2184     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2185     return makeArrayRef(Variants);
2186   }
2187
2188   if (isForcedVOP3()) {
2189     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2190     return makeArrayRef(Variants);
2191   }
2192
2193   if (isForcedSDWA()) {
2194     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2195                                         AMDGPUAsmVariants::SDWA9};
2196     return makeArrayRef(Variants);
2197   }
2198
2199   if (isForcedDPP()) {
2200     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2201     return makeArrayRef(Variants);
2202   }
2203
2204   static const unsigned Variants[] = {
2205     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2206     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2207   };
2208
2209   return makeArrayRef(Variants);
2210 }
2211
2212 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2213   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2214   const unsigned Num = Desc.getNumImplicitUses();
2215   for (unsigned i = 0; i < Num; ++i) {
2216     unsigned Reg = Desc.ImplicitUses[i];
2217     switch (Reg) {
2218     case AMDGPU::FLAT_SCR:
2219     case AMDGPU::VCC:
2220     case AMDGPU::M0:
2221       return Reg;
2222     default:
2223       break;
2224     }
2225   }
2226   return AMDGPU::NoRegister;
2227 }
2228
2229 // NB: This code is correct only when used to check constant
2230 // bus limitations because GFX7 support no f16 inline constants.
2231 // Note that there are no cases when a GFX7 opcode violates
2232 // constant bus limitations due to the use of an f16 constant.
2233 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2234                                        unsigned OpIdx) const {
2235   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2236
2237   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2238     return false;
2239   }
2240
2241   const MCOperand &MO = Inst.getOperand(OpIdx);
2242
2243   int64_t Val = MO.getImm();
2244   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2245
2246   switch (OpSize) { // expected operand size
2247   case 8:
2248     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2249   case 4:
2250     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2251   case 2: {
2252     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2253     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2254         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2255       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2256     } else {
2257       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2258     }
2259   }
2260   default:
2261     llvm_unreachable("invalid operand size");
2262   }
2263 }
2264
2265 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2266   const MCOperand &MO = Inst.getOperand(OpIdx);
2267   if (MO.isImm()) {
2268     return !isInlineConstant(Inst, OpIdx);
2269   }
2270   return !MO.isReg() ||
2271          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2272 }
2273
2274 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2275   const unsigned Opcode = Inst.getOpcode();
2276   const MCInstrDesc &Desc = MII.get(Opcode);
2277   unsigned ConstantBusUseCount = 0;
2278
2279   if (Desc.TSFlags &
2280       (SIInstrFlags::VOPC |
2281        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2282        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2283        SIInstrFlags::SDWA)) {
2284     // Check special imm operands (used by madmk, etc)
2285     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2286       ++ConstantBusUseCount;
2287     }
2288
2289     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2290     if (SGPRUsed != AMDGPU::NoRegister) {
2291       ++ConstantBusUseCount;
2292     }
2293
2294     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2295     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2296     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2297
2298     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2299
2300     for (int OpIdx : OpIndices) {
2301       if (OpIdx == -1) break;
2302
2303       const MCOperand &MO = Inst.getOperand(OpIdx);
2304       if (usesConstantBus(Inst, OpIdx)) {
2305         if (MO.isReg()) {
2306           const unsigned Reg = mc2PseudoReg(MO.getReg());
2307           // Pairs of registers with a partial intersections like these
2308           //   s0, s[0:1]
2309           //   flat_scratch_lo, flat_scratch
2310           //   flat_scratch_lo, flat_scratch_hi
2311           // are theoretically valid but they are disabled anyway.
2312           // Note that this code mimics SIInstrInfo::verifyInstruction
2313           if (Reg != SGPRUsed) {
2314             ++ConstantBusUseCount;
2315           }
2316           SGPRUsed = Reg;
2317         } else { // Expression or a literal
2318           ++ConstantBusUseCount;
2319         }
2320       }
2321     }
2322   }
2323
2324   return ConstantBusUseCount <= 1;
2325 }
2326
2327 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2328   const unsigned Opcode = Inst.getOpcode();
2329   const MCInstrDesc &Desc = MII.get(Opcode);
2330
2331   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2332   if (DstIdx == -1 ||
2333       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2334     return true;
2335   }
2336
2337   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2338
2339   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2340   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2341   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2342
2343   assert(DstIdx != -1);
2344   const MCOperand &Dst = Inst.getOperand(DstIdx);
2345   assert(Dst.isReg());
2346   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2347
2348   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2349
2350   for (int SrcIdx : SrcIndices) {
2351     if (SrcIdx == -1) break;
2352     const MCOperand &Src = Inst.getOperand(SrcIdx);
2353     if (Src.isReg()) {
2354       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2355       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2356         return false;
2357       }
2358     }
2359   }
2360
2361   return true;
2362 }
2363
2364 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2365
2366   const unsigned Opc = Inst.getOpcode();
2367   const MCInstrDesc &Desc = MII.get(Opc);
2368
2369   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2370     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2371     assert(ClampIdx != -1);
2372     return Inst.getOperand(ClampIdx).getImm() == 0;
2373   }
2374
2375   return true;
2376 }
2377
2378 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2379
2380   const unsigned Opc = Inst.getOpcode();
2381   const MCInstrDesc &Desc = MII.get(Opc);
2382
2383   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2384     return true;
2385
2386   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2387   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2388   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2389
2390   assert(VDataIdx != -1);
2391   assert(DMaskIdx != -1);
2392   assert(TFEIdx != -1);
2393
2394   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2395   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2396   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2397   if (DMask == 0)
2398     DMask = 1;
2399
2400   unsigned DataSize =
2401     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2402   if (hasPackedD16()) {
2403     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2404     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2405       DataSize = (DataSize + 1) / 2;
2406   }
2407
2408   return (VDataSize / 4) == DataSize + TFESize;
2409 }
2410
2411 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2412
2413   const unsigned Opc = Inst.getOpcode();
2414   const MCInstrDesc &Desc = MII.get(Opc);
2415
2416   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2417     return true;
2418   if (!Desc.mayLoad() || !Desc.mayStore())
2419     return true; // Not atomic
2420
2421   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2422   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2423
2424   // This is an incomplete check because image_atomic_cmpswap
2425   // may only use 0x3 and 0xf while other atomic operations
2426   // may use 0x1 and 0x3. However these limitations are
2427   // verified when we check that dmask matches dst size.
2428   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2429 }
2430
2431 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2432
2433   const unsigned Opc = Inst.getOpcode();
2434   const MCInstrDesc &Desc = MII.get(Opc);
2435
2436   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2437     return true;
2438
2439   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2440   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2441
2442   // GATHER4 instructions use dmask in a different fashion compared to
2443   // other MIMG instructions. The only useful DMASK values are
2444   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2445   // (red,red,red,red) etc.) The ISA document doesn't mention
2446   // this.
2447   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2448 }
2449
2450 bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
2451
2452   const unsigned Opc = Inst.getOpcode();
2453   const MCInstrDesc &Desc = MII.get(Opc);
2454
2455   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2456     return true;
2457
2458   int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
2459   assert(Idx != -1);
2460
2461   bool R128 = (Inst.getOperand(Idx).getImm() != 0);
2462
2463   return !R128 || hasMIMG_R128();
2464 }
2465
2466 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2467
2468   const unsigned Opc = Inst.getOpcode();
2469   const MCInstrDesc &Desc = MII.get(Opc);
2470
2471   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2472     return true;
2473
2474   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2475   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2476     if (isCI() || isSI())
2477       return false;
2478   }
2479
2480   return true;
2481 }
2482
2483 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2484                                           const SMLoc &IDLoc) {
2485   if (!validateConstantBusLimitations(Inst)) {
2486     Error(IDLoc,
2487       "invalid operand (violates constant bus restrictions)");
2488     return false;
2489   }
2490   if (!validateEarlyClobberLimitations(Inst)) {
2491     Error(IDLoc,
2492       "destination must be different than all sources");
2493     return false;
2494   }
2495   if (!validateIntClampSupported(Inst)) {
2496     Error(IDLoc,
2497       "integer clamping is not supported on this GPU");
2498     return false;
2499   }
2500   if (!validateMIMGR128(Inst)) {
2501     Error(IDLoc,
2502       "r128 modifier is not supported on this GPU");
2503     return false;
2504   }
2505   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2506   if (!validateMIMGD16(Inst)) {
2507     Error(IDLoc,
2508       "d16 modifier is not supported on this GPU");
2509     return false;
2510   }
2511   if (!validateMIMGDataSize(Inst)) {
2512     Error(IDLoc,
2513       "image data size does not match dmask and tfe");
2514     return false;
2515   }
2516   if (!validateMIMGAtomicDMask(Inst)) {
2517     Error(IDLoc,
2518       "invalid atomic image dmask");
2519     return false;
2520   }
2521   if (!validateMIMGGatherDMask(Inst)) {
2522     Error(IDLoc,
2523       "invalid image_gather dmask: only one bit must be set");
2524     return false;
2525   }
2526
2527   return true;
2528 }
2529
2530 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2531                                             unsigned VariantID = 0);
2532
2533 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2534                                               OperandVector &Operands,
2535                                               MCStreamer &Out,
2536                                               uint64_t &ErrorInfo,
2537                                               bool MatchingInlineAsm) {
2538   MCInst Inst;
2539   unsigned Result = Match_Success;
2540   for (auto Variant : getMatchedVariants()) {
2541     uint64_t EI;
2542     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2543                                   Variant);
2544     // We order match statuses from least to most specific. We use most specific
2545     // status as resulting
2546     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2547     if ((R == Match_Success) ||
2548         (R == Match_PreferE32) ||
2549         (R == Match_MissingFeature && Result != Match_PreferE32) ||
2550         (R == Match_InvalidOperand && Result != Match_MissingFeature
2551                                    && Result != Match_PreferE32) ||
2552         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
2553                                    && Result != Match_MissingFeature
2554                                    && Result != Match_PreferE32)) {
2555       Result = R;
2556       ErrorInfo = EI;
2557     }
2558     if (R == Match_Success)
2559       break;
2560   }
2561
2562   switch (Result) {
2563   default: break;
2564   case Match_Success:
2565     if (!validateInstruction(Inst, IDLoc)) {
2566       return true;
2567     }
2568     Inst.setLoc(IDLoc);
2569     Out.EmitInstruction(Inst, getSTI());
2570     return false;
2571
2572   case Match_MissingFeature:
2573     return Error(IDLoc, "instruction not supported on this GPU");
2574
2575   case Match_MnemonicFail: {
2576     uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2577     std::string Suggestion = AMDGPUMnemonicSpellCheck(
2578         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2579     return Error(IDLoc, "invalid instruction" + Suggestion,
2580                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
2581   }
2582
2583   case Match_InvalidOperand: {
2584     SMLoc ErrorLoc = IDLoc;
2585     if (ErrorInfo != ~0ULL) {
2586       if (ErrorInfo >= Operands.size()) {
2587         return Error(IDLoc, "too few operands for instruction");
2588       }
2589       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2590       if (ErrorLoc == SMLoc())
2591         ErrorLoc = IDLoc;
2592     }
2593     return Error(ErrorLoc, "invalid operand for instruction");
2594   }
2595
2596   case Match_PreferE32:
2597     return Error(IDLoc, "internal error: instruction without _e64 suffix "
2598                         "should be encoded as e32");
2599   }
2600   llvm_unreachable("Implement any new match types added!");
2601 }
2602
2603 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2604   int64_t Tmp = -1;
2605   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2606     return true;
2607   }
2608   if (getParser().parseAbsoluteExpression(Tmp)) {
2609     return true;
2610   }
2611   Ret = static_cast<uint32_t>(Tmp);
2612   return false;
2613 }
2614
2615 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2616                                                uint32_t &Minor) {
2617   if (ParseAsAbsoluteExpression(Major))
2618     return TokError("invalid major version");
2619
2620   if (getLexer().isNot(AsmToken::Comma))
2621     return TokError("minor version number required, comma expected");
2622   Lex();
2623
2624   if (ParseAsAbsoluteExpression(Minor))
2625     return TokError("invalid minor version");
2626
2627   return false;
2628 }
2629
2630 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2631   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2632     return TokError("directive only supported for amdgcn architecture");
2633
2634   std::string Target;
2635
2636   SMLoc TargetStart = getTok().getLoc();
2637   if (getParser().parseEscapedString(Target))
2638     return true;
2639   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2640
2641   std::string ExpectedTarget;
2642   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2643   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2644
2645   if (Target != ExpectedTargetOS.str())
2646     return getParser().Error(TargetRange.Start, "target must match options",
2647                              TargetRange);
2648
2649   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2650   return false;
2651 }
2652
2653 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2654   return getParser().Error(Range.Start, "value out of range", Range);
2655 }
2656
2657 bool AMDGPUAsmParser::calculateGPRBlocks(
2658     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2659     bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2660     unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2661     unsigned &SGPRBlocks) {
2662   // TODO(scott.linder): These calculations are duplicated from
2663   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2664   IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
2665
2666   unsigned NumVGPRs = NextFreeVGPR;
2667   unsigned NumSGPRs = NextFreeSGPR;
2668   unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
2669
2670   if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2671       NumSGPRs > MaxAddressableNumSGPRs)
2672     return OutOfRangeError(SGPRRange);
2673
2674   NumSGPRs +=
2675       IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
2676
2677   if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2678       NumSGPRs > MaxAddressableNumSGPRs)
2679     return OutOfRangeError(SGPRRange);
2680
2681   if (Features.test(FeatureSGPRInitBug))
2682     NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2683
2684   VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
2685   SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
2686
2687   return false;
2688 }
2689
2690 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2691   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2692     return TokError("directive only supported for amdgcn architecture");
2693
2694   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2695     return TokError("directive only supported for amdhsa OS");
2696
2697   StringRef KernelName;
2698   if (getParser().parseIdentifier(KernelName))
2699     return true;
2700
2701   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2702
2703   StringSet<> Seen;
2704
2705   IsaInfo::IsaVersion IVersion =
2706       IsaInfo::getIsaVersion(getSTI().getFeatureBits());
2707
2708   SMRange VGPRRange;
2709   uint64_t NextFreeVGPR = 0;
2710   SMRange SGPRRange;
2711   uint64_t NextFreeSGPR = 0;
2712   unsigned UserSGPRCount = 0;
2713   bool ReserveVCC = true;
2714   bool ReserveFlatScr = true;
2715   bool ReserveXNACK = hasXNACK();
2716
2717   while (true) {
2718     while (getLexer().is(AsmToken::EndOfStatement))
2719       Lex();
2720
2721     if (getLexer().isNot(AsmToken::Identifier))
2722       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2723
2724     StringRef ID = getTok().getIdentifier();
2725     SMRange IDRange = getTok().getLocRange();
2726     Lex();
2727
2728     if (ID == ".end_amdhsa_kernel")
2729       break;
2730
2731     if (Seen.find(ID) != Seen.end())
2732       return TokError(".amdhsa_ directives cannot be repeated");
2733     Seen.insert(ID);
2734
2735     SMLoc ValStart = getTok().getLoc();
2736     int64_t IVal;
2737     if (getParser().parseAbsoluteExpression(IVal))
2738       return true;
2739     SMLoc ValEnd = getTok().getLoc();
2740     SMRange ValRange = SMRange(ValStart, ValEnd);
2741
2742     if (IVal < 0)
2743       return OutOfRangeError(ValRange);
2744
2745     uint64_t Val = IVal;
2746
2747 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
2748   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
2749     return OutOfRangeError(RANGE);                                             \
2750   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2751
2752     if (ID == ".amdhsa_group_segment_fixed_size") {
2753       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2754         return OutOfRangeError(ValRange);
2755       KD.group_segment_fixed_size = Val;
2756     } else if (ID == ".amdhsa_private_segment_fixed_size") {
2757       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2758         return OutOfRangeError(ValRange);
2759       KD.private_segment_fixed_size = Val;
2760     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2761       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2762                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2763                        Val, ValRange);
2764       UserSGPRCount++;
2765     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2766       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2767                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2768                        ValRange);
2769       UserSGPRCount++;
2770     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2771       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2772                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2773                        ValRange);
2774       UserSGPRCount++;
2775     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2776       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2777                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2778                        Val, ValRange);
2779       UserSGPRCount++;
2780     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2781       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2782                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2783                        ValRange);
2784       UserSGPRCount++;
2785     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2786       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2787                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2788                        ValRange);
2789       UserSGPRCount++;
2790     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2791       PARSE_BITS_ENTRY(KD.kernel_code_properties,
2792                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2793                        Val, ValRange);
2794       UserSGPRCount++;
2795     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2796       PARSE_BITS_ENTRY(
2797           KD.compute_pgm_rsrc2,
2798           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2799           ValRange);
2800     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2801       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2802                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2803                        ValRange);
2804     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2805       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2806                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2807                        ValRange);
2808     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2809       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2810                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2811                        ValRange);
2812     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2813       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2814                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2815                        ValRange);
2816     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2817       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2818                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2819                        ValRange);
2820     } else if (ID == ".amdhsa_next_free_vgpr") {
2821       VGPRRange = ValRange;
2822       NextFreeVGPR = Val;
2823     } else if (ID == ".amdhsa_next_free_sgpr") {
2824       SGPRRange = ValRange;
2825       NextFreeSGPR = Val;
2826     } else if (ID == ".amdhsa_reserve_vcc") {
2827       if (!isUInt<1>(Val))
2828         return OutOfRangeError(ValRange);
2829       ReserveVCC = Val;
2830     } else if (ID == ".amdhsa_reserve_flat_scratch") {
2831       if (IVersion.Major < 7)
2832         return getParser().Error(IDRange.Start, "directive requires gfx7+",
2833                                  IDRange);
2834       if (!isUInt<1>(Val))
2835         return OutOfRangeError(ValRange);
2836       ReserveFlatScr = Val;
2837     } else if (ID == ".amdhsa_reserve_xnack_mask") {
2838       if (IVersion.Major < 8)
2839         return getParser().Error(IDRange.Start, "directive requires gfx8+",
2840                                  IDRange);
2841       if (!isUInt<1>(Val))
2842         return OutOfRangeError(ValRange);
2843       ReserveXNACK = Val;
2844     } else if (ID == ".amdhsa_float_round_mode_32") {
2845       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2846                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2847     } else if (ID == ".amdhsa_float_round_mode_16_64") {
2848       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2849                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
2850     } else if (ID == ".amdhsa_float_denorm_mode_32") {
2851       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2852                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
2853     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
2854       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2855                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
2856                        ValRange);
2857     } else if (ID == ".amdhsa_dx10_clamp") {
2858       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2859                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
2860     } else if (ID == ".amdhsa_ieee_mode") {
2861       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
2862                        Val, ValRange);
2863     } else if (ID == ".amdhsa_fp16_overflow") {
2864       if (IVersion.Major < 9)
2865         return getParser().Error(IDRange.Start, "directive requires gfx9+",
2866                                  IDRange);
2867       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
2868                        ValRange);
2869     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
2870       PARSE_BITS_ENTRY(
2871           KD.compute_pgm_rsrc2,
2872           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
2873           ValRange);
2874     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
2875       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2876                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
2877                        Val, ValRange);
2878     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
2879       PARSE_BITS_ENTRY(
2880           KD.compute_pgm_rsrc2,
2881           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
2882           ValRange);
2883     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
2884       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2885                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
2886                        Val, ValRange);
2887     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
2888       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2889                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
2890                        Val, ValRange);
2891     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
2892       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2893                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
2894                        Val, ValRange);
2895     } else if (ID == ".amdhsa_exception_int_div_zero") {
2896       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2897                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
2898                        Val, ValRange);
2899     } else {
2900       return getParser().Error(IDRange.Start,
2901                                "unknown .amdhsa_kernel directive", IDRange);
2902     }
2903
2904 #undef PARSE_BITS_ENTRY
2905   }
2906
2907   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
2908     return TokError(".amdhsa_next_free_vgpr directive is required");
2909
2910   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
2911     return TokError(".amdhsa_next_free_sgpr directive is required");
2912
2913   unsigned VGPRBlocks;
2914   unsigned SGPRBlocks;
2915   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
2916                          ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
2917                          SGPRRange, VGPRBlocks, SGPRBlocks))
2918     return true;
2919
2920   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
2921           VGPRBlocks))
2922     return OutOfRangeError(VGPRRange);
2923   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2924                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
2925
2926   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
2927           SGPRBlocks))
2928     return OutOfRangeError(SGPRRange);
2929   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2930                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2931                   SGPRBlocks);
2932
2933   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
2934     return TokError("too many user SGPRs enabled");
2935   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
2936                   UserSGPRCount);
2937
2938   getTargetStreamer().EmitAmdhsaKernelDescriptor(
2939       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
2940       ReserveFlatScr, ReserveXNACK);
2941   return false;
2942 }
2943
2944 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2945   uint32_t Major;
2946   uint32_t Minor;
2947
2948   if (ParseDirectiveMajorMinor(Major, Minor))
2949     return true;
2950
2951   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2952   return false;
2953 }
2954
2955 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2956   uint32_t Major;
2957   uint32_t Minor;
2958   uint32_t Stepping;
2959   StringRef VendorName;
2960   StringRef ArchName;
2961
2962   // If this directive has no arguments, then use the ISA version for the
2963   // targeted GPU.
2964   if (getLexer().is(AsmToken::EndOfStatement)) {
2965     AMDGPU::IsaInfo::IsaVersion ISA =
2966         AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
2967     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2968                                                       ISA.Stepping,
2969                                                       "AMD", "AMDGPU");
2970     return false;
2971   }
2972
2973   if (ParseDirectiveMajorMinor(Major, Minor))
2974     return true;
2975
2976   if (getLexer().isNot(AsmToken::Comma))
2977     return TokError("stepping version number required, comma expected");
2978   Lex();
2979
2980   if (ParseAsAbsoluteExpression(Stepping))
2981     return TokError("invalid stepping version");
2982
2983   if (getLexer().isNot(AsmToken::Comma))
2984     return TokError("vendor name required, comma expected");
2985   Lex();
2986
2987   if (getLexer().isNot(AsmToken::String))
2988     return TokError("invalid vendor name");
2989
2990   VendorName = getLexer().getTok().getStringContents();
2991   Lex();
2992
2993   if (getLexer().isNot(AsmToken::Comma))
2994     return TokError("arch name required, comma expected");
2995   Lex();
2996
2997   if (getLexer().isNot(AsmToken::String))
2998     return TokError("invalid arch name");
2999
3000   ArchName = getLexer().getTok().getStringContents();
3001   Lex();
3002
3003   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3004                                                     VendorName, ArchName);
3005   return false;
3006 }
3007
3008 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3009                                                amd_kernel_code_t &Header) {
3010   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3011   // assembly for backwards compatibility.
3012   if (ID == "max_scratch_backing_memory_byte_size") {
3013     Parser.eatToEndOfStatement();
3014     return false;
3015   }
3016
3017   SmallString<40> ErrStr;
3018   raw_svector_ostream Err(ErrStr);
3019   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3020     return TokError(Err.str());
3021   }
3022   Lex();
3023   return false;
3024 }
3025
3026 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3027   amd_kernel_code_t Header;
3028   AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
3029
3030   while (true) {
3031     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3032     // will set the current token to EndOfStatement.
3033     while(getLexer().is(AsmToken::EndOfStatement))
3034       Lex();
3035
3036     if (getLexer().isNot(AsmToken::Identifier))
3037       return TokError("expected value identifier or .end_amd_kernel_code_t");
3038
3039     StringRef ID = getLexer().getTok().getIdentifier();
3040     Lex();
3041
3042     if (ID == ".end_amd_kernel_code_t")
3043       break;
3044
3045     if (ParseAMDKernelCodeTValue(ID, Header))
3046       return true;
3047   }
3048
3049   getTargetStreamer().EmitAMDKernelCodeT(Header);
3050
3051   return false;
3052 }
3053
3054 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3055   if (getLexer().isNot(AsmToken::Identifier))
3056     return TokError("expected symbol name");
3057
3058   StringRef KernelName = Parser.getTok().getString();
3059
3060   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3061                                            ELF::STT_AMDGPU_HSA_KERNEL);
3062   Lex();
3063   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3064     KernelScope.initialize(getContext());
3065   return false;
3066 }
3067
3068 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3069   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3070     return Error(getParser().getTok().getLoc(),
3071                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3072                  "architectures");
3073   }
3074
3075   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3076
3077   std::string ISAVersionStringFromSTI;
3078   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3079   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3080
3081   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3082     return Error(getParser().getTok().getLoc(),
3083                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3084                  "arguments specified through the command line");
3085   }
3086
3087   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3088   Lex();
3089
3090   return false;
3091 }
3092
3093 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3094   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3095     return Error(getParser().getTok().getLoc(),
3096                  (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
3097                  "not available on non-amdhsa OSes")).str());
3098   }
3099
3100   std::string HSAMetadataString;
3101   raw_string_ostream YamlStream(HSAMetadataString);
3102
3103   getLexer().setSkipSpace(false);
3104
3105   bool FoundEnd = false;
3106   while (!getLexer().is(AsmToken::Eof)) {
3107     while (getLexer().is(AsmToken::Space)) {
3108       YamlStream << getLexer().getTok().getString();
3109       Lex();
3110     }
3111
3112     if (getLexer().is(AsmToken::Identifier)) {
3113       StringRef ID = getLexer().getTok().getIdentifier();
3114       if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
3115         Lex();
3116         FoundEnd = true;
3117         break;
3118       }
3119     }
3120
3121     YamlStream << Parser.parseStringToEndOfStatement()
3122                << getContext().getAsmInfo()->getSeparatorString();
3123
3124     Parser.eatToEndOfStatement();
3125   }
3126
3127   getLexer().setSkipSpace(true);
3128
3129   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3130     return TokError(Twine("expected directive ") +
3131                     Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3132   }
3133
3134   YamlStream.flush();
3135
3136   if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
3137     return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3138
3139   return false;
3140 }
3141
3142 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3143   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3144     return Error(getParser().getTok().getLoc(),
3145                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3146                  "not available on non-amdpal OSes")).str());
3147   }
3148
3149   PALMD::Metadata PALMetadata;
3150   for (;;) {
3151     uint32_t Value;
3152     if (ParseAsAbsoluteExpression(Value)) {
3153       return TokError(Twine("invalid value in ") +
3154                       Twine(PALMD::AssemblerDirective));
3155     }
3156     PALMetadata.push_back(Value);
3157     if (getLexer().isNot(AsmToken::Comma))
3158       break;
3159     Lex();
3160   }
3161   getTargetStreamer().EmitPALMetadata(PALMetadata);
3162   return false;
3163 }
3164
3165 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3166   StringRef IDVal = DirectiveID.getString();
3167
3168   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3169     if (IDVal == ".amdgcn_target")
3170       return ParseDirectiveAMDGCNTarget();
3171
3172     if (IDVal == ".amdhsa_kernel")
3173       return ParseDirectiveAMDHSAKernel();
3174   } else {
3175     if (IDVal == ".hsa_code_object_version")
3176       return ParseDirectiveHSACodeObjectVersion();
3177
3178     if (IDVal == ".hsa_code_object_isa")
3179       return ParseDirectiveHSACodeObjectISA();
3180
3181     if (IDVal == ".amd_kernel_code_t")
3182       return ParseDirectiveAMDKernelCodeT();
3183
3184     if (IDVal == ".amdgpu_hsa_kernel")
3185       return ParseDirectiveAMDGPUHsaKernel();
3186
3187     if (IDVal == ".amd_amdgpu_isa")
3188       return ParseDirectiveISAVersion();
3189   }
3190
3191   if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3192     return ParseDirectiveHSAMetadata();
3193
3194   if (IDVal == PALMD::AssemblerDirective)
3195     return ParseDirectivePALMetadata();
3196
3197   return true;
3198 }
3199
3200 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3201                                            unsigned RegNo) const {
3202
3203   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3204        R.isValid(); ++R) {
3205     if (*R == RegNo)
3206       return isGFX9();
3207   }
3208
3209   switch (RegNo) {
3210   case AMDGPU::TBA:
3211   case AMDGPU::TBA_LO:
3212   case AMDGPU::TBA_HI:
3213   case AMDGPU::TMA:
3214   case AMDGPU::TMA_LO:
3215   case AMDGPU::TMA_HI:
3216     return !isGFX9();
3217   case AMDGPU::XNACK_MASK:
3218   case AMDGPU::XNACK_MASK_LO:
3219   case AMDGPU::XNACK_MASK_HI:
3220     return !isCI() && !isSI() && hasXNACK();
3221   default:
3222     break;
3223   }
3224
3225   if (isCI())
3226     return true;
3227
3228   if (isSI()) {
3229     // No flat_scr
3230     switch (RegNo) {
3231     case AMDGPU::FLAT_SCR:
3232     case AMDGPU::FLAT_SCR_LO:
3233     case AMDGPU::FLAT_SCR_HI:
3234       return false;
3235     default:
3236       return true;
3237     }
3238   }
3239
3240   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3241   // SI/CI have.
3242   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3243        R.isValid(); ++R) {
3244     if (*R == RegNo)
3245       return false;
3246   }
3247
3248   return true;
3249 }
3250
3251 OperandMatchResultTy
3252 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3253   // Try to parse with a custom parser
3254   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3255
3256   // If we successfully parsed the operand or if there as an error parsing,
3257   // we are done.
3258   //
3259   // If we are parsing after we reach EndOfStatement then this means we
3260   // are appending default values to the Operands list.  This is only done
3261   // by custom parser, so we shouldn't continue on to the generic parsing.
3262   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3263       getLexer().is(AsmToken::EndOfStatement))
3264     return ResTy;
3265
3266   ResTy = parseRegOrImm(Operands);
3267
3268   if (ResTy == MatchOperand_Success)
3269     return ResTy;
3270
3271   const auto &Tok = Parser.getTok();
3272   SMLoc S = Tok.getLoc();
3273
3274   const MCExpr *Expr = nullptr;
3275   if (!Parser.parseExpression(Expr)) {
3276     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3277     return MatchOperand_Success;
3278   }
3279
3280   // Possibly this is an instruction flag like 'gds'.
3281   if (Tok.getKind() == AsmToken::Identifier) {
3282     Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3283     Parser.Lex();
3284     return MatchOperand_Success;
3285   }
3286
3287   return MatchOperand_NoMatch;
3288 }
3289
3290 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3291   // Clear any forced encodings from the previous instruction.
3292   setForcedEncodingSize(0);
3293   setForcedDPP(false);
3294   setForcedSDWA(false);
3295
3296   if (Name.endswith("_e64")) {
3297     setForcedEncodingSize(64);
3298     return Name.substr(0, Name.size() - 4);
3299   } else if (Name.endswith("_e32")) {
3300     setForcedEncodingSize(32);
3301     return Name.substr(0, Name.size() - 4);
3302   } else if (Name.endswith("_dpp")) {
3303     setForcedDPP(true);
3304     return Name.substr(0, Name.size() - 4);
3305   } else if (Name.endswith("_sdwa")) {
3306     setForcedSDWA(true);
3307     return Name.substr(0, Name.size() - 5);
3308   }
3309   return Name;
3310 }
3311
3312 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3313                                        StringRef Name,
3314                                        SMLoc NameLoc, OperandVector &Operands) {
3315   // Add the instruction mnemonic
3316   Name = parseMnemonicSuffix(Name);
3317   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3318
3319   while (!getLexer().is(AsmToken::EndOfStatement)) {
3320     OperandMatchResultTy Res = parseOperand(Operands, Name);
3321
3322     // Eat the comma or space if there is one.
3323     if (getLexer().is(AsmToken::Comma))
3324       Parser.Lex();
3325
3326     switch (Res) {
3327       case MatchOperand_Success: break;
3328       case MatchOperand_ParseFail:
3329         Error(getLexer().getLoc(), "failed parsing operand.");
3330         while (!getLexer().is(AsmToken::EndOfStatement)) {
3331           Parser.Lex();
3332         }
3333         return true;
3334       case MatchOperand_NoMatch:
3335         Error(getLexer().getLoc(), "not a valid operand.");
3336         while (!getLexer().is(AsmToken::EndOfStatement)) {
3337           Parser.Lex();
3338         }
3339         return true;
3340     }
3341   }
3342
3343   return false;
3344 }
3345
3346 //===----------------------------------------------------------------------===//
3347 // Utility functions
3348 //===----------------------------------------------------------------------===//
3349
3350 OperandMatchResultTy
3351 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3352   switch(getLexer().getKind()) {
3353     default: return MatchOperand_NoMatch;
3354     case AsmToken::Identifier: {
3355       StringRef Name = Parser.getTok().getString();
3356       if (!Name.equals(Prefix)) {
3357         return MatchOperand_NoMatch;
3358       }
3359
3360       Parser.Lex();
3361       if (getLexer().isNot(AsmToken::Colon))
3362         return MatchOperand_ParseFail;
3363
3364       Parser.Lex();
3365
3366       bool IsMinus = false;
3367       if (getLexer().getKind() == AsmToken::Minus) {
3368         Parser.Lex();
3369         IsMinus = true;
3370       }
3371
3372       if (getLexer().isNot(AsmToken::Integer))
3373         return MatchOperand_ParseFail;
3374
3375       if (getParser().parseAbsoluteExpression(Int))
3376         return MatchOperand_ParseFail;
3377
3378       if (IsMinus)
3379         Int = -Int;
3380       break;
3381     }
3382   }
3383   return MatchOperand_Success;
3384 }
3385
3386 OperandMatchResultTy
3387 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3388                                     AMDGPUOperand::ImmTy ImmTy,
3389                                     bool (*ConvertResult)(int64_t&)) {
3390   SMLoc S = Parser.getTok().getLoc();
3391   int64_t Value = 0;
3392
3393   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3394   if (Res != MatchOperand_Success)
3395     return Res;
3396
3397   if (ConvertResult && !ConvertResult(Value)) {
3398     return MatchOperand_ParseFail;
3399   }
3400
3401   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3402   return MatchOperand_Success;
3403 }
3404
3405 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3406   const char *Prefix,
3407   OperandVector &Operands,
3408   AMDGPUOperand::ImmTy ImmTy,
3409   bool (*ConvertResult)(int64_t&)) {
3410   StringRef Name = Parser.getTok().getString();
3411   if (!Name.equals(Prefix))
3412     return MatchOperand_NoMatch;
3413
3414   Parser.Lex();
3415   if (getLexer().isNot(AsmToken::Colon))
3416     return MatchOperand_ParseFail;
3417
3418   Parser.Lex();
3419   if (getLexer().isNot(AsmToken::LBrac))
3420     return MatchOperand_ParseFail;
3421   Parser.Lex();
3422
3423   unsigned Val = 0;
3424   SMLoc S = Parser.getTok().getLoc();
3425
3426   // FIXME: How to verify the number of elements matches the number of src
3427   // operands?
3428   for (int I = 0; I < 4; ++I) {
3429     if (I != 0) {
3430       if (getLexer().is(AsmToken::RBrac))
3431         break;
3432
3433       if (getLexer().isNot(AsmToken::Comma))
3434         return MatchOperand_ParseFail;
3435       Parser.Lex();
3436     }
3437
3438     if (getLexer().isNot(AsmToken::Integer))
3439       return MatchOperand_ParseFail;
3440
3441     int64_t Op;
3442     if (getParser().parseAbsoluteExpression(Op))
3443       return MatchOperand_ParseFail;
3444
3445     if (Op != 0 && Op != 1)
3446       return MatchOperand_ParseFail;
3447     Val |= (Op << I);
3448   }
3449
3450   Parser.Lex();
3451   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3452   return MatchOperand_Success;
3453 }
3454
3455 OperandMatchResultTy
3456 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3457                                AMDGPUOperand::ImmTy ImmTy) {
3458   int64_t Bit = 0;
3459   SMLoc S = Parser.getTok().getLoc();
3460
3461   // We are at the end of the statement, and this is a default argument, so
3462   // use a default value.
3463   if (getLexer().isNot(AsmToken::EndOfStatement)) {
3464     switch(getLexer().getKind()) {
3465       case AsmToken::Identifier: {
3466         StringRef Tok = Parser.getTok().getString();
3467         if (Tok == Name) {
3468           Bit = 1;
3469           Parser.Lex();
3470         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3471           Bit = 0;
3472           Parser.Lex();
3473         } else {
3474           return MatchOperand_NoMatch;
3475         }
3476         break;
3477       }
3478       default:
3479         return MatchOperand_NoMatch;
3480     }
3481   }
3482
3483   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3484   return MatchOperand_Success;
3485 }
3486
3487 static void addOptionalImmOperand(
3488   MCInst& Inst, const OperandVector& Operands,
3489   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3490   AMDGPUOperand::ImmTy ImmT,
3491   int64_t Default = 0) {
3492   auto i = OptionalIdx.find(ImmT);
3493   if (i != OptionalIdx.end()) {
3494     unsigned Idx = i->second;
3495     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3496   } else {
3497     Inst.addOperand(MCOperand::createImm(Default));
3498   }
3499 }
3500
3501 OperandMatchResultTy
3502 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3503   if (getLexer().isNot(AsmToken::Identifier)) {
3504     return MatchOperand_NoMatch;
3505   }
3506   StringRef Tok = Parser.getTok().getString();
3507   if (Tok != Prefix) {
3508     return MatchOperand_NoMatch;
3509   }
3510
3511   Parser.Lex();
3512   if (getLexer().isNot(AsmToken::Colon)) {
3513     return MatchOperand_ParseFail;
3514   }
3515
3516   Parser.Lex();
3517   if (getLexer().isNot(AsmToken::Identifier)) {
3518     return MatchOperand_ParseFail;
3519   }
3520
3521   Value = Parser.getTok().getString();
3522   return MatchOperand_Success;
3523 }
3524
3525 //===----------------------------------------------------------------------===//
3526 // ds
3527 //===----------------------------------------------------------------------===//
3528
3529 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3530                                     const OperandVector &Operands) {
3531   OptionalImmIndexMap OptionalIdx;
3532
3533   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3534     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3535
3536     // Add the register arguments
3537     if (Op.isReg()) {
3538       Op.addRegOperands(Inst, 1);
3539       continue;
3540     }
3541
3542     // Handle optional arguments
3543     OptionalIdx[Op.getImmTy()] = i;
3544   }
3545
3546   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3547   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3548   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3549
3550   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3551 }
3552
3553 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3554                                 bool IsGdsHardcoded) {
3555   OptionalImmIndexMap OptionalIdx;
3556
3557   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3558     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3559
3560     // Add the register arguments
3561     if (Op.isReg()) {
3562       Op.addRegOperands(Inst, 1);
3563       continue;
3564     }
3565
3566     if (Op.isToken() && Op.getToken() == "gds") {
3567       IsGdsHardcoded = true;
3568       continue;
3569     }
3570
3571     // Handle optional arguments
3572     OptionalIdx[Op.getImmTy()] = i;
3573   }
3574
3575   AMDGPUOperand::ImmTy OffsetType =
3576     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3577      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3578                                                       AMDGPUOperand::ImmTyOffset;
3579
3580   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3581
3582   if (!IsGdsHardcoded) {
3583     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3584   }
3585   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3586 }
3587
3588 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3589   OptionalImmIndexMap OptionalIdx;
3590
3591   unsigned OperandIdx[4];
3592   unsigned EnMask = 0;
3593   int SrcIdx = 0;
3594
3595   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3596     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3597
3598     // Add the register arguments
3599     if (Op.isReg()) {
3600       assert(SrcIdx < 4);
3601       OperandIdx[SrcIdx] = Inst.size();
3602       Op.addRegOperands(Inst, 1);
3603       ++SrcIdx;
3604       continue;
3605     }
3606
3607     if (Op.isOff()) {
3608       assert(SrcIdx < 4);
3609       OperandIdx[SrcIdx] = Inst.size();
3610       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3611       ++SrcIdx;
3612       continue;
3613     }
3614
3615     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3616       Op.addImmOperands(Inst, 1);
3617       continue;
3618     }
3619
3620     if (Op.isToken() && Op.getToken() == "done")
3621       continue;
3622
3623     // Handle optional arguments
3624     OptionalIdx[Op.getImmTy()] = i;
3625   }
3626
3627   assert(SrcIdx == 4);
3628
3629   bool Compr = false;
3630   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3631     Compr = true;
3632     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3633     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3634     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3635   }
3636
3637   for (auto i = 0; i < SrcIdx; ++i) {
3638     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3639       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3640     }
3641   }
3642
3643   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3644   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3645
3646   Inst.addOperand(MCOperand::createImm(EnMask));
3647 }
3648
3649 //===----------------------------------------------------------------------===//
3650 // s_waitcnt
3651 //===----------------------------------------------------------------------===//
3652
3653 static bool
3654 encodeCnt(
3655   const AMDGPU::IsaInfo::IsaVersion ISA,
3656   int64_t &IntVal,
3657   int64_t CntVal,
3658   bool Saturate,
3659   unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
3660   unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
3661 {
3662   bool Failed = false;
3663
3664   IntVal = encode(ISA, IntVal, CntVal);
3665   if (CntVal != decode(ISA, IntVal)) {
3666     if (Saturate) {
3667       IntVal = encode(ISA, IntVal, -1);
3668     } else {
3669       Failed = true;
3670     }
3671   }
3672   return Failed;
3673 }
3674
3675 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3676   StringRef CntName = Parser.getTok().getString();
3677   int64_t CntVal;
3678
3679   Parser.Lex();
3680   if (getLexer().isNot(AsmToken::LParen))
3681     return true;
3682
3683   Parser.Lex();
3684   if (getLexer().isNot(AsmToken::Integer))
3685     return true;
3686
3687   SMLoc ValLoc = Parser.getTok().getLoc();
3688   if (getParser().parseAbsoluteExpression(CntVal))
3689     return true;
3690
3691   AMDGPU::IsaInfo::IsaVersion ISA =
3692       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3693
3694   bool Failed = true;
3695   bool Sat = CntName.endswith("_sat");
3696
3697   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3698     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3699   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3700     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3701   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3702     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3703   }
3704
3705   if (Failed) {
3706     Error(ValLoc, "too large value for " + CntName);
3707     return true;
3708   }
3709
3710   if (getLexer().isNot(AsmToken::RParen)) {
3711     return true;
3712   }
3713
3714   Parser.Lex();
3715   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3716     const AsmToken NextToken = getLexer().peekTok();
3717     if (NextToken.is(AsmToken::Identifier)) {
3718       Parser.Lex();
3719     }
3720   }
3721
3722   return false;
3723 }
3724
3725 OperandMatchResultTy
3726 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3727   AMDGPU::IsaInfo::IsaVersion ISA =
3728       AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
3729   int64_t Waitcnt = getWaitcntBitMask(ISA);
3730   SMLoc S = Parser.getTok().getLoc();
3731
3732   switch(getLexer().getKind()) {
3733     default: return MatchOperand_ParseFail;
3734     case AsmToken::Integer:
3735       // The operand can be an integer value.
3736       if (getParser().parseAbsoluteExpression(Waitcnt))
3737         return MatchOperand_ParseFail;
3738       break;
3739
3740     case AsmToken::Identifier:
3741       do {
3742         if (parseCnt(Waitcnt))
3743           return MatchOperand_ParseFail;
3744       } while(getLexer().isNot(AsmToken::EndOfStatement));
3745       break;
3746   }
3747   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3748   return MatchOperand_Success;
3749 }
3750
3751 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3752                                           int64_t &Width) {
3753   using namespace llvm::AMDGPU::Hwreg;
3754
3755   if (Parser.getTok().getString() != "hwreg")
3756     return true;
3757   Parser.Lex();
3758
3759   if (getLexer().isNot(AsmToken::LParen))
3760     return true;
3761   Parser.Lex();
3762
3763   if (getLexer().is(AsmToken::Identifier)) {
3764     HwReg.IsSymbolic = true;
3765     HwReg.Id = ID_UNKNOWN_;
3766     const StringRef tok = Parser.getTok().getString();
3767     int Last = ID_SYMBOLIC_LAST_;
3768     if (isSI() || isCI() || isVI())
3769       Last = ID_SYMBOLIC_FIRST_GFX9_;
3770     for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3771       if (tok == IdSymbolic[i]) {
3772         HwReg.Id = i;
3773         break;
3774       }
3775     }
3776     Parser.Lex();
3777   } else {
3778     HwReg.IsSymbolic = false;
3779     if (getLexer().isNot(AsmToken::Integer))
3780       return true;
3781     if (getParser().parseAbsoluteExpression(HwReg.Id))
3782       return true;
3783   }
3784
3785   if (getLexer().is(AsmToken::RParen)) {
3786     Parser.Lex();
3787     return false;
3788   }
3789
3790   // optional params
3791   if (getLexer().isNot(AsmToken::Comma))
3792     return true;
3793   Parser.Lex();
3794
3795   if (getLexer().isNot(AsmToken::Integer))
3796     return true;
3797   if (getParser().parseAbsoluteExpression(Offset))
3798     return true;
3799
3800   if (getLexer().isNot(AsmToken::Comma))
3801     return true;
3802   Parser.Lex();
3803
3804   if (getLexer().isNot(AsmToken::Integer))
3805     return true;
3806   if (getParser().parseAbsoluteExpression(Width))
3807     return true;
3808
3809   if (getLexer().isNot(AsmToken::RParen))
3810     return true;
3811   Parser.Lex();
3812
3813   return false;
3814 }
3815
3816 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3817   using namespace llvm::AMDGPU::Hwreg;
3818
3819   int64_t Imm16Val = 0;
3820   SMLoc S = Parser.getTok().getLoc();
3821
3822   switch(getLexer().getKind()) {
3823     default: return MatchOperand_NoMatch;
3824     case AsmToken::Integer:
3825       // The operand can be an integer value.
3826       if (getParser().parseAbsoluteExpression(Imm16Val))
3827         return MatchOperand_NoMatch;
3828       if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3829         Error(S, "invalid immediate: only 16-bit values are legal");
3830         // Do not return error code, but create an imm operand anyway and proceed
3831         // to the next operand, if any. That avoids unneccessary error messages.
3832       }
3833       break;
3834
3835     case AsmToken::Identifier: {
3836         OperandInfoTy HwReg(ID_UNKNOWN_);
3837         int64_t Offset = OFFSET_DEFAULT_;
3838         int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3839         if (parseHwregConstruct(HwReg, Offset, Width))
3840           return MatchOperand_ParseFail;
3841         if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3842           if (HwReg.IsSymbolic)
3843             Error(S, "invalid symbolic name of hardware register");
3844           else
3845             Error(S, "invalid code of hardware register: only 6-bit values are legal");
3846         }
3847         if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3848           Error(S, "invalid bit offset: only 5-bit values are legal");
3849         if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3850           Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3851         Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3852       }
3853       break;
3854   }
3855   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3856   return MatchOperand_Success;
3857 }
3858
3859 bool AMDGPUOperand::isSWaitCnt() const {
3860   return isImm();
3861 }
3862
3863 bool AMDGPUOperand::isHwreg() const {
3864   return isImmTy(ImmTyHwreg);
3865 }
3866
3867 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3868   using namespace llvm::AMDGPU::SendMsg;
3869
3870   if (Parser.getTok().getString() != "sendmsg")
3871     return true;
3872   Parser.Lex();
3873
3874   if (getLexer().isNot(AsmToken::LParen))
3875     return true;
3876   Parser.Lex();
3877
3878   if (getLexer().is(AsmToken::Identifier)) {
3879     Msg.IsSymbolic = true;
3880     Msg.Id = ID_UNKNOWN_;
3881     const std::string tok = Parser.getTok().getString();
3882     for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3883       switch(i) {
3884         default: continue; // Omit gaps.
3885         case ID_INTERRUPT: case ID_GS: case ID_GS_DONE:  case ID_SYSMSG: break;
3886       }
3887       if (tok == IdSymbolic[i]) {
3888         Msg.Id = i;
3889         break;
3890       }
3891     }
3892     Parser.Lex();
3893   } else {
3894     Msg.IsSymbolic = false;
3895     if (getLexer().isNot(AsmToken::Integer))
3896       return true;
3897     if (getParser().parseAbsoluteExpression(Msg.Id))
3898       return true;
3899     if (getLexer().is(AsmToken::Integer))
3900       if (getParser().parseAbsoluteExpression(Msg.Id))
3901         Msg.Id = ID_UNKNOWN_;
3902   }
3903   if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3904     return false;
3905
3906   if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3907     if (getLexer().isNot(AsmToken::RParen))
3908       return true;
3909     Parser.Lex();
3910     return false;
3911   }
3912
3913   if (getLexer().isNot(AsmToken::Comma))
3914     return true;
3915   Parser.Lex();
3916
3917   assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3918   Operation.Id = ID_UNKNOWN_;
3919   if (getLexer().is(AsmToken::Identifier)) {
3920     Operation.IsSymbolic = true;
3921     const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3922     const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3923     const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3924     const StringRef Tok = Parser.getTok().getString();
3925     for (int i = F; i < L; ++i) {
3926       if (Tok == S[i]) {
3927         Operation.Id = i;
3928         break;
3929       }
3930     }
3931     Parser.Lex();
3932   } else {
3933     Operation.IsSymbolic = false;
3934     if (getLexer().isNot(AsmToken::Integer))
3935       return true;
3936     if (getParser().parseAbsoluteExpression(Operation.Id))
3937       return true;
3938   }
3939
3940   if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3941     // Stream id is optional.
3942     if (getLexer().is(AsmToken::RParen)) {
3943       Parser.Lex();
3944       return false;
3945     }
3946
3947     if (getLexer().isNot(AsmToken::Comma))
3948       return true;
3949     Parser.Lex();
3950
3951     if (getLexer().isNot(AsmToken::Integer))
3952       return true;
3953     if (getParser().parseAbsoluteExpression(StreamId))
3954       return true;
3955   }
3956
3957   if (getLexer().isNot(AsmToken::RParen))
3958     return true;
3959   Parser.Lex();
3960   return false;
3961 }
3962
3963 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
3964   if (getLexer().getKind() != AsmToken::Identifier)
3965     return MatchOperand_NoMatch;
3966
3967   StringRef Str = Parser.getTok().getString();
3968   int Slot = StringSwitch<int>(Str)
3969     .Case("p10", 0)
3970     .Case("p20", 1)
3971     .Case("p0", 2)
3972     .Default(-1);
3973
3974   SMLoc S = Parser.getTok().getLoc();
3975   if (Slot == -1)
3976     return MatchOperand_ParseFail;
3977
3978   Parser.Lex();
3979   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
3980                                               AMDGPUOperand::ImmTyInterpSlot));
3981   return MatchOperand_Success;
3982 }
3983
3984 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
3985   if (getLexer().getKind() != AsmToken::Identifier)
3986     return MatchOperand_NoMatch;
3987
3988   StringRef Str = Parser.getTok().getString();
3989   if (!Str.startswith("attr"))
3990     return MatchOperand_NoMatch;
3991
3992   StringRef Chan = Str.take_back(2);
3993   int AttrChan = StringSwitch<int>(Chan)
3994     .Case(".x", 0)
3995     .Case(".y", 1)
3996     .Case(".z", 2)
3997     .Case(".w", 3)
3998     .Default(-1);
3999   if (AttrChan == -1)
4000     return MatchOperand_ParseFail;
4001
4002   Str = Str.drop_back(2).drop_front(4);
4003
4004   uint8_t Attr;
4005   if (Str.getAsInteger(10, Attr))
4006     return MatchOperand_ParseFail;
4007
4008   SMLoc S = Parser.getTok().getLoc();
4009   Parser.Lex();
4010   if (Attr > 63) {
4011     Error(S, "out of bounds attr");
4012     return MatchOperand_Success;
4013   }
4014
4015   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4016
4017   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4018                                               AMDGPUOperand::ImmTyInterpAttr));
4019   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4020                                               AMDGPUOperand::ImmTyAttrChan));
4021   return MatchOperand_Success;
4022 }
4023
4024 void AMDGPUAsmParser::errorExpTgt() {
4025   Error(Parser.getTok().getLoc(), "invalid exp target");
4026 }
4027
4028 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4029                                                       uint8_t &Val) {
4030   if (Str == "null") {
4031     Val = 9;
4032     return MatchOperand_Success;
4033   }
4034
4035   if (Str.startswith("mrt")) {
4036     Str = Str.drop_front(3);
4037     if (Str == "z") { // == mrtz
4038       Val = 8;
4039       return MatchOperand_Success;
4040     }
4041
4042     if (Str.getAsInteger(10, Val))
4043       return MatchOperand_ParseFail;
4044
4045     if (Val > 7)
4046       errorExpTgt();
4047
4048     return MatchOperand_Success;
4049   }
4050
4051   if (Str.startswith("pos")) {
4052     Str = Str.drop_front(3);
4053     if (Str.getAsInteger(10, Val))
4054       return MatchOperand_ParseFail;
4055
4056     if (Val > 3)
4057       errorExpTgt();
4058
4059     Val += 12;
4060     return MatchOperand_Success;
4061   }
4062
4063   if (Str.startswith("param")) {
4064     Str = Str.drop_front(5);
4065     if (Str.getAsInteger(10, Val))
4066       return MatchOperand_ParseFail;
4067
4068     if (Val >= 32)
4069       errorExpTgt();
4070
4071     Val += 32;
4072     return MatchOperand_Success;
4073   }
4074
4075   if (Str.startswith("invalid_target_")) {
4076     Str = Str.drop_front(15);
4077     if (Str.getAsInteger(10, Val))
4078       return MatchOperand_ParseFail;
4079
4080     errorExpTgt();
4081     return MatchOperand_Success;
4082   }
4083
4084   return MatchOperand_NoMatch;
4085 }
4086
4087 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4088   uint8_t Val;
4089   StringRef Str = Parser.getTok().getString();
4090
4091   auto Res = parseExpTgtImpl(Str, Val);
4092   if (Res != MatchOperand_Success)
4093     return Res;
4094
4095   SMLoc S = Parser.getTok().getLoc();
4096   Parser.Lex();
4097
4098   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4099                                               AMDGPUOperand::ImmTyExpTgt));
4100   return MatchOperand_Success;
4101 }
4102
4103 OperandMatchResultTy
4104 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4105   using namespace llvm::AMDGPU::SendMsg;
4106
4107   int64_t Imm16Val = 0;
4108   SMLoc S = Parser.getTok().getLoc();
4109
4110   switch(getLexer().getKind()) {
4111   default:
4112     return MatchOperand_NoMatch;
4113   case AsmToken::Integer:
4114     // The operand can be an integer value.
4115     if (getParser().parseAbsoluteExpression(Imm16Val))
4116       return MatchOperand_NoMatch;
4117     if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4118       Error(S, "invalid immediate: only 16-bit values are legal");
4119       // Do not return error code, but create an imm operand anyway and proceed
4120       // to the next operand, if any. That avoids unneccessary error messages.
4121     }
4122     break;
4123   case AsmToken::Identifier: {
4124       OperandInfoTy Msg(ID_UNKNOWN_);
4125       OperandInfoTy Operation(OP_UNKNOWN_);
4126       int64_t StreamId = STREAM_ID_DEFAULT_;
4127       if (parseSendMsgConstruct(Msg, Operation, StreamId))
4128         return MatchOperand_ParseFail;
4129       do {
4130         // Validate and encode message ID.
4131         if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4132                 || Msg.Id == ID_SYSMSG)) {
4133           if (Msg.IsSymbolic)
4134             Error(S, "invalid/unsupported symbolic name of message");
4135           else
4136             Error(S, "invalid/unsupported code of message");
4137           break;
4138         }
4139         Imm16Val = (Msg.Id << ID_SHIFT_);
4140         // Validate and encode operation ID.
4141         if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4142           if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4143             if (Operation.IsSymbolic)
4144               Error(S, "invalid symbolic name of GS_OP");
4145             else
4146               Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4147             break;
4148           }
4149           if (Operation.Id == OP_GS_NOP
4150               && Msg.Id != ID_GS_DONE) {
4151             Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4152             break;
4153           }
4154           Imm16Val |= (Operation.Id << OP_SHIFT_);
4155         }
4156         if (Msg.Id == ID_SYSMSG) {
4157           if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4158             if (Operation.IsSymbolic)
4159               Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4160             else
4161               Error(S, "invalid/unsupported code of SYSMSG_OP");
4162             break;
4163           }
4164           Imm16Val |= (Operation.Id << OP_SHIFT_);
4165         }
4166         // Validate and encode stream ID.
4167         if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4168           if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4169             Error(S, "invalid stream id: only 2-bit values are legal");
4170             break;
4171           }
4172           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4173         }
4174       } while (false);
4175     }
4176     break;
4177   }
4178   Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4179   return MatchOperand_Success;
4180 }
4181
4182 bool AMDGPUOperand::isSendMsg() const {
4183   return isImmTy(ImmTySendMsg);
4184 }
4185
4186 //===----------------------------------------------------------------------===//
4187 // parser helpers
4188 //===----------------------------------------------------------------------===//
4189
4190 bool
4191 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4192   if (getLexer().getKind() == AsmToken::Identifier &&
4193       Parser.getTok().getString() == Id) {
4194     Parser.Lex();
4195     return true;
4196   }
4197   return false;
4198 }
4199
4200 bool
4201 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4202   if (getLexer().getKind() == Kind) {
4203     Parser.Lex();
4204     return true;
4205   }
4206   return false;
4207 }
4208
4209 bool
4210 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4211                            const StringRef ErrMsg) {
4212   if (!trySkipToken(Kind)) {
4213     Error(Parser.getTok().getLoc(), ErrMsg);
4214     return false;
4215   }
4216   return true;
4217 }
4218
4219 bool
4220 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4221   return !getParser().parseAbsoluteExpression(Imm);
4222 }
4223
4224 bool
4225 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4226   SMLoc S = Parser.getTok().getLoc();
4227   if (getLexer().getKind() == AsmToken::String) {
4228     Val = Parser.getTok().getStringContents();
4229     Parser.Lex();
4230     return true;
4231   } else {
4232     Error(S, ErrMsg);
4233     return false;
4234   }
4235 }
4236
4237 //===----------------------------------------------------------------------===//
4238 // swizzle
4239 //===----------------------------------------------------------------------===//
4240
4241 LLVM_READNONE
4242 static unsigned
4243 encodeBitmaskPerm(const unsigned AndMask,
4244                   const unsigned OrMask,
4245                   const unsigned XorMask) {
4246   using namespace llvm::AMDGPU::Swizzle;
4247
4248   return BITMASK_PERM_ENC |
4249          (AndMask << BITMASK_AND_SHIFT) |
4250          (OrMask  << BITMASK_OR_SHIFT)  |
4251          (XorMask << BITMASK_XOR_SHIFT);
4252 }
4253
4254 bool
4255 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4256                                       const unsigned MinVal,
4257                                       const unsigned MaxVal,
4258                                       const StringRef ErrMsg) {
4259   for (unsigned i = 0; i < OpNum; ++i) {
4260     if (!skipToken(AsmToken::Comma, "expected a comma")){
4261       return false;
4262     }
4263     SMLoc ExprLoc = Parser.getTok().getLoc();
4264     if (!parseExpr(Op[i])) {
4265       return false;
4266     }
4267     if (Op[i] < MinVal || Op[i] > MaxVal) {
4268       Error(ExprLoc, ErrMsg);
4269       return false;
4270     }
4271   }
4272
4273   return true;
4274 }
4275
4276 bool
4277 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4278   using namespace llvm::AMDGPU::Swizzle;
4279
4280   int64_t Lane[LANE_NUM];
4281   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4282                            "expected a 2-bit lane id")) {
4283     Imm = QUAD_PERM_ENC;
4284     for (auto i = 0; i < LANE_NUM; ++i) {
4285       Imm |= Lane[i] << (LANE_SHIFT * i);
4286     }
4287     return true;
4288   }
4289   return false;
4290 }
4291
4292 bool
4293 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4294   using namespace llvm::AMDGPU::Swizzle;
4295
4296   SMLoc S = Parser.getTok().getLoc();
4297   int64_t GroupSize;
4298   int64_t LaneIdx;
4299
4300   if (!parseSwizzleOperands(1, &GroupSize,
4301                             2, 32,
4302                             "group size must be in the interval [2,32]")) {
4303     return false;
4304   }
4305   if (!isPowerOf2_64(GroupSize)) {
4306     Error(S, "group size must be a power of two");
4307     return false;
4308   }
4309   if (parseSwizzleOperands(1, &LaneIdx,
4310                            0, GroupSize - 1,
4311                            "lane id must be in the interval [0,group size - 1]")) {
4312     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4313     return true;
4314   }
4315   return false;
4316 }
4317
4318 bool
4319 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4320   using namespace llvm::AMDGPU::Swizzle;
4321
4322   SMLoc S = Parser.getTok().getLoc();
4323   int64_t GroupSize;
4324
4325   if (!parseSwizzleOperands(1, &GroupSize,
4326       2, 32, "group size must be in the interval [2,32]")) {
4327     return false;
4328   }
4329   if (!isPowerOf2_64(GroupSize)) {
4330     Error(S, "group size must be a power of two");
4331     return false;
4332   }
4333
4334   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4335   return true;
4336 }
4337
4338 bool
4339 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4340   using namespace llvm::AMDGPU::Swizzle;
4341
4342   SMLoc S = Parser.getTok().getLoc();
4343   int64_t GroupSize;
4344
4345   if (!parseSwizzleOperands(1, &GroupSize,
4346       1, 16, "group size must be in the interval [1,16]")) {
4347     return false;
4348   }
4349   if (!isPowerOf2_64(GroupSize)) {
4350     Error(S, "group size must be a power of two");
4351     return false;
4352   }
4353
4354   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4355   return true;
4356 }
4357
4358 bool
4359 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4360   using namespace llvm::AMDGPU::Swizzle;
4361
4362   if (!skipToken(AsmToken::Comma, "expected a comma")) {
4363     return false;
4364   }
4365
4366   StringRef Ctl;
4367   SMLoc StrLoc = Parser.getTok().getLoc();
4368   if (!parseString(Ctl)) {
4369     return false;
4370   }
4371   if (Ctl.size() != BITMASK_WIDTH) {
4372     Error(StrLoc, "expected a 5-character mask");
4373     return false;
4374   }
4375
4376   unsigned AndMask = 0;
4377   unsigned OrMask = 0;
4378   unsigned XorMask = 0;
4379
4380   for (size_t i = 0; i < Ctl.size(); ++i) {
4381     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4382     switch(Ctl[i]) {
4383     default:
4384       Error(StrLoc, "invalid mask");
4385       return false;
4386     case '0':
4387       break;
4388     case '1':
4389       OrMask |= Mask;
4390       break;
4391     case 'p':
4392       AndMask |= Mask;
4393       break;
4394     case 'i':
4395       AndMask |= Mask;
4396       XorMask |= Mask;
4397       break;
4398     }
4399   }
4400
4401   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4402   return true;
4403 }
4404
4405 bool
4406 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4407
4408   SMLoc OffsetLoc = Parser.getTok().getLoc();
4409
4410   if (!parseExpr(Imm)) {
4411     return false;
4412   }
4413   if (!isUInt<16>(Imm)) {
4414     Error(OffsetLoc, "expected a 16-bit offset");
4415     return false;
4416   }
4417   return true;
4418 }
4419
4420 bool
4421 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4422   using namespace llvm::AMDGPU::Swizzle;
4423
4424   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4425
4426     SMLoc ModeLoc = Parser.getTok().getLoc();
4427     bool Ok = false;
4428
4429     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4430       Ok = parseSwizzleQuadPerm(Imm);
4431     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4432       Ok = parseSwizzleBitmaskPerm(Imm);
4433     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4434       Ok = parseSwizzleBroadcast(Imm);
4435     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4436       Ok = parseSwizzleSwap(Imm);
4437     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4438       Ok = parseSwizzleReverse(Imm);
4439     } else {
4440       Error(ModeLoc, "expected a swizzle mode");
4441     }
4442
4443     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4444   }
4445
4446   return false;
4447 }
4448
4449 OperandMatchResultTy
4450 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4451   SMLoc S = Parser.getTok().getLoc();
4452   int64_t Imm = 0;
4453
4454   if (trySkipId("offset")) {
4455
4456     bool Ok = false;
4457     if (skipToken(AsmToken::Colon, "expected a colon")) {
4458       if (trySkipId("swizzle")) {
4459         Ok = parseSwizzleMacro(Imm);
4460       } else {
4461         Ok = parseSwizzleOffset(Imm);
4462       }
4463     }
4464
4465     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4466
4467     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4468   } else {
4469     // Swizzle "offset" operand is optional.
4470     // If it is omitted, try parsing other optional operands.
4471     return parseOptionalOpr(Operands);
4472   }
4473 }
4474
4475 bool
4476 AMDGPUOperand::isSwizzle() const {
4477   return isImmTy(ImmTySwizzle);
4478 }
4479
4480 //===----------------------------------------------------------------------===//
4481 // sopp branch targets
4482 //===----------------------------------------------------------------------===//
4483
4484 OperandMatchResultTy
4485 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4486   SMLoc S = Parser.getTok().getLoc();
4487
4488   switch (getLexer().getKind()) {
4489     default: return MatchOperand_ParseFail;
4490     case AsmToken::Integer: {
4491       int64_t Imm;
4492       if (getParser().parseAbsoluteExpression(Imm))
4493         return MatchOperand_ParseFail;
4494       Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4495       return MatchOperand_Success;
4496     }
4497
4498     case AsmToken::Identifier:
4499       Operands.push_back(AMDGPUOperand::CreateExpr(this,
4500           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4501                                   Parser.getTok().getString()), getContext()), S));
4502       Parser.Lex();
4503       return MatchOperand_Success;
4504   }
4505 }
4506
4507 //===----------------------------------------------------------------------===//
4508 // mubuf
4509 //===----------------------------------------------------------------------===//
4510
4511 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4512   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4513 }
4514
4515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4516   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4517 }
4518
4519 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4520                                const OperandVector &Operands,
4521                                bool IsAtomic,
4522                                bool IsAtomicReturn,
4523                                bool IsLds) {
4524   bool IsLdsOpcode = IsLds;
4525   bool HasLdsModifier = false;
4526   OptionalImmIndexMap OptionalIdx;
4527   assert(IsAtomicReturn ? IsAtomic : true);
4528
4529   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4530     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4531
4532     // Add the register arguments
4533     if (Op.isReg()) {
4534       Op.addRegOperands(Inst, 1);
4535       continue;
4536     }
4537
4538     // Handle the case where soffset is an immediate
4539     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4540       Op.addImmOperands(Inst, 1);
4541       continue;
4542     }
4543
4544     HasLdsModifier = Op.isLDS();
4545
4546     // Handle tokens like 'offen' which are sometimes hard-coded into the
4547     // asm string.  There are no MCInst operands for these.
4548     if (Op.isToken()) {
4549       continue;
4550     }
4551     assert(Op.isImm());
4552
4553     // Handle optional arguments
4554     OptionalIdx[Op.getImmTy()] = i;
4555   }
4556
4557   // This is a workaround for an llvm quirk which may result in an
4558   // incorrect instruction selection. Lds and non-lds versions of
4559   // MUBUF instructions are identical except that lds versions
4560   // have mandatory 'lds' modifier. However this modifier follows
4561   // optional modifiers and llvm asm matcher regards this 'lds'
4562   // modifier as an optional one. As a result, an lds version
4563   // of opcode may be selected even if it has no 'lds' modifier.
4564   if (IsLdsOpcode && !HasLdsModifier) {
4565     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4566     if (NoLdsOpcode != -1) { // Got lds version - correct it.
4567       Inst.setOpcode(NoLdsOpcode);
4568       IsLdsOpcode = false;
4569     }
4570   }
4571
4572   // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4573   if (IsAtomicReturn) {
4574     MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4575     Inst.insert(I, *I);
4576   }
4577
4578   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4579   if (!IsAtomic) { // glc is hard-coded.
4580     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4581   }
4582   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4583
4584   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4585     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4586   }
4587 }
4588
4589 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4590   OptionalImmIndexMap OptionalIdx;
4591
4592   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4593     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4594
4595     // Add the register arguments
4596     if (Op.isReg()) {
4597       Op.addRegOperands(Inst, 1);
4598       continue;
4599     }
4600
4601     // Handle the case where soffset is an immediate
4602     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4603       Op.addImmOperands(Inst, 1);
4604       continue;
4605     }
4606
4607     // Handle tokens like 'offen' which are sometimes hard-coded into the
4608     // asm string.  There are no MCInst operands for these.
4609     if (Op.isToken()) {
4610       continue;
4611     }
4612     assert(Op.isImm());
4613
4614     // Handle optional arguments
4615     OptionalIdx[Op.getImmTy()] = i;
4616   }
4617
4618   addOptionalImmOperand(Inst, Operands, OptionalIdx,
4619                         AMDGPUOperand::ImmTyOffset);
4620   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
4621   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
4622   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4623   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4624   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4625 }
4626
4627 //===----------------------------------------------------------------------===//
4628 // mimg
4629 //===----------------------------------------------------------------------===//
4630
4631 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4632                               bool IsAtomic) {
4633   unsigned I = 1;
4634   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4635   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4636     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4637   }
4638
4639   if (IsAtomic) {
4640     // Add src, same as dst
4641     assert(Desc.getNumDefs() == 1);
4642     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4643   }
4644
4645   OptionalImmIndexMap OptionalIdx;
4646
4647   for (unsigned E = Operands.size(); I != E; ++I) {
4648     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4649
4650     // Add the register arguments
4651     if (Op.isReg()) {
4652       Op.addRegOperands(Inst, 1);
4653     } else if (Op.isImmModifier()) {
4654       OptionalIdx[Op.getImmTy()] = I;
4655     } else {
4656       llvm_unreachable("unexpected operand type");
4657     }
4658   }
4659
4660   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4661   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4662   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4663   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4664   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
4665   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4666   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4667   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4668   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4669 }
4670
4671 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4672   cvtMIMG(Inst, Operands, true);
4673 }
4674
4675 //===----------------------------------------------------------------------===//
4676 // smrd
4677 //===----------------------------------------------------------------------===//
4678
4679 bool AMDGPUOperand::isSMRDOffset8() const {
4680   return isImm() && isUInt<8>(getImm());
4681 }
4682
4683 bool AMDGPUOperand::isSMRDOffset20() const {
4684   return isImm() && isUInt<20>(getImm());
4685 }
4686
4687 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4688   // 32-bit literals are only supported on CI and we only want to use them
4689   // when the offset is > 8-bits.
4690   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4691 }
4692
4693 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4694   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4695 }
4696
4697 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4698   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4699 }
4700
4701 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4702   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4703 }
4704
4705 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4706   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4707 }
4708
4709 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4710   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4711 }
4712
4713 //===----------------------------------------------------------------------===//
4714 // vop3
4715 //===----------------------------------------------------------------------===//
4716
4717 static bool ConvertOmodMul(int64_t &Mul) {
4718   if (Mul != 1 && Mul != 2 && Mul != 4)
4719     return false;
4720
4721   Mul >>= 1;
4722   return true;
4723 }
4724
4725 static bool ConvertOmodDiv(int64_t &Div) {
4726   if (Div == 1) {
4727     Div = 0;
4728     return true;
4729   }
4730
4731   if (Div == 2) {
4732     Div = 3;
4733     return true;
4734   }
4735
4736   return false;
4737 }
4738
4739 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4740   if (BoundCtrl == 0) {
4741     BoundCtrl = 1;
4742     return true;
4743   }
4744
4745   if (BoundCtrl == -1) {
4746     BoundCtrl = 0;
4747     return true;
4748   }
4749
4750   return false;
4751 }
4752
4753 // Note: the order in this table matches the order of operands in AsmString.
4754 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4755   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
4756   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
4757   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
4758   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4759   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4760   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
4761   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
4762   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
4763   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4764   {"dfmt",    AMDGPUOperand::ImmTyDFMT, false, nullptr},
4765   {"nfmt",    AMDGPUOperand::ImmTyNFMT, false, nullptr},
4766   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
4767   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
4768   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
4769   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
4770   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
4771   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
4772   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4773   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
4774   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
4775   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
4776   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
4777   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
4778   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
4779   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4780   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4781   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4782   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4783   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4784   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4785   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4786   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4787   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4788   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4789   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4790   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4791   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4792 };
4793
4794 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4795   unsigned size = Operands.size();
4796   assert(size > 0);
4797
4798   OperandMatchResultTy res = parseOptionalOpr(Operands);
4799
4800   // This is a hack to enable hardcoded mandatory operands which follow
4801   // optional operands.
4802   //
4803   // Current design assumes that all operands after the first optional operand
4804   // are also optional. However implementation of some instructions violates
4805   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4806   //
4807   // To alleviate this problem, we have to (implicitly) parse extra operands
4808   // to make sure autogenerated parser of custom operands never hit hardcoded
4809   // mandatory operands.
4810
4811   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4812
4813     // We have parsed the first optional operand.
4814     // Parse as many operands as necessary to skip all mandatory operands.
4815
4816     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4817       if (res != MatchOperand_Success ||
4818           getLexer().is(AsmToken::EndOfStatement)) break;
4819       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4820       res = parseOptionalOpr(Operands);
4821     }
4822   }
4823
4824   return res;
4825 }
4826
4827 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4828   OperandMatchResultTy res;
4829   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4830     // try to parse any optional operand here
4831     if (Op.IsBit) {
4832       res = parseNamedBit(Op.Name, Operands, Op.Type);
4833     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4834       res = parseOModOperand(Operands);
4835     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4836                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4837                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4838       res = parseSDWASel(Operands, Op.Name, Op.Type);
4839     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4840       res = parseSDWADstUnused(Operands);
4841     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4842                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4843                Op.Type == AMDGPUOperand::ImmTyNegLo ||
4844                Op.Type == AMDGPUOperand::ImmTyNegHi) {
4845       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4846                                         Op.ConvertResult);
4847     } else {
4848       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4849     }
4850     if (res != MatchOperand_NoMatch) {
4851       return res;
4852     }
4853   }
4854   return MatchOperand_NoMatch;
4855 }
4856
4857 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4858   StringRef Name = Parser.getTok().getString();
4859   if (Name == "mul") {
4860     return parseIntWithPrefix("mul", Operands,
4861                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4862   }
4863
4864   if (Name == "div") {
4865     return parseIntWithPrefix("div", Operands,
4866                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4867   }
4868
4869   return MatchOperand_NoMatch;
4870 }
4871
4872 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4873   cvtVOP3P(Inst, Operands);
4874
4875   int Opc = Inst.getOpcode();
4876
4877   int SrcNum;
4878   const int Ops[] = { AMDGPU::OpName::src0,
4879                       AMDGPU::OpName::src1,
4880                       AMDGPU::OpName::src2 };
4881   for (SrcNum = 0;
4882        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4883        ++SrcNum);
4884   assert(SrcNum > 0);
4885
4886   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4887   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4888
4889   if ((OpSel & (1 << SrcNum)) != 0) {
4890     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4891     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4892     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4893   }
4894 }
4895
4896 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4897       // 1. This operand is input modifiers
4898   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4899       // 2. This is not last operand
4900       && Desc.NumOperands > (OpNum + 1)
4901       // 3. Next operand is register class
4902       && Desc.OpInfo[OpNum + 1].RegClass != -1
4903       // 4. Next register is not tied to any other operand
4904       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4905 }
4906
4907 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4908 {
4909   OptionalImmIndexMap OptionalIdx;
4910   unsigned Opc = Inst.getOpcode();
4911
4912   unsigned I = 1;
4913   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4914   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4915     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4916   }
4917
4918   for (unsigned E = Operands.size(); I != E; ++I) {
4919     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4920     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4921       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4922     } else if (Op.isInterpSlot() ||
4923                Op.isInterpAttr() ||
4924                Op.isAttrChan()) {
4925       Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4926     } else if (Op.isImmModifier()) {
4927       OptionalIdx[Op.getImmTy()] = I;
4928     } else {
4929       llvm_unreachable("unhandled operand type");
4930     }
4931   }
4932
4933   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4934     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4935   }
4936
4937   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4938     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4939   }
4940
4941   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4942     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4943   }
4944 }
4945
4946 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4947                               OptionalImmIndexMap &OptionalIdx) {
4948   unsigned Opc = Inst.getOpcode();
4949
4950   unsigned I = 1;
4951   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4952   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4953     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4954   }
4955
4956   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4957     // This instruction has src modifiers
4958     for (unsigned E = Operands.size(); I != E; ++I) {
4959       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4960       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4961         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4962       } else if (Op.isImmModifier()) {
4963         OptionalIdx[Op.getImmTy()] = I;
4964       } else if (Op.isRegOrImm()) {
4965         Op.addRegOrImmOperands(Inst, 1);
4966       } else {
4967         llvm_unreachable("unhandled operand type");
4968       }
4969     }
4970   } else {
4971     // No src modifiers
4972     for (unsigned E = Operands.size(); I != E; ++I) {
4973       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4974       if (Op.isMod()) {
4975         OptionalIdx[Op.getImmTy()] = I;
4976       } else {
4977         Op.addRegOrImmOperands(Inst, 1);
4978       }
4979     }
4980   }
4981
4982   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4983     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4984   }
4985
4986   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4987     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4988   }
4989
4990   // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
4991   // it has src2 register operand that is tied to dst operand
4992   // we don't allow modifiers for this operand in assembler so src2_modifiers
4993   // should be 0.
4994   if (Opc == AMDGPU::V_MAC_F32_e64_si ||
4995       Opc == AMDGPU::V_MAC_F32_e64_vi ||
4996       Opc == AMDGPU::V_MAC_F16_e64_vi ||
4997       Opc == AMDGPU::V_FMAC_F32_e64_vi) {
4998     auto it = Inst.begin();
4999     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5000     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5001     ++it;
5002     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5003   }
5004 }
5005
5006 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5007   OptionalImmIndexMap OptionalIdx;
5008   cvtVOP3(Inst, Operands, OptionalIdx);
5009 }
5010
5011 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5012                                const OperandVector &Operands) {
5013   OptionalImmIndexMap OptIdx;
5014   const int Opc = Inst.getOpcode();
5015   const MCInstrDesc &Desc = MII.get(Opc);
5016
5017   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5018
5019   cvtVOP3(Inst, Operands, OptIdx);
5020
5021   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5022     assert(!IsPacked);
5023     Inst.addOperand(Inst.getOperand(0));
5024   }
5025
5026   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5027   // instruction, and then figure out where to actually put the modifiers
5028
5029   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5030
5031   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5032   if (OpSelHiIdx != -1) {
5033     int DefaultVal = IsPacked ? -1 : 0;
5034     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5035                           DefaultVal);
5036   }
5037
5038   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5039   if (NegLoIdx != -1) {
5040     assert(IsPacked);
5041     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5042     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5043   }
5044
5045   const int Ops[] = { AMDGPU::OpName::src0,
5046                       AMDGPU::OpName::src1,
5047                       AMDGPU::OpName::src2 };
5048   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5049                          AMDGPU::OpName::src1_modifiers,
5050                          AMDGPU::OpName::src2_modifiers };
5051
5052   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5053
5054   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5055   unsigned OpSelHi = 0;
5056   unsigned NegLo = 0;
5057   unsigned NegHi = 0;
5058
5059   if (OpSelHiIdx != -1) {
5060     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5061   }
5062
5063   if (NegLoIdx != -1) {
5064     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5065     NegLo = Inst.getOperand(NegLoIdx).getImm();
5066     NegHi = Inst.getOperand(NegHiIdx).getImm();
5067   }
5068
5069   for (int J = 0; J < 3; ++J) {
5070     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5071     if (OpIdx == -1)
5072       break;
5073
5074     uint32_t ModVal = 0;
5075
5076     if ((OpSel & (1 << J)) != 0)
5077       ModVal |= SISrcMods::OP_SEL_0;
5078
5079     if ((OpSelHi & (1 << J)) != 0)
5080       ModVal |= SISrcMods::OP_SEL_1;
5081
5082     if ((NegLo & (1 << J)) != 0)
5083       ModVal |= SISrcMods::NEG;
5084
5085     if ((NegHi & (1 << J)) != 0)
5086       ModVal |= SISrcMods::NEG_HI;
5087
5088     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5089
5090     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5091   }
5092 }
5093
5094 //===----------------------------------------------------------------------===//
5095 // dpp
5096 //===----------------------------------------------------------------------===//
5097
5098 bool AMDGPUOperand::isDPPCtrl() const {
5099   using namespace AMDGPU::DPP;
5100
5101   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5102   if (result) {
5103     int64_t Imm = getImm();
5104     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5105            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5106            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5107            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5108            (Imm == DppCtrl::WAVE_SHL1) ||
5109            (Imm == DppCtrl::WAVE_ROL1) ||
5110            (Imm == DppCtrl::WAVE_SHR1) ||
5111            (Imm == DppCtrl::WAVE_ROR1) ||
5112            (Imm == DppCtrl::ROW_MIRROR) ||
5113            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5114            (Imm == DppCtrl::BCAST15) ||
5115            (Imm == DppCtrl::BCAST31);
5116   }
5117   return false;
5118 }
5119
5120 bool AMDGPUOperand::isGPRIdxMode() const {
5121   return isImm() && isUInt<4>(getImm());
5122 }
5123
5124 bool AMDGPUOperand::isS16Imm() const {
5125   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5126 }
5127
5128 bool AMDGPUOperand::isU16Imm() const {
5129   return isImm() && isUInt<16>(getImm());
5130 }
5131
5132 OperandMatchResultTy
5133 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5134   using namespace AMDGPU::DPP;
5135
5136   SMLoc S = Parser.getTok().getLoc();
5137   StringRef Prefix;
5138   int64_t Int;
5139
5140   if (getLexer().getKind() == AsmToken::Identifier) {
5141     Prefix = Parser.getTok().getString();
5142   } else {
5143     return MatchOperand_NoMatch;
5144   }
5145
5146   if (Prefix == "row_mirror") {
5147     Int = DppCtrl::ROW_MIRROR;
5148     Parser.Lex();
5149   } else if (Prefix == "row_half_mirror") {
5150     Int = DppCtrl::ROW_HALF_MIRROR;
5151     Parser.Lex();
5152   } else {
5153     // Check to prevent parseDPPCtrlOps from eating invalid tokens
5154     if (Prefix != "quad_perm"
5155         && Prefix != "row_shl"
5156         && Prefix != "row_shr"
5157         && Prefix != "row_ror"
5158         && Prefix != "wave_shl"
5159         && Prefix != "wave_rol"
5160         && Prefix != "wave_shr"
5161         && Prefix != "wave_ror"
5162         && Prefix != "row_bcast") {
5163       return MatchOperand_NoMatch;
5164     }
5165
5166     Parser.Lex();
5167     if (getLexer().isNot(AsmToken::Colon))
5168       return MatchOperand_ParseFail;
5169
5170     if (Prefix == "quad_perm") {
5171       // quad_perm:[%d,%d,%d,%d]
5172       Parser.Lex();
5173       if (getLexer().isNot(AsmToken::LBrac))
5174         return MatchOperand_ParseFail;
5175       Parser.Lex();
5176
5177       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5178         return MatchOperand_ParseFail;
5179
5180       for (int i = 0; i < 3; ++i) {
5181         if (getLexer().isNot(AsmToken::Comma))
5182           return MatchOperand_ParseFail;
5183         Parser.Lex();
5184
5185         int64_t Temp;
5186         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5187           return MatchOperand_ParseFail;
5188         const int shift = i*2 + 2;
5189         Int += (Temp << shift);
5190       }
5191
5192       if (getLexer().isNot(AsmToken::RBrac))
5193         return MatchOperand_ParseFail;
5194       Parser.Lex();
5195     } else {
5196       // sel:%d
5197       Parser.Lex();
5198       if (getParser().parseAbsoluteExpression(Int))
5199         return MatchOperand_ParseFail;
5200
5201       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5202         Int |= DppCtrl::ROW_SHL0;
5203       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5204         Int |= DppCtrl::ROW_SHR0;
5205       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5206         Int |= DppCtrl::ROW_ROR0;
5207       } else if (Prefix == "wave_shl" && 1 == Int) {
5208         Int = DppCtrl::WAVE_SHL1;
5209       } else if (Prefix == "wave_rol" && 1 == Int) {
5210         Int = DppCtrl::WAVE_ROL1;
5211       } else if (Prefix == "wave_shr" && 1 == Int) {
5212         Int = DppCtrl::WAVE_SHR1;
5213       } else if (Prefix == "wave_ror" && 1 == Int) {
5214         Int = DppCtrl::WAVE_ROR1;
5215       } else if (Prefix == "row_bcast") {
5216         if (Int == 15) {
5217           Int = DppCtrl::BCAST15;
5218         } else if (Int == 31) {
5219           Int = DppCtrl::BCAST31;
5220         } else {
5221           return MatchOperand_ParseFail;
5222         }
5223       } else {
5224         return MatchOperand_ParseFail;
5225       }
5226     }
5227   }
5228
5229   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5230   return MatchOperand_Success;
5231 }
5232
5233 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5234   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5235 }
5236
5237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5238   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5239 }
5240
5241 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5242   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5243 }
5244
5245 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5246   OptionalImmIndexMap OptionalIdx;
5247
5248   unsigned I = 1;
5249   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5250   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5251     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5252   }
5253
5254   // All DPP instructions with at least one source operand have a fake "old"
5255   // source at the beginning that's tied to the dst operand. Handle it here.
5256   if (Desc.getNumOperands() >= 2)
5257     Inst.addOperand(Inst.getOperand(0));
5258
5259   for (unsigned E = Operands.size(); I != E; ++I) {
5260     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5261     // Add the register arguments
5262     if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5263       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5264       // Skip it.
5265       continue;
5266     } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5267       Op.addRegWithFPInputModsOperands(Inst, 2);
5268     } else if (Op.isDPPCtrl()) {
5269       Op.addImmOperands(Inst, 1);
5270     } else if (Op.isImm()) {
5271       // Handle optional arguments
5272       OptionalIdx[Op.getImmTy()] = I;
5273     } else {
5274       llvm_unreachable("Invalid operand type");
5275     }
5276   }
5277
5278   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5279   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5280   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5281 }
5282
5283 //===----------------------------------------------------------------------===//
5284 // sdwa
5285 //===----------------------------------------------------------------------===//
5286
5287 OperandMatchResultTy
5288 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5289                               AMDGPUOperand::ImmTy Type) {
5290   using namespace llvm::AMDGPU::SDWA;
5291
5292   SMLoc S = Parser.getTok().getLoc();
5293   StringRef Value;
5294   OperandMatchResultTy res;
5295
5296   res = parseStringWithPrefix(Prefix, Value);
5297   if (res != MatchOperand_Success) {
5298     return res;
5299   }
5300
5301   int64_t Int;
5302   Int = StringSwitch<int64_t>(Value)
5303         .Case("BYTE_0", SdwaSel::BYTE_0)
5304         .Case("BYTE_1", SdwaSel::BYTE_1)
5305         .Case("BYTE_2", SdwaSel::BYTE_2)
5306         .Case("BYTE_3", SdwaSel::BYTE_3)
5307         .Case("WORD_0", SdwaSel::WORD_0)
5308         .Case("WORD_1", SdwaSel::WORD_1)
5309         .Case("DWORD", SdwaSel::DWORD)
5310         .Default(0xffffffff);
5311   Parser.Lex(); // eat last token
5312
5313   if (Int == 0xffffffff) {
5314     return MatchOperand_ParseFail;
5315   }
5316
5317   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5318   return MatchOperand_Success;
5319 }
5320
5321 OperandMatchResultTy
5322 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5323   using namespace llvm::AMDGPU::SDWA;
5324
5325   SMLoc S = Parser.getTok().getLoc();
5326   StringRef Value;
5327   OperandMatchResultTy res;
5328
5329   res = parseStringWithPrefix("dst_unused", Value);
5330   if (res != MatchOperand_Success) {
5331     return res;
5332   }
5333
5334   int64_t Int;
5335   Int = StringSwitch<int64_t>(Value)
5336         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5337         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5338         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5339         .Default(0xffffffff);
5340   Parser.Lex(); // eat last token
5341
5342   if (Int == 0xffffffff) {
5343     return MatchOperand_ParseFail;
5344   }
5345
5346   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5347   return MatchOperand_Success;
5348 }
5349
5350 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5351   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5352 }
5353
5354 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5355   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5356 }
5357
5358 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5359   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5360 }
5361
5362 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5363   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5364 }
5365
5366 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5367                               uint64_t BasicInstType, bool skipVcc) {
5368   using namespace llvm::AMDGPU::SDWA;
5369
5370   OptionalImmIndexMap OptionalIdx;
5371   bool skippedVcc = false;
5372
5373   unsigned I = 1;
5374   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5375   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5376     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5377   }
5378
5379   for (unsigned E = Operands.size(); I != E; ++I) {
5380     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5381     if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5382       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5383       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5384       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5385       // Skip VCC only if we didn't skip it on previous iteration.
5386       if (BasicInstType == SIInstrFlags::VOP2 &&
5387           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5388         skippedVcc = true;
5389         continue;
5390       } else if (BasicInstType == SIInstrFlags::VOPC &&
5391                  Inst.getNumOperands() == 0) {
5392         skippedVcc = true;
5393         continue;
5394       }
5395     }
5396     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5397       Op.addRegOrImmWithInputModsOperands(Inst, 2);
5398     } else if (Op.isImm()) {
5399       // Handle optional arguments
5400       OptionalIdx[Op.getImmTy()] = I;
5401     } else {
5402       llvm_unreachable("Invalid operand type");
5403     }
5404     skippedVcc = false;
5405   }
5406
5407   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5408       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5409     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5410     switch (BasicInstType) {
5411     case SIInstrFlags::VOP1:
5412       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5413       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5414         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5415       }
5416       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5417       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5418       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5419       break;
5420
5421     case SIInstrFlags::VOP2:
5422       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5423       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5424         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5425       }
5426       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5427       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5428       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5429       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5430       break;
5431
5432     case SIInstrFlags::VOPC:
5433       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5434       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5435       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5436       break;
5437
5438     default:
5439       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5440     }
5441   }
5442
5443   // special case v_mac_{f16, f32}:
5444   // it has src2 register operand that is tied to dst operand
5445   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5446       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
5447     auto it = Inst.begin();
5448     std::advance(
5449       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5450     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5451   }
5452 }
5453
5454 /// Force static initialization.
5455 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5456   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5457   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5458 }
5459
5460 #define GET_REGISTER_MATCHER
5461 #define GET_MATCHER_IMPLEMENTATION
5462 #define GET_MNEMONIC_SPELL_CHECKER
5463 #include "AMDGPUGenAsmMatcher.inc"
5464
5465 // This fuction should be defined after auto-generated include so that we have
5466 // MatchClassKind enum defined
5467 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5468                                                      unsigned Kind) {
5469   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5470   // But MatchInstructionImpl() expects to meet token and fails to validate
5471   // operand. This method checks if we are given immediate operand but expect to
5472   // get corresponding token.
5473   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5474   switch (Kind) {
5475   case MCK_addr64:
5476     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5477   case MCK_gds:
5478     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5479   case MCK_lds:
5480     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5481   case MCK_glc:
5482     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5483   case MCK_idxen:
5484     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5485   case MCK_offen:
5486     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5487   case MCK_SSrcB32:
5488     // When operands have expression values, they will return true for isToken,
5489     // because it is not possible to distinguish between a token and an
5490     // expression at parse time. MatchInstructionImpl() will always try to
5491     // match an operand as a token, when isToken returns true, and when the
5492     // name of the expression is not a valid token, the match will fail,
5493     // so we need to handle it here.
5494     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5495   case MCK_SSrcF32:
5496     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5497   case MCK_SoppBrTarget:
5498     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5499   case MCK_VReg32OrOff:
5500     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5501   case MCK_InterpSlot:
5502     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5503   case MCK_Attr:
5504     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5505   case MCK_AttrChan:
5506     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5507   default:
5508     return Match_InvalidOperand;
5509   }
5510 }