llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

   1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "AMDKernelCodeT.h"
  10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
  12 #include "SIDefines.h"
  13 #include "SIInstrInfo.h"
  14 #include "SIRegisterInfo.h"
  15 #include "TargetInfo/AMDGPUTargetInfo.h"
  16 #include "Utils/AMDGPUAsmUtils.h"
  17 #include "Utils/AMDGPUBaseInfo.h"
  18 #include "Utils/AMDKernelCodeTUtils.h"
  19 #include "llvm/ADT/APFloat.h"
  20 #include "llvm/ADT/SmallBitVector.h"
  21 #include "llvm/ADT/StringSet.h"
  22 #include "llvm/ADT/Twine.h"
  23 #include "llvm/BinaryFormat/ELF.h"
  24 #include "llvm/CodeGen/MachineValueType.h"
  25 #include "llvm/MC/MCAsmInfo.h"
  26 #include "llvm/MC/MCContext.h"
  27 #include "llvm/MC/MCExpr.h"
  28 #include "llvm/MC/MCInst.h"
  29 #include "llvm/MC/MCInstrDesc.h"
  30 #include "llvm/MC/MCParser/MCAsmLexer.h"
  31 #include "llvm/MC/MCParser/MCAsmParser.h"
  32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
  34 #include "llvm/MC/MCSymbol.h"
  35 #include "llvm/MC/TargetRegistry.h"
  36 #include "llvm/Support/AMDGPUMetadata.h"
  37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
  38 #include "llvm/Support/Casting.h"
  39 #include "llvm/Support/MathExtras.h"
  40 #include "llvm/TargetParser/TargetParser.h"
  41 #include <optional>
  42
  43 using namespace llvm;
  44 using namespace llvm::AMDGPU;
  45 using namespace llvm::amdhsa;
  46
  47 namespace {
  48
  49 class AMDGPUAsmParser;
  50
  51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
  52
  53 //===----------------------------------------------------------------------===//
  54 // Operand
  55 //===----------------------------------------------------------------------===//
  56
  57 class AMDGPUOperand : public MCParsedAsmOperand {
  58   enum KindTy {
  59     Token,
  60     Immediate,
  61     Register,
  62     Expression
  63   } Kind;
  64
  65   SMLoc StartLoc, EndLoc;
  66   const AMDGPUAsmParser *AsmParser;
  67
  68 public:
  69   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
  70       : Kind(Kind_), AsmParser(AsmParser_) {}
  71
  72   using Ptr = std::unique_ptr<AMDGPUOperand>;
  73
  74   struct Modifiers {
  75     bool Abs = false;
  76     bool Neg = false;
  77     bool Sext = false;
  78
  79     bool hasFPModifiers() const { return Abs || Neg; }
  80     bool hasIntModifiers() const { return Sext; }
  81     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
  82
  83     int64_t getFPModifiersOperand() const {
  84       int64_t Operand = 0;
  85       Operand |= Abs ? SISrcMods::ABS : 0u;
  86       Operand |= Neg ? SISrcMods::NEG : 0u;
  87       return Operand;
  88     }
  89
  90     int64_t getIntModifiersOperand() const {
  91       int64_t Operand = 0;
  92       Operand |= Sext ? SISrcMods::SEXT : 0u;
  93       return Operand;
  94     }
  95
  96     int64_t getModifiersOperand() const {
  97       assert(!(hasFPModifiers() && hasIntModifiers())
  98            && "fp and int modifiers should not be used simultaneously");
  99       if (hasFPModifiers()) {
 100         return getFPModifiersOperand();
 101       } else if (hasIntModifiers()) {
 102         return getIntModifiersOperand();
 103       } else {
 104         return 0;
 105       }
 106     }
 107
 108     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
 109   };
 110
 111   enum ImmTy {
 112     ImmTyNone,
 113     ImmTyGDS,
 114     ImmTyLDS,
 115     ImmTyOffen,
 116     ImmTyIdxen,
 117     ImmTyAddr64,
 118     ImmTyOffset,
 119     ImmTyInstOffset,
 120     ImmTyOffset0,
 121     ImmTyOffset1,
 122     ImmTySMEMOffsetMod,
 123     ImmTyCPol,
 124     ImmTyTFE,
 125     ImmTyD16,
 126     ImmTyClampSI,
 127     ImmTyOModSI,
 128     ImmTySDWADstSel,
 129     ImmTySDWASrc0Sel,
 130     ImmTySDWASrc1Sel,
 131     ImmTySDWADstUnused,
 132     ImmTyDMask,
 133     ImmTyDim,
 134     ImmTyUNorm,
 135     ImmTyDA,
 136     ImmTyR128A16,
 137     ImmTyA16,
 138     ImmTyLWE,
 139     ImmTyExpTgt,
 140     ImmTyExpCompr,
 141     ImmTyExpVM,
 142     ImmTyFORMAT,
 143     ImmTyHwreg,
 144     ImmTyOff,
 145     ImmTySendMsg,
 146     ImmTyInterpSlot,
 147     ImmTyInterpAttr,
 148     ImmTyInterpAttrChan,
 149     ImmTyOpSel,
 150     ImmTyOpSelHi,
 151     ImmTyNegLo,
 152     ImmTyNegHi,
 153     ImmTyDPP8,
 154     ImmTyDppCtrl,
 155     ImmTyDppRowMask,
 156     ImmTyDppBankMask,
 157     ImmTyDppBoundCtrl,
 158     ImmTyDppFI,
 159     ImmTySwizzle,
 160     ImmTyGprIdxMode,
 161     ImmTyHigh,
 162     ImmTyBLGP,
 163     ImmTyCBSZ,
 164     ImmTyABID,
 165     ImmTyEndpgm,
 166     ImmTyWaitVDST,
 167     ImmTyWaitEXP,
 168   };
 169
 170   // Immediate operand kind.
 171   // It helps to identify the location of an offending operand after an error.
 172   // Note that regular literals and mandatory literals (KImm) must be handled
 173   // differently. When looking for an offending operand, we should usually
 174   // ignore mandatory literals because they are part of the instruction and
 175   // cannot be changed. Report location of mandatory operands only for VOPD,
 176   // when both OpX and OpY have a KImm and there are no other literals.
 177   enum ImmKindTy {
 178     ImmKindTyNone,
 179     ImmKindTyLiteral,
 180     ImmKindTyMandatoryLiteral,
 181     ImmKindTyConst,
 182   };
 183
 184 private:
 185   struct TokOp {
 186     const char *Data;
 187     unsigned Length;
 188   };
 189
 190   struct ImmOp {
 191     int64_t Val;
 192     ImmTy Type;
 193     bool IsFPImm;
 194     mutable ImmKindTy Kind;
 195     Modifiers Mods;
 196   };
 197
 198   struct RegOp {
 199     unsigned RegNo;
 200     Modifiers Mods;
 201   };
 202
 203   union {
 204     TokOp Tok;
 205     ImmOp Imm;
 206     RegOp Reg;
 207     const MCExpr *Expr;
 208   };
 209
 210 public:
 211   bool isToken() const override { return Kind == Token; }
 212
 213   bool isSymbolRefExpr() const {
 214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
 215   }
 216
 217   bool isImm() const override {
 218     return Kind == Immediate;
 219   }
 220
 221   void setImmKindNone() const {
 222     assert(isImm());
 223     Imm.Kind = ImmKindTyNone;
 224   }
 225
 226   void setImmKindLiteral() const {
 227     assert(isImm());
 228     Imm.Kind = ImmKindTyLiteral;
 229   }
 230
 231   void setImmKindMandatoryLiteral() const {
 232     assert(isImm());
 233     Imm.Kind = ImmKindTyMandatoryLiteral;
 234   }
 235
 236   void setImmKindConst() const {
 237     assert(isImm());
 238     Imm.Kind = ImmKindTyConst;
 239   }
 240
 241   bool IsImmKindLiteral() const {
 242     return isImm() && Imm.Kind == ImmKindTyLiteral;
 243   }
 244
 245   bool IsImmKindMandatoryLiteral() const {
 246     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
 247   }
 248
 249   bool isImmKindConst() const {
 250     return isImm() && Imm.Kind == ImmKindTyConst;
 251   }
 252
 253   bool isInlinableImm(MVT type) const;
 254   bool isLiteralImm(MVT type) const;
 255
 256   bool isRegKind() const {
 257     return Kind == Register;
 258   }
 259
 260   bool isReg() const override {
 261     return isRegKind() && !hasModifiers();
 262   }
 263
 264   bool isRegOrInline(unsigned RCID, MVT type) const {
 265     return isRegClass(RCID) || isInlinableImm(type);
 266   }
 267
 268   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
 269     return isRegOrInline(RCID, type) || isLiteralImm(type);
 270   }
 271
 272   bool isRegOrImmWithInt16InputMods() const {
 273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
 274   }
 275
 276   bool isRegOrImmWithInt32InputMods() const {
 277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
 278   }
 279
 280   bool isRegOrInlineImmWithInt16InputMods() const {
 281     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
 282   }
 283
 284   bool isRegOrInlineImmWithInt32InputMods() const {
 285     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
 286   }
 287
 288   bool isRegOrImmWithInt64InputMods() const {
 289     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
 290   }
 291
 292   bool isRegOrImmWithFP16InputMods() const {
 293     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
 294   }
 295
 296   bool isRegOrImmWithFP32InputMods() const {
 297     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
 298   }
 299
 300   bool isRegOrImmWithFP64InputMods() const {
 301     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
 302   }
 303
 304   bool isRegOrInlineImmWithFP16InputMods() const {
 305     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
 306   }
 307
 308   bool isRegOrInlineImmWithFP32InputMods() const {
 309     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
 310   }
 311
 312
 313   bool isVReg() const {
 314     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
 315            isRegClass(AMDGPU::VReg_64RegClassID) ||
 316            isRegClass(AMDGPU::VReg_96RegClassID) ||
 317            isRegClass(AMDGPU::VReg_128RegClassID) ||
 318            isRegClass(AMDGPU::VReg_160RegClassID) ||
 319            isRegClass(AMDGPU::VReg_192RegClassID) ||
 320            isRegClass(AMDGPU::VReg_256RegClassID) ||
 321            isRegClass(AMDGPU::VReg_512RegClassID) ||
 322            isRegClass(AMDGPU::VReg_1024RegClassID);
 323   }
 324
 325   bool isVReg32() const {
 326     return isRegClass(AMDGPU::VGPR_32RegClassID);
 327   }
 328
 329   bool isVReg32OrOff() const {
 330     return isOff() || isVReg32();
 331   }
 332
 333   bool isNull() const {
 334     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
 335   }
 336
 337   bool isVRegWithInputMods() const;
 338   bool isT16VRegWithInputMods() const;
 339
 340   bool isSDWAOperand(MVT type) const;
 341   bool isSDWAFP16Operand() const;
 342   bool isSDWAFP32Operand() const;
 343   bool isSDWAInt16Operand() const;
 344   bool isSDWAInt32Operand() const;
 345
 346   bool isImmTy(ImmTy ImmT) const {
 347     return isImm() && Imm.Type == ImmT;
 348   }
 349
 350   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
 351
 352   bool isImmModifier() const {
 353     return isImm() && Imm.Type != ImmTyNone;
 354   }
 355
 356   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
 357   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
 358   bool isDMask() const { return isImmTy(ImmTyDMask); }
 359   bool isDim() const { return isImmTy(ImmTyDim); }
 360   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
 361   bool isDA() const { return isImmTy(ImmTyDA); }
 362   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
 363   bool isA16() const { return isImmTy(ImmTyA16); }
 364   bool isLWE() const { return isImmTy(ImmTyLWE); }
 365   bool isOff() const { return isImmTy(ImmTyOff); }
 366   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
 367   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
 368   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
 369   bool isOffen() const { return isImmTy(ImmTyOffen); }
 370   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
 371   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
 372   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
 373   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
 374   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
 375   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
 376   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
 377   bool isGDS() const { return isImmTy(ImmTyGDS); }
 378   bool isLDS() const { return isImmTy(ImmTyLDS); }
 379   bool isCPol() const { return isImmTy(ImmTyCPol); }
 380   bool isTFE() const { return isImmTy(ImmTyTFE); }
 381   bool isD16() const { return isImmTy(ImmTyD16); }
 382   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
 383   bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
 384   bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
 385   bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
 386   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
 387   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
 388   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
 389   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
 390   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
 391   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
 392   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
 393   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
 394   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
 395   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
 396   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
 397   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
 398   bool isHigh() const { return isImmTy(ImmTyHigh); }
 399
 400   bool isRegOrImm() const {
 401     return isReg() || isImm();
 402   }
 403
 404   bool isRegClass(unsigned RCID) const;
 405
 406   bool isInlineValue() const;
 407
 408   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
 409     return isRegOrInline(RCID, type) && !hasModifiers();
 410   }
 411
 412   bool isSCSrcB16() const {
 413     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
 414   }
 415
 416   bool isSCSrcV2B16() const {
 417     return isSCSrcB16();
 418   }
 419
 420   bool isSCSrcB32() const {
 421     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
 422   }
 423
 424   bool isSCSrcB64() const {
 425     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
 426   }
 427
 428   bool isBoolReg() const;
 429
 430   bool isSCSrcF16() const {
 431     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
 432   }
 433
 434   bool isSCSrcV2F16() const {
 435     return isSCSrcF16();
 436   }
 437
 438   bool isSCSrcF32() const {
 439     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
 440   }
 441
 442   bool isSCSrcF64() const {
 443     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
 444   }
 445
 446   bool isSSrcB32() const {
 447     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
 448   }
 449
 450   bool isSSrcB16() const {
 451     return isSCSrcB16() || isLiteralImm(MVT::i16);
 452   }
 453
 454   bool isSSrcV2B16() const {
 455     llvm_unreachable("cannot happen");
 456     return isSSrcB16();
 457   }
 458
 459   bool isSSrcB64() const {
 460     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
 461     // See isVSrc64().
 462     return isSCSrcB64() || isLiteralImm(MVT::i64);
 463   }
 464
 465   bool isSSrcF32() const {
 466     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
 467   }
 468
 469   bool isSSrcF64() const {
 470     return isSCSrcB64() || isLiteralImm(MVT::f64);
 471   }
 472
 473   bool isSSrcF16() const {
 474     return isSCSrcB16() || isLiteralImm(MVT::f16);
 475   }
 476
 477   bool isSSrcV2F16() const {
 478     llvm_unreachable("cannot happen");
 479     return isSSrcF16();
 480   }
 481
 482   bool isSSrcV2FP32() const {
 483     llvm_unreachable("cannot happen");
 484     return isSSrcF32();
 485   }
 486
 487   bool isSCSrcV2FP32() const {
 488     llvm_unreachable("cannot happen");
 489     return isSCSrcF32();
 490   }
 491
 492   bool isSSrcV2INT32() const {
 493     llvm_unreachable("cannot happen");
 494     return isSSrcB32();
 495   }
 496
 497   bool isSCSrcV2INT32() const {
 498     llvm_unreachable("cannot happen");
 499     return isSCSrcB32();
 500   }
 501
 502   bool isSSrcOrLdsB32() const {
 503     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
 504            isLiteralImm(MVT::i32) || isExpr();
 505   }
 506
 507   bool isVCSrcB32() const {
 508     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
 509   }
 510
 511   bool isVCSrcB64() const {
 512     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
 513   }
 514
 515   bool isVCSrcTB16_Lo128() const {
 516     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
 517   }
 518
 519   bool isVCSrcB16() const {
 520     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
 521   }
 522
 523   bool isVCSrcV2B16() const {
 524     return isVCSrcB16();
 525   }
 526
 527   bool isVCSrcF32() const {
 528     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
 529   }
 530
 531   bool isVCSrcF64() const {
 532     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
 533   }
 534
 535   bool isVCSrcTF16_Lo128() const {
 536     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
 537   }
 538
 539   bool isVCSrcF16() const {
 540     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
 541   }
 542
 543   bool isVCSrcV2F16() const {
 544     return isVCSrcF16();
 545   }
 546
 547   bool isVSrcB32() const {
 548     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
 549   }
 550
 551   bool isVSrcB64() const {
 552     return isVCSrcF64() || isLiteralImm(MVT::i64);
 553   }
 554
 555   bool isVSrcTB16_Lo128() const {
 556     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
 557   }
 558
 559   bool isVSrcB16() const {
 560     return isVCSrcB16() || isLiteralImm(MVT::i16);
 561   }
 562
 563   bool isVSrcV2B16() const {
 564     return isVSrcB16() || isLiteralImm(MVT::v2i16);
 565   }
 566
 567   bool isVCSrcV2FP32() const {
 568     return isVCSrcF64();
 569   }
 570
 571   bool isVSrcV2FP32() const {
 572     return isVSrcF64() || isLiteralImm(MVT::v2f32);
 573   }
 574
 575   bool isVCSrcV2INT32() const {
 576     return isVCSrcB64();
 577   }
 578
 579   bool isVSrcV2INT32() const {
 580     return isVSrcB64() || isLiteralImm(MVT::v2i32);
 581   }
 582
 583   bool isVSrcF32() const {
 584     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
 585   }
 586
 587   bool isVSrcF64() const {
 588     return isVCSrcF64() || isLiteralImm(MVT::f64);
 589   }
 590
 591   bool isVSrcTF16_Lo128() const {
 592     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
 593   }
 594
 595   bool isVSrcF16() const {
 596     return isVCSrcF16() || isLiteralImm(MVT::f16);
 597   }
 598
 599   bool isVSrcV2F16() const {
 600     return isVSrcF16() || isLiteralImm(MVT::v2f16);
 601   }
 602
 603   bool isVISrcB32() const {
 604     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
 605   }
 606
 607   bool isVISrcB16() const {
 608     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
 609   }
 610
 611   bool isVISrcV2B16() const {
 612     return isVISrcB16();
 613   }
 614
 615   bool isVISrcF32() const {
 616     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
 617   }
 618
 619   bool isVISrcF16() const {
 620     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
 621   }
 622
 623   bool isVISrcV2F16() const {
 624     return isVISrcF16() || isVISrcB32();
 625   }
 626
 627   bool isVISrc_64B64() const {
 628     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
 629   }
 630
 631   bool isVISrc_64F64() const {
 632     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
 633   }
 634
 635   bool isVISrc_64V2FP32() const {
 636     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
 637   }
 638
 639   bool isVISrc_64V2INT32() const {
 640     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
 641   }
 642
 643   bool isVISrc_256B64() const {
 644     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
 645   }
 646
 647   bool isVISrc_256F64() const {
 648     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
 649   }
 650
 651   bool isVISrc_128B16() const {
 652     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
 653   }
 654
 655   bool isVISrc_128V2B16() const {
 656     return isVISrc_128B16();
 657   }
 658
 659   bool isVISrc_128B32() const {
 660     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
 661   }
 662
 663   bool isVISrc_128F32() const {
 664     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
 665   }
 666
 667   bool isVISrc_256V2FP32() const {
 668     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
 669   }
 670
 671   bool isVISrc_256V2INT32() const {
 672     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
 673   }
 674
 675   bool isVISrc_512B32() const {
 676     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
 677   }
 678
 679   bool isVISrc_512B16() const {
 680     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
 681   }
 682
 683   bool isVISrc_512V2B16() const {
 684     return isVISrc_512B16();
 685   }
 686
 687   bool isVISrc_512F32() const {
 688     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
 689   }
 690
 691   bool isVISrc_512F16() const {
 692     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
 693   }
 694
 695   bool isVISrc_512V2F16() const {
 696     return isVISrc_512F16() || isVISrc_512B32();
 697   }
 698
 699   bool isVISrc_1024B32() const {
 700     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
 701   }
 702
 703   bool isVISrc_1024B16() const {
 704     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
 705   }
 706
 707   bool isVISrc_1024V2B16() const {
 708     return isVISrc_1024B16();
 709   }
 710
 711   bool isVISrc_1024F32() const {
 712     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
 713   }
 714
 715   bool isVISrc_1024F16() const {
 716     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
 717   }
 718
 719   bool isVISrc_1024V2F16() const {
 720     return isVISrc_1024F16() || isVISrc_1024B32();
 721   }
 722
 723   bool isAISrcB32() const {
 724     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
 725   }
 726
 727   bool isAISrcB16() const {
 728     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
 729   }
 730
 731   bool isAISrcV2B16() const {
 732     return isAISrcB16();
 733   }
 734
 735   bool isAISrcF32() const {
 736     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
 737   }
 738
 739   bool isAISrcF16() const {
 740     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
 741   }
 742
 743   bool isAISrcV2F16() const {
 744     return isAISrcF16() || isAISrcB32();
 745   }
 746
 747   bool isAISrc_64B64() const {
 748     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
 749   }
 750
 751   bool isAISrc_64F64() const {
 752     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
 753   }
 754
 755   bool isAISrc_128B32() const {
 756     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
 757   }
 758
 759   bool isAISrc_128B16() const {
 760     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
 761   }
 762
 763   bool isAISrc_128V2B16() const {
 764     return isAISrc_128B16();
 765   }
 766
 767   bool isAISrc_128F32() const {
 768     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
 769   }
 770
 771   bool isAISrc_128F16() const {
 772     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
 773   }
 774
 775   bool isAISrc_128V2F16() const {
 776     return isAISrc_128F16() || isAISrc_128B32();
 777   }
 778
 779   bool isVISrc_128F16() const {
 780     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
 781   }
 782
 783   bool isVISrc_128V2F16() const {
 784     return isVISrc_128F16() || isVISrc_128B32();
 785   }
 786
 787   bool isAISrc_256B64() const {
 788     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
 789   }
 790
 791   bool isAISrc_256F64() const {
 792     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
 793   }
 794
 795   bool isAISrc_512B32() const {
 796     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
 797   }
 798
 799   bool isAISrc_512B16() const {
 800     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
 801   }
 802
 803   bool isAISrc_512V2B16() const {
 804     return isAISrc_512B16();
 805   }
 806
 807   bool isAISrc_512F32() const {
 808     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
 809   }
 810
 811   bool isAISrc_512F16() const {
 812     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
 813   }
 814
 815   bool isAISrc_512V2F16() const {
 816     return isAISrc_512F16() || isAISrc_512B32();
 817   }
 818
 819   bool isAISrc_1024B32() const {
 820     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
 821   }
 822
 823   bool isAISrc_1024B16() const {
 824     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
 825   }
 826
 827   bool isAISrc_1024V2B16() const {
 828     return isAISrc_1024B16();
 829   }
 830
 831   bool isAISrc_1024F32() const {
 832     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
 833   }
 834
 835   bool isAISrc_1024F16() const {
 836     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
 837   }
 838
 839   bool isAISrc_1024V2F16() const {
 840     return isAISrc_1024F16() || isAISrc_1024B32();
 841   }
 842
 843   bool isKImmFP32() const {
 844     return isLiteralImm(MVT::f32);
 845   }
 846
 847   bool isKImmFP16() const {
 848     return isLiteralImm(MVT::f16);
 849   }
 850
 851   bool isMem() const override {
 852     return false;
 853   }
 854
 855   bool isExpr() const {
 856     return Kind == Expression;
 857   }
 858
 859   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
 860
 861   bool isSWaitCnt() const;
 862   bool isDepCtr() const;
 863   bool isSDelayALU() const;
 864   bool isHwreg() const;
 865   bool isSendMsg() const;
 866   bool isSwizzle() const;
 867   bool isSMRDOffset8() const;
 868   bool isSMEMOffset() const;
 869   bool isSMRDLiteralOffset() const;
 870   bool isDPP8() const;
 871   bool isDPPCtrl() const;
 872   bool isBLGP() const;
 873   bool isCBSZ() const;
 874   bool isABID() const;
 875   bool isGPRIdxMode() const;
 876   bool isS16Imm() const;
 877   bool isU16Imm() const;
 878   bool isEndpgm() const;
 879   bool isWaitVDST() const;
 880   bool isWaitEXP() const;
 881
 882   StringRef getToken() const {
 883     assert(isToken());
 884     return StringRef(Tok.Data, Tok.Length);
 885   }
 886
 887   int64_t getImm() const {
 888     assert(isImm());
 889     return Imm.Val;
 890   }
 891
 892   void setImm(int64_t Val) {
 893     assert(isImm());
 894     Imm.Val = Val;
 895   }
 896
 897   ImmTy getImmTy() const {
 898     assert(isImm());
 899     return Imm.Type;
 900   }
 901
 902   unsigned getReg() const override {
 903     assert(isRegKind());
 904     return Reg.RegNo;
 905   }
 906
 907   SMLoc getStartLoc() const override {
 908     return StartLoc;
 909   }
 910
 911   SMLoc getEndLoc() const override {
 912     return EndLoc;
 913   }
 914
 915   SMRange getLocRange() const {
 916     return SMRange(StartLoc, EndLoc);
 917   }
 918
 919   Modifiers getModifiers() const {
 920     assert(isRegKind() || isImmTy(ImmTyNone));
 921     return isRegKind() ? Reg.Mods : Imm.Mods;
 922   }
 923
 924   void setModifiers(Modifiers Mods) {
 925     assert(isRegKind() || isImmTy(ImmTyNone));
 926     if (isRegKind())
 927       Reg.Mods = Mods;
 928     else
 929       Imm.Mods = Mods;
 930   }
 931
 932   bool hasModifiers() const {
 933     return getModifiers().hasModifiers();
 934   }
 935
 936   bool hasFPModifiers() const {
 937     return getModifiers().hasFPModifiers();
 938   }
 939
 940   bool hasIntModifiers() const {
 941     return getModifiers().hasIntModifiers();
 942   }
 943
 944   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
 945
 946   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
 947
 948   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
 949
 950   void addRegOperands(MCInst &Inst, unsigned N) const;
 951
 952   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
 953     if (isRegKind())
 954       addRegOperands(Inst, N);
 955     else
 956       addImmOperands(Inst, N);
 957   }
 958
 959   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
 960     Modifiers Mods = getModifiers();
 961     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
 962     if (isRegKind()) {
 963       addRegOperands(Inst, N);
 964     } else {
 965       addImmOperands(Inst, N, false);
 966     }
 967   }
 968
 969   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
 970     assert(!hasIntModifiers());
 971     addRegOrImmWithInputModsOperands(Inst, N);
 972   }
 973
 974   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
 975     assert(!hasFPModifiers());
 976     addRegOrImmWithInputModsOperands(Inst, N);
 977   }
 978
 979   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
 980     Modifiers Mods = getModifiers();
 981     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
 982     assert(isRegKind());
 983     addRegOperands(Inst, N);
 984   }
 985
 986   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
 987     assert(!hasIntModifiers());
 988     addRegWithInputModsOperands(Inst, N);
 989   }
 990
 991   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
 992     assert(!hasFPModifiers());
 993     addRegWithInputModsOperands(Inst, N);
 994   }
 995
 996   static void printImmTy(raw_ostream& OS, ImmTy Type) {
 997     switch (Type) {
 998     case ImmTyNone: OS << "None"; break;
 999     case ImmTyGDS: OS << "GDS"; break;
1000     case ImmTyLDS: OS << "LDS"; break;
1001     case ImmTyOffen: OS << "Offen"; break;
1002     case ImmTyIdxen: OS << "Idxen"; break;
1003     case ImmTyAddr64: OS << "Addr64"; break;
1004     case ImmTyOffset: OS << "Offset"; break;
1005     case ImmTyInstOffset: OS << "InstOffset"; break;
1006     case ImmTyOffset0: OS << "Offset0"; break;
1007     case ImmTyOffset1: OS << "Offset1"; break;
1008     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1009     case ImmTyCPol: OS << "CPol"; break;
1010     case ImmTyTFE: OS << "TFE"; break;
1011     case ImmTyD16: OS << "D16"; break;
1012     case ImmTyFORMAT: OS << "FORMAT"; break;
1013     case ImmTyClampSI: OS << "ClampSI"; break;
1014     case ImmTyOModSI: OS << "OModSI"; break;
1015     case ImmTyDPP8: OS << "DPP8"; break;
1016     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1017     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1018     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1019     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1020     case ImmTyDppFI: OS << "DppFI"; break;
1021     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1022     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1023     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1024     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1025     case ImmTyDMask: OS << "DMask"; break;
1026     case ImmTyDim: OS << "Dim"; break;
1027     case ImmTyUNorm: OS << "UNorm"; break;
1028     case ImmTyDA: OS << "DA"; break;
1029     case ImmTyR128A16: OS << "R128A16"; break;
1030     case ImmTyA16: OS << "A16"; break;
1031     case ImmTyLWE: OS << "LWE"; break;
1032     case ImmTyOff: OS << "Off"; break;
1033     case ImmTyExpTgt: OS << "ExpTgt"; break;
1034     case ImmTyExpCompr: OS << "ExpCompr"; break;
1035     case ImmTyExpVM: OS << "ExpVM"; break;
1036     case ImmTyHwreg: OS << "Hwreg"; break;
1037     case ImmTySendMsg: OS << "SendMsg"; break;
1038     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1039     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1040     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1041     case ImmTyOpSel: OS << "OpSel"; break;
1042     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1043     case ImmTyNegLo: OS << "NegLo"; break;
1044     case ImmTyNegHi: OS << "NegHi"; break;
1045     case ImmTySwizzle: OS << "Swizzle"; break;
1046     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1047     case ImmTyHigh: OS << "High"; break;
1048     case ImmTyBLGP: OS << "BLGP"; break;
1049     case ImmTyCBSZ: OS << "CBSZ"; break;
1050     case ImmTyABID: OS << "ABID"; break;
1051     case ImmTyEndpgm: OS << "Endpgm"; break;
1052     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1053     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1054     }
1055   }
1056
1057   void print(raw_ostream &OS) const override {
1058     switch (Kind) {
1059     case Register:
1060       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1061       break;
1062     case Immediate:
1063       OS << '<' << getImm();
1064       if (getImmTy() != ImmTyNone) {
1065         OS << " type: "; printImmTy(OS, getImmTy());
1066       }
1067       OS << " mods: " << Imm.Mods << '>';
1068       break;
1069     case Token:
1070       OS << '\'' << getToken() << '\'';
1071       break;
1072     case Expression:
1073       OS << "<expr " << *Expr << '>';
1074       break;
1075     }
1076   }
1077
1078   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1079                                       int64_t Val, SMLoc Loc,
1080                                       ImmTy Type = ImmTyNone,
1081                                       bool IsFPImm = false) {
1082     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1083     Op->Imm.Val = Val;
1084     Op->Imm.IsFPImm = IsFPImm;
1085     Op->Imm.Kind = ImmKindTyNone;
1086     Op->Imm.Type = Type;
1087     Op->Imm.Mods = Modifiers();
1088     Op->StartLoc = Loc;
1089     Op->EndLoc = Loc;
1090     return Op;
1091   }
1092
1093   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1094                                         StringRef Str, SMLoc Loc,
1095                                         bool HasExplicitEncodingSize = true) {
1096     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1097     Res->Tok.Data = Str.data();
1098     Res->Tok.Length = Str.size();
1099     Res->StartLoc = Loc;
1100     Res->EndLoc = Loc;
1101     return Res;
1102   }
1103
1104   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1105                                       unsigned RegNo, SMLoc S,
1106                                       SMLoc E) {
1107     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1108     Op->Reg.RegNo = RegNo;
1109     Op->Reg.Mods = Modifiers();
1110     Op->StartLoc = S;
1111     Op->EndLoc = E;
1112     return Op;
1113   }
1114
1115   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1116                                        const class MCExpr *Expr, SMLoc S) {
1117     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1118     Op->Expr = Expr;
1119     Op->StartLoc = S;
1120     Op->EndLoc = S;
1121     return Op;
1122   }
1123 };
1124
1125 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1126   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1127   return OS;
1128 }
1129
1130 //===----------------------------------------------------------------------===//
1131 // AsmParser
1132 //===----------------------------------------------------------------------===//
1133
1134 // Holds info related to the current kernel, e.g. count of SGPRs used.
1135 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1136 // .amdgpu_hsa_kernel or at EOF.
1137 class KernelScopeInfo {
1138   int SgprIndexUnusedMin = -1;
1139   int VgprIndexUnusedMin = -1;
1140   int AgprIndexUnusedMin = -1;
1141   MCContext *Ctx = nullptr;
1142   MCSubtargetInfo const *MSTI = nullptr;
1143
1144   void usesSgprAt(int i) {
1145     if (i >= SgprIndexUnusedMin) {
1146       SgprIndexUnusedMin = ++i;
1147       if (Ctx) {
1148         MCSymbol* const Sym =
1149           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1150         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1151       }
1152     }
1153   }
1154
1155   void usesVgprAt(int i) {
1156     if (i >= VgprIndexUnusedMin) {
1157       VgprIndexUnusedMin = ++i;
1158       if (Ctx) {
1159         MCSymbol* const Sym =
1160           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1161         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1162                                          VgprIndexUnusedMin);
1163         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1164       }
1165     }
1166   }
1167
1168   void usesAgprAt(int i) {
1169     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1170     if (!hasMAIInsts(*MSTI))
1171       return;
1172
1173     if (i >= AgprIndexUnusedMin) {
1174       AgprIndexUnusedMin = ++i;
1175       if (Ctx) {
1176         MCSymbol* const Sym =
1177           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1178         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1179
1180         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1181         MCSymbol* const vSym =
1182           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1183         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1184                                          VgprIndexUnusedMin);
1185         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1186       }
1187     }
1188   }
1189
1190 public:
1191   KernelScopeInfo() = default;
1192
1193   void initialize(MCContext &Context) {
1194     Ctx = &Context;
1195     MSTI = Ctx->getSubtargetInfo();
1196
1197     usesSgprAt(SgprIndexUnusedMin = -1);
1198     usesVgprAt(VgprIndexUnusedMin = -1);
1199     if (hasMAIInsts(*MSTI)) {
1200       usesAgprAt(AgprIndexUnusedMin = -1);
1201     }
1202   }
1203
1204   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1205                     unsigned RegWidth) {
1206     switch (RegKind) {
1207     case IS_SGPR:
1208       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209       break;
1210     case IS_AGPR:
1211       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1212       break;
1213     case IS_VGPR:
1214       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1215       break;
1216     default:
1217       break;
1218     }
1219   }
1220 };
1221
1222 class AMDGPUAsmParser : public MCTargetAsmParser {
1223   MCAsmParser &Parser;
1224
1225   unsigned ForcedEncodingSize = 0;
1226   bool ForcedDPP = false;
1227   bool ForcedSDWA = false;
1228   KernelScopeInfo KernelScope;
1229
1230   /// @name Auto-generated Match Functions
1231   /// {
1232
1233 #define GET_ASSEMBLER_HEADER
1234 #include "AMDGPUGenAsmMatcher.inc"
1235
1236   /// }
1237
1238 private:
1239   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1240   bool OutOfRangeError(SMRange Range);
1241   /// Calculate VGPR/SGPR blocks required for given target, reserved
1242   /// registers, and user-specified NextFreeXGPR values.
1243   ///
1244   /// \param Features [in] Target features, used for bug corrections.
1245   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1246   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1247   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1248   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1249   /// descriptor field, if valid.
1250   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1251   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1252   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1253   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1254   /// \param VGPRBlocks [out] Result VGPR block count.
1255   /// \param SGPRBlocks [out] Result SGPR block count.
1256   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1257                           bool FlatScrUsed, bool XNACKUsed,
1258                           std::optional<bool> EnableWavefrontSize32,
1259                           unsigned NextFreeVGPR, SMRange VGPRRange,
1260                           unsigned NextFreeSGPR, SMRange SGPRRange,
1261                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1262   bool ParseDirectiveAMDGCNTarget();
1263   bool ParseDirectiveAMDHSAKernel();
1264   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1265   bool ParseDirectiveHSACodeObjectVersion();
1266   bool ParseDirectiveHSACodeObjectISA();
1267   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1268   bool ParseDirectiveAMDKernelCodeT();
1269   // TODO: Possibly make subtargetHasRegister const.
1270   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1271   bool ParseDirectiveAMDGPUHsaKernel();
1272
1273   bool ParseDirectiveISAVersion();
1274   bool ParseDirectiveHSAMetadata();
1275   bool ParseDirectivePALMetadataBegin();
1276   bool ParseDirectivePALMetadata();
1277   bool ParseDirectiveAMDGPULDS();
1278
1279   /// Common code to parse out a block of text (typically YAML) between start and
1280   /// end directives.
1281   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1282                            const char *AssemblerDirectiveEnd,
1283                            std::string &CollectString);
1284
1285   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1286                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1287   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1288                            unsigned &RegNum, unsigned &RegWidth,
1289                            bool RestoreOnFailure = false);
1290   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1291                            unsigned &RegNum, unsigned &RegWidth,
1292                            SmallVectorImpl<AsmToken> &Tokens);
1293   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1294                            unsigned &RegWidth,
1295                            SmallVectorImpl<AsmToken> &Tokens);
1296   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1297                            unsigned &RegWidth,
1298                            SmallVectorImpl<AsmToken> &Tokens);
1299   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1300                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1301   bool ParseRegRange(unsigned& Num, unsigned& Width);
1302   unsigned getRegularReg(RegisterKind RegKind,
1303                          unsigned RegNum,
1304                          unsigned RegWidth,
1305                          SMLoc Loc);
1306
1307   bool isRegister();
1308   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1309   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1310   void initializeGprCountSymbol(RegisterKind RegKind);
1311   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1312                              unsigned RegWidth);
1313   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1314                     bool IsAtomic);
1315   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1316                  bool IsGdsHardcoded);
1317
1318 public:
1319   enum AMDGPUMatchResultTy {
1320     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1321   };
1322   enum OperandMode {
1323     OperandMode_Default,
1324     OperandMode_NSA,
1325   };
1326
1327   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1328
1329   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1330                const MCInstrInfo &MII,
1331                const MCTargetOptions &Options)
1332       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1333     MCAsmParserExtension::Initialize(Parser);
1334
1335     if (getFeatureBits().none()) {
1336       // Set default features.
1337       copySTI().ToggleFeature("southern-islands");
1338     }
1339
1340     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1341
1342     {
1343       // TODO: make those pre-defined variables read-only.
1344       // Currently there is none suitable machinery in the core llvm-mc for this.
1345       // MCSymbol::isRedefinable is intended for another purpose, and
1346       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1347       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1348       MCContext &Ctx = getContext();
1349       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1350         MCSymbol *Sym =
1351             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1352         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1353         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1354         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1355         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1356         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1357       } else {
1358         MCSymbol *Sym =
1359             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1360         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1361         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1362         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1363         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1364         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1365       }
1366       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1367         initializeGprCountSymbol(IS_VGPR);
1368         initializeGprCountSymbol(IS_SGPR);
1369       } else
1370         KernelScope.initialize(getContext());
1371     }
1372   }
1373
1374   bool hasMIMG_R128() const {
1375     return AMDGPU::hasMIMG_R128(getSTI());
1376   }
1377
1378   bool hasPackedD16() const {
1379     return AMDGPU::hasPackedD16(getSTI());
1380   }
1381
1382   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1383
1384   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1385
1386   bool isSI() const {
1387     return AMDGPU::isSI(getSTI());
1388   }
1389
1390   bool isCI() const {
1391     return AMDGPU::isCI(getSTI());
1392   }
1393
1394   bool isVI() const {
1395     return AMDGPU::isVI(getSTI());
1396   }
1397
1398   bool isGFX9() const {
1399     return AMDGPU::isGFX9(getSTI());
1400   }
1401
1402   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1403   bool isGFX90A() const {
1404     return AMDGPU::isGFX90A(getSTI());
1405   }
1406
1407   bool isGFX940() const {
1408     return AMDGPU::isGFX940(getSTI());
1409   }
1410
1411   bool isGFX9Plus() const {
1412     return AMDGPU::isGFX9Plus(getSTI());
1413   }
1414
1415   bool isGFX10() const {
1416     return AMDGPU::isGFX10(getSTI());
1417   }
1418
1419   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1420
1421   bool isGFX11() const {
1422     return AMDGPU::isGFX11(getSTI());
1423   }
1424
1425   bool isGFX11Plus() const {
1426     return AMDGPU::isGFX11Plus(getSTI());
1427   }
1428
1429   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1430
1431   bool isGFX10_BEncoding() const {
1432     return AMDGPU::isGFX10_BEncoding(getSTI());
1433   }
1434
1435   bool hasInv2PiInlineImm() const {
1436     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1437   }
1438
1439   bool hasFlatOffsets() const {
1440     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1441   }
1442
1443   bool hasArchitectedFlatScratch() const {
1444     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1445   }
1446
1447   bool hasSGPR102_SGPR103() const {
1448     return !isVI() && !isGFX9();
1449   }
1450
1451   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1452
1453   bool hasIntClamp() const {
1454     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1455   }
1456
1457   bool hasPartialNSAEncoding() const {
1458     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1459   }
1460
1461   unsigned getNSAMaxSize() const {
1462     return AMDGPU::getNSAMaxSize(getSTI());
1463   }
1464
1465   AMDGPUTargetStreamer &getTargetStreamer() {
1466     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1467     return static_cast<AMDGPUTargetStreamer &>(TS);
1468   }
1469
1470   const MCRegisterInfo *getMRI() const {
1471     // We need this const_cast because for some reason getContext() is not const
1472     // in MCAsmParser.
1473     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1474   }
1475
1476   const MCInstrInfo *getMII() const {
1477     return &MII;
1478   }
1479
1480   const FeatureBitset &getFeatureBits() const {
1481     return getSTI().getFeatureBits();
1482   }
1483
1484   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1485   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1486   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1487
1488   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1489   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1490   bool isForcedDPP() const { return ForcedDPP; }
1491   bool isForcedSDWA() const { return ForcedSDWA; }
1492   ArrayRef<unsigned> getMatchedVariants() const;
1493   StringRef getMatchedVariantName() const;
1494
1495   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1496   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1497                      bool RestoreOnFailure);
1498   bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1499                      SMLoc &EndLoc) override;
1500   OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1501                                         SMLoc &EndLoc) override;
1502   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1503   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1504                                       unsigned Kind) override;
1505   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1506                                OperandVector &Operands, MCStreamer &Out,
1507                                uint64_t &ErrorInfo,
1508                                bool MatchingInlineAsm) override;
1509   bool ParseDirective(AsmToken DirectiveID) override;
1510   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1511                            OperandMode Mode = OperandMode_Default);
1512   StringRef parseMnemonicSuffix(StringRef Name);
1513   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1514                         SMLoc NameLoc, OperandVector &Operands) override;
1515   //bool ProcessInstruction(MCInst &Inst);
1516
1517   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1518
1519   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1520
1521   ParseStatus
1522   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1523                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1524                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1525
1526   ParseStatus parseOperandArrayWithPrefix(
1527       const char *Prefix, OperandVector &Operands,
1528       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1529       bool (*ConvertResult)(int64_t &) = nullptr);
1530
1531   ParseStatus
1532   parseNamedBit(StringRef Name, OperandVector &Operands,
1533                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1534   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1535   ParseStatus parseCPol(OperandVector &Operands);
1536   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1537                                     SMLoc &StringLoc);
1538
1539   bool isModifier();
1540   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1541   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1542   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1543   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1544   bool parseSP3NegModifier();
1545   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1546   ParseStatus parseReg(OperandVector &Operands);
1547   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1548   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1549                                            bool AllowImm = true);
1550   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1551                                             bool AllowImm = true);
1552   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1553   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1554   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1555   ParseStatus parseDfmtNfmt(int64_t &Format);
1556   ParseStatus parseUfmt(int64_t &Format);
1557   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1558                                        int64_t &Format);
1559   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1560                                          int64_t &Format);
1561   ParseStatus parseFORMAT(OperandVector &Operands);
1562   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1563   ParseStatus parseNumericFormat(int64_t &Format);
1564   ParseStatus parseFlatOffset(OperandVector &Operands);
1565   ParseStatus parseR128A16(OperandVector &Operands);
1566   ParseStatus parseBLGP(OperandVector &Operands);
1567   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1568   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1569
1570   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1571   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1572   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1573   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1574
1575   bool parseCnt(int64_t &IntVal);
1576   ParseStatus parseSWaitCnt(OperandVector &Operands);
1577
1578   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1579   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1580   ParseStatus parseDepCtr(OperandVector &Operands);
1581
1582   bool parseDelay(int64_t &Delay);
1583   ParseStatus parseSDelayALU(OperandVector &Operands);
1584
1585   ParseStatus parseHwreg(OperandVector &Operands);
1586
1587 private:
1588   struct OperandInfoTy {
1589     SMLoc Loc;
1590     int64_t Id;
1591     bool IsSymbolic = false;
1592     bool IsDefined = false;
1593
1594     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1595   };
1596
1597   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1598   bool validateSendMsg(const OperandInfoTy &Msg,
1599                        const OperandInfoTy &Op,
1600                        const OperandInfoTy &Stream);
1601
1602   bool parseHwregBody(OperandInfoTy &HwReg,
1603                       OperandInfoTy &Offset,
1604                       OperandInfoTy &Width);
1605   bool validateHwreg(const OperandInfoTy &HwReg,
1606                      const OperandInfoTy &Offset,
1607                      const OperandInfoTy &Width);
1608
1609   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1610   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1611   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1612
1613   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1614                       const OperandVector &Operands) const;
1615   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1616   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1617   SMLoc getLitLoc(const OperandVector &Operands,
1618                   bool SearchMandatoryLiterals = false) const;
1619   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1620   SMLoc getConstLoc(const OperandVector &Operands) const;
1621   SMLoc getInstLoc(const OperandVector &Operands) const;
1622
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1629                                       const OperandVector &Operands);
1630   bool validateIntClampSupported(const MCInst &Inst);
1631   bool validateMIMGAtomicDMask(const MCInst &Inst);
1632   bool validateMIMGGatherDMask(const MCInst &Inst);
1633   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1634   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1635   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1636   bool validateMIMGD16(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1645   bool validateAGPRLdSt(const MCInst &Inst) const;
1646   bool validateVGPRAlign(const MCInst &Inst) const;
1647   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1649   bool validateDivScale(const MCInst &Inst);
1650   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1651   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1652                              const SMLoc &IDLoc);
1653   bool validateExeczVcczOperands(const OperandVector &Operands);
1654   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1655   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1656   unsigned getConstantBusLimit(unsigned Opcode) const;
1657   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1658   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1659   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1660
1661   bool isSupportedMnemo(StringRef Mnemo,
1662                         const FeatureBitset &FBS);
1663   bool isSupportedMnemo(StringRef Mnemo,
1664                         const FeatureBitset &FBS,
1665                         ArrayRef<unsigned> Variants);
1666   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1667
1668   bool isId(const StringRef Id) const;
1669   bool isId(const AsmToken &Token, const StringRef Id) const;
1670   bool isToken(const AsmToken::TokenKind Kind) const;
1671   StringRef getId() const;
1672   bool trySkipId(const StringRef Id);
1673   bool trySkipId(const StringRef Pref, const StringRef Id);
1674   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1675   bool trySkipToken(const AsmToken::TokenKind Kind);
1676   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1677   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1678   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1679
1680   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1681   AsmToken::TokenKind getTokenKind() const;
1682   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1683   bool parseExpr(OperandVector &Operands);
1684   StringRef getTokenStr() const;
1685   AsmToken peekToken(bool ShouldSkipSpace = true);
1686   AsmToken getToken() const;
1687   SMLoc getLoc() const;
1688   void lex();
1689
1690 public:
1691   void onBeginOfFile() override;
1692
1693   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1694
1695   ParseStatus parseExpTgt(OperandVector &Operands);
1696   ParseStatus parseSendMsg(OperandVector &Operands);
1697   ParseStatus parseInterpSlot(OperandVector &Operands);
1698   ParseStatus parseInterpAttr(OperandVector &Operands);
1699   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1700   ParseStatus parseBoolReg(OperandVector &Operands);
1701
1702   bool parseSwizzleOperand(int64_t &Op,
1703                            const unsigned MinVal,
1704                            const unsigned MaxVal,
1705                            const StringRef ErrMsg,
1706                            SMLoc &Loc);
1707   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1708                             const unsigned MinVal,
1709                             const unsigned MaxVal,
1710                             const StringRef ErrMsg);
1711   ParseStatus parseSwizzle(OperandVector &Operands);
1712   bool parseSwizzleOffset(int64_t &Imm);
1713   bool parseSwizzleMacro(int64_t &Imm);
1714   bool parseSwizzleQuadPerm(int64_t &Imm);
1715   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1716   bool parseSwizzleBroadcast(int64_t &Imm);
1717   bool parseSwizzleSwap(int64_t &Imm);
1718   bool parseSwizzleReverse(int64_t &Imm);
1719
1720   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1721   int64_t parseGPRIdxMacro();
1722
1723   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1724   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1725
1726   ParseStatus parseOModSI(OperandVector &Operands);
1727
1728   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1729                OptionalImmIndexMap &OptionalIdx);
1730   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1731   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1732   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1733   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1734   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1735                     OptionalImmIndexMap &OptionalIdx);
1736   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1737                 OptionalImmIndexMap &OptionalIdx);
1738
1739   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1741   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1742
1743   bool parseDimId(unsigned &Encoding);
1744   ParseStatus parseDim(OperandVector &Operands);
1745   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1746   ParseStatus parseDPP8(OperandVector &Operands);
1747   ParseStatus parseDPPCtrl(OperandVector &Operands);
1748   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1749   int64_t parseDPPCtrlSel(StringRef Ctrl);
1750   int64_t parseDPPCtrlPerm();
1751   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1752   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1753     cvtDPP(Inst, Operands, true);
1754   }
1755   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1756                   bool IsDPP8 = false);
1757   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1758     cvtVOP3DPP(Inst, Operands, true);
1759   }
1760
1761   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1762                            AMDGPUOperand::ImmTy Type);
1763   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1764   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1765   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1766   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1767   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1768   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1769   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1770                uint64_t BasicInstType,
1771                bool SkipDstVcc = false,
1772                bool SkipSrcVcc = false);
1773
1774   ParseStatus parseEndpgm(OperandVector &Operands);
1775
1776   ParseStatus parseVOPD(OperandVector &Operands);
1777 };
1778
1779 } // end anonymous namespace
1780
1781 // May be called with integer type with equivalent bitwidth.
1782 static const fltSemantics *getFltSemantics(unsigned Size) {
1783   switch (Size) {
1784   case 4:
1785     return &APFloat::IEEEsingle();
1786   case 8:
1787     return &APFloat::IEEEdouble();
1788   case 2:
1789     return &APFloat::IEEEhalf();
1790   default:
1791     llvm_unreachable("unsupported fp type");
1792   }
1793 }
1794
1795 static const fltSemantics *getFltSemantics(MVT VT) {
1796   return getFltSemantics(VT.getSizeInBits() / 8);
1797 }
1798
1799 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1800   switch (OperandType) {
1801   case AMDGPU::OPERAND_REG_IMM_INT32:
1802   case AMDGPU::OPERAND_REG_IMM_FP32:
1803   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1804   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1805   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1806   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1807   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1808   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1809   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1810   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1811   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1812   case AMDGPU::OPERAND_KIMM32:
1813     return &APFloat::IEEEsingle();
1814   case AMDGPU::OPERAND_REG_IMM_INT64:
1815   case AMDGPU::OPERAND_REG_IMM_FP64:
1816   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1817   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1818   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1819     return &APFloat::IEEEdouble();
1820   case AMDGPU::OPERAND_REG_IMM_INT16:
1821   case AMDGPU::OPERAND_REG_IMM_FP16:
1822   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1823   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1824   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1825   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1826   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1827   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1828   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1829   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1830   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1831   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1832   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1833   case AMDGPU::OPERAND_KIMM16:
1834     return &APFloat::IEEEhalf();
1835   default:
1836     llvm_unreachable("unsupported fp type");
1837   }
1838 }
1839
1840 //===----------------------------------------------------------------------===//
1841 // Operand
1842 //===----------------------------------------------------------------------===//
1843
1844 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1845   bool Lost;
1846
1847   // Convert literal to single precision
1848   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1849                                                APFloat::rmNearestTiesToEven,
1850                                                &Lost);
1851   // We allow precision lost but not overflow or underflow
1852   if (Status != APFloat::opOK &&
1853       Lost &&
1854       ((Status & APFloat::opOverflow)  != 0 ||
1855        (Status & APFloat::opUnderflow) != 0)) {
1856     return false;
1857   }
1858
1859   return true;
1860 }
1861
1862 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1863   return isUIntN(Size, Val) || isIntN(Size, Val);
1864 }
1865
1866 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1867   if (VT.getScalarType() == MVT::i16) {
1868     // FP immediate values are broken.
1869     return isInlinableIntLiteral(Val);
1870   }
1871
1872   // f16/v2f16 operands work correctly for all values.
1873   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1874 }
1875
1876 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1877
1878   // This is a hack to enable named inline values like
1879   // shared_base with both 32-bit and 64-bit operands.
1880   // Note that these values are defined as
1881   // 32-bit operands only.
1882   if (isInlineValue()) {
1883     return true;
1884   }
1885
1886   if (!isImmTy(ImmTyNone)) {
1887     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1888     return false;
1889   }
1890   // TODO: We should avoid using host float here. It would be better to
1891   // check the float bit values which is what a few other places do.
1892   // We've had bot failures before due to weird NaN support on mips hosts.
1893
1894   APInt Literal(64, Imm.Val);
1895
1896   if (Imm.IsFPImm) { // We got fp literal token
1897     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1898       return AMDGPU::isInlinableLiteral64(Imm.Val,
1899                                           AsmParser->hasInv2PiInlineImm());
1900     }
1901
1902     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1903     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1904       return false;
1905
1906     if (type.getScalarSizeInBits() == 16) {
1907       return isInlineableLiteralOp16(
1908         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1909         type, AsmParser->hasInv2PiInlineImm());
1910     }
1911
1912     // Check if single precision literal is inlinable
1913     return AMDGPU::isInlinableLiteral32(
1914       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1915       AsmParser->hasInv2PiInlineImm());
1916   }
1917
1918   // We got int literal token.
1919   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1920     return AMDGPU::isInlinableLiteral64(Imm.Val,
1921                                         AsmParser->hasInv2PiInlineImm());
1922   }
1923
1924   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1925     return false;
1926   }
1927
1928   if (type.getScalarSizeInBits() == 16) {
1929     return isInlineableLiteralOp16(
1930       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1931       type, AsmParser->hasInv2PiInlineImm());
1932   }
1933
1934   return AMDGPU::isInlinableLiteral32(
1935     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1936     AsmParser->hasInv2PiInlineImm());
1937 }
1938
1939 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1940   // Check that this immediate can be added as literal
1941   if (!isImmTy(ImmTyNone)) {
1942     return false;
1943   }
1944
1945   if (!Imm.IsFPImm) {
1946     // We got int literal token.
1947
1948     if (type == MVT::f64 && hasFPModifiers()) {
1949       // Cannot apply fp modifiers to int literals preserving the same semantics
1950       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1951       // disable these cases.
1952       return false;
1953     }
1954
1955     unsigned Size = type.getSizeInBits();
1956     if (Size == 64)
1957       Size = 32;
1958
1959     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1960     // types.
1961     return isSafeTruncation(Imm.Val, Size);
1962   }
1963
1964   // We got fp literal token
1965   if (type == MVT::f64) { // Expected 64-bit fp operand
1966     // We would set low 64-bits of literal to zeroes but we accept this literals
1967     return true;
1968   }
1969
1970   if (type == MVT::i64) { // Expected 64-bit int operand
1971     // We don't allow fp literals in 64-bit integer instructions. It is
1972     // unclear how we should encode them.
1973     return false;
1974   }
1975
1976   // We allow fp literals with f16x2 operands assuming that the specified
1977   // literal goes into the lower half and the upper half is zero. We also
1978   // require that the literal may be losslessly converted to f16.
1979   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1980                      (type == MVT::v2i16)? MVT::i16 :
1981                      (type == MVT::v2f32)? MVT::f32 : type;
1982
1983   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1984   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1985 }
1986
1987 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1988   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1989 }
1990
1991 bool AMDGPUOperand::isVRegWithInputMods() const {
1992   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1993          // GFX90A allows DPP on 64-bit operands.
1994          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1995           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1996 }
1997
1998 bool AMDGPUOperand::isT16VRegWithInputMods() const {
1999   return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2000 }
2001
2002 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2003   if (AsmParser->isVI())
2004     return isVReg32();
2005   else if (AsmParser->isGFX9Plus())
2006     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2007   else
2008     return false;
2009 }
2010
2011 bool AMDGPUOperand::isSDWAFP16Operand() const {
2012   return isSDWAOperand(MVT::f16);
2013 }
2014
2015 bool AMDGPUOperand::isSDWAFP32Operand() const {
2016   return isSDWAOperand(MVT::f32);
2017 }
2018
2019 bool AMDGPUOperand::isSDWAInt16Operand() const {
2020   return isSDWAOperand(MVT::i16);
2021 }
2022
2023 bool AMDGPUOperand::isSDWAInt32Operand() const {
2024   return isSDWAOperand(MVT::i32);
2025 }
2026
2027 bool AMDGPUOperand::isBoolReg() const {
2028   auto FB = AsmParser->getFeatureBits();
2029   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2030                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2031 }
2032
2033 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2034 {
2035   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2036   assert(Size == 2 || Size == 4 || Size == 8);
2037
2038   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2039
2040   if (Imm.Mods.Abs) {
2041     Val &= ~FpSignMask;
2042   }
2043   if (Imm.Mods.Neg) {
2044     Val ^= FpSignMask;
2045   }
2046
2047   return Val;
2048 }
2049
2050 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2051   if (isExpr()) {
2052     Inst.addOperand(MCOperand::createExpr(Expr));
2053     return;
2054   }
2055
2056   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2057                              Inst.getNumOperands())) {
2058     addLiteralImmOperand(Inst, Imm.Val,
2059                          ApplyModifiers &
2060                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2061   } else {
2062     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2063     Inst.addOperand(MCOperand::createImm(Imm.Val));
2064     setImmKindNone();
2065   }
2066 }
2067
2068 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2069   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2070   auto OpNum = Inst.getNumOperands();
2071   // Check that this operand accepts literals
2072   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2073
2074   if (ApplyModifiers) {
2075     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2076     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2077     Val = applyInputFPModifiers(Val, Size);
2078   }
2079
2080   APInt Literal(64, Val);
2081   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2082
2083   if (Imm.IsFPImm) { // We got fp literal token
2084     switch (OpTy) {
2085     case AMDGPU::OPERAND_REG_IMM_INT64:
2086     case AMDGPU::OPERAND_REG_IMM_FP64:
2087     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2088     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2089     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2090       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2091                                        AsmParser->hasInv2PiInlineImm())) {
2092         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2093         setImmKindConst();
2094         return;
2095       }
2096
2097       // Non-inlineable
2098       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2099         // For fp operands we check if low 32 bits are zeros
2100         if (Literal.getLoBits(32) != 0) {
2101           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2102           "Can't encode literal as exact 64-bit floating-point operand. "
2103           "Low 32-bits will be set to zero");
2104         }
2105
2106         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2107         setImmKindLiteral();
2108         return;
2109       }
2110
2111       // We don't allow fp literals in 64-bit integer instructions. It is
2112       // unclear how we should encode them. This case should be checked earlier
2113       // in predicate methods (isLiteralImm())
2114       llvm_unreachable("fp literal in 64-bit integer instruction.");
2115
2116     case AMDGPU::OPERAND_REG_IMM_INT32:
2117     case AMDGPU::OPERAND_REG_IMM_FP32:
2118     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2119     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2120     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2121     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2122     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2123     case AMDGPU::OPERAND_REG_IMM_INT16:
2124     case AMDGPU::OPERAND_REG_IMM_FP16:
2125     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2126     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2127     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2128     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2129     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2130     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2131     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2132     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2133     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2134     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2135     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2136     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2137     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2138     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2139     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2140     case AMDGPU::OPERAND_KIMM32:
2141     case AMDGPU::OPERAND_KIMM16: {
2142       bool lost;
2143       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2144       // Convert literal to single precision
2145       FPLiteral.convert(*getOpFltSemantics(OpTy),
2146                         APFloat::rmNearestTiesToEven, &lost);
2147       // We allow precision lost but not overflow or underflow. This should be
2148       // checked earlier in isLiteralImm()
2149
2150       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2151       Inst.addOperand(MCOperand::createImm(ImmVal));
2152       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2153         setImmKindMandatoryLiteral();
2154       } else {
2155         setImmKindLiteral();
2156       }
2157       return;
2158     }
2159     default:
2160       llvm_unreachable("invalid operand size");
2161     }
2162
2163     return;
2164   }
2165
2166   // We got int literal token.
2167   // Only sign extend inline immediates.
2168   switch (OpTy) {
2169   case AMDGPU::OPERAND_REG_IMM_INT32:
2170   case AMDGPU::OPERAND_REG_IMM_FP32:
2171   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2172   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2173   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2174   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2175   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2176   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2177   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2178   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2179   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2180   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2181   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2182     if (isSafeTruncation(Val, 32) &&
2183         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2184                                      AsmParser->hasInv2PiInlineImm())) {
2185       Inst.addOperand(MCOperand::createImm(Val));
2186       setImmKindConst();
2187       return;
2188     }
2189
2190     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2191     setImmKindLiteral();
2192     return;
2193
2194   case AMDGPU::OPERAND_REG_IMM_INT64:
2195   case AMDGPU::OPERAND_REG_IMM_FP64:
2196   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2197   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2198   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2199     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2200       Inst.addOperand(MCOperand::createImm(Val));
2201       setImmKindConst();
2202       return;
2203     }
2204
2205     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2206     setImmKindLiteral();
2207     return;
2208
2209   case AMDGPU::OPERAND_REG_IMM_INT16:
2210   case AMDGPU::OPERAND_REG_IMM_FP16:
2211   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2212   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2213   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2214   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2215   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2216     if (isSafeTruncation(Val, 16) &&
2217         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2218                                      AsmParser->hasInv2PiInlineImm())) {
2219       Inst.addOperand(MCOperand::createImm(Val));
2220       setImmKindConst();
2221       return;
2222     }
2223
2224     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2225     setImmKindLiteral();
2226     return;
2227
2228   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2229   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2230   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2231   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2232     assert(isSafeTruncation(Val, 16));
2233     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2234                                         AsmParser->hasInv2PiInlineImm()));
2235
2236     Inst.addOperand(MCOperand::createImm(Val));
2237     return;
2238   }
2239   case AMDGPU::OPERAND_KIMM32:
2240     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2241     setImmKindMandatoryLiteral();
2242     return;
2243   case AMDGPU::OPERAND_KIMM16:
2244     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2245     setImmKindMandatoryLiteral();
2246     return;
2247   default:
2248     llvm_unreachable("invalid operand size");
2249   }
2250 }
2251
2252 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2253   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2254 }
2255
2256 bool AMDGPUOperand::isInlineValue() const {
2257   return isRegKind() && ::isInlineValue(getReg());
2258 }
2259
2260 //===----------------------------------------------------------------------===//
2261 // AsmParser
2262 //===----------------------------------------------------------------------===//
2263
2264 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2265   if (Is == IS_VGPR) {
2266     switch (RegWidth) {
2267       default: return -1;
2268       case 32:
2269         return AMDGPU::VGPR_32RegClassID;
2270       case 64:
2271         return AMDGPU::VReg_64RegClassID;
2272       case 96:
2273         return AMDGPU::VReg_96RegClassID;
2274       case 128:
2275         return AMDGPU::VReg_128RegClassID;
2276       case 160:
2277         return AMDGPU::VReg_160RegClassID;
2278       case 192:
2279         return AMDGPU::VReg_192RegClassID;
2280       case 224:
2281         return AMDGPU::VReg_224RegClassID;
2282       case 256:
2283         return AMDGPU::VReg_256RegClassID;
2284       case 288:
2285         return AMDGPU::VReg_288RegClassID;
2286       case 320:
2287         return AMDGPU::VReg_320RegClassID;
2288       case 352:
2289         return AMDGPU::VReg_352RegClassID;
2290       case 384:
2291         return AMDGPU::VReg_384RegClassID;
2292       case 512:
2293         return AMDGPU::VReg_512RegClassID;
2294       case 1024:
2295         return AMDGPU::VReg_1024RegClassID;
2296     }
2297   } else if (Is == IS_TTMP) {
2298     switch (RegWidth) {
2299       default: return -1;
2300       case 32:
2301         return AMDGPU::TTMP_32RegClassID;
2302       case 64:
2303         return AMDGPU::TTMP_64RegClassID;
2304       case 128:
2305         return AMDGPU::TTMP_128RegClassID;
2306       case 256:
2307         return AMDGPU::TTMP_256RegClassID;
2308       case 512:
2309         return AMDGPU::TTMP_512RegClassID;
2310     }
2311   } else if (Is == IS_SGPR) {
2312     switch (RegWidth) {
2313       default: return -1;
2314       case 32:
2315         return AMDGPU::SGPR_32RegClassID;
2316       case 64:
2317         return AMDGPU::SGPR_64RegClassID;
2318       case 96:
2319         return AMDGPU::SGPR_96RegClassID;
2320       case 128:
2321         return AMDGPU::SGPR_128RegClassID;
2322       case 160:
2323         return AMDGPU::SGPR_160RegClassID;
2324       case 192:
2325         return AMDGPU::SGPR_192RegClassID;
2326       case 224:
2327         return AMDGPU::SGPR_224RegClassID;
2328       case 256:
2329         return AMDGPU::SGPR_256RegClassID;
2330       case 288:
2331         return AMDGPU::SGPR_288RegClassID;
2332       case 320:
2333         return AMDGPU::SGPR_320RegClassID;
2334       case 352:
2335         return AMDGPU::SGPR_352RegClassID;
2336       case 384:
2337         return AMDGPU::SGPR_384RegClassID;
2338       case 512:
2339         return AMDGPU::SGPR_512RegClassID;
2340     }
2341   } else if (Is == IS_AGPR) {
2342     switch (RegWidth) {
2343       default: return -1;
2344       case 32:
2345         return AMDGPU::AGPR_32RegClassID;
2346       case 64:
2347         return AMDGPU::AReg_64RegClassID;
2348       case 96:
2349         return AMDGPU::AReg_96RegClassID;
2350       case 128:
2351         return AMDGPU::AReg_128RegClassID;
2352       case 160:
2353         return AMDGPU::AReg_160RegClassID;
2354       case 192:
2355         return AMDGPU::AReg_192RegClassID;
2356       case 224:
2357         return AMDGPU::AReg_224RegClassID;
2358       case 256:
2359         return AMDGPU::AReg_256RegClassID;
2360       case 288:
2361         return AMDGPU::AReg_288RegClassID;
2362       case 320:
2363         return AMDGPU::AReg_320RegClassID;
2364       case 352:
2365         return AMDGPU::AReg_352RegClassID;
2366       case 384:
2367         return AMDGPU::AReg_384RegClassID;
2368       case 512:
2369         return AMDGPU::AReg_512RegClassID;
2370       case 1024:
2371         return AMDGPU::AReg_1024RegClassID;
2372     }
2373   }
2374   return -1;
2375 }
2376
2377 static unsigned getSpecialRegForName(StringRef RegName) {
2378   return StringSwitch<unsigned>(RegName)
2379     .Case("exec", AMDGPU::EXEC)
2380     .Case("vcc", AMDGPU::VCC)
2381     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2382     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2383     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2384     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2385     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2386     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2387     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2388     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2389     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2390     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2391     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2392     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2393     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2394     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2395     .Case("m0", AMDGPU::M0)
2396     .Case("vccz", AMDGPU::SRC_VCCZ)
2397     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2398     .Case("execz", AMDGPU::SRC_EXECZ)
2399     .Case("src_execz", AMDGPU::SRC_EXECZ)
2400     .Case("scc", AMDGPU::SRC_SCC)
2401     .Case("src_scc", AMDGPU::SRC_SCC)
2402     .Case("tba", AMDGPU::TBA)
2403     .Case("tma", AMDGPU::TMA)
2404     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2405     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2406     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2407     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2408     .Case("vcc_lo", AMDGPU::VCC_LO)
2409     .Case("vcc_hi", AMDGPU::VCC_HI)
2410     .Case("exec_lo", AMDGPU::EXEC_LO)
2411     .Case("exec_hi", AMDGPU::EXEC_HI)
2412     .Case("tma_lo", AMDGPU::TMA_LO)
2413     .Case("tma_hi", AMDGPU::TMA_HI)
2414     .Case("tba_lo", AMDGPU::TBA_LO)
2415     .Case("tba_hi", AMDGPU::TBA_HI)
2416     .Case("pc", AMDGPU::PC_REG)
2417     .Case("null", AMDGPU::SGPR_NULL)
2418     .Default(AMDGPU::NoRegister);
2419 }
2420
2421 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2422                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2423   auto R = parseRegister();
2424   if (!R) return true;
2425   assert(R->isReg());
2426   RegNo = R->getReg();
2427   StartLoc = R->getStartLoc();
2428   EndLoc = R->getEndLoc();
2429   return false;
2430 }
2431
2432 bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2433                                     SMLoc &EndLoc) {
2434   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2435 }
2436
2437 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
2438                                                        SMLoc &StartLoc,
2439                                                        SMLoc &EndLoc) {
2440   bool Result =
2441       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2442   bool PendingErrors = getParser().hasPendingError();
2443   getParser().clearPendingErrors();
2444   if (PendingErrors)
2445     return MatchOperand_ParseFail;
2446   if (Result)
2447     return MatchOperand_NoMatch;
2448   return MatchOperand_Success;
2449 }
2450
2451 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2452                                             RegisterKind RegKind, unsigned Reg1,
2453                                             SMLoc Loc) {
2454   switch (RegKind) {
2455   case IS_SPECIAL:
2456     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2457       Reg = AMDGPU::EXEC;
2458       RegWidth = 64;
2459       return true;
2460     }
2461     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2462       Reg = AMDGPU::FLAT_SCR;
2463       RegWidth = 64;
2464       return true;
2465     }
2466     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2467       Reg = AMDGPU::XNACK_MASK;
2468       RegWidth = 64;
2469       return true;
2470     }
2471     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2472       Reg = AMDGPU::VCC;
2473       RegWidth = 64;
2474       return true;
2475     }
2476     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2477       Reg = AMDGPU::TBA;
2478       RegWidth = 64;
2479       return true;
2480     }
2481     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2482       Reg = AMDGPU::TMA;
2483       RegWidth = 64;
2484       return true;
2485     }
2486     Error(Loc, "register does not fit in the list");
2487     return false;
2488   case IS_VGPR:
2489   case IS_SGPR:
2490   case IS_AGPR:
2491   case IS_TTMP:
2492     if (Reg1 != Reg + RegWidth / 32) {
2493       Error(Loc, "registers in a list must have consecutive indices");
2494       return false;
2495     }
2496     RegWidth += 32;
2497     return true;
2498   default:
2499     llvm_unreachable("unexpected register kind");
2500   }
2501 }
2502
2503 struct RegInfo {
2504   StringLiteral Name;
2505   RegisterKind Kind;
2506 };
2507
2508 static constexpr RegInfo RegularRegisters[] = {
2509   {{"v"},    IS_VGPR},
2510   {{"s"},    IS_SGPR},
2511   {{"ttmp"}, IS_TTMP},
2512   {{"acc"},  IS_AGPR},
2513   {{"a"},    IS_AGPR},
2514 };
2515
2516 static bool isRegularReg(RegisterKind Kind) {
2517   return Kind == IS_VGPR ||
2518          Kind == IS_SGPR ||
2519          Kind == IS_TTMP ||
2520          Kind == IS_AGPR;
2521 }
2522
2523 static const RegInfo* getRegularRegInfo(StringRef Str) {
2524   for (const RegInfo &Reg : RegularRegisters)
2525     if (Str.startswith(Reg.Name))
2526       return &Reg;
2527   return nullptr;
2528 }
2529
2530 static bool getRegNum(StringRef Str, unsigned& Num) {
2531   return !Str.getAsInteger(10, Num);
2532 }
2533
2534 bool
2535 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2536                             const AsmToken &NextToken) const {
2537
2538   // A list of consecutive registers: [s0,s1,s2,s3]
2539   if (Token.is(AsmToken::LBrac))
2540     return true;
2541
2542   if (!Token.is(AsmToken::Identifier))
2543     return false;
2544
2545   // A single register like s0 or a range of registers like s[0:1]
2546
2547   StringRef Str = Token.getString();
2548   const RegInfo *Reg = getRegularRegInfo(Str);
2549   if (Reg) {
2550     StringRef RegName = Reg->Name;
2551     StringRef RegSuffix = Str.substr(RegName.size());
2552     if (!RegSuffix.empty()) {
2553       unsigned Num;
2554       // A single register with an index: rXX
2555       if (getRegNum(RegSuffix, Num))
2556         return true;
2557     } else {
2558       // A range of registers: r[XX:YY].
2559       if (NextToken.is(AsmToken::LBrac))
2560         return true;
2561     }
2562   }
2563
2564   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2565 }
2566
2567 bool
2568 AMDGPUAsmParser::isRegister()
2569 {
2570   return isRegister(getToken(), peekToken());
2571 }
2572
2573 unsigned
2574 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2575                                unsigned RegNum,
2576                                unsigned RegWidth,
2577                                SMLoc Loc) {
2578
2579   assert(isRegularReg(RegKind));
2580
2581   unsigned AlignSize = 1;
2582   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2583     // SGPR and TTMP registers must be aligned.
2584     // Max required alignment is 4 dwords.
2585     AlignSize = std::min(RegWidth / 32, 4u);
2586   }
2587
2588   if (RegNum % AlignSize != 0) {
2589     Error(Loc, "invalid register alignment");
2590     return AMDGPU::NoRegister;
2591   }
2592
2593   unsigned RegIdx = RegNum / AlignSize;
2594   int RCID = getRegClass(RegKind, RegWidth);
2595   if (RCID == -1) {
2596     Error(Loc, "invalid or unsupported register size");
2597     return AMDGPU::NoRegister;
2598   }
2599
2600   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2601   const MCRegisterClass RC = TRI->getRegClass(RCID);
2602   if (RegIdx >= RC.getNumRegs()) {
2603     Error(Loc, "register index is out of range");
2604     return AMDGPU::NoRegister;
2605   }
2606
2607   return RC.getRegister(RegIdx);
2608 }
2609
2610 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2611   int64_t RegLo, RegHi;
2612   if (!skipToken(AsmToken::LBrac, "missing register index"))
2613     return false;
2614
2615   SMLoc FirstIdxLoc = getLoc();
2616   SMLoc SecondIdxLoc;
2617
2618   if (!parseExpr(RegLo))
2619     return false;
2620
2621   if (trySkipToken(AsmToken::Colon)) {
2622     SecondIdxLoc = getLoc();
2623     if (!parseExpr(RegHi))
2624       return false;
2625   } else {
2626     RegHi = RegLo;
2627   }
2628
2629   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2630     return false;
2631
2632   if (!isUInt<32>(RegLo)) {
2633     Error(FirstIdxLoc, "invalid register index");
2634     return false;
2635   }
2636
2637   if (!isUInt<32>(RegHi)) {
2638     Error(SecondIdxLoc, "invalid register index");
2639     return false;
2640   }
2641
2642   if (RegLo > RegHi) {
2643     Error(FirstIdxLoc, "first register index should not exceed second index");
2644     return false;
2645   }
2646
2647   Num = static_cast<unsigned>(RegLo);
2648   RegWidth = 32 * ((RegHi - RegLo) + 1);
2649   return true;
2650 }
2651
2652 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2653                                           unsigned &RegNum, unsigned &RegWidth,
2654                                           SmallVectorImpl<AsmToken> &Tokens) {
2655   assert(isToken(AsmToken::Identifier));
2656   unsigned Reg = getSpecialRegForName(getTokenStr());
2657   if (Reg) {
2658     RegNum = 0;
2659     RegWidth = 32;
2660     RegKind = IS_SPECIAL;
2661     Tokens.push_back(getToken());
2662     lex(); // skip register name
2663   }
2664   return Reg;
2665 }
2666
2667 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2668                                           unsigned &RegNum, unsigned &RegWidth,
2669                                           SmallVectorImpl<AsmToken> &Tokens) {
2670   assert(isToken(AsmToken::Identifier));
2671   StringRef RegName = getTokenStr();
2672   auto Loc = getLoc();
2673
2674   const RegInfo *RI = getRegularRegInfo(RegName);
2675   if (!RI) {
2676     Error(Loc, "invalid register name");
2677     return AMDGPU::NoRegister;
2678   }
2679
2680   Tokens.push_back(getToken());
2681   lex(); // skip register name
2682
2683   RegKind = RI->Kind;
2684   StringRef RegSuffix = RegName.substr(RI->Name.size());
2685   if (!RegSuffix.empty()) {
2686     // Single 32-bit register: vXX.
2687     if (!getRegNum(RegSuffix, RegNum)) {
2688       Error(Loc, "invalid register index");
2689       return AMDGPU::NoRegister;
2690     }
2691     RegWidth = 32;
2692   } else {
2693     // Range of registers: v[XX:YY]. ":YY" is optional.
2694     if (!ParseRegRange(RegNum, RegWidth))
2695       return AMDGPU::NoRegister;
2696   }
2697
2698   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2699 }
2700
2701 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2702                                        unsigned &RegWidth,
2703                                        SmallVectorImpl<AsmToken> &Tokens) {
2704   unsigned Reg = AMDGPU::NoRegister;
2705   auto ListLoc = getLoc();
2706
2707   if (!skipToken(AsmToken::LBrac,
2708                  "expected a register or a list of registers")) {
2709     return AMDGPU::NoRegister;
2710   }
2711
2712   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2713
2714   auto Loc = getLoc();
2715   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2716     return AMDGPU::NoRegister;
2717   if (RegWidth != 32) {
2718     Error(Loc, "expected a single 32-bit register");
2719     return AMDGPU::NoRegister;
2720   }
2721
2722   for (; trySkipToken(AsmToken::Comma); ) {
2723     RegisterKind NextRegKind;
2724     unsigned NextReg, NextRegNum, NextRegWidth;
2725     Loc = getLoc();
2726
2727     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2728                              NextRegNum, NextRegWidth,
2729                              Tokens)) {
2730       return AMDGPU::NoRegister;
2731     }
2732     if (NextRegWidth != 32) {
2733       Error(Loc, "expected a single 32-bit register");
2734       return AMDGPU::NoRegister;
2735     }
2736     if (NextRegKind != RegKind) {
2737       Error(Loc, "registers in a list must be of the same kind");
2738       return AMDGPU::NoRegister;
2739     }
2740     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2741       return AMDGPU::NoRegister;
2742   }
2743
2744   if (!skipToken(AsmToken::RBrac,
2745                  "expected a comma or a closing square bracket")) {
2746     return AMDGPU::NoRegister;
2747   }
2748
2749   if (isRegularReg(RegKind))
2750     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2751
2752   return Reg;
2753 }
2754
2755 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2756                                           unsigned &RegNum, unsigned &RegWidth,
2757                                           SmallVectorImpl<AsmToken> &Tokens) {
2758   auto Loc = getLoc();
2759   Reg = AMDGPU::NoRegister;
2760
2761   if (isToken(AsmToken::Identifier)) {
2762     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2763     if (Reg == AMDGPU::NoRegister)
2764       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2765   } else {
2766     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2767   }
2768
2769   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2770   if (Reg == AMDGPU::NoRegister) {
2771     assert(Parser.hasPendingError());
2772     return false;
2773   }
2774
2775   if (!subtargetHasRegister(*TRI, Reg)) {
2776     if (Reg == AMDGPU::SGPR_NULL) {
2777       Error(Loc, "'null' operand is not supported on this GPU");
2778     } else {
2779       Error(Loc, "register not available on this GPU");
2780     }
2781     return false;
2782   }
2783
2784   return true;
2785 }
2786
2787 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2788                                           unsigned &RegNum, unsigned &RegWidth,
2789                                           bool RestoreOnFailure /*=false*/) {
2790   Reg = AMDGPU::NoRegister;
2791
2792   SmallVector<AsmToken, 1> Tokens;
2793   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2794     if (RestoreOnFailure) {
2795       while (!Tokens.empty()) {
2796         getLexer().UnLex(Tokens.pop_back_val());
2797       }
2798     }
2799     return true;
2800   }
2801   return false;
2802 }
2803
2804 std::optional<StringRef>
2805 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2806   switch (RegKind) {
2807   case IS_VGPR:
2808     return StringRef(".amdgcn.next_free_vgpr");
2809   case IS_SGPR:
2810     return StringRef(".amdgcn.next_free_sgpr");
2811   default:
2812     return std::nullopt;
2813   }
2814 }
2815
2816 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2817   auto SymbolName = getGprCountSymbolName(RegKind);
2818   assert(SymbolName && "initializing invalid register kind");
2819   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2820   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2821 }
2822
2823 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2824                                             unsigned DwordRegIndex,
2825                                             unsigned RegWidth) {
2826   // Symbols are only defined for GCN targets
2827   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2828     return true;
2829
2830   auto SymbolName = getGprCountSymbolName(RegKind);
2831   if (!SymbolName)
2832     return true;
2833   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2834
2835   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2836   int64_t OldCount;
2837
2838   if (!Sym->isVariable())
2839     return !Error(getLoc(),
2840                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2841   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2842     return !Error(
2843         getLoc(),
2844         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2845
2846   if (OldCount <= NewMax)
2847     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2848
2849   return true;
2850 }
2851
2852 std::unique_ptr<AMDGPUOperand>
2853 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2854   const auto &Tok = getToken();
2855   SMLoc StartLoc = Tok.getLoc();
2856   SMLoc EndLoc = Tok.getEndLoc();
2857   RegisterKind RegKind;
2858   unsigned Reg, RegNum, RegWidth;
2859
2860   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2861     return nullptr;
2862   }
2863   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2864     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2865       return nullptr;
2866   } else
2867     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2868   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2869 }
2870
2871 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2872                                       bool HasSP3AbsModifier) {
2873   // TODO: add syntactic sugar for 1/(2*PI)
2874
2875   if (isRegister())
2876     return ParseStatus::NoMatch;
2877   assert(!isModifier());
2878
2879   const auto& Tok = getToken();
2880   const auto& NextTok = peekToken();
2881   bool IsReal = Tok.is(AsmToken::Real);
2882   SMLoc S = getLoc();
2883   bool Negate = false;
2884
2885   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2886     lex();
2887     IsReal = true;
2888     Negate = true;
2889   }
2890
2891   if (IsReal) {
2892     // Floating-point expressions are not supported.
2893     // Can only allow floating-point literals with an
2894     // optional sign.
2895
2896     StringRef Num = getTokenStr();
2897     lex();
2898
2899     APFloat RealVal(APFloat::IEEEdouble());
2900     auto roundMode = APFloat::rmNearestTiesToEven;
2901     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2902       return ParseStatus::Failure;
2903     if (Negate)
2904       RealVal.changeSign();
2905
2906     Operands.push_back(
2907       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2908                                AMDGPUOperand::ImmTyNone, true));
2909
2910     return ParseStatus::Success;
2911
2912   } else {
2913     int64_t IntVal;
2914     const MCExpr *Expr;
2915     SMLoc S = getLoc();
2916
2917     if (HasSP3AbsModifier) {
2918       // This is a workaround for handling expressions
2919       // as arguments of SP3 'abs' modifier, for example:
2920       //     |1.0|
2921       //     |-1|
2922       //     |1+x|
2923       // This syntax is not compatible with syntax of standard
2924       // MC expressions (due to the trailing '|').
2925       SMLoc EndLoc;
2926       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2927         return ParseStatus::Failure;
2928     } else {
2929       if (Parser.parseExpression(Expr))
2930         return ParseStatus::Failure;
2931     }
2932
2933     if (Expr->evaluateAsAbsolute(IntVal)) {
2934       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2935     } else {
2936       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2937     }
2938
2939     return ParseStatus::Success;
2940   }
2941
2942   return ParseStatus::NoMatch;
2943 }
2944
2945 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2946   if (!isRegister())
2947     return ParseStatus::NoMatch;
2948
2949   if (auto R = parseRegister()) {
2950     assert(R->isReg());
2951     Operands.push_back(std::move(R));
2952     return ParseStatus::Success;
2953   }
2954   return ParseStatus::Failure;
2955 }
2956
2957 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
2958                                            bool HasSP3AbsMod) {
2959   ParseStatus Res = parseReg(Operands);
2960   if (!Res.isNoMatch())
2961     return Res;
2962   if (isModifier())
2963     return ParseStatus::NoMatch;
2964   return parseImm(Operands, HasSP3AbsMod);
2965 }
2966
2967 bool
2968 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2969   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2970     const auto &str = Token.getString();
2971     return str == "abs" || str == "neg" || str == "sext";
2972   }
2973   return false;
2974 }
2975
2976 bool
2977 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2978   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2979 }
2980
2981 bool
2982 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2983   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2984 }
2985
2986 bool
2987 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2988   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2989 }
2990
2991 // Check if this is an operand modifier or an opcode modifier
2992 // which may look like an expression but it is not. We should
2993 // avoid parsing these modifiers as expressions. Currently
2994 // recognized sequences are:
2995 //   |...|
2996 //   abs(...)
2997 //   neg(...)
2998 //   sext(...)
2999 //   -reg
3000 //   -|...|
3001 //   -abs(...)
3002 //   name:...
3003 //
3004 bool
3005 AMDGPUAsmParser::isModifier() {
3006
3007   AsmToken Tok = getToken();
3008   AsmToken NextToken[2];
3009   peekTokens(NextToken);
3010
3011   return isOperandModifier(Tok, NextToken[0]) ||
3012          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3013          isOpcodeModifierWithVal(Tok, NextToken[0]);
3014 }
3015
3016 // Check if the current token is an SP3 'neg' modifier.
3017 // Currently this modifier is allowed in the following context:
3018 //
3019 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3020 // 2. Before an 'abs' modifier: -abs(...)
3021 // 3. Before an SP3 'abs' modifier: -|...|
3022 //
3023 // In all other cases "-" is handled as a part
3024 // of an expression that follows the sign.
3025 //
3026 // Note: When "-" is followed by an integer literal,
3027 // this is interpreted as integer negation rather
3028 // than a floating-point NEG modifier applied to N.
3029 // Beside being contr-intuitive, such use of floating-point
3030 // NEG modifier would have resulted in different meaning
3031 // of integer literals used with VOP1/2/C and VOP3,
3032 // for example:
3033 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3034 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3035 // Negative fp literals with preceding "-" are
3036 // handled likewise for uniformity
3037 //
3038 bool
3039 AMDGPUAsmParser::parseSP3NegModifier() {
3040
3041   AsmToken NextToken[2];
3042   peekTokens(NextToken);
3043
3044   if (isToken(AsmToken::Minus) &&
3045       (isRegister(NextToken[0], NextToken[1]) ||
3046        NextToken[0].is(AsmToken::Pipe) ||
3047        isId(NextToken[0], "abs"))) {
3048     lex();
3049     return true;
3050   }
3051
3052   return false;
3053 }
3054
3055 ParseStatus
3056 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3057                                               bool AllowImm) {
3058   bool Neg, SP3Neg;
3059   bool Abs, SP3Abs;
3060   SMLoc Loc;
3061
3062   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3063   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3064     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3065
3066   SP3Neg = parseSP3NegModifier();
3067
3068   Loc = getLoc();
3069   Neg = trySkipId("neg");
3070   if (Neg && SP3Neg)
3071     return Error(Loc, "expected register or immediate");
3072   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3073     return ParseStatus::Failure;
3074
3075   Abs = trySkipId("abs");
3076   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3077     return ParseStatus::Failure;
3078
3079   Loc = getLoc();
3080   SP3Abs = trySkipToken(AsmToken::Pipe);
3081   if (Abs && SP3Abs)
3082     return Error(Loc, "expected register or immediate");
3083
3084   ParseStatus Res;
3085   if (AllowImm) {
3086     Res = parseRegOrImm(Operands, SP3Abs);
3087   } else {
3088     Res = parseReg(Operands);
3089   }
3090   if (!Res.isSuccess())
3091     return (SP3Neg || Neg || SP3Abs || Abs) ? ParseStatus::Failure : Res;
3092
3093   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3094     return ParseStatus::Failure;
3095   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3096     return ParseStatus::Failure;
3097   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3098     return ParseStatus::Failure;
3099
3100   AMDGPUOperand::Modifiers Mods;
3101   Mods.Abs = Abs || SP3Abs;
3102   Mods.Neg = Neg || SP3Neg;
3103
3104   if (Mods.hasFPModifiers()) {
3105     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3106     if (Op.isExpr())
3107       return Error(Op.getStartLoc(), "expected an absolute expression");
3108     Op.setModifiers(Mods);
3109   }
3110   return ParseStatus::Success;
3111 }
3112
3113 ParseStatus
3114 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3115                                                bool AllowImm) {
3116   bool Sext = trySkipId("sext");
3117   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3118     return ParseStatus::Failure;
3119
3120   ParseStatus Res;
3121   if (AllowImm) {
3122     Res = parseRegOrImm(Operands);
3123   } else {
3124     Res = parseReg(Operands);
3125   }
3126   if (!Res.isSuccess())
3127     return Sext ? ParseStatus::Failure : Res;
3128
3129   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3130     return ParseStatus::Failure;
3131
3132   AMDGPUOperand::Modifiers Mods;
3133   Mods.Sext = Sext;
3134
3135   if (Mods.hasIntModifiers()) {
3136     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3137     if (Op.isExpr())
3138       return Error(Op.getStartLoc(), "expected an absolute expression");
3139     Op.setModifiers(Mods);
3140   }
3141
3142   return ParseStatus::Success;
3143 }
3144
3145 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3146   return parseRegOrImmWithFPInputMods(Operands, false);
3147 }
3148
3149 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3150   return parseRegOrImmWithIntInputMods(Operands, false);
3151 }
3152
3153 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3154   auto Loc = getLoc();
3155   if (trySkipId("off")) {
3156     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3157                                                 AMDGPUOperand::ImmTyOff, false));
3158     return ParseStatus::Success;
3159   }
3160
3161   if (!isRegister())
3162     return ParseStatus::NoMatch;
3163
3164   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3165   if (Reg) {
3166     Operands.push_back(std::move(Reg));
3167     return ParseStatus::Success;
3168   }
3169
3170   return ParseStatus::Failure;
3171 }
3172
3173 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3174   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3175
3176   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3177       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3178       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3179       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3180     return Match_InvalidOperand;
3181
3182   if ((TSFlags & SIInstrFlags::VOP3) &&
3183       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3184       getForcedEncodingSize() != 64)
3185     return Match_PreferE32;
3186
3187   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3188       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3189     // v_mac_f32/16 allow only dst_sel == DWORD;
3190     auto OpNum =
3191         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3192     const auto &Op = Inst.getOperand(OpNum);
3193     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3194       return Match_InvalidOperand;
3195     }
3196   }
3197
3198   return Match_Success;
3199 }
3200
3201 static ArrayRef<unsigned> getAllVariants() {
3202   static const unsigned Variants[] = {
3203     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3204     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3205     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3206   };
3207
3208   return ArrayRef(Variants);
3209 }
3210
3211 // What asm variants we should check
3212 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3213   if (isForcedDPP() && isForcedVOP3()) {
3214     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3215     return ArrayRef(Variants);
3216   }
3217   if (getForcedEncodingSize() == 32) {
3218     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3219     return ArrayRef(Variants);
3220   }
3221
3222   if (isForcedVOP3()) {
3223     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3224     return ArrayRef(Variants);
3225   }
3226
3227   if (isForcedSDWA()) {
3228     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3229                                         AMDGPUAsmVariants::SDWA9};
3230     return ArrayRef(Variants);
3231   }
3232
3233   if (isForcedDPP()) {
3234     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3235     return ArrayRef(Variants);
3236   }
3237
3238   return getAllVariants();
3239 }
3240
3241 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3242   if (isForcedDPP() && isForcedVOP3())
3243     return "e64_dpp";
3244
3245   if (getForcedEncodingSize() == 32)
3246     return "e32";
3247
3248   if (isForcedVOP3())
3249     return "e64";
3250
3251   if (isForcedSDWA())
3252     return "sdwa";
3253
3254   if (isForcedDPP())
3255     return "dpp";
3256
3257   return "";
3258 }
3259
3260 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3261   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3262   for (MCPhysReg Reg : Desc.implicit_uses()) {
3263     switch (Reg) {
3264     case AMDGPU::FLAT_SCR:
3265     case AMDGPU::VCC:
3266     case AMDGPU::VCC_LO:
3267     case AMDGPU::VCC_HI:
3268     case AMDGPU::M0:
3269       return Reg;
3270     default:
3271       break;
3272     }
3273   }
3274   return AMDGPU::NoRegister;
3275 }
3276
3277 // NB: This code is correct only when used to check constant
3278 // bus limitations because GFX7 support no f16 inline constants.
3279 // Note that there are no cases when a GFX7 opcode violates
3280 // constant bus limitations due to the use of an f16 constant.
3281 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3282                                        unsigned OpIdx) const {
3283   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3284
3285   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3286       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3287     return false;
3288   }
3289
3290   const MCOperand &MO = Inst.getOperand(OpIdx);
3291
3292   int64_t Val = MO.getImm();
3293   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3294
3295   switch (OpSize) { // expected operand size
3296   case 8:
3297     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3298   case 4:
3299     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3300   case 2: {
3301     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3302     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3303         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3304         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3305       return AMDGPU::isInlinableIntLiteral(Val);
3306
3307     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3308         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3309         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3310       return AMDGPU::isInlinableIntLiteralV216(Val);
3311
3312     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3313         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3314         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3315       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3316
3317     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3318   }
3319   default:
3320     llvm_unreachable("invalid operand size");
3321   }
3322 }
3323
3324 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3325   if (!isGFX10Plus())
3326     return 1;
3327
3328   switch (Opcode) {
3329   // 64-bit shift instructions can use only one scalar value input
3330   case AMDGPU::V_LSHLREV_B64_e64:
3331   case AMDGPU::V_LSHLREV_B64_gfx10:
3332   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3333   case AMDGPU::V_LSHRREV_B64_e64:
3334   case AMDGPU::V_LSHRREV_B64_gfx10:
3335   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3336   case AMDGPU::V_ASHRREV_I64_e64:
3337   case AMDGPU::V_ASHRREV_I64_gfx10:
3338   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3339   case AMDGPU::V_LSHL_B64_e64:
3340   case AMDGPU::V_LSHR_B64_e64:
3341   case AMDGPU::V_ASHR_I64_e64:
3342     return 1;
3343   default:
3344     return 2;
3345   }
3346 }
3347
3348 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3349 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3350
3351 // Get regular operand indices in the same order as specified
3352 // in the instruction (but append mandatory literals to the end).
3353 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3354                                            bool AddMandatoryLiterals = false) {
3355
3356   int16_t ImmIdx =
3357       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3358
3359   if (isVOPD(Opcode)) {
3360     int16_t ImmDeferredIdx =
3361         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3362                              : -1;
3363
3364     return {getNamedOperandIdx(Opcode, OpName::src0X),
3365             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3366             getNamedOperandIdx(Opcode, OpName::src0Y),
3367             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3368             ImmDeferredIdx,
3369             ImmIdx};
3370   }
3371
3372   return {getNamedOperandIdx(Opcode, OpName::src0),
3373           getNamedOperandIdx(Opcode, OpName::src1),
3374           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3375 }
3376
3377 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3378   const MCOperand &MO = Inst.getOperand(OpIdx);
3379   if (MO.isImm()) {
3380     return !isInlineConstant(Inst, OpIdx);
3381   } else if (MO.isReg()) {
3382     auto Reg = MO.getReg();
3383     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3384     auto PReg = mc2PseudoReg(Reg);
3385     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3386   } else {
3387     return true;
3388   }
3389 }
3390
3391 bool AMDGPUAsmParser::validateConstantBusLimitations(
3392     const MCInst &Inst, const OperandVector &Operands) {
3393   const unsigned Opcode = Inst.getOpcode();
3394   const MCInstrDesc &Desc = MII.get(Opcode);
3395   unsigned LastSGPR = AMDGPU::NoRegister;
3396   unsigned ConstantBusUseCount = 0;
3397   unsigned NumLiterals = 0;
3398   unsigned LiteralSize;
3399
3400   if (!(Desc.TSFlags &
3401         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3402          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3403       !isVOPD(Opcode))
3404     return true;
3405
3406   // Check special imm operands (used by madmk, etc)
3407   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3408     ++NumLiterals;
3409     LiteralSize = 4;
3410   }
3411
3412   SmallDenseSet<unsigned> SGPRsUsed;
3413   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3414   if (SGPRUsed != AMDGPU::NoRegister) {
3415     SGPRsUsed.insert(SGPRUsed);
3416     ++ConstantBusUseCount;
3417   }
3418
3419   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3420
3421   for (int OpIdx : OpIndices) {
3422     if (OpIdx == -1)
3423       continue;
3424
3425     const MCOperand &MO = Inst.getOperand(OpIdx);
3426     if (usesConstantBus(Inst, OpIdx)) {
3427       if (MO.isReg()) {
3428         LastSGPR = mc2PseudoReg(MO.getReg());
3429         // Pairs of registers with a partial intersections like these
3430         //   s0, s[0:1]
3431         //   flat_scratch_lo, flat_scratch
3432         //   flat_scratch_lo, flat_scratch_hi
3433         // are theoretically valid but they are disabled anyway.
3434         // Note that this code mimics SIInstrInfo::verifyInstruction
3435         if (SGPRsUsed.insert(LastSGPR).second) {
3436           ++ConstantBusUseCount;
3437         }
3438       } else { // Expression or a literal
3439
3440         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3441           continue; // special operand like VINTERP attr_chan
3442
3443         // An instruction may use only one literal.
3444         // This has been validated on the previous step.
3445         // See validateVOPLiteral.
3446         // This literal may be used as more than one operand.
3447         // If all these operands are of the same size,
3448         // this literal counts as one scalar value.
3449         // Otherwise it counts as 2 scalar values.
3450         // See "GFX10 Shader Programming", section 3.6.2.3.
3451
3452         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3453         if (Size < 4)
3454           Size = 4;
3455
3456         if (NumLiterals == 0) {
3457           NumLiterals = 1;
3458           LiteralSize = Size;
3459         } else if (LiteralSize != Size) {
3460           NumLiterals = 2;
3461         }
3462       }
3463     }
3464   }
3465   ConstantBusUseCount += NumLiterals;
3466
3467   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3468     return true;
3469
3470   SMLoc LitLoc = getLitLoc(Operands);
3471   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3472   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3473   Error(Loc, "invalid operand (violates constant bus restrictions)");
3474   return false;
3475 }
3476
3477 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3478     const MCInst &Inst, const OperandVector &Operands) {
3479
3480   const unsigned Opcode = Inst.getOpcode();
3481   if (!isVOPD(Opcode))
3482     return true;
3483
3484   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3485
3486   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3487     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3488     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3489                ? Opr.getReg()
3490                : MCRegister::NoRegister;
3491   };
3492
3493   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3494   auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3495   if (!InvalidCompOprIdx)
3496     return true;
3497
3498   auto CompOprIdx = *InvalidCompOprIdx;
3499   auto ParsedIdx =
3500       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3501                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3502   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3503
3504   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3505   if (CompOprIdx == VOPD::Component::DST) {
3506     Error(Loc, "one dst register must be even and the other odd");
3507   } else {
3508     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3509     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3510                    " operands must use different VGPR banks");
3511   }
3512
3513   return false;
3514 }
3515
3516 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3517
3518   const unsigned Opc = Inst.getOpcode();
3519   const MCInstrDesc &Desc = MII.get(Opc);
3520
3521   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3522     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3523     assert(ClampIdx != -1);
3524     return Inst.getOperand(ClampIdx).getImm() == 0;
3525   }
3526
3527   return true;
3528 }
3529
3530 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3531                                            const SMLoc &IDLoc) {
3532
3533   const unsigned Opc = Inst.getOpcode();
3534   const MCInstrDesc &Desc = MII.get(Opc);
3535
3536   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3537     return true;
3538
3539   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3540   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3541   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3542
3543   assert(VDataIdx != -1);
3544
3545   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3546     return true;
3547
3548   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3549   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3550   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3551   if (DMask == 0)
3552     DMask = 1;
3553
3554   bool IsPackedD16 = false;
3555   unsigned DataSize =
3556       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3557   if (hasPackedD16()) {
3558     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3559     IsPackedD16 = D16Idx >= 0;
3560     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3561       DataSize = (DataSize + 1) / 2;
3562   }
3563
3564   if ((VDataSize / 4) == DataSize + TFESize)
3565     return true;
3566
3567   StringRef Modifiers;
3568   if (isGFX90A())
3569     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3570   else
3571     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3572
3573   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3574   return false;
3575 }
3576
3577 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3578                                            const SMLoc &IDLoc) {
3579   const unsigned Opc = Inst.getOpcode();
3580   const MCInstrDesc &Desc = MII.get(Opc);
3581
3582   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3583     return true;
3584
3585   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3586
3587   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3588       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3589   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3590   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3591   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3592   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3593
3594   assert(VAddr0Idx != -1);
3595   assert(SrsrcIdx != -1);
3596   assert(SrsrcIdx > VAddr0Idx);
3597
3598   bool IsA16 = Inst.getOperand(A16Idx).getImm();
3599   if (BaseOpcode->BVH) {
3600     if (IsA16 == BaseOpcode->A16)
3601       return true;
3602     Error(IDLoc, "image address size does not match a16");
3603     return false;
3604   }
3605
3606   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3607   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3608   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3609   unsigned ActualAddrSize =
3610       IsNSA ? SrsrcIdx - VAddr0Idx
3611             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3612
3613   unsigned ExpectedAddrSize =
3614       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3615
3616   if (IsNSA) {
3617     if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
3618       int VAddrLastIdx = SrsrcIdx - 1;
3619       unsigned VAddrLastSize =
3620           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3621
3622       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3623     }
3624   } else {
3625     if (ExpectedAddrSize > 12)
3626       ExpectedAddrSize = 16;
3627
3628     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3629     // This provides backward compatibility for assembly created
3630     // before 160b/192b/224b types were directly supported.
3631     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3632       return true;
3633   }
3634
3635   if (ActualAddrSize == ExpectedAddrSize)
3636     return true;
3637
3638   Error(IDLoc, "image address size does not match dim and a16");
3639   return false;
3640 }
3641
3642 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3643
3644   const unsigned Opc = Inst.getOpcode();
3645   const MCInstrDesc &Desc = MII.get(Opc);
3646
3647   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3648     return true;
3649   if (!Desc.mayLoad() || !Desc.mayStore())
3650     return true; // Not atomic
3651
3652   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3653   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3654
3655   // This is an incomplete check because image_atomic_cmpswap
3656   // may only use 0x3 and 0xf while other atomic operations
3657   // may use 0x1 and 0x3. However these limitations are
3658   // verified when we check that dmask matches dst size.
3659   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3660 }
3661
3662 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3663
3664   const unsigned Opc = Inst.getOpcode();
3665   const MCInstrDesc &Desc = MII.get(Opc);
3666
3667   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3668     return true;
3669
3670   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3671   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3672
3673   // GATHER4 instructions use dmask in a different fashion compared to
3674   // other MIMG instructions. The only useful DMASK values are
3675   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3676   // (red,red,red,red) etc.) The ISA document doesn't mention
3677   // this.
3678   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3679 }
3680
3681 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3682   const unsigned Opc = Inst.getOpcode();
3683   const MCInstrDesc &Desc = MII.get(Opc);
3684
3685   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3686     return true;
3687
3688   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3689   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3690       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3691
3692   if (!BaseOpcode->MSAA)
3693     return true;
3694
3695   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3696   assert(DimIdx != -1);
3697
3698   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3699   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3700
3701   return DimInfo->MSAA;
3702 }
3703
3704 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3705 {
3706   switch (Opcode) {
3707   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3708   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3709   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3710     return true;
3711   default:
3712     return false;
3713   }
3714 }
3715
3716 // movrels* opcodes should only allow VGPRS as src0.
3717 // This is specified in .td description for vop1/vop3,
3718 // but sdwa is handled differently. See isSDWAOperand.
3719 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3720                                       const OperandVector &Operands) {
3721
3722   const unsigned Opc = Inst.getOpcode();
3723   const MCInstrDesc &Desc = MII.get(Opc);
3724
3725   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3726     return true;
3727
3728   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3729   assert(Src0Idx != -1);
3730
3731   SMLoc ErrLoc;
3732   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3733   if (Src0.isReg()) {
3734     auto Reg = mc2PseudoReg(Src0.getReg());
3735     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3736     if (!isSGPR(Reg, TRI))
3737       return true;
3738     ErrLoc = getRegLoc(Reg, Operands);
3739   } else {
3740     ErrLoc = getConstLoc(Operands);
3741   }
3742
3743   Error(ErrLoc, "source operand must be a VGPR");
3744   return false;
3745 }
3746
3747 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3748                                           const OperandVector &Operands) {
3749
3750   const unsigned Opc = Inst.getOpcode();
3751
3752   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3753     return true;
3754
3755   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3756   assert(Src0Idx != -1);
3757
3758   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3759   if (!Src0.isReg())
3760     return true;
3761
3762   auto Reg = mc2PseudoReg(Src0.getReg());
3763   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3764   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3765     Error(getRegLoc(Reg, Operands),
3766           "source operand must be either a VGPR or an inline constant");
3767     return false;
3768   }
3769
3770   return true;
3771 }
3772
3773 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3774                                       const OperandVector &Operands) {
3775   unsigned Opcode = Inst.getOpcode();
3776   const MCInstrDesc &Desc = MII.get(Opcode);
3777
3778   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3779       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3780     return true;
3781
3782   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3783   if (Src2Idx == -1)
3784     return true;
3785
3786   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3787     Error(getConstLoc(Operands),
3788           "inline constants are not allowed for this operand");
3789     return false;
3790   }
3791
3792   return true;
3793 }
3794
3795 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3796                                    const OperandVector &Operands) {
3797   const unsigned Opc = Inst.getOpcode();
3798   const MCInstrDesc &Desc = MII.get(Opc);
3799
3800   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3801     return true;
3802
3803   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3804   if (Src2Idx == -1)
3805     return true;
3806
3807   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3808   if (!Src2.isReg())
3809     return true;
3810
3811   MCRegister Src2Reg = Src2.getReg();
3812   MCRegister DstReg = Inst.getOperand(0).getReg();
3813   if (Src2Reg == DstReg)
3814     return true;
3815
3816   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3817   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3818     return true;
3819
3820   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3821     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3822           "source 2 operand must not partially overlap with dst");
3823     return false;
3824   }
3825
3826   return true;
3827 }
3828
3829 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3830   switch (Inst.getOpcode()) {
3831   default:
3832     return true;
3833   case V_DIV_SCALE_F32_gfx6_gfx7:
3834   case V_DIV_SCALE_F32_vi:
3835   case V_DIV_SCALE_F32_gfx10:
3836   case V_DIV_SCALE_F64_gfx6_gfx7:
3837   case V_DIV_SCALE_F64_vi:
3838   case V_DIV_SCALE_F64_gfx10:
3839     break;
3840   }
3841
3842   // TODO: Check that src0 = src1 or src2.
3843
3844   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3845                     AMDGPU::OpName::src2_modifiers,
3846                     AMDGPU::OpName::src2_modifiers}) {
3847     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3848             .getImm() &
3849         SISrcMods::ABS) {
3850       return false;
3851     }
3852   }
3853
3854   return true;
3855 }
3856
3857 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3858
3859   const unsigned Opc = Inst.getOpcode();
3860   const MCInstrDesc &Desc = MII.get(Opc);
3861
3862   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3863     return true;
3864
3865   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3866   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3867     if (isCI() || isSI())
3868       return false;
3869   }
3870
3871   return true;
3872 }
3873
3874 static bool IsRevOpcode(const unsigned Opcode)
3875 {
3876   switch (Opcode) {
3877   case AMDGPU::V_SUBREV_F32_e32:
3878   case AMDGPU::V_SUBREV_F32_e64:
3879   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3880   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3881   case AMDGPU::V_SUBREV_F32_e32_vi:
3882   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3883   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3884   case AMDGPU::V_SUBREV_F32_e64_vi:
3885
3886   case AMDGPU::V_SUBREV_CO_U32_e32:
3887   case AMDGPU::V_SUBREV_CO_U32_e64:
3888   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3889   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3890
3891   case AMDGPU::V_SUBBREV_U32_e32:
3892   case AMDGPU::V_SUBBREV_U32_e64:
3893   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3894   case AMDGPU::V_SUBBREV_U32_e32_vi:
3895   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3896   case AMDGPU::V_SUBBREV_U32_e64_vi:
3897
3898   case AMDGPU::V_SUBREV_U32_e32:
3899   case AMDGPU::V_SUBREV_U32_e64:
3900   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3901   case AMDGPU::V_SUBREV_U32_e32_vi:
3902   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3903   case AMDGPU::V_SUBREV_U32_e64_vi:
3904
3905   case AMDGPU::V_SUBREV_F16_e32:
3906   case AMDGPU::V_SUBREV_F16_e64:
3907   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3908   case AMDGPU::V_SUBREV_F16_e32_vi:
3909   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3910   case AMDGPU::V_SUBREV_F16_e64_vi:
3911
3912   case AMDGPU::V_SUBREV_U16_e32:
3913   case AMDGPU::V_SUBREV_U16_e64:
3914   case AMDGPU::V_SUBREV_U16_e32_vi:
3915   case AMDGPU::V_SUBREV_U16_e64_vi:
3916
3917   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3918   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3919   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3920
3921   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3922   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3923
3924   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3925   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3926
3927   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3928   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3929
3930   case AMDGPU::V_LSHRREV_B32_e32:
3931   case AMDGPU::V_LSHRREV_B32_e64:
3932   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3933   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3934   case AMDGPU::V_LSHRREV_B32_e32_vi:
3935   case AMDGPU::V_LSHRREV_B32_e64_vi:
3936   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3937   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3938
3939   case AMDGPU::V_ASHRREV_I32_e32:
3940   case AMDGPU::V_ASHRREV_I32_e64:
3941   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3942   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3943   case AMDGPU::V_ASHRREV_I32_e32_vi:
3944   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3945   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3946   case AMDGPU::V_ASHRREV_I32_e64_vi:
3947
3948   case AMDGPU::V_LSHLREV_B32_e32:
3949   case AMDGPU::V_LSHLREV_B32_e64:
3950   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3951   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3952   case AMDGPU::V_LSHLREV_B32_e32_vi:
3953   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3954   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3955   case AMDGPU::V_LSHLREV_B32_e64_vi:
3956
3957   case AMDGPU::V_LSHLREV_B16_e32:
3958   case AMDGPU::V_LSHLREV_B16_e64:
3959   case AMDGPU::V_LSHLREV_B16_e32_vi:
3960   case AMDGPU::V_LSHLREV_B16_e64_vi:
3961   case AMDGPU::V_LSHLREV_B16_gfx10:
3962
3963   case AMDGPU::V_LSHRREV_B16_e32:
3964   case AMDGPU::V_LSHRREV_B16_e64:
3965   case AMDGPU::V_LSHRREV_B16_e32_vi:
3966   case AMDGPU::V_LSHRREV_B16_e64_vi:
3967   case AMDGPU::V_LSHRREV_B16_gfx10:
3968
3969   case AMDGPU::V_ASHRREV_I16_e32:
3970   case AMDGPU::V_ASHRREV_I16_e64:
3971   case AMDGPU::V_ASHRREV_I16_e32_vi:
3972   case AMDGPU::V_ASHRREV_I16_e64_vi:
3973   case AMDGPU::V_ASHRREV_I16_gfx10:
3974
3975   case AMDGPU::V_LSHLREV_B64_e64:
3976   case AMDGPU::V_LSHLREV_B64_gfx10:
3977   case AMDGPU::V_LSHLREV_B64_vi:
3978
3979   case AMDGPU::V_LSHRREV_B64_e64:
3980   case AMDGPU::V_LSHRREV_B64_gfx10:
3981   case AMDGPU::V_LSHRREV_B64_vi:
3982
3983   case AMDGPU::V_ASHRREV_I64_e64:
3984   case AMDGPU::V_ASHRREV_I64_gfx10:
3985   case AMDGPU::V_ASHRREV_I64_vi:
3986
3987   case AMDGPU::V_PK_LSHLREV_B16:
3988   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3989   case AMDGPU::V_PK_LSHLREV_B16_vi:
3990
3991   case AMDGPU::V_PK_LSHRREV_B16:
3992   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3993   case AMDGPU::V_PK_LSHRREV_B16_vi:
3994   case AMDGPU::V_PK_ASHRREV_I16:
3995   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3996   case AMDGPU::V_PK_ASHRREV_I16_vi:
3997     return true;
3998   default:
3999     return false;
4000   }
4001 }
4002
4003 std::optional<StringRef>
4004 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4005
4006   using namespace SIInstrFlags;
4007   const unsigned Opcode = Inst.getOpcode();
4008   const MCInstrDesc &Desc = MII.get(Opcode);
4009
4010   // lds_direct register is defined so that it can be used
4011   // with 9-bit operands only. Ignore encodings which do not accept these.
4012   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4013   if ((Desc.TSFlags & Enc) == 0)
4014     return std::nullopt;
4015
4016   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4017     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4018     if (SrcIdx == -1)
4019       break;
4020     const auto &Src = Inst.getOperand(SrcIdx);
4021     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4022
4023       if (isGFX90A() || isGFX11Plus())
4024         return StringRef("lds_direct is not supported on this GPU");
4025
4026       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4027         return StringRef("lds_direct cannot be used with this instruction");
4028
4029       if (SrcName != OpName::src0)
4030         return StringRef("lds_direct may be used as src0 only");
4031     }
4032   }
4033
4034   return std::nullopt;
4035 }
4036
4037 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4038   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4039     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4040     if (Op.isFlatOffset())
4041       return Op.getStartLoc();
4042   }
4043   return getLoc();
4044 }
4045
4046 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4047                                          const OperandVector &Operands) {
4048   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4049   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4050     return true;
4051
4052   auto Opcode = Inst.getOpcode();
4053   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4054   assert(OpNum != -1);
4055
4056   const auto &Op = Inst.getOperand(OpNum);
4057   if (!hasFlatOffsets() && Op.getImm() != 0) {
4058     Error(getFlatOffsetLoc(Operands),
4059           "flat offset modifier is not supported on this GPU");
4060     return false;
4061   }
4062
4063   // For FLAT segment the offset must be positive;
4064   // MSB is ignored and forced to zero.
4065   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4066   bool AllowNegative =
4067       TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
4068   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4069     Error(getFlatOffsetLoc(Operands),
4070           Twine("expected a ") +
4071               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4072                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4073     return false;
4074   }
4075
4076   return true;
4077 }
4078
4079 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4080   // Start with second operand because SMEM Offset cannot be dst or src0.
4081   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4082     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4083     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4084       return Op.getStartLoc();
4085   }
4086   return getLoc();
4087 }
4088
4089 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4090                                          const OperandVector &Operands) {
4091   if (isCI() || isSI())
4092     return true;
4093
4094   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4095   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4096     return true;
4097
4098   auto Opcode = Inst.getOpcode();
4099   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4100   if (OpNum == -1)
4101     return true;
4102
4103   const auto &Op = Inst.getOperand(OpNum);
4104   if (!Op.isImm())
4105     return true;
4106
4107   uint64_t Offset = Op.getImm();
4108   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4109   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4110       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4111     return true;
4112
4113   Error(getSMEMOffsetLoc(Operands),
4114         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4115                                "expected a 21-bit signed offset");
4116
4117   return false;
4118 }
4119
4120 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4121   unsigned Opcode = Inst.getOpcode();
4122   const MCInstrDesc &Desc = MII.get(Opcode);
4123   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4124     return true;
4125
4126   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4127   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4128
4129   const int OpIndices[] = { Src0Idx, Src1Idx };
4130
4131   unsigned NumExprs = 0;
4132   unsigned NumLiterals = 0;
4133   uint32_t LiteralValue;
4134
4135   for (int OpIdx : OpIndices) {
4136     if (OpIdx == -1) break;
4137
4138     const MCOperand &MO = Inst.getOperand(OpIdx);
4139     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4140     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4141       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4142         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4143         if (NumLiterals == 0 || LiteralValue != Value) {
4144           LiteralValue = Value;
4145           ++NumLiterals;
4146         }
4147       } else if (MO.isExpr()) {
4148         ++NumExprs;
4149       }
4150     }
4151   }
4152
4153   return NumLiterals + NumExprs <= 1;
4154 }
4155
4156 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4157   const unsigned Opc = Inst.getOpcode();
4158   if (isPermlane16(Opc)) {
4159     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4160     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4161
4162     if (OpSel & ~3)
4163       return false;
4164   }
4165
4166   uint64_t TSFlags = MII.get(Opc).TSFlags;
4167
4168   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4169     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4170     if (OpSelIdx != -1) {
4171       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4172         return false;
4173     }
4174     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4175     if (OpSelHiIdx != -1) {
4176       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4177         return false;
4178     }
4179   }
4180
4181   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4182   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4183       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4184     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4185     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4186     if (OpSel & 3)
4187       return false;
4188   }
4189
4190   return true;
4191 }
4192
4193 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4194                                   const OperandVector &Operands) {
4195   const unsigned Opc = Inst.getOpcode();
4196   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4197   if (DppCtrlIdx < 0)
4198     return true;
4199   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4200
4201   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4202     // DPP64 is supported for row_newbcast only.
4203     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4204     if (Src0Idx >= 0 &&
4205         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4206       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4207       Error(S, "64 bit dpp only supports row_newbcast");
4208       return false;
4209     }
4210   }
4211
4212   return true;
4213 }
4214
4215 // Check if VCC register matches wavefront size
4216 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4217   auto FB = getFeatureBits();
4218   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4219     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4220 }
4221
4222 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4223 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4224                                          const OperandVector &Operands) {
4225   unsigned Opcode = Inst.getOpcode();
4226   const MCInstrDesc &Desc = MII.get(Opcode);
4227   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4228   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4229       !HasMandatoryLiteral && !isVOPD(Opcode))
4230     return true;
4231
4232   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4233
4234   unsigned NumExprs = 0;
4235   unsigned NumLiterals = 0;
4236   uint32_t LiteralValue;
4237
4238   for (int OpIdx : OpIndices) {
4239     if (OpIdx == -1)
4240       continue;
4241
4242     const MCOperand &MO = Inst.getOperand(OpIdx);
4243     if (!MO.isImm() && !MO.isExpr())
4244       continue;
4245     if (!isSISrcOperand(Desc, OpIdx))
4246       continue;
4247
4248     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4249       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4250       if (NumLiterals == 0 || LiteralValue != Value) {
4251         LiteralValue = Value;
4252         ++NumLiterals;
4253       }
4254     } else if (MO.isExpr()) {
4255       ++NumExprs;
4256     }
4257   }
4258   NumLiterals += NumExprs;
4259
4260   if (!NumLiterals)
4261     return true;
4262
4263   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4264     Error(getLitLoc(Operands), "literal operands are not supported");
4265     return false;
4266   }
4267
4268   if (NumLiterals > 1) {
4269     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4270     return false;
4271   }
4272
4273   return true;
4274 }
4275
4276 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4277 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4278                          const MCRegisterInfo *MRI) {
4279   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4280   if (OpIdx < 0)
4281     return -1;
4282
4283   const MCOperand &Op = Inst.getOperand(OpIdx);
4284   if (!Op.isReg())
4285     return -1;
4286
4287   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4288   auto Reg = Sub ? Sub : Op.getReg();
4289   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4290   return AGPR32.contains(Reg) ? 1 : 0;
4291 }
4292
4293 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4294   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4295   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4296                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4297                   SIInstrFlags::DS)) == 0)
4298     return true;
4299
4300   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4301                                                       : AMDGPU::OpName::vdata;
4302
4303   const MCRegisterInfo *MRI = getMRI();
4304   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4305   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4306
4307   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4308     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4309     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4310       return false;
4311   }
4312
4313   auto FB = getFeatureBits();
4314   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4315     if (DataAreg < 0 || DstAreg < 0)
4316       return true;
4317     return DstAreg == DataAreg;
4318   }
4319
4320   return DstAreg < 1 && DataAreg < 1;
4321 }
4322
4323 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4324   auto FB = getFeatureBits();
4325   if (!FB[AMDGPU::FeatureGFX90AInsts])
4326     return true;
4327
4328   const MCRegisterInfo *MRI = getMRI();
4329   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4330   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4331   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4332     const MCOperand &Op = Inst.getOperand(I);
4333     if (!Op.isReg())
4334       continue;
4335
4336     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4337     if (!Sub)
4338       continue;
4339
4340     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4341       return false;
4342     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4343       return false;
4344   }
4345
4346   return true;
4347 }
4348
4349 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4350   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4351     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4352     if (Op.isBLGP())
4353       return Op.getStartLoc();
4354   }
4355   return SMLoc();
4356 }
4357
4358 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4359                                    const OperandVector &Operands) {
4360   unsigned Opc = Inst.getOpcode();
4361   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4362   if (BlgpIdx == -1)
4363     return true;
4364   SMLoc BLGPLoc = getBLGPLoc(Operands);
4365   if (!BLGPLoc.isValid())
4366     return true;
4367   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4368   auto FB = getFeatureBits();
4369   bool UsesNeg = false;
4370   if (FB[AMDGPU::FeatureGFX940Insts]) {
4371     switch (Opc) {
4372     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4373     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4374     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4375     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4376       UsesNeg = true;
4377     }
4378   }
4379
4380   if (IsNeg == UsesNeg)
4381     return true;
4382
4383   Error(BLGPLoc,
4384         UsesNeg ? "invalid modifier: blgp is not supported"
4385                 : "invalid modifier: neg is not supported");
4386
4387   return false;
4388 }
4389
4390 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4391                                       const OperandVector &Operands) {
4392   if (!isGFX11Plus())
4393     return true;
4394
4395   unsigned Opc = Inst.getOpcode();
4396   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4397       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4398       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4399       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4400     return true;
4401
4402   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4403   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4404   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4405   if (Reg == AMDGPU::SGPR_NULL)
4406     return true;
4407
4408   SMLoc RegLoc = getRegLoc(Reg, Operands);
4409   Error(RegLoc, "src0 must be null");
4410   return false;
4411 }
4412
4413 // gfx90a has an undocumented limitation:
4414 // DS_GWS opcodes must use even aligned registers.
4415 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4416                                   const OperandVector &Operands) {
4417   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4418     return true;
4419
4420   int Opc = Inst.getOpcode();
4421   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4422       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4423     return true;
4424
4425   const MCRegisterInfo *MRI = getMRI();
4426   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4427   int Data0Pos =
4428       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4429   assert(Data0Pos != -1);
4430   auto Reg = Inst.getOperand(Data0Pos).getReg();
4431   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4432   if (RegIdx & 1) {
4433     SMLoc RegLoc = getRegLoc(Reg, Operands);
4434     Error(RegLoc, "vgpr must be even aligned");
4435     return false;
4436   }
4437
4438   return true;
4439 }
4440
4441 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4442                                             const OperandVector &Operands,
4443                                             const SMLoc &IDLoc) {
4444   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4445                                            AMDGPU::OpName::cpol);
4446   if (CPolPos == -1)
4447     return true;
4448
4449   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4450
4451   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4452   if (TSFlags & SIInstrFlags::SMRD) {
4453     if (CPol && (isSI() || isCI())) {
4454       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4455       Error(S, "cache policy is not supported for SMRD instructions");
4456       return false;
4457     }
4458     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4459       Error(IDLoc, "invalid cache policy for SMEM instruction");
4460       return false;
4461     }
4462   }
4463
4464   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4465     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4466     StringRef CStr(S.getPointer());
4467     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4468     Error(S, "scc is not supported on this GPU");
4469     return false;
4470   }
4471
4472   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4473     return true;
4474
4475   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4476     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4477       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4478                               : "instruction must use glc");
4479       return false;
4480     }
4481   } else {
4482     if (CPol & CPol::GLC) {
4483       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4484       StringRef CStr(S.getPointer());
4485       S = SMLoc::getFromPointer(
4486           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4487       Error(S, isGFX940() ? "instruction must not use sc0"
4488                           : "instruction must not use glc");
4489       return false;
4490     }
4491   }
4492
4493   return true;
4494 }
4495
4496 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4497   if (!isGFX11Plus())
4498     return true;
4499   for (auto &Operand : Operands) {
4500     if (!Operand->isReg())
4501       continue;
4502     unsigned Reg = Operand->getReg();
4503     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4504       Error(getRegLoc(Reg, Operands),
4505             "execz and vccz are not supported on this GPU");
4506       return false;
4507     }
4508   }
4509   return true;
4510 }
4511
4512 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4513                                   const OperandVector &Operands) {
4514   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4515   if (Desc.mayStore() &&
4516       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4517     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4518     if (Loc != getInstLoc(Operands)) {
4519       Error(Loc, "TFE modifier has no meaning for store instructions");
4520       return false;
4521     }
4522   }
4523
4524   return true;
4525 }
4526
4527 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4528                                           const SMLoc &IDLoc,
4529                                           const OperandVector &Operands) {
4530   if (auto ErrMsg = validateLdsDirect(Inst)) {
4531     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4532     return false;
4533   }
4534   if (!validateSOPLiteral(Inst)) {
4535     Error(getLitLoc(Operands),
4536       "only one unique literal operand is allowed");
4537     return false;
4538   }
4539   if (!validateVOPLiteral(Inst, Operands)) {
4540     return false;
4541   }
4542   if (!validateConstantBusLimitations(Inst, Operands)) {
4543     return false;
4544   }
4545   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4546     return false;
4547   }
4548   if (!validateIntClampSupported(Inst)) {
4549     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4550       "integer clamping is not supported on this GPU");
4551     return false;
4552   }
4553   if (!validateOpSel(Inst)) {
4554     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4555       "invalid op_sel operand");
4556     return false;
4557   }
4558   if (!validateDPP(Inst, Operands)) {
4559     return false;
4560   }
4561   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4562   if (!validateMIMGD16(Inst)) {
4563     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4564       "d16 modifier is not supported on this GPU");
4565     return false;
4566   }
4567   if (!validateMIMGMSAA(Inst)) {
4568     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4569           "invalid dim; must be MSAA type");
4570     return false;
4571   }
4572   if (!validateMIMGDataSize(Inst, IDLoc)) {
4573     return false;
4574   }
4575   if (!validateMIMGAddrSize(Inst, IDLoc))
4576     return false;
4577   if (!validateMIMGAtomicDMask(Inst)) {
4578     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4579       "invalid atomic image dmask");
4580     return false;
4581   }
4582   if (!validateMIMGGatherDMask(Inst)) {
4583     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4584       "invalid image_gather dmask: only one bit must be set");
4585     return false;
4586   }
4587   if (!validateMovrels(Inst, Operands)) {
4588     return false;
4589   }
4590   if (!validateFlatOffset(Inst, Operands)) {
4591     return false;
4592   }
4593   if (!validateSMEMOffset(Inst, Operands)) {
4594     return false;
4595   }
4596   if (!validateMAIAccWrite(Inst, Operands)) {
4597     return false;
4598   }
4599   if (!validateMAISrc2(Inst, Operands)) {
4600     return false;
4601   }
4602   if (!validateMFMA(Inst, Operands)) {
4603     return false;
4604   }
4605   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4606     return false;
4607   }
4608
4609   if (!validateAGPRLdSt(Inst)) {
4610     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4611     ? "invalid register class: data and dst should be all VGPR or AGPR"
4612     : "invalid register class: agpr loads and stores not supported on this GPU"
4613     );
4614     return false;
4615   }
4616   if (!validateVGPRAlign(Inst)) {
4617     Error(IDLoc,
4618       "invalid register class: vgpr tuples must be 64 bit aligned");
4619     return false;
4620   }
4621   if (!validateGWS(Inst, Operands)) {
4622     return false;
4623   }
4624
4625   if (!validateBLGP(Inst, Operands)) {
4626     return false;
4627   }
4628
4629   if (!validateDivScale(Inst)) {
4630     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4631     return false;
4632   }
4633   if (!validateWaitCnt(Inst, Operands)) {
4634     return false;
4635   }
4636   if (!validateExeczVcczOperands(Operands)) {
4637     return false;
4638   }
4639   if (!validateTFE(Inst, Operands)) {
4640     return false;
4641   }
4642
4643   return true;
4644 }
4645
4646 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4647                                             const FeatureBitset &FBS,
4648                                             unsigned VariantID = 0);
4649
4650 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4651                                 const FeatureBitset &AvailableFeatures,
4652                                 unsigned VariantID);
4653
4654 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4655                                        const FeatureBitset &FBS) {
4656   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4657 }
4658
4659 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4660                                        const FeatureBitset &FBS,
4661                                        ArrayRef<unsigned> Variants) {
4662   for (auto Variant : Variants) {
4663     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4664       return true;
4665   }
4666
4667   return false;
4668 }
4669
4670 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4671                                                   const SMLoc &IDLoc) {
4672   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4673
4674   // Check if requested instruction variant is supported.
4675   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4676     return false;
4677
4678   // This instruction is not supported.
4679   // Clear any other pending errors because they are no longer relevant.
4680   getParser().clearPendingErrors();
4681
4682   // Requested instruction variant is not supported.
4683   // Check if any other variants are supported.
4684   StringRef VariantName = getMatchedVariantName();
4685   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4686     return Error(IDLoc,
4687                  Twine(VariantName,
4688                        " variant of this instruction is not supported"));
4689   }
4690
4691   // Check if this instruction may be used with a different wavesize.
4692   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4693       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4694
4695     FeatureBitset FeaturesWS32 = getFeatureBits();
4696     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4697         .flip(AMDGPU::FeatureWavefrontSize32);
4698     FeatureBitset AvailableFeaturesWS32 =
4699         ComputeAvailableFeatures(FeaturesWS32);
4700
4701     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4702       return Error(IDLoc, "instruction requires wavesize=32");
4703   }
4704
4705   // Finally check if this instruction is supported on any other GPU.
4706   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4707     return Error(IDLoc, "instruction not supported on this GPU");
4708   }
4709
4710   // Instruction not supported on any GPU. Probably a typo.
4711   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4712   return Error(IDLoc, "invalid instruction" + Suggestion);
4713 }
4714
4715 static bool isInvalidVOPDY(const OperandVector &Operands,
4716                            uint64_t InvalidOprIdx) {
4717   assert(InvalidOprIdx < Operands.size());
4718   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4719   if (Op.isToken() && InvalidOprIdx > 1) {
4720     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4721     return PrevOp.isToken() && PrevOp.getToken() == "::";
4722   }
4723   return false;
4724 }
4725
4726 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4727                                               OperandVector &Operands,
4728                                               MCStreamer &Out,
4729                                               uint64_t &ErrorInfo,
4730                                               bool MatchingInlineAsm) {
4731   MCInst Inst;
4732   unsigned Result = Match_Success;
4733   for (auto Variant : getMatchedVariants()) {
4734     uint64_t EI;
4735     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4736                                   Variant);
4737     // We order match statuses from least to most specific. We use most specific
4738     // status as resulting
4739     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4740     if ((R == Match_Success) ||
4741         (R == Match_PreferE32) ||
4742         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4743         (R == Match_InvalidOperand && Result != Match_MissingFeature
4744                                    && Result != Match_PreferE32) ||
4745         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4746                                    && Result != Match_MissingFeature
4747                                    && Result != Match_PreferE32)) {
4748       Result = R;
4749       ErrorInfo = EI;
4750     }
4751     if (R == Match_Success)
4752       break;
4753   }
4754
4755   if (Result == Match_Success) {
4756     if (!validateInstruction(Inst, IDLoc, Operands)) {
4757       return true;
4758     }
4759     Inst.setLoc(IDLoc);
4760     Out.emitInstruction(Inst, getSTI());
4761     return false;
4762   }
4763
4764   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4765   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4766     return true;
4767   }
4768
4769   switch (Result) {
4770   default: break;
4771   case Match_MissingFeature:
4772     // It has been verified that the specified instruction
4773     // mnemonic is valid. A match was found but it requires
4774     // features which are not supported on this GPU.
4775     return Error(IDLoc, "operands are not valid for this GPU or mode");
4776
4777   case Match_InvalidOperand: {
4778     SMLoc ErrorLoc = IDLoc;
4779     if (ErrorInfo != ~0ULL) {
4780       if (ErrorInfo >= Operands.size()) {
4781         return Error(IDLoc, "too few operands for instruction");
4782       }
4783       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4784       if (ErrorLoc == SMLoc())
4785         ErrorLoc = IDLoc;
4786
4787       if (isInvalidVOPDY(Operands, ErrorInfo))
4788         return Error(ErrorLoc, "invalid VOPDY instruction");
4789     }
4790     return Error(ErrorLoc, "invalid operand for instruction");
4791   }
4792
4793   case Match_PreferE32:
4794     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4795                         "should be encoded as e32");
4796   case Match_MnemonicFail:
4797     llvm_unreachable("Invalid instructions should have been handled already");
4798   }
4799   llvm_unreachable("Implement any new match types added!");
4800 }
4801
4802 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4803   int64_t Tmp = -1;
4804   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4805     return true;
4806   }
4807   if (getParser().parseAbsoluteExpression(Tmp)) {
4808     return true;
4809   }
4810   Ret = static_cast<uint32_t>(Tmp);
4811   return false;
4812 }
4813
4814 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4815                                                uint32_t &Minor) {
4816   if (ParseAsAbsoluteExpression(Major))
4817     return TokError("invalid major version");
4818
4819   if (!trySkipToken(AsmToken::Comma))
4820     return TokError("minor version number required, comma expected");
4821
4822   if (ParseAsAbsoluteExpression(Minor))
4823     return TokError("invalid minor version");
4824
4825   return false;
4826 }
4827
4828 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4829   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4830     return TokError("directive only supported for amdgcn architecture");
4831
4832   std::string TargetIDDirective;
4833   SMLoc TargetStart = getTok().getLoc();
4834   if (getParser().parseEscapedString(TargetIDDirective))
4835     return true;
4836
4837   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4838   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4839     return getParser().Error(TargetRange.Start,
4840         (Twine(".amdgcn_target directive's target id ") +
4841          Twine(TargetIDDirective) +
4842          Twine(" does not match the specified target id ") +
4843          Twine(getTargetStreamer().getTargetID()->toString())).str());
4844
4845   return false;
4846 }
4847
4848 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4849   return Error(Range.Start, "value out of range", Range);
4850 }
4851
4852 bool AMDGPUAsmParser::calculateGPRBlocks(
4853     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4854     bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4855     unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4856     SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4857   // TODO(scott.linder): These calculations are duplicated from
4858   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4859   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4860
4861   unsigned NumVGPRs = NextFreeVGPR;
4862   unsigned NumSGPRs = NextFreeSGPR;
4863
4864   if (Version.Major >= 10)
4865     NumSGPRs = 0;
4866   else {
4867     unsigned MaxAddressableNumSGPRs =
4868         IsaInfo::getAddressableNumSGPRs(&getSTI());
4869
4870     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4871         NumSGPRs > MaxAddressableNumSGPRs)
4872       return OutOfRangeError(SGPRRange);
4873
4874     NumSGPRs +=
4875         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4876
4877     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4878         NumSGPRs > MaxAddressableNumSGPRs)
4879       return OutOfRangeError(SGPRRange);
4880
4881     if (Features.test(FeatureSGPRInitBug))
4882       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4883   }
4884
4885   VGPRBlocks =
4886       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4887   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4888
4889   return false;
4890 }
4891
4892 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4893   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4894     return TokError("directive only supported for amdgcn architecture");
4895
4896   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4897     return TokError("directive only supported for amdhsa OS");
4898
4899   StringRef KernelName;
4900   if (getParser().parseIdentifier(KernelName))
4901     return true;
4902
4903   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4904
4905   StringSet<> Seen;
4906
4907   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4908
4909   SMRange VGPRRange;
4910   uint64_t NextFreeVGPR = 0;
4911   uint64_t AccumOffset = 0;
4912   uint64_t SharedVGPRCount = 0;
4913   SMRange SGPRRange;
4914   uint64_t NextFreeSGPR = 0;
4915
4916   // Count the number of user SGPRs implied from the enabled feature bits.
4917   unsigned ImpliedUserSGPRCount = 0;
4918
4919   // Track if the asm explicitly contains the directive for the user SGPR
4920   // count.
4921   std::optional<unsigned> ExplicitUserSGPRCount;
4922   bool ReserveVCC = true;
4923   bool ReserveFlatScr = true;
4924   std::optional<bool> EnableWavefrontSize32;
4925
4926   while (true) {
4927     while (trySkipToken(AsmToken::EndOfStatement));
4928
4929     StringRef ID;
4930     SMRange IDRange = getTok().getLocRange();
4931     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4932       return true;
4933
4934     if (ID == ".end_amdhsa_kernel")
4935       break;
4936
4937     if (!Seen.insert(ID).second)
4938       return TokError(".amdhsa_ directives cannot be repeated");
4939
4940     SMLoc ValStart = getLoc();
4941     int64_t IVal;
4942     if (getParser().parseAbsoluteExpression(IVal))
4943       return true;
4944     SMLoc ValEnd = getLoc();
4945     SMRange ValRange = SMRange(ValStart, ValEnd);
4946
4947     if (IVal < 0)
4948       return OutOfRangeError(ValRange);
4949
4950     uint64_t Val = IVal;
4951
4952 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4953   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4954     return OutOfRangeError(RANGE);                                             \
4955   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4956
4957     if (ID == ".amdhsa_group_segment_fixed_size") {
4958       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4959         return OutOfRangeError(ValRange);
4960       KD.group_segment_fixed_size = Val;
4961     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4962       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4963         return OutOfRangeError(ValRange);
4964       KD.private_segment_fixed_size = Val;
4965     } else if (ID == ".amdhsa_kernarg_size") {
4966       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4967         return OutOfRangeError(ValRange);
4968       KD.kernarg_size = Val;
4969     } else if (ID == ".amdhsa_user_sgpr_count") {
4970       ExplicitUserSGPRCount = Val;
4971     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4972       if (hasArchitectedFlatScratch())
4973         return Error(IDRange.Start,
4974                      "directive is not supported with architected flat scratch",
4975                      IDRange);
4976       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4977                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4978                        Val, ValRange);
4979       if (Val)
4980         ImpliedUserSGPRCount += 4;
4981     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4982       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4983                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4984                        ValRange);
4985       if (Val)
4986         ImpliedUserSGPRCount += 2;
4987     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4988       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4989                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4990                        ValRange);
4991       if (Val)
4992         ImpliedUserSGPRCount += 2;
4993     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4994       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4995                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4996                        Val, ValRange);
4997       if (Val)
4998         ImpliedUserSGPRCount += 2;
4999     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5000       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5001                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5002                        ValRange);
5003       if (Val)
5004         ImpliedUserSGPRCount += 2;
5005     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5006       if (hasArchitectedFlatScratch())
5007         return Error(IDRange.Start,
5008                      "directive is not supported with architected flat scratch",
5009                      IDRange);
5010       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5011                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5012                        ValRange);
5013       if (Val)
5014         ImpliedUserSGPRCount += 2;
5015     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5016       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5017                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5018                        Val, ValRange);
5019       if (Val)
5020         ImpliedUserSGPRCount += 1;
5021     } else if (ID == ".amdhsa_wavefront_size32") {
5022       if (IVersion.Major < 10)
5023         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5024       EnableWavefrontSize32 = Val;
5025       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5026                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5027                        Val, ValRange);
5028     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5029       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5030                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5031     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5032       if (hasArchitectedFlatScratch())
5033         return Error(IDRange.Start,
5034                      "directive is not supported with architected flat scratch",
5035                      IDRange);
5036       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5037                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5038     } else if (ID == ".amdhsa_enable_private_segment") {
5039       if (!hasArchitectedFlatScratch())
5040         return Error(
5041             IDRange.Start,
5042             "directive is not supported without architected flat scratch",
5043             IDRange);
5044       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5045                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5046     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5047       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5048                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5049                        ValRange);
5050     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5051       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5052                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5053                        ValRange);
5054     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5055       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5056                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5057                        ValRange);
5058     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5059       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5060                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5061                        ValRange);
5062     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5063       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5064                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5065                        ValRange);
5066     } else if (ID == ".amdhsa_next_free_vgpr") {
5067       VGPRRange = ValRange;
5068       NextFreeVGPR = Val;
5069     } else if (ID == ".amdhsa_next_free_sgpr") {
5070       SGPRRange = ValRange;
5071       NextFreeSGPR = Val;
5072     } else if (ID == ".amdhsa_accum_offset") {
5073       if (!isGFX90A())
5074         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5075       AccumOffset = Val;
5076     } else if (ID == ".amdhsa_reserve_vcc") {
5077       if (!isUInt<1>(Val))
5078         return OutOfRangeError(ValRange);
5079       ReserveVCC = Val;
5080     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5081       if (IVersion.Major < 7)
5082         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5083       if (hasArchitectedFlatScratch())
5084         return Error(IDRange.Start,
5085                      "directive is not supported with architected flat scratch",
5086                      IDRange);
5087       if (!isUInt<1>(Val))
5088         return OutOfRangeError(ValRange);
5089       ReserveFlatScr = Val;
5090     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5091       if (IVersion.Major < 8)
5092         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5093       if (!isUInt<1>(Val))
5094         return OutOfRangeError(ValRange);
5095       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5096         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5097                                  IDRange);
5098     } else if (ID == ".amdhsa_float_round_mode_32") {
5099       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5100                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5101     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5102       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5103                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5104     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5105       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5106                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5107     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5108       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5109                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5110                        ValRange);
5111     } else if (ID == ".amdhsa_dx10_clamp") {
5112       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5113                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5114     } else if (ID == ".amdhsa_ieee_mode") {
5115       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5116                        Val, ValRange);
5117     } else if (ID == ".amdhsa_fp16_overflow") {
5118       if (IVersion.Major < 9)
5119         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5120       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5121                        ValRange);
5122     } else if (ID == ".amdhsa_tg_split") {
5123       if (!isGFX90A())
5124         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5125       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5126                        ValRange);
5127     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5128       if (IVersion.Major < 10)
5129         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5130       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5131                        ValRange);
5132     } else if (ID == ".amdhsa_memory_ordered") {
5133       if (IVersion.Major < 10)
5134         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5135       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5136                        ValRange);
5137     } else if (ID == ".amdhsa_forward_progress") {
5138       if (IVersion.Major < 10)
5139         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5140       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5141                        ValRange);
5142     } else if (ID == ".amdhsa_shared_vgpr_count") {
5143       if (IVersion.Major < 10)
5144         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5145       SharedVGPRCount = Val;
5146       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5147                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5148                        ValRange);
5149     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5150       PARSE_BITS_ENTRY(
5151           KD.compute_pgm_rsrc2,
5152           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5153           ValRange);
5154     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5155       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5156                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5157                        Val, ValRange);
5158     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5159       PARSE_BITS_ENTRY(
5160           KD.compute_pgm_rsrc2,
5161           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5162           ValRange);
5163     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5164       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5165                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5166                        Val, ValRange);
5167     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5168       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5169                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5170                        Val, ValRange);
5171     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5172       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5173                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5174                        Val, ValRange);
5175     } else if (ID == ".amdhsa_exception_int_div_zero") {
5176       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5177                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5178                        Val, ValRange);
5179     } else {
5180       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5181     }
5182
5183 #undef PARSE_BITS_ENTRY
5184   }
5185
5186   if (!Seen.contains(".amdhsa_next_free_vgpr"))
5187     return TokError(".amdhsa_next_free_vgpr directive is required");
5188
5189   if (!Seen.contains(".amdhsa_next_free_sgpr"))
5190     return TokError(".amdhsa_next_free_sgpr directive is required");
5191
5192   unsigned VGPRBlocks;
5193   unsigned SGPRBlocks;
5194   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5195                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5196                          EnableWavefrontSize32, NextFreeVGPR,
5197                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5198                          SGPRBlocks))
5199     return true;
5200
5201   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5202           VGPRBlocks))
5203     return OutOfRangeError(VGPRRange);
5204   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5205                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5206
5207   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5208           SGPRBlocks))
5209     return OutOfRangeError(SGPRRange);
5210   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5211                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5212                   SGPRBlocks);
5213
5214   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5215     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5216                     "enabled user SGPRs");
5217
5218   unsigned UserSGPRCount =
5219       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5220
5221   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5222     return TokError("too many user SGPRs enabled");
5223   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5224                   UserSGPRCount);
5225
5226   if (isGFX90A()) {
5227     if (!Seen.contains(".amdhsa_accum_offset"))
5228       return TokError(".amdhsa_accum_offset directive is required");
5229     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5230       return TokError("accum_offset should be in range [4..256] in "
5231                       "increments of 4");
5232     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5233       return TokError("accum_offset exceeds total VGPR allocation");
5234     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5235                     (AccumOffset / 4 - 1));
5236   }
5237
5238   if (IVersion.Major >= 10) {
5239     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5240     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5241       return TokError("shared_vgpr_count directive not valid on "
5242                       "wavefront size 32");
5243     }
5244     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5245       return TokError("shared_vgpr_count*2 + "
5246                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5247                       "exceed 63\n");
5248     }
5249   }
5250
5251   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5252       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5253       ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
5254   return false;
5255 }
5256
5257 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5258   uint32_t Major;
5259   uint32_t Minor;
5260
5261   if (ParseDirectiveMajorMinor(Major, Minor))
5262     return true;
5263
5264   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5265   return false;
5266 }
5267
5268 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5269   uint32_t Major;
5270   uint32_t Minor;
5271   uint32_t Stepping;
5272   StringRef VendorName;
5273   StringRef ArchName;
5274
5275   // If this directive has no arguments, then use the ISA version for the
5276   // targeted GPU.
5277   if (isToken(AsmToken::EndOfStatement)) {
5278     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5279     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5280                                                         ISA.Stepping,
5281                                                         "AMD", "AMDGPU");
5282     return false;
5283   }
5284
5285   if (ParseDirectiveMajorMinor(Major, Minor))
5286     return true;
5287
5288   if (!trySkipToken(AsmToken::Comma))
5289     return TokError("stepping version number required, comma expected");
5290
5291   if (ParseAsAbsoluteExpression(Stepping))
5292     return TokError("invalid stepping version");
5293
5294   if (!trySkipToken(AsmToken::Comma))
5295     return TokError("vendor name required, comma expected");
5296
5297   if (!parseString(VendorName, "invalid vendor name"))
5298     return true;
5299
5300   if (!trySkipToken(AsmToken::Comma))
5301     return TokError("arch name required, comma expected");
5302
5303   if (!parseString(ArchName, "invalid arch name"))
5304     return true;
5305
5306   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5307                                                       VendorName, ArchName);
5308   return false;
5309 }
5310
5311 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5312                                                amd_kernel_code_t &Header) {
5313   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5314   // assembly for backwards compatibility.
5315   if (ID == "max_scratch_backing_memory_byte_size") {
5316     Parser.eatToEndOfStatement();
5317     return false;
5318   }
5319
5320   SmallString<40> ErrStr;
5321   raw_svector_ostream Err(ErrStr);
5322   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5323     return TokError(Err.str());
5324   }
5325   Lex();
5326
5327   if (ID == "enable_wavefront_size32") {
5328     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5329       if (!isGFX10Plus())
5330         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5331       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5332         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5333     } else {
5334       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5335         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5336     }
5337   }
5338
5339   if (ID == "wavefront_size") {
5340     if (Header.wavefront_size == 5) {
5341       if (!isGFX10Plus())
5342         return TokError("wavefront_size=5 is only allowed on GFX10+");
5343       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5344         return TokError("wavefront_size=5 requires +WavefrontSize32");
5345     } else if (Header.wavefront_size == 6) {
5346       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5347         return TokError("wavefront_size=6 requires +WavefrontSize64");
5348     }
5349   }
5350
5351   if (ID == "enable_wgp_mode") {
5352     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5353         !isGFX10Plus())
5354       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5355   }
5356
5357   if (ID == "enable_mem_ordered") {
5358     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5359         !isGFX10Plus())
5360       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5361   }
5362
5363   if (ID == "enable_fwd_progress") {
5364     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5365         !isGFX10Plus())
5366       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5367   }
5368
5369   return false;
5370 }
5371
5372 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5373   amd_kernel_code_t Header;
5374   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5375
5376   while (true) {
5377     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5378     // will set the current token to EndOfStatement.
5379     while(trySkipToken(AsmToken::EndOfStatement));
5380
5381     StringRef ID;
5382     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5383       return true;
5384
5385     if (ID == ".end_amd_kernel_code_t")
5386       break;
5387
5388     if (ParseAMDKernelCodeTValue(ID, Header))
5389       return true;
5390   }
5391
5392   getTargetStreamer().EmitAMDKernelCodeT(Header);
5393
5394   return false;
5395 }
5396
5397 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5398   StringRef KernelName;
5399   if (!parseId(KernelName, "expected symbol name"))
5400     return true;
5401
5402   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5403                                            ELF::STT_AMDGPU_HSA_KERNEL);
5404
5405   KernelScope.initialize(getContext());
5406   return false;
5407 }
5408
5409 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5410   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5411     return Error(getLoc(),
5412                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5413                  "architectures");
5414   }
5415
5416   auto TargetIDDirective = getLexer().getTok().getStringContents();
5417   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5418     return Error(getParser().getTok().getLoc(), "target id must match options");
5419
5420   getTargetStreamer().EmitISAVersion();
5421   Lex();
5422
5423   return false;
5424 }
5425
5426 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5427   const char *AssemblerDirectiveBegin;
5428   const char *AssemblerDirectiveEnd;
5429   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5430       isHsaAbiVersion3AndAbove(&getSTI())
5431           ? std::pair(HSAMD::V3::AssemblerDirectiveBegin,
5432                       HSAMD::V3::AssemblerDirectiveEnd)
5433           : std::pair(HSAMD::AssemblerDirectiveBegin,
5434                       HSAMD::AssemblerDirectiveEnd);
5435
5436   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5437     return Error(getLoc(),
5438                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5439                  "not available on non-amdhsa OSes")).str());
5440   }
5441
5442   std::string HSAMetadataString;
5443   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5444                           HSAMetadataString))
5445     return true;
5446
5447   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5448     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5449       return Error(getLoc(), "invalid HSA metadata");
5450   } else {
5451     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5452       return Error(getLoc(), "invalid HSA metadata");
5453   }
5454
5455   return false;
5456 }
5457
5458 /// Common code to parse out a block of text (typically YAML) between start and
5459 /// end directives.
5460 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5461                                           const char *AssemblerDirectiveEnd,
5462                                           std::string &CollectString) {
5463
5464   raw_string_ostream CollectStream(CollectString);
5465
5466   getLexer().setSkipSpace(false);
5467
5468   bool FoundEnd = false;
5469   while (!isToken(AsmToken::Eof)) {
5470     while (isToken(AsmToken::Space)) {
5471       CollectStream << getTokenStr();
5472       Lex();
5473     }
5474
5475     if (trySkipId(AssemblerDirectiveEnd)) {
5476       FoundEnd = true;
5477       break;
5478     }
5479
5480     CollectStream << Parser.parseStringToEndOfStatement()
5481                   << getContext().getAsmInfo()->getSeparatorString();
5482
5483     Parser.eatToEndOfStatement();
5484   }
5485
5486   getLexer().setSkipSpace(true);
5487
5488   if (isToken(AsmToken::Eof) && !FoundEnd) {
5489     return TokError(Twine("expected directive ") +
5490                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5491   }
5492
5493   CollectStream.flush();
5494   return false;
5495 }
5496
5497 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5498 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5499   std::string String;
5500   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5501                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5502     return true;
5503
5504   auto PALMetadata = getTargetStreamer().getPALMetadata();
5505   if (!PALMetadata->setFromString(String))
5506     return Error(getLoc(), "invalid PAL metadata");
5507   return false;
5508 }
5509
5510 /// Parse the assembler directive for old linear-format PAL metadata.
5511 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5512   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5513     return Error(getLoc(),
5514                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5515                  "not available on non-amdpal OSes")).str());
5516   }
5517
5518   auto PALMetadata = getTargetStreamer().getPALMetadata();
5519   PALMetadata->setLegacy();
5520   for (;;) {
5521     uint32_t Key, Value;
5522     if (ParseAsAbsoluteExpression(Key)) {
5523       return TokError(Twine("invalid value in ") +
5524                       Twine(PALMD::AssemblerDirective));
5525     }
5526     if (!trySkipToken(AsmToken::Comma)) {
5527       return TokError(Twine("expected an even number of values in ") +
5528                       Twine(PALMD::AssemblerDirective));
5529     }
5530     if (ParseAsAbsoluteExpression(Value)) {
5531       return TokError(Twine("invalid value in ") +
5532                       Twine(PALMD::AssemblerDirective));
5533     }
5534     PALMetadata->setRegister(Key, Value);
5535     if (!trySkipToken(AsmToken::Comma))
5536       break;
5537   }
5538   return false;
5539 }
5540
5541 /// ParseDirectiveAMDGPULDS
5542 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5543 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5544   if (getParser().checkForValidSection())
5545     return true;
5546
5547   StringRef Name;
5548   SMLoc NameLoc = getLoc();
5549   if (getParser().parseIdentifier(Name))
5550     return TokError("expected identifier in directive");
5551
5552   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5553   if (getParser().parseComma())
5554     return true;
5555
5556   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5557
5558   int64_t Size;
5559   SMLoc SizeLoc = getLoc();
5560   if (getParser().parseAbsoluteExpression(Size))
5561     return true;
5562   if (Size < 0)
5563     return Error(SizeLoc, "size must be non-negative");
5564   if (Size > LocalMemorySize)
5565     return Error(SizeLoc, "size is too large");
5566
5567   int64_t Alignment = 4;
5568   if (trySkipToken(AsmToken::Comma)) {
5569     SMLoc AlignLoc = getLoc();
5570     if (getParser().parseAbsoluteExpression(Alignment))
5571       return true;
5572     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5573       return Error(AlignLoc, "alignment must be a power of two");
5574
5575     // Alignment larger than the size of LDS is possible in theory, as long
5576     // as the linker manages to place to symbol at address 0, but we do want
5577     // to make sure the alignment fits nicely into a 32-bit integer.
5578     if (Alignment >= 1u << 31)
5579       return Error(AlignLoc, "alignment is too large");
5580   }
5581
5582   if (parseEOL())
5583     return true;
5584
5585   Symbol->redefineIfPossible();
5586   if (!Symbol->isUndefined())
5587     return Error(NameLoc, "invalid symbol redefinition");
5588
5589   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5590   return false;
5591 }
5592
5593 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5594   StringRef IDVal = DirectiveID.getString();
5595
5596   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5597     if (IDVal == ".amdhsa_kernel")
5598      return ParseDirectiveAMDHSAKernel();
5599
5600     // TODO: Restructure/combine with PAL metadata directive.
5601     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5602       return ParseDirectiveHSAMetadata();
5603   } else {
5604     if (IDVal == ".hsa_code_object_version")
5605       return ParseDirectiveHSACodeObjectVersion();
5606
5607     if (IDVal == ".hsa_code_object_isa")
5608       return ParseDirectiveHSACodeObjectISA();
5609
5610     if (IDVal == ".amd_kernel_code_t")
5611       return ParseDirectiveAMDKernelCodeT();
5612
5613     if (IDVal == ".amdgpu_hsa_kernel")
5614       return ParseDirectiveAMDGPUHsaKernel();
5615
5616     if (IDVal == ".amd_amdgpu_isa")
5617       return ParseDirectiveISAVersion();
5618
5619     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5620       return ParseDirectiveHSAMetadata();
5621   }
5622
5623   if (IDVal == ".amdgcn_target")
5624     return ParseDirectiveAMDGCNTarget();
5625
5626   if (IDVal == ".amdgpu_lds")
5627     return ParseDirectiveAMDGPULDS();
5628
5629   if (IDVal == PALMD::AssemblerDirectiveBegin)
5630     return ParseDirectivePALMetadataBegin();
5631
5632   if (IDVal == PALMD::AssemblerDirective)
5633     return ParseDirectivePALMetadata();
5634
5635   return true;
5636 }
5637
5638 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5639                                            unsigned RegNo) {
5640
5641   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5642     return isGFX9Plus();
5643
5644   // GFX10+ has 2 more SGPRs 104 and 105.
5645   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5646     return hasSGPR104_SGPR105();
5647
5648   switch (RegNo) {
5649   case AMDGPU::SRC_SHARED_BASE_LO:
5650   case AMDGPU::SRC_SHARED_BASE:
5651   case AMDGPU::SRC_SHARED_LIMIT_LO:
5652   case AMDGPU::SRC_SHARED_LIMIT:
5653   case AMDGPU::SRC_PRIVATE_BASE_LO:
5654   case AMDGPU::SRC_PRIVATE_BASE:
5655   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5656   case AMDGPU::SRC_PRIVATE_LIMIT:
5657     return isGFX9Plus();
5658   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5659     return isGFX9Plus() && !isGFX11Plus();
5660   case AMDGPU::TBA:
5661   case AMDGPU::TBA_LO:
5662   case AMDGPU::TBA_HI:
5663   case AMDGPU::TMA:
5664   case AMDGPU::TMA_LO:
5665   case AMDGPU::TMA_HI:
5666     return !isGFX9Plus();
5667   case AMDGPU::XNACK_MASK:
5668   case AMDGPU::XNACK_MASK_LO:
5669   case AMDGPU::XNACK_MASK_HI:
5670     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5671   case AMDGPU::SGPR_NULL:
5672     return isGFX10Plus();
5673   default:
5674     break;
5675   }
5676
5677   if (isCI())
5678     return true;
5679
5680   if (isSI() || isGFX10Plus()) {
5681     // No flat_scr on SI.
5682     // On GFX10Plus flat scratch is not a valid register operand and can only be
5683     // accessed with s_setreg/s_getreg.
5684     switch (RegNo) {
5685     case AMDGPU::FLAT_SCR:
5686     case AMDGPU::FLAT_SCR_LO:
5687     case AMDGPU::FLAT_SCR_HI:
5688       return false;
5689     default:
5690       return true;
5691     }
5692   }
5693
5694   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5695   // SI/CI have.
5696   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5697     return hasSGPR102_SGPR103();
5698
5699   return true;
5700 }
5701
5702 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
5703                                           StringRef Mnemonic,
5704                                           OperandMode Mode) {
5705   ParseStatus Res = parseVOPD(Operands);
5706   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
5707     return Res;
5708
5709   // Try to parse with a custom parser
5710   Res = MatchOperandParserImpl(Operands, Mnemonic);
5711
5712   // If we successfully parsed the operand or if there as an error parsing,
5713   // we are done.
5714   //
5715   // If we are parsing after we reach EndOfStatement then this means we
5716   // are appending default values to the Operands list.  This is only done
5717   // by custom parser, so we shouldn't continue on to the generic parsing.
5718   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
5719     return Res;
5720
5721   SMLoc RBraceLoc;
5722   SMLoc LBraceLoc = getLoc();
5723   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5724     unsigned Prefix = Operands.size();
5725
5726     for (;;) {
5727       auto Loc = getLoc();
5728       Res = parseReg(Operands);
5729       if (Res.isNoMatch())
5730         Error(Loc, "expected a register");
5731       if (!Res.isSuccess())
5732         return ParseStatus::Failure;
5733
5734       RBraceLoc = getLoc();
5735       if (trySkipToken(AsmToken::RBrac))
5736         break;
5737
5738       if (!skipToken(AsmToken::Comma,
5739                      "expected a comma or a closing square bracket"))
5740         return ParseStatus::Failure;
5741     }
5742
5743     if (Operands.size() - Prefix > 1) {
5744       Operands.insert(Operands.begin() + Prefix,
5745                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5746       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5747     }
5748
5749     return ParseStatus::Success;
5750   }
5751
5752   return parseRegOrImm(Operands);
5753 }
5754
5755 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5756   // Clear any forced encodings from the previous instruction.
5757   setForcedEncodingSize(0);
5758   setForcedDPP(false);
5759   setForcedSDWA(false);
5760
5761   if (Name.endswith("_e64_dpp")) {
5762     setForcedDPP(true);
5763     setForcedEncodingSize(64);
5764     return Name.substr(0, Name.size() - 8);
5765   } else if (Name.endswith("_e64")) {
5766     setForcedEncodingSize(64);
5767     return Name.substr(0, Name.size() - 4);
5768   } else if (Name.endswith("_e32")) {
5769     setForcedEncodingSize(32);
5770     return Name.substr(0, Name.size() - 4);
5771   } else if (Name.endswith("_dpp")) {
5772     setForcedDPP(true);
5773     return Name.substr(0, Name.size() - 4);
5774   } else if (Name.endswith("_sdwa")) {
5775     setForcedSDWA(true);
5776     return Name.substr(0, Name.size() - 5);
5777   }
5778   return Name;
5779 }
5780
5781 static void applyMnemonicAliases(StringRef &Mnemonic,
5782                                  const FeatureBitset &Features,
5783                                  unsigned VariantID);
5784
5785 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5786                                        StringRef Name,
5787                                        SMLoc NameLoc, OperandVector &Operands) {
5788   // Add the instruction mnemonic
5789   Name = parseMnemonicSuffix(Name);
5790
5791   // If the target architecture uses MnemonicAlias, call it here to parse
5792   // operands correctly.
5793   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5794
5795   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5796
5797   bool IsMIMG = Name.startswith("image_");
5798
5799   while (!trySkipToken(AsmToken::EndOfStatement)) {
5800     OperandMode Mode = OperandMode_Default;
5801     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5802       Mode = OperandMode_NSA;
5803     ParseStatus Res = parseOperand(Operands, Name, Mode);
5804
5805     if (!Res.isSuccess()) {
5806       checkUnsupportedInstruction(Name, NameLoc);
5807       if (!Parser.hasPendingError()) {
5808         // FIXME: use real operand location rather than the current location.
5809         StringRef Msg = Res.isFailure() ? "failed parsing operand."
5810                                         : "not a valid operand.";
5811         Error(getLoc(), Msg);
5812       }
5813       while (!trySkipToken(AsmToken::EndOfStatement)) {
5814         lex();
5815       }
5816       return true;
5817     }
5818
5819     // Eat the comma or space if there is one.
5820     trySkipToken(AsmToken::Comma);
5821   }
5822
5823   return false;
5824 }
5825
5826 //===----------------------------------------------------------------------===//
5827 // Utility functions
5828 //===----------------------------------------------------------------------===//
5829
5830 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
5831                                           OperandVector &Operands) {
5832   SMLoc S = getLoc();
5833   if (!trySkipId(Name))
5834     return ParseStatus::NoMatch;
5835
5836   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
5837   return ParseStatus::Success;
5838 }
5839
5840 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
5841                                                 int64_t &IntVal) {
5842
5843   if (!trySkipId(Prefix, AsmToken::Colon))
5844     return ParseStatus::NoMatch;
5845
5846   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
5847 }
5848
5849 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
5850     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
5851     std::function<bool(int64_t &)> ConvertResult) {
5852   SMLoc S = getLoc();
5853   int64_t Value = 0;
5854
5855   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
5856   if (!Res.isSuccess())
5857     return Res;
5858
5859   if (ConvertResult && !ConvertResult(Value)) {
5860     Error(S, "invalid " + StringRef(Prefix) + " value.");
5861   }
5862
5863   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5864   return ParseStatus::Success;
5865 }
5866
5867 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
5868     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
5869     bool (*ConvertResult)(int64_t &)) {
5870   SMLoc S = getLoc();
5871   if (!trySkipId(Prefix, AsmToken::Colon))
5872     return ParseStatus::NoMatch;
5873
5874   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5875     return ParseStatus::Failure;
5876
5877   unsigned Val = 0;
5878   const unsigned MaxSize = 4;
5879
5880   // FIXME: How to verify the number of elements matches the number of src
5881   // operands?
5882   for (int I = 0; ; ++I) {
5883     int64_t Op;
5884     SMLoc Loc = getLoc();
5885     if (!parseExpr(Op))
5886       return ParseStatus::Failure;
5887
5888     if (Op != 0 && Op != 1)
5889       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5890
5891     Val |= (Op << I);
5892
5893     if (trySkipToken(AsmToken::RBrac))
5894       break;
5895
5896     if (I + 1 == MaxSize)
5897       return Error(getLoc(), "expected a closing square bracket");
5898
5899     if (!skipToken(AsmToken::Comma, "expected a comma"))
5900       return ParseStatus::Failure;
5901   }
5902
5903   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5904   return ParseStatus::Success;
5905 }
5906
5907 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
5908                                            OperandVector &Operands,
5909                                            AMDGPUOperand::ImmTy ImmTy) {
5910   int64_t Bit;
5911   SMLoc S = getLoc();
5912
5913   if (trySkipId(Name)) {
5914     Bit = 1;
5915   } else if (trySkipId("no", Name)) {
5916     Bit = 0;
5917   } else {
5918     return ParseStatus::NoMatch;
5919   }
5920
5921   if (Name == "r128" && !hasMIMG_R128())
5922     return Error(S, "r128 modifier is not supported on this GPU");
5923   if (Name == "a16" && !hasA16())
5924     return Error(S, "a16 modifier is not supported on this GPU");
5925
5926   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5927     ImmTy = AMDGPUOperand::ImmTyR128A16;
5928
5929   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5930   return ParseStatus::Success;
5931 }
5932
5933 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
5934                                       bool &Disabling) const {
5935   Disabling = Id.consume_front("no");
5936
5937   if (isGFX940() && !Mnemo.startswith("s_")) {
5938     return StringSwitch<unsigned>(Id)
5939         .Case("nt", AMDGPU::CPol::NT)
5940         .Case("sc0", AMDGPU::CPol::SC0)
5941         .Case("sc1", AMDGPU::CPol::SC1)
5942         .Default(0);
5943   }
5944
5945   return StringSwitch<unsigned>(Id)
5946       .Case("dlc", AMDGPU::CPol::DLC)
5947       .Case("glc", AMDGPU::CPol::GLC)
5948       .Case("scc", AMDGPU::CPol::SCC)
5949       .Case("slc", AMDGPU::CPol::SLC)
5950       .Default(0);
5951 }
5952
5953 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5954   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5955   SMLoc OpLoc = getLoc();
5956   unsigned Enabled = 0, Seen = 0;
5957   for (;;) {
5958     SMLoc S = getLoc();
5959     bool Disabling;
5960     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
5961     if (!CPol)
5962       break;
5963
5964     lex();
5965
5966     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
5967       return Error(S, "dlc modifier is not supported on this GPU");
5968
5969     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
5970       return Error(S, "scc modifier is not supported on this GPU");
5971
5972     if (Seen & CPol)
5973       return Error(S, "duplicate cache policy modifier");
5974
5975     if (!Disabling)
5976       Enabled |= CPol;
5977
5978     Seen |= CPol;
5979   }
5980
5981   if (!Seen)
5982     return ParseStatus::NoMatch;
5983
5984   Operands.push_back(
5985       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
5986   return ParseStatus::Success;
5987 }
5988
5989 static void addOptionalImmOperand(
5990   MCInst& Inst, const OperandVector& Operands,
5991   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5992   AMDGPUOperand::ImmTy ImmT,
5993   int64_t Default = 0) {
5994   auto i = OptionalIdx.find(ImmT);
5995   if (i != OptionalIdx.end()) {
5996     unsigned Idx = i->second;
5997     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5998   } else {
5999     Inst.addOperand(MCOperand::createImm(Default));
6000   }
6001 }
6002
6003 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6004                                                    StringRef &Value,
6005                                                    SMLoc &StringLoc) {
6006   if (!trySkipId(Prefix, AsmToken::Colon))
6007     return ParseStatus::NoMatch;
6008
6009   StringLoc = getLoc();
6010   return parseId(Value, "expected an identifier") ? ParseStatus::Success
6011                                                   : ParseStatus::Failure;
6012 }
6013
6014 //===----------------------------------------------------------------------===//
6015 // MTBUF format
6016 //===----------------------------------------------------------------------===//
6017
6018 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6019                                   int64_t MaxVal,
6020                                   int64_t &Fmt) {
6021   int64_t Val;
6022   SMLoc Loc = getLoc();
6023
6024   auto Res = parseIntWithPrefix(Pref, Val);
6025   if (Res.isFailure())
6026     return false;
6027   if (Res.isNoMatch())
6028     return true;
6029
6030   if (Val < 0 || Val > MaxVal) {
6031     Error(Loc, Twine("out of range ", StringRef(Pref)));
6032     return false;
6033   }
6034
6035   Fmt = Val;
6036   return true;
6037 }
6038
6039 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6040 // values to live in a joint format operand in the MCInst encoding.
6041 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6042   using namespace llvm::AMDGPU::MTBUFFormat;
6043
6044   int64_t Dfmt = DFMT_UNDEF;
6045   int64_t Nfmt = NFMT_UNDEF;
6046
6047   // dfmt and nfmt can appear in either order, and each is optional.
6048   for (int I = 0; I < 2; ++I) {
6049     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6050       return ParseStatus::Failure;
6051
6052     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6053       return ParseStatus::Failure;
6054
6055     // Skip optional comma between dfmt/nfmt
6056     // but guard against 2 commas following each other.
6057     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6058         !peekToken().is(AsmToken::Comma)) {
6059       trySkipToken(AsmToken::Comma);
6060     }
6061   }
6062
6063   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6064     return ParseStatus::NoMatch;
6065
6066   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6067   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6068
6069   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6070   return ParseStatus::Success;
6071 }
6072
6073 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6074   using namespace llvm::AMDGPU::MTBUFFormat;
6075
6076   int64_t Fmt = UFMT_UNDEF;
6077
6078   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6079     return ParseStatus::Failure;
6080
6081   if (Fmt == UFMT_UNDEF)
6082     return ParseStatus::NoMatch;
6083
6084   Format = Fmt;
6085   return ParseStatus::Success;
6086 }
6087
6088 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6089                                     int64_t &Nfmt,
6090                                     StringRef FormatStr,
6091                                     SMLoc Loc) {
6092   using namespace llvm::AMDGPU::MTBUFFormat;
6093   int64_t Format;
6094
6095   Format = getDfmt(FormatStr);
6096   if (Format != DFMT_UNDEF) {
6097     Dfmt = Format;
6098     return true;
6099   }
6100
6101   Format = getNfmt(FormatStr, getSTI());
6102   if (Format != NFMT_UNDEF) {
6103     Nfmt = Format;
6104     return true;
6105   }
6106
6107   Error(Loc, "unsupported format");
6108   return false;
6109 }
6110
6111 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6112                                                       SMLoc FormatLoc,
6113                                                       int64_t &Format) {
6114   using namespace llvm::AMDGPU::MTBUFFormat;
6115
6116   int64_t Dfmt = DFMT_UNDEF;
6117   int64_t Nfmt = NFMT_UNDEF;
6118   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6119     return ParseStatus::Failure;
6120
6121   if (trySkipToken(AsmToken::Comma)) {
6122     StringRef Str;
6123     SMLoc Loc = getLoc();
6124     if (!parseId(Str, "expected a format string") ||
6125         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6126       return ParseStatus::Failure;
6127     if (Dfmt == DFMT_UNDEF)
6128       return Error(Loc, "duplicate numeric format");
6129     if (Nfmt == NFMT_UNDEF)
6130       return Error(Loc, "duplicate data format");
6131   }
6132
6133   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6134   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6135
6136   if (isGFX10Plus()) {
6137     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6138     if (Ufmt == UFMT_UNDEF)
6139       return Error(FormatLoc, "unsupported format");
6140     Format = Ufmt;
6141   } else {
6142     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6143   }
6144
6145   return ParseStatus::Success;
6146 }
6147
6148 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6149                                                         SMLoc Loc,
6150                                                         int64_t &Format) {
6151   using namespace llvm::AMDGPU::MTBUFFormat;
6152
6153   auto Id = getUnifiedFormat(FormatStr, getSTI());
6154   if (Id == UFMT_UNDEF)
6155     return ParseStatus::NoMatch;
6156
6157   if (!isGFX10Plus())
6158     return Error(Loc, "unified format is not supported on this GPU");
6159
6160   Format = Id;
6161   return ParseStatus::Success;
6162 }
6163
6164 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6165   using namespace llvm::AMDGPU::MTBUFFormat;
6166   SMLoc Loc = getLoc();
6167
6168   if (!parseExpr(Format))
6169     return ParseStatus::Failure;
6170   if (!isValidFormatEncoding(Format, getSTI()))
6171     return Error(Loc, "out of range format");
6172
6173   return ParseStatus::Success;
6174 }
6175
6176 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6177   using namespace llvm::AMDGPU::MTBUFFormat;
6178
6179   if (!trySkipId("format", AsmToken::Colon))
6180     return ParseStatus::NoMatch;
6181
6182   if (trySkipToken(AsmToken::LBrac)) {
6183     StringRef FormatStr;
6184     SMLoc Loc = getLoc();
6185     if (!parseId(FormatStr, "expected a format string"))
6186       return ParseStatus::Failure;
6187
6188     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6189     if (Res.isNoMatch())
6190       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6191     if (!Res.isSuccess())
6192       return Res;
6193
6194     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6195       return ParseStatus::Failure;
6196
6197     return ParseStatus::Success;
6198   }
6199
6200   return parseNumericFormat(Format);
6201 }
6202
6203 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6204   using namespace llvm::AMDGPU::MTBUFFormat;
6205
6206   int64_t Format = getDefaultFormatEncoding(getSTI());
6207   ParseStatus Res;
6208   SMLoc Loc = getLoc();
6209
6210   // Parse legacy format syntax.
6211   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6212   if (Res.isFailure())
6213     return Res;
6214
6215   bool FormatFound = Res.isSuccess();
6216
6217   Operands.push_back(
6218     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6219
6220   if (FormatFound)
6221     trySkipToken(AsmToken::Comma);
6222
6223   if (isToken(AsmToken::EndOfStatement)) {
6224     // We are expecting an soffset operand,
6225     // but let matcher handle the error.
6226     return ParseStatus::Success;
6227   }
6228
6229   // Parse soffset.
6230   Res = parseRegOrImm(Operands);
6231   if (!Res.isSuccess())
6232     return Res;
6233
6234   trySkipToken(AsmToken::Comma);
6235
6236   if (!FormatFound) {
6237     Res = parseSymbolicOrNumericFormat(Format);
6238     if (Res.isFailure())
6239       return Res;
6240     if (Res.isSuccess()) {
6241       auto Size = Operands.size();
6242       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6243       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6244       Op.setImm(Format);
6245     }
6246     return ParseStatus::Success;
6247   }
6248
6249   if (isId("format") && peekToken().is(AsmToken::Colon))
6250     return Error(getLoc(), "duplicate format");
6251   return ParseStatus::Success;
6252 }
6253
6254 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6255   ParseStatus Res =
6256       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6257   if (Res.isNoMatch()) {
6258     Res = parseIntWithPrefix("inst_offset", Operands,
6259                              AMDGPUOperand::ImmTyInstOffset);
6260   }
6261   return Res;
6262 }
6263
6264 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6265   ParseStatus Res =
6266       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6267   if (Res.isNoMatch())
6268     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6269   return Res;
6270 }
6271
6272 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6273   ParseStatus Res =
6274       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6275   if (Res.isNoMatch()) {
6276     Res =
6277         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6278   }
6279   return Res;
6280 }
6281
6282 //===----------------------------------------------------------------------===//
6283 // ds
6284 //===----------------------------------------------------------------------===//
6285
6286 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6287                                     const OperandVector &Operands) {
6288   OptionalImmIndexMap OptionalIdx;
6289
6290   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6291     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6292
6293     // Add the register arguments
6294     if (Op.isReg()) {
6295       Op.addRegOperands(Inst, 1);
6296       continue;
6297     }
6298
6299     // Handle optional arguments
6300     OptionalIdx[Op.getImmTy()] = i;
6301   }
6302
6303   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6304   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6305   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6306
6307   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6308 }
6309
6310 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6311                                 bool IsGdsHardcoded) {
6312   OptionalImmIndexMap OptionalIdx;
6313   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6314   AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6315
6316   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6317     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6318
6319     auto TiedTo =
6320         Desc.getOperandConstraint(Inst.getNumOperands(), MCOI::TIED_TO);
6321
6322     if (TiedTo != -1) {
6323       assert((unsigned)TiedTo < Inst.getNumOperands());
6324       Inst.addOperand(Inst.getOperand(TiedTo));
6325     }
6326
6327     // Add the register arguments
6328     if (Op.isReg()) {
6329       Op.addRegOperands(Inst, 1);
6330       continue;
6331     }
6332
6333     if (Op.isToken() && Op.getToken() == "gds") {
6334       IsGdsHardcoded = true;
6335       continue;
6336     }
6337
6338     // Handle optional arguments
6339     OptionalIdx[Op.getImmTy()] = i;
6340
6341     if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6342       OffsetType = AMDGPUOperand::ImmTySwizzle;
6343   }
6344
6345   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6346
6347   if (!IsGdsHardcoded) {
6348     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6349   }
6350   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6351 }
6352
6353 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6354   OptionalImmIndexMap OptionalIdx;
6355
6356   unsigned OperandIdx[4];
6357   unsigned EnMask = 0;
6358   int SrcIdx = 0;
6359
6360   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6361     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6362
6363     // Add the register arguments
6364     if (Op.isReg()) {
6365       assert(SrcIdx < 4);
6366       OperandIdx[SrcIdx] = Inst.size();
6367       Op.addRegOperands(Inst, 1);
6368       ++SrcIdx;
6369       continue;
6370     }
6371
6372     if (Op.isOff()) {
6373       assert(SrcIdx < 4);
6374       OperandIdx[SrcIdx] = Inst.size();
6375       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6376       ++SrcIdx;
6377       continue;
6378     }
6379
6380     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6381       Op.addImmOperands(Inst, 1);
6382       continue;
6383     }
6384
6385     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6386       continue;
6387
6388     // Handle optional arguments
6389     OptionalIdx[Op.getImmTy()] = i;
6390   }
6391
6392   assert(SrcIdx == 4);
6393
6394   bool Compr = false;
6395   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6396     Compr = true;
6397     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6398     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6399     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6400   }
6401
6402   for (auto i = 0; i < SrcIdx; ++i) {
6403     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6404       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6405     }
6406   }
6407
6408   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6409   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6410
6411   Inst.addOperand(MCOperand::createImm(EnMask));
6412 }
6413
6414 //===----------------------------------------------------------------------===//
6415 // s_waitcnt
6416 //===----------------------------------------------------------------------===//
6417
6418 static bool
6419 encodeCnt(
6420   const AMDGPU::IsaVersion ISA,
6421   int64_t &IntVal,
6422   int64_t CntVal,
6423   bool Saturate,
6424   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6425   unsigned (*decode)(const IsaVersion &Version, unsigned))
6426 {
6427   bool Failed = false;
6428
6429   IntVal = encode(ISA, IntVal, CntVal);
6430   if (CntVal != decode(ISA, IntVal)) {
6431     if (Saturate) {
6432       IntVal = encode(ISA, IntVal, -1);
6433     } else {
6434       Failed = true;
6435     }
6436   }
6437   return Failed;
6438 }
6439
6440 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6441
6442   SMLoc CntLoc = getLoc();
6443   StringRef CntName = getTokenStr();
6444
6445   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6446       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6447     return false;
6448
6449   int64_t CntVal;
6450   SMLoc ValLoc = getLoc();
6451   if (!parseExpr(CntVal))
6452     return false;
6453
6454   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6455
6456   bool Failed = true;
6457   bool Sat = CntName.endswith("_sat");
6458
6459   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6460     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6461   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6462     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6463   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6464     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6465   } else {
6466     Error(CntLoc, "invalid counter name " + CntName);
6467     return false;
6468   }
6469
6470   if (Failed) {
6471     Error(ValLoc, "too large value for " + CntName);
6472     return false;
6473   }
6474
6475   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6476     return false;
6477
6478   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6479     if (isToken(AsmToken::EndOfStatement)) {
6480       Error(getLoc(), "expected a counter name");
6481       return false;
6482     }
6483   }
6484
6485   return true;
6486 }
6487
6488 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6489   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6490   int64_t Waitcnt = getWaitcntBitMask(ISA);
6491   SMLoc S = getLoc();
6492
6493   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6494     while (!isToken(AsmToken::EndOfStatement)) {
6495       if (!parseCnt(Waitcnt))
6496         return ParseStatus::Failure;
6497     }
6498   } else {
6499     if (!parseExpr(Waitcnt))
6500       return ParseStatus::Failure;
6501   }
6502
6503   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6504   return ParseStatus::Success;
6505 }
6506
6507 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6508   SMLoc FieldLoc = getLoc();
6509   StringRef FieldName = getTokenStr();
6510   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6511       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6512     return false;
6513
6514   SMLoc ValueLoc = getLoc();
6515   StringRef ValueName = getTokenStr();
6516   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6517       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6518     return false;
6519
6520   unsigned Shift;
6521   if (FieldName == "instid0") {
6522     Shift = 0;
6523   } else if (FieldName == "instskip") {
6524     Shift = 4;
6525   } else if (FieldName == "instid1") {
6526     Shift = 7;
6527   } else {
6528     Error(FieldLoc, "invalid field name " + FieldName);
6529     return false;
6530   }
6531
6532   int Value;
6533   if (Shift == 4) {
6534     // Parse values for instskip.
6535     Value = StringSwitch<int>(ValueName)
6536                 .Case("SAME", 0)
6537                 .Case("NEXT", 1)
6538                 .Case("SKIP_1", 2)
6539                 .Case("SKIP_2", 3)
6540                 .Case("SKIP_3", 4)
6541                 .Case("SKIP_4", 5)
6542                 .Default(-1);
6543   } else {
6544     // Parse values for instid0 and instid1.
6545     Value = StringSwitch<int>(ValueName)
6546                 .Case("NO_DEP", 0)
6547                 .Case("VALU_DEP_1", 1)
6548                 .Case("VALU_DEP_2", 2)
6549                 .Case("VALU_DEP_3", 3)
6550                 .Case("VALU_DEP_4", 4)
6551                 .Case("TRANS32_DEP_1", 5)
6552                 .Case("TRANS32_DEP_2", 6)
6553                 .Case("TRANS32_DEP_3", 7)
6554                 .Case("FMA_ACCUM_CYCLE_1", 8)
6555                 .Case("SALU_CYCLE_1", 9)
6556                 .Case("SALU_CYCLE_2", 10)
6557                 .Case("SALU_CYCLE_3", 11)
6558                 .Default(-1);
6559   }
6560   if (Value < 0) {
6561     Error(ValueLoc, "invalid value name " + ValueName);
6562     return false;
6563   }
6564
6565   Delay |= Value << Shift;
6566   return true;
6567 }
6568
6569 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6570   int64_t Delay = 0;
6571   SMLoc S = getLoc();
6572
6573   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6574     do {
6575       if (!parseDelay(Delay))
6576         return ParseStatus::Failure;
6577     } while (trySkipToken(AsmToken::Pipe));
6578   } else {
6579     if (!parseExpr(Delay))
6580       return ParseStatus::Failure;
6581   }
6582
6583   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6584   return ParseStatus::Success;
6585 }
6586
6587 bool
6588 AMDGPUOperand::isSWaitCnt() const {
6589   return isImm();
6590 }
6591
6592 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
6593
6594 //===----------------------------------------------------------------------===//
6595 // DepCtr
6596 //===----------------------------------------------------------------------===//
6597
6598 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6599                                   StringRef DepCtrName) {
6600   switch (ErrorId) {
6601   case OPR_ID_UNKNOWN:
6602     Error(Loc, Twine("invalid counter name ", DepCtrName));
6603     return;
6604   case OPR_ID_UNSUPPORTED:
6605     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6606     return;
6607   case OPR_ID_DUPLICATE:
6608     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6609     return;
6610   case OPR_VAL_INVALID:
6611     Error(Loc, Twine("invalid value for ", DepCtrName));
6612     return;
6613   default:
6614     assert(false);
6615   }
6616 }
6617
6618 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6619
6620   using namespace llvm::AMDGPU::DepCtr;
6621
6622   SMLoc DepCtrLoc = getLoc();
6623   StringRef DepCtrName = getTokenStr();
6624
6625   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6626       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6627     return false;
6628
6629   int64_t ExprVal;
6630   if (!parseExpr(ExprVal))
6631     return false;
6632
6633   unsigned PrevOprMask = UsedOprMask;
6634   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6635
6636   if (CntVal < 0) {
6637     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6638     return false;
6639   }
6640
6641   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6642     return false;
6643
6644   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6645     if (isToken(AsmToken::EndOfStatement)) {
6646       Error(getLoc(), "expected a counter name");
6647       return false;
6648     }
6649   }
6650
6651   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6652   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6653   return true;
6654 }
6655
6656 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
6657   using namespace llvm::AMDGPU::DepCtr;
6658
6659   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6660   SMLoc Loc = getLoc();
6661
6662   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6663     unsigned UsedOprMask = 0;
6664     while (!isToken(AsmToken::EndOfStatement)) {
6665       if (!parseDepCtr(DepCtr, UsedOprMask))
6666         return ParseStatus::Failure;
6667     }
6668   } else {
6669     if (!parseExpr(DepCtr))
6670       return ParseStatus::Failure;
6671   }
6672
6673   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6674   return ParseStatus::Success;
6675 }
6676
6677 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6678
6679 //===----------------------------------------------------------------------===//
6680 // hwreg
6681 //===----------------------------------------------------------------------===//
6682
6683 bool
6684 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6685                                 OperandInfoTy &Offset,
6686                                 OperandInfoTy &Width) {
6687   using namespace llvm::AMDGPU::Hwreg;
6688
6689   // The register may be specified by name or using a numeric code
6690   HwReg.Loc = getLoc();
6691   if (isToken(AsmToken::Identifier) &&
6692       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6693     HwReg.IsSymbolic = true;
6694     lex(); // skip register name
6695   } else if (!parseExpr(HwReg.Id, "a register name")) {
6696     return false;
6697   }
6698
6699   if (trySkipToken(AsmToken::RParen))
6700     return true;
6701
6702   // parse optional params
6703   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6704     return false;
6705
6706   Offset.Loc = getLoc();
6707   if (!parseExpr(Offset.Id))
6708     return false;
6709
6710   if (!skipToken(AsmToken::Comma, "expected a comma"))
6711     return false;
6712
6713   Width.Loc = getLoc();
6714   return parseExpr(Width.Id) &&
6715          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6716 }
6717
6718 bool
6719 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6720                                const OperandInfoTy &Offset,
6721                                const OperandInfoTy &Width) {
6722
6723   using namespace llvm::AMDGPU::Hwreg;
6724
6725   if (HwReg.IsSymbolic) {
6726     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6727       Error(HwReg.Loc,
6728             "specified hardware register is not supported on this GPU");
6729       return false;
6730     }
6731   } else {
6732     if (!isValidHwreg(HwReg.Id)) {
6733       Error(HwReg.Loc,
6734             "invalid code of hardware register: only 6-bit values are legal");
6735       return false;
6736     }
6737   }
6738   if (!isValidHwregOffset(Offset.Id)) {
6739     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6740     return false;
6741   }
6742   if (!isValidHwregWidth(Width.Id)) {
6743     Error(Width.Loc,
6744           "invalid bitfield width: only values from 1 to 32 are legal");
6745     return false;
6746   }
6747   return true;
6748 }
6749
6750 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6751   using namespace llvm::AMDGPU::Hwreg;
6752
6753   int64_t ImmVal = 0;
6754   SMLoc Loc = getLoc();
6755
6756   if (trySkipId("hwreg", AsmToken::LParen)) {
6757     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6758     OperandInfoTy Offset(OFFSET_DEFAULT_);
6759     OperandInfoTy Width(WIDTH_DEFAULT_);
6760     if (parseHwregBody(HwReg, Offset, Width) &&
6761         validateHwreg(HwReg, Offset, Width)) {
6762       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6763     } else {
6764       return ParseStatus::Failure;
6765     }
6766   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6767     if (ImmVal < 0 || !isUInt<16>(ImmVal))
6768       return Error(Loc, "invalid immediate: only 16-bit values are legal");
6769   } else {
6770     return ParseStatus::Failure;
6771   }
6772
6773   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6774   return ParseStatus::Success;
6775 }
6776
6777 bool AMDGPUOperand::isHwreg() const {
6778   return isImmTy(ImmTyHwreg);
6779 }
6780
6781 //===----------------------------------------------------------------------===//
6782 // sendmsg
6783 //===----------------------------------------------------------------------===//
6784
6785 bool
6786 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6787                                   OperandInfoTy &Op,
6788                                   OperandInfoTy &Stream) {
6789   using namespace llvm::AMDGPU::SendMsg;
6790
6791   Msg.Loc = getLoc();
6792   if (isToken(AsmToken::Identifier) &&
6793       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6794     Msg.IsSymbolic = true;
6795     lex(); // skip message name
6796   } else if (!parseExpr(Msg.Id, "a message name")) {
6797     return false;
6798   }
6799
6800   if (trySkipToken(AsmToken::Comma)) {
6801     Op.IsDefined = true;
6802     Op.Loc = getLoc();
6803     if (isToken(AsmToken::Identifier) &&
6804         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6805       lex(); // skip operation name
6806     } else if (!parseExpr(Op.Id, "an operation name")) {
6807       return false;
6808     }
6809
6810     if (trySkipToken(AsmToken::Comma)) {
6811       Stream.IsDefined = true;
6812       Stream.Loc = getLoc();
6813       if (!parseExpr(Stream.Id))
6814         return false;
6815     }
6816   }
6817
6818   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6819 }
6820
6821 bool
6822 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6823                                  const OperandInfoTy &Op,
6824                                  const OperandInfoTy &Stream) {
6825   using namespace llvm::AMDGPU::SendMsg;
6826
6827   // Validation strictness depends on whether message is specified
6828   // in a symbolic or in a numeric form. In the latter case
6829   // only encoding possibility is checked.
6830   bool Strict = Msg.IsSymbolic;
6831
6832   if (Strict) {
6833     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6834       Error(Msg.Loc, "specified message id is not supported on this GPU");
6835       return false;
6836     }
6837   } else {
6838     if (!isValidMsgId(Msg.Id, getSTI())) {
6839       Error(Msg.Loc, "invalid message id");
6840       return false;
6841     }
6842   }
6843   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6844     if (Op.IsDefined) {
6845       Error(Op.Loc, "message does not support operations");
6846     } else {
6847       Error(Msg.Loc, "missing message operation");
6848     }
6849     return false;
6850   }
6851   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6852     Error(Op.Loc, "invalid operation id");
6853     return false;
6854   }
6855   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6856       Stream.IsDefined) {
6857     Error(Stream.Loc, "message operation does not support streams");
6858     return false;
6859   }
6860   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6861     Error(Stream.Loc, "invalid message stream id");
6862     return false;
6863   }
6864   return true;
6865 }
6866
6867 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
6868   using namespace llvm::AMDGPU::SendMsg;
6869
6870   int64_t ImmVal = 0;
6871   SMLoc Loc = getLoc();
6872
6873   if (trySkipId("sendmsg", AsmToken::LParen)) {
6874     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6875     OperandInfoTy Op(OP_NONE_);
6876     OperandInfoTy Stream(STREAM_ID_NONE_);
6877     if (parseSendMsgBody(Msg, Op, Stream) &&
6878         validateSendMsg(Msg, Op, Stream)) {
6879       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6880     } else {
6881       return ParseStatus::Failure;
6882     }
6883   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6884     if (ImmVal < 0 || !isUInt<16>(ImmVal))
6885       return Error(Loc, "invalid immediate: only 16-bit values are legal");
6886   } else {
6887     return ParseStatus::Failure;
6888   }
6889
6890   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6891   return ParseStatus::Success;
6892 }
6893
6894 bool AMDGPUOperand::isSendMsg() const {
6895   return isImmTy(ImmTySendMsg);
6896 }
6897
6898 //===----------------------------------------------------------------------===//
6899 // v_interp
6900 //===----------------------------------------------------------------------===//
6901
6902 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6903   StringRef Str;
6904   SMLoc S = getLoc();
6905
6906   if (!parseId(Str))
6907     return ParseStatus::NoMatch;
6908
6909   int Slot = StringSwitch<int>(Str)
6910     .Case("p10", 0)
6911     .Case("p20", 1)
6912     .Case("p0", 2)
6913     .Default(-1);
6914
6915   if (Slot == -1)
6916     return Error(S, "invalid interpolation slot");
6917
6918   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6919                                               AMDGPUOperand::ImmTyInterpSlot));
6920   return ParseStatus::Success;
6921 }
6922
6923 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6924   StringRef Str;
6925   SMLoc S = getLoc();
6926
6927   if (!parseId(Str))
6928     return ParseStatus::NoMatch;
6929
6930   if (!Str.startswith("attr"))
6931     return Error(S, "invalid interpolation attribute");
6932
6933   StringRef Chan = Str.take_back(2);
6934   int AttrChan = StringSwitch<int>(Chan)
6935     .Case(".x", 0)
6936     .Case(".y", 1)
6937     .Case(".z", 2)
6938     .Case(".w", 3)
6939     .Default(-1);
6940   if (AttrChan == -1)
6941     return Error(S, "invalid or missing interpolation attribute channel");
6942
6943   Str = Str.drop_back(2).drop_front(4);
6944
6945   uint8_t Attr;
6946   if (Str.getAsInteger(10, Attr))
6947     return Error(S, "invalid or missing interpolation attribute number");
6948
6949   if (Attr > 32)
6950     return Error(S, "out of bounds interpolation attribute number");
6951
6952   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6953
6954   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6955                                               AMDGPUOperand::ImmTyInterpAttr));
6956   Operands.push_back(AMDGPUOperand::CreateImm(
6957       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
6958   return ParseStatus::Success;
6959 }
6960
6961 //===----------------------------------------------------------------------===//
6962 // exp
6963 //===----------------------------------------------------------------------===//
6964
6965 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6966   using namespace llvm::AMDGPU::Exp;
6967
6968   StringRef Str;
6969   SMLoc S = getLoc();
6970
6971   if (!parseId(Str))
6972     return ParseStatus::NoMatch;
6973
6974   unsigned Id = getTgtId(Str);
6975   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
6976     return Error(S, (Id == ET_INVALID)
6977                         ? "invalid exp target"
6978                         : "exp target is not supported on this GPU");
6979
6980   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6981                                               AMDGPUOperand::ImmTyExpTgt));
6982   return ParseStatus::Success;
6983 }
6984
6985 //===----------------------------------------------------------------------===//
6986 // parser helpers
6987 //===----------------------------------------------------------------------===//
6988
6989 bool
6990 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6991   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6992 }
6993
6994 bool
6995 AMDGPUAsmParser::isId(const StringRef Id) const {
6996   return isId(getToken(), Id);
6997 }
6998
6999 bool
7000 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7001   return getTokenKind() == Kind;
7002 }
7003
7004 StringRef AMDGPUAsmParser::getId() const {
7005   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7006 }
7007
7008 bool
7009 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7010   if (isId(Id)) {
7011     lex();
7012     return true;
7013   }
7014   return false;
7015 }
7016
7017 bool
7018 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7019   if (isToken(AsmToken::Identifier)) {
7020     StringRef Tok = getTokenStr();
7021     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7022       lex();
7023       return true;
7024     }
7025   }
7026   return false;
7027 }
7028
7029 bool
7030 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7031   if (isId(Id) && peekToken().is(Kind)) {
7032     lex();
7033     lex();
7034     return true;
7035   }
7036   return false;
7037 }
7038
7039 bool
7040 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7041   if (isToken(Kind)) {
7042     lex();
7043     return true;
7044   }
7045   return false;
7046 }
7047
7048 bool
7049 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7050                            const StringRef ErrMsg) {
7051   if (!trySkipToken(Kind)) {
7052     Error(getLoc(), ErrMsg);
7053     return false;
7054   }
7055   return true;
7056 }
7057
7058 bool
7059 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7060   SMLoc S = getLoc();
7061
7062   const MCExpr *Expr;
7063   if (Parser.parseExpression(Expr))
7064     return false;
7065
7066   if (Expr->evaluateAsAbsolute(Imm))
7067     return true;
7068
7069   if (Expected.empty()) {
7070     Error(S, "expected absolute expression");
7071   } else {
7072     Error(S, Twine("expected ", Expected) +
7073              Twine(" or an absolute expression"));
7074   }
7075   return false;
7076 }
7077
7078 bool
7079 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7080   SMLoc S = getLoc();
7081
7082   const MCExpr *Expr;
7083   if (Parser.parseExpression(Expr))
7084     return false;
7085
7086   int64_t IntVal;
7087   if (Expr->evaluateAsAbsolute(IntVal)) {
7088     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7089   } else {
7090     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7091   }
7092   return true;
7093 }
7094
7095 bool
7096 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7097   if (isToken(AsmToken::String)) {
7098     Val = getToken().getStringContents();
7099     lex();
7100     return true;
7101   } else {
7102     Error(getLoc(), ErrMsg);
7103     return false;
7104   }
7105 }
7106
7107 bool
7108 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7109   if (isToken(AsmToken::Identifier)) {
7110     Val = getTokenStr();
7111     lex();
7112     return true;
7113   } else {
7114     if (!ErrMsg.empty())
7115       Error(getLoc(), ErrMsg);
7116     return false;
7117   }
7118 }
7119
7120 AsmToken
7121 AMDGPUAsmParser::getToken() const {
7122   return Parser.getTok();
7123 }
7124
7125 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7126   return isToken(AsmToken::EndOfStatement)
7127              ? getToken()
7128              : getLexer().peekTok(ShouldSkipSpace);
7129 }
7130
7131 void
7132 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7133   auto TokCount = getLexer().peekTokens(Tokens);
7134
7135   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7136     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7137 }
7138
7139 AsmToken::TokenKind
7140 AMDGPUAsmParser::getTokenKind() const {
7141   return getLexer().getKind();
7142 }
7143
7144 SMLoc
7145 AMDGPUAsmParser::getLoc() const {
7146   return getToken().getLoc();
7147 }
7148
7149 StringRef
7150 AMDGPUAsmParser::getTokenStr() const {
7151   return getToken().getString();
7152 }
7153
7154 void
7155 AMDGPUAsmParser::lex() {
7156   Parser.Lex();
7157 }
7158
7159 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7160   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7161 }
7162
7163 SMLoc
7164 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7165                                const OperandVector &Operands) const {
7166   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7167     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7168     if (Test(Op))
7169       return Op.getStartLoc();
7170   }
7171   return getInstLoc(Operands);
7172 }
7173
7174 SMLoc
7175 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7176                            const OperandVector &Operands) const {
7177   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7178   return getOperandLoc(Test, Operands);
7179 }
7180
7181 SMLoc
7182 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7183                            const OperandVector &Operands) const {
7184   auto Test = [=](const AMDGPUOperand& Op) {
7185     return Op.isRegKind() && Op.getReg() == Reg;
7186   };
7187   return getOperandLoc(Test, Operands);
7188 }
7189
7190 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7191                                  bool SearchMandatoryLiterals) const {
7192   auto Test = [](const AMDGPUOperand& Op) {
7193     return Op.IsImmKindLiteral() || Op.isExpr();
7194   };
7195   SMLoc Loc = getOperandLoc(Test, Operands);
7196   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7197     Loc = getMandatoryLitLoc(Operands);
7198   return Loc;
7199 }
7200
7201 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7202   auto Test = [](const AMDGPUOperand &Op) {
7203     return Op.IsImmKindMandatoryLiteral();
7204   };
7205   return getOperandLoc(Test, Operands);
7206 }
7207
7208 SMLoc
7209 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7210   auto Test = [](const AMDGPUOperand& Op) {
7211     return Op.isImmKindConst();
7212   };
7213   return getOperandLoc(Test, Operands);
7214 }
7215
7216 //===----------------------------------------------------------------------===//
7217 // swizzle
7218 //===----------------------------------------------------------------------===//
7219
7220 LLVM_READNONE
7221 static unsigned
7222 encodeBitmaskPerm(const unsigned AndMask,
7223                   const unsigned OrMask,
7224                   const unsigned XorMask) {
7225   using namespace llvm::AMDGPU::Swizzle;
7226
7227   return BITMASK_PERM_ENC |
7228          (AndMask << BITMASK_AND_SHIFT) |
7229          (OrMask  << BITMASK_OR_SHIFT)  |
7230          (XorMask << BITMASK_XOR_SHIFT);
7231 }
7232
7233 bool
7234 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7235                                      const unsigned MinVal,
7236                                      const unsigned MaxVal,
7237                                      const StringRef ErrMsg,
7238                                      SMLoc &Loc) {
7239   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7240     return false;
7241   }
7242   Loc = getLoc();
7243   if (!parseExpr(Op)) {
7244     return false;
7245   }
7246   if (Op < MinVal || Op > MaxVal) {
7247     Error(Loc, ErrMsg);
7248     return false;
7249   }
7250
7251   return true;
7252 }
7253
7254 bool
7255 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7256                                       const unsigned MinVal,
7257                                       const unsigned MaxVal,
7258                                       const StringRef ErrMsg) {
7259   SMLoc Loc;
7260   for (unsigned i = 0; i < OpNum; ++i) {
7261     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7262       return false;
7263   }
7264
7265   return true;
7266 }
7267
7268 bool
7269 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7270   using namespace llvm::AMDGPU::Swizzle;
7271
7272   int64_t Lane[LANE_NUM];
7273   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7274                            "expected a 2-bit lane id")) {
7275     Imm = QUAD_PERM_ENC;
7276     for (unsigned I = 0; I < LANE_NUM; ++I) {
7277       Imm |= Lane[I] << (LANE_SHIFT * I);
7278     }
7279     return true;
7280   }
7281   return false;
7282 }
7283
7284 bool
7285 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7286   using namespace llvm::AMDGPU::Swizzle;
7287
7288   SMLoc Loc;
7289   int64_t GroupSize;
7290   int64_t LaneIdx;
7291
7292   if (!parseSwizzleOperand(GroupSize,
7293                            2, 32,
7294                            "group size must be in the interval [2,32]",
7295                            Loc)) {
7296     return false;
7297   }
7298   if (!isPowerOf2_64(GroupSize)) {
7299     Error(Loc, "group size must be a power of two");
7300     return false;
7301   }
7302   if (parseSwizzleOperand(LaneIdx,
7303                           0, GroupSize - 1,
7304                           "lane id must be in the interval [0,group size - 1]",
7305                           Loc)) {
7306     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7307     return true;
7308   }
7309   return false;
7310 }
7311
7312 bool
7313 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7314   using namespace llvm::AMDGPU::Swizzle;
7315
7316   SMLoc Loc;
7317   int64_t GroupSize;
7318
7319   if (!parseSwizzleOperand(GroupSize,
7320                            2, 32,
7321                            "group size must be in the interval [2,32]",
7322                            Loc)) {
7323     return false;
7324   }
7325   if (!isPowerOf2_64(GroupSize)) {
7326     Error(Loc, "group size must be a power of two");
7327     return false;
7328   }
7329
7330   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7331   return true;
7332 }
7333
7334 bool
7335 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7336   using namespace llvm::AMDGPU::Swizzle;
7337
7338   SMLoc Loc;
7339   int64_t GroupSize;
7340
7341   if (!parseSwizzleOperand(GroupSize,
7342                            1, 16,
7343                            "group size must be in the interval [1,16]",
7344                            Loc)) {
7345     return false;
7346   }
7347   if (!isPowerOf2_64(GroupSize)) {
7348     Error(Loc, "group size must be a power of two");
7349     return false;
7350   }
7351
7352   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7353   return true;
7354 }
7355
7356 bool
7357 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7358   using namespace llvm::AMDGPU::Swizzle;
7359
7360   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7361     return false;
7362   }
7363
7364   StringRef Ctl;
7365   SMLoc StrLoc = getLoc();
7366   if (!parseString(Ctl)) {
7367     return false;
7368   }
7369   if (Ctl.size() != BITMASK_WIDTH) {
7370     Error(StrLoc, "expected a 5-character mask");
7371     return false;
7372   }
7373
7374   unsigned AndMask = 0;
7375   unsigned OrMask = 0;
7376   unsigned XorMask = 0;
7377
7378   for (size_t i = 0; i < Ctl.size(); ++i) {
7379     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7380     switch(Ctl[i]) {
7381     default:
7382       Error(StrLoc, "invalid mask");
7383       return false;
7384     case '0':
7385       break;
7386     case '1':
7387       OrMask |= Mask;
7388       break;
7389     case 'p':
7390       AndMask |= Mask;
7391       break;
7392     case 'i':
7393       AndMask |= Mask;
7394       XorMask |= Mask;
7395       break;
7396     }
7397   }
7398
7399   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7400   return true;
7401 }
7402
7403 bool
7404 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7405
7406   SMLoc OffsetLoc = getLoc();
7407
7408   if (!parseExpr(Imm, "a swizzle macro")) {
7409     return false;
7410   }
7411   if (!isUInt<16>(Imm)) {
7412     Error(OffsetLoc, "expected a 16-bit offset");
7413     return false;
7414   }
7415   return true;
7416 }
7417
7418 bool
7419 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7420   using namespace llvm::AMDGPU::Swizzle;
7421
7422   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7423
7424     SMLoc ModeLoc = getLoc();
7425     bool Ok = false;
7426
7427     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7428       Ok = parseSwizzleQuadPerm(Imm);
7429     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7430       Ok = parseSwizzleBitmaskPerm(Imm);
7431     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7432       Ok = parseSwizzleBroadcast(Imm);
7433     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7434       Ok = parseSwizzleSwap(Imm);
7435     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7436       Ok = parseSwizzleReverse(Imm);
7437     } else {
7438       Error(ModeLoc, "expected a swizzle mode");
7439     }
7440
7441     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7442   }
7443
7444   return false;
7445 }
7446
7447 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7448   SMLoc S = getLoc();
7449   int64_t Imm = 0;
7450
7451   if (trySkipId("offset")) {
7452
7453     bool Ok = false;
7454     if (skipToken(AsmToken::Colon, "expected a colon")) {
7455       if (trySkipId("swizzle")) {
7456         Ok = parseSwizzleMacro(Imm);
7457       } else {
7458         Ok = parseSwizzleOffset(Imm);
7459       }
7460     }
7461
7462     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7463
7464     return Ok ? ParseStatus::Success : ParseStatus::Failure;
7465   }
7466   return ParseStatus::NoMatch;
7467 }
7468
7469 bool
7470 AMDGPUOperand::isSwizzle() const {
7471   return isImmTy(ImmTySwizzle);
7472 }
7473
7474 //===----------------------------------------------------------------------===//
7475 // VGPR Index Mode
7476 //===----------------------------------------------------------------------===//
7477
7478 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7479
7480   using namespace llvm::AMDGPU::VGPRIndexMode;
7481
7482   if (trySkipToken(AsmToken::RParen)) {
7483     return OFF;
7484   }
7485
7486   int64_t Imm = 0;
7487
7488   while (true) {
7489     unsigned Mode = 0;
7490     SMLoc S = getLoc();
7491
7492     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7493       if (trySkipId(IdSymbolic[ModeId])) {
7494         Mode = 1 << ModeId;
7495         break;
7496       }
7497     }
7498
7499     if (Mode == 0) {
7500       Error(S, (Imm == 0)?
7501                "expected a VGPR index mode or a closing parenthesis" :
7502                "expected a VGPR index mode");
7503       return UNDEF;
7504     }
7505
7506     if (Imm & Mode) {
7507       Error(S, "duplicate VGPR index mode");
7508       return UNDEF;
7509     }
7510     Imm |= Mode;
7511
7512     if (trySkipToken(AsmToken::RParen))
7513       break;
7514     if (!skipToken(AsmToken::Comma,
7515                    "expected a comma or a closing parenthesis"))
7516       return UNDEF;
7517   }
7518
7519   return Imm;
7520 }
7521
7522 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7523
7524   using namespace llvm::AMDGPU::VGPRIndexMode;
7525
7526   int64_t Imm = 0;
7527   SMLoc S = getLoc();
7528
7529   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7530     Imm = parseGPRIdxMacro();
7531     if (Imm == UNDEF)
7532       return ParseStatus::Failure;
7533   } else {
7534     if (getParser().parseAbsoluteExpression(Imm))
7535       return ParseStatus::Failure;
7536     if (Imm < 0 || !isUInt<4>(Imm))
7537       return Error(S, "invalid immediate: only 4-bit values are legal");
7538   }
7539
7540   Operands.push_back(
7541       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7542   return ParseStatus::Success;
7543 }
7544
7545 bool AMDGPUOperand::isGPRIdxMode() const {
7546   return isImmTy(ImmTyGprIdxMode);
7547 }
7548
7549 //===----------------------------------------------------------------------===//
7550 // sopp branch targets
7551 //===----------------------------------------------------------------------===//
7552
7553 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7554
7555   // Make sure we are not parsing something
7556   // that looks like a label or an expression but is not.
7557   // This will improve error messages.
7558   if (isRegister() || isModifier())
7559     return ParseStatus::NoMatch;
7560
7561   if (!parseExpr(Operands))
7562     return ParseStatus::Failure;
7563
7564   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7565   assert(Opr.isImm() || Opr.isExpr());
7566   SMLoc Loc = Opr.getStartLoc();
7567
7568   // Currently we do not support arbitrary expressions as branch targets.
7569   // Only labels and absolute expressions are accepted.
7570   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7571     Error(Loc, "expected an absolute expression or a label");
7572   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7573     Error(Loc, "expected a 16-bit signed jump offset");
7574   }
7575
7576   return ParseStatus::Success;
7577 }
7578
7579 //===----------------------------------------------------------------------===//
7580 // Boolean holding registers
7581 //===----------------------------------------------------------------------===//
7582
7583 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7584   return parseReg(Operands);
7585 }
7586
7587 //===----------------------------------------------------------------------===//
7588 // mubuf
7589 //===----------------------------------------------------------------------===//
7590
7591 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7592                                    const OperandVector &Operands,
7593                                    bool IsAtomic) {
7594   OptionalImmIndexMap OptionalIdx;
7595   unsigned FirstOperandIdx = 1;
7596   bool IsAtomicReturn = false;
7597
7598   if (IsAtomic) {
7599     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7600       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7601       if (!Op.isCPol())
7602         continue;
7603       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7604       break;
7605     }
7606
7607     if (!IsAtomicReturn) {
7608       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7609       if (NewOpc != -1)
7610         Inst.setOpcode(NewOpc);
7611     }
7612
7613     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7614                       SIInstrFlags::IsAtomicRet;
7615   }
7616
7617   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7618     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7619
7620     // Add the register arguments
7621     if (Op.isReg()) {
7622       Op.addRegOperands(Inst, 1);
7623       // Insert a tied src for atomic return dst.
7624       // This cannot be postponed as subsequent calls to
7625       // addImmOperands rely on correct number of MC operands.
7626       if (IsAtomicReturn && i == FirstOperandIdx)
7627         Op.addRegOperands(Inst, 1);
7628       continue;
7629     }
7630
7631     // Handle the case where soffset is an immediate
7632     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7633       Op.addImmOperands(Inst, 1);
7634       continue;
7635     }
7636
7637     // Handle tokens like 'offen' which are sometimes hard-coded into the
7638     // asm string.  There are no MCInst operands for these.
7639     if (Op.isToken()) {
7640       continue;
7641     }
7642     assert(Op.isImm());
7643
7644     // Handle optional arguments
7645     OptionalIdx[Op.getImmTy()] = i;
7646   }
7647
7648   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7649   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7650 }
7651
7652 //===----------------------------------------------------------------------===//
7653 // SMEM
7654 //===----------------------------------------------------------------------===//
7655
7656 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7657   OptionalImmIndexMap OptionalIdx;
7658   bool IsAtomicReturn = false;
7659
7660   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7661     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7662     if (!Op.isCPol())
7663       continue;
7664     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7665     break;
7666   }
7667
7668   if (!IsAtomicReturn) {
7669     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7670     if (NewOpc != -1)
7671       Inst.setOpcode(NewOpc);
7672   }
7673
7674   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7675                     SIInstrFlags::IsAtomicRet;
7676
7677   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7678     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7679
7680     // Add the register arguments
7681     if (Op.isReg()) {
7682       Op.addRegOperands(Inst, 1);
7683       if (IsAtomicReturn && i == 1)
7684         Op.addRegOperands(Inst, 1);
7685       continue;
7686     }
7687
7688     // Handle the case where soffset is an immediate
7689     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7690       Op.addImmOperands(Inst, 1);
7691       continue;
7692     }
7693
7694     // Handle tokens like 'offen' which are sometimes hard-coded into the
7695     // asm string.  There are no MCInst operands for these.
7696     if (Op.isToken()) {
7697       continue;
7698     }
7699     assert(Op.isImm());
7700
7701     // Handle optional arguments
7702     OptionalIdx[Op.getImmTy()] = i;
7703   }
7704
7705   if ((int)Inst.getNumOperands() <=
7706       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7707     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7708                           AMDGPUOperand::ImmTySMEMOffsetMod);
7709   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7710 }
7711
7712 //===----------------------------------------------------------------------===//
7713 // smrd
7714 //===----------------------------------------------------------------------===//
7715
7716 bool AMDGPUOperand::isSMRDOffset8() const {
7717   return isImmLiteral() && isUInt<8>(getImm());
7718 }
7719
7720 bool AMDGPUOperand::isSMEMOffset() const {
7721   // Offset range is checked later by validator.
7722   return isImmLiteral();
7723 }
7724
7725 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7726   // 32-bit literals are only supported on CI and we only want to use them
7727   // when the offset is > 8-bits.
7728   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7729 }
7730
7731 //===----------------------------------------------------------------------===//
7732 // vop3
7733 //===----------------------------------------------------------------------===//
7734
7735 static bool ConvertOmodMul(int64_t &Mul) {
7736   if (Mul != 1 && Mul != 2 && Mul != 4)
7737     return false;
7738
7739   Mul >>= 1;
7740   return true;
7741 }
7742
7743 static bool ConvertOmodDiv(int64_t &Div) {
7744   if (Div == 1) {
7745     Div = 0;
7746     return true;
7747   }
7748
7749   if (Div == 2) {
7750     Div = 3;
7751     return true;
7752   }
7753
7754   return false;
7755 }
7756
7757 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7758 // This is intentional and ensures compatibility with sp3.
7759 // See bug 35397 for details.
7760 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
7761   if (BoundCtrl == 0 || BoundCtrl == 1) {
7762     if (!isGFX11Plus())
7763       BoundCtrl = 1;
7764     return true;
7765   }
7766   return false;
7767 }
7768
7769 void AMDGPUAsmParser::onBeginOfFile() {
7770   if (!getParser().getStreamer().getTargetStreamer() ||
7771       getSTI().getTargetTriple().getArch() == Triple::r600)
7772     return;
7773
7774   if (!getTargetStreamer().getTargetID())
7775     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
7776         // TODO: Should try to check code object version from directive???
7777         AMDGPU::getAmdhsaCodeObjectVersion());
7778
7779   if (isHsaAbiVersion3AndAbove(&getSTI()))
7780     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7781 }
7782
7783 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
7784   StringRef Name = getTokenStr();
7785   if (Name == "mul") {
7786     return parseIntWithPrefix("mul", Operands,
7787                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7788   }
7789
7790   if (Name == "div") {
7791     return parseIntWithPrefix("div", Operands,
7792                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7793   }
7794
7795   return ParseStatus::NoMatch;
7796 }
7797
7798 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
7799 // the number of src operands present, then copies that bit into src0_modifiers.
7800 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
7801   int Opc = Inst.getOpcode();
7802   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7803   if (OpSelIdx == -1)
7804     return;
7805
7806   int SrcNum;
7807   const int Ops[] = { AMDGPU::OpName::src0,
7808                       AMDGPU::OpName::src1,
7809                       AMDGPU::OpName::src2 };
7810   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
7811        ++SrcNum)
7812     ;
7813   assert(SrcNum > 0);
7814
7815   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7816
7817   if ((OpSel & (1 << SrcNum)) != 0) {
7818     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7819     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7820     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7821   }
7822 }
7823
7824 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
7825                                    const OperandVector &Operands) {
7826   cvtVOP3P(Inst, Operands);
7827   cvtVOP3DstOpSelOnly(Inst);
7828 }
7829
7830 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
7831                                    OptionalImmIndexMap &OptionalIdx) {
7832   cvtVOP3P(Inst, Operands, OptionalIdx);
7833   cvtVOP3DstOpSelOnly(Inst);
7834 }
7835
7836 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7837   return
7838       // 1. This operand is input modifiers
7839       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7840       // 2. This is not last operand
7841       && Desc.NumOperands > (OpNum + 1)
7842       // 3. Next operand is register class
7843       && Desc.operands()[OpNum + 1].RegClass != -1
7844       // 4. Next register is not tied to any other operand
7845       && Desc.getOperandConstraint(OpNum + 1,
7846                                    MCOI::OperandConstraint::TIED_TO) == -1;
7847 }
7848
7849 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7850 {
7851   OptionalImmIndexMap OptionalIdx;
7852   unsigned Opc = Inst.getOpcode();
7853
7854   unsigned I = 1;
7855   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7856   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7857     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7858   }
7859
7860   for (unsigned E = Operands.size(); I != E; ++I) {
7861     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7862     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7863       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7864     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
7865                Op.isInterpAttrChan()) {
7866       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7867     } else if (Op.isImmModifier()) {
7868       OptionalIdx[Op.getImmTy()] = I;
7869     } else {
7870       llvm_unreachable("unhandled operand type");
7871     }
7872   }
7873
7874   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
7875     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7876                           AMDGPUOperand::ImmTyHigh);
7877
7878   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
7879     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7880                           AMDGPUOperand::ImmTyClampSI);
7881
7882   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
7883     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7884                           AMDGPUOperand::ImmTyOModSI);
7885 }
7886
7887 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
7888 {
7889   OptionalImmIndexMap OptionalIdx;
7890   unsigned Opc = Inst.getOpcode();
7891
7892   unsigned I = 1;
7893   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7894   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7895     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7896   }
7897
7898   for (unsigned E = Operands.size(); I != E; ++I) {
7899     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7900     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7901       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7902     } else if (Op.isImmModifier()) {
7903       OptionalIdx[Op.getImmTy()] = I;
7904     } else {
7905       llvm_unreachable("unhandled operand type");
7906     }
7907   }
7908
7909   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7910
7911   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7912   if (OpSelIdx != -1)
7913     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
7914
7915   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
7916
7917   if (OpSelIdx == -1)
7918     return;
7919
7920   const int Ops[] = { AMDGPU::OpName::src0,
7921                       AMDGPU::OpName::src1,
7922                       AMDGPU::OpName::src2 };
7923   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7924                          AMDGPU::OpName::src1_modifiers,
7925                          AMDGPU::OpName::src2_modifiers };
7926
7927   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7928
7929   for (int J = 0; J < 3; ++J) {
7930     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7931     if (OpIdx == -1)
7932       break;
7933
7934     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7935     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7936
7937     if ((OpSel & (1 << J)) != 0)
7938       ModVal |= SISrcMods::OP_SEL_0;
7939     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
7940         (OpSel & (1 << 3)) != 0)
7941       ModVal |= SISrcMods::DST_OP_SEL;
7942
7943     Inst.getOperand(ModIdx).setImm(ModVal);
7944   }
7945 }
7946
7947 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7948                               OptionalImmIndexMap &OptionalIdx) {
7949   unsigned Opc = Inst.getOpcode();
7950
7951   unsigned I = 1;
7952   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7953   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7954     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7955   }
7956
7957   for (unsigned E = Operands.size(); I != E; ++I) {
7958     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7959     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7960       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7961     } else if (Op.isImmModifier()) {
7962       OptionalIdx[Op.getImmTy()] = I;
7963     } else if (Op.isRegOrImm()) {
7964       Op.addRegOrImmOperands(Inst, 1);
7965     } else {
7966       llvm_unreachable("unhandled operand type");
7967     }
7968   }
7969
7970   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
7971     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7972                           AMDGPUOperand::ImmTyClampSI);
7973
7974   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
7975     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7976                           AMDGPUOperand::ImmTyOModSI);
7977
7978   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7979   // it has src2 register operand that is tied to dst operand
7980   // we don't allow modifiers for this operand in assembler so src2_modifiers
7981   // should be 0.
7982   if (isMAC(Opc)) {
7983     auto it = Inst.begin();
7984     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7985     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7986     ++it;
7987     // Copy the operand to ensure it's not invalidated when Inst grows.
7988     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7989   }
7990 }
7991
7992 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7993   OptionalImmIndexMap OptionalIdx;
7994   cvtVOP3(Inst, Operands, OptionalIdx);
7995 }
7996
7997 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7998                                OptionalImmIndexMap &OptIdx) {
7999   const int Opc = Inst.getOpcode();
8000   const MCInstrDesc &Desc = MII.get(Opc);
8001
8002   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8003
8004   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8005       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8006     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8007     Inst.addOperand(Inst.getOperand(0));
8008   }
8009
8010   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
8011     assert(!IsPacked);
8012     Inst.addOperand(Inst.getOperand(0));
8013   }
8014
8015   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8016   // instruction, and then figure out where to actually put the modifiers
8017
8018   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8019   if (OpSelIdx != -1) {
8020     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8021   }
8022
8023   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8024   if (OpSelHiIdx != -1) {
8025     int DefaultVal = IsPacked ? -1 : 0;
8026     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8027                           DefaultVal);
8028   }
8029
8030   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8031   if (NegLoIdx != -1) {
8032     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8033     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8034   }
8035
8036   const int Ops[] = { AMDGPU::OpName::src0,
8037                       AMDGPU::OpName::src1,
8038                       AMDGPU::OpName::src2 };
8039   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8040                          AMDGPU::OpName::src1_modifiers,
8041                          AMDGPU::OpName::src2_modifiers };
8042
8043   unsigned OpSel = 0;
8044   unsigned OpSelHi = 0;
8045   unsigned NegLo = 0;
8046   unsigned NegHi = 0;
8047
8048   if (OpSelIdx != -1)
8049     OpSel = Inst.getOperand(OpSelIdx).getImm();
8050
8051   if (OpSelHiIdx != -1)
8052     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8053
8054   if (NegLoIdx != -1) {
8055     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8056     NegLo = Inst.getOperand(NegLoIdx).getImm();
8057     NegHi = Inst.getOperand(NegHiIdx).getImm();
8058   }
8059
8060   for (int J = 0; J < 3; ++J) {
8061     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8062     if (OpIdx == -1)
8063       break;
8064
8065     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8066
8067     if (ModIdx == -1)
8068       continue;
8069
8070     uint32_t ModVal = 0;
8071
8072     if ((OpSel & (1 << J)) != 0)
8073       ModVal |= SISrcMods::OP_SEL_0;
8074
8075     if ((OpSelHi & (1 << J)) != 0)
8076       ModVal |= SISrcMods::OP_SEL_1;
8077
8078     if ((NegLo & (1 << J)) != 0)
8079       ModVal |= SISrcMods::NEG;
8080
8081     if ((NegHi & (1 << J)) != 0)
8082       ModVal |= SISrcMods::NEG_HI;
8083
8084     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8085   }
8086 }
8087
8088 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8089   OptionalImmIndexMap OptIdx;
8090   cvtVOP3(Inst, Operands, OptIdx);
8091   cvtVOP3P(Inst, Operands, OptIdx);
8092 }
8093
8094 //===----------------------------------------------------------------------===//
8095 // VOPD
8096 //===----------------------------------------------------------------------===//
8097
8098 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8099   if (!hasVOPD(getSTI()))
8100     return ParseStatus::NoMatch;
8101
8102   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8103     SMLoc S = getLoc();
8104     lex();
8105     lex();
8106     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8107     SMLoc OpYLoc = getLoc();
8108     StringRef OpYName;
8109     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8110       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8111       return ParseStatus::Success;
8112     }
8113     return Error(OpYLoc, "expected a VOPDY instruction after ::");
8114   }
8115   return ParseStatus::NoMatch;
8116 }
8117
8118 // Create VOPD MCInst operands using parsed assembler operands.
8119 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8120   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8121     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8122     if (Op.isReg()) {
8123       Op.addRegOperands(Inst, 1);
8124       return;
8125     }
8126     if (Op.isImm()) {
8127       Op.addImmOperands(Inst, 1);
8128       return;
8129     }
8130     llvm_unreachable("Unhandled operand type in cvtVOPD");
8131   };
8132
8133   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8134
8135   // MCInst operands are ordered as follows:
8136   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8137
8138   for (auto CompIdx : VOPD::COMPONENTS) {
8139     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8140   }
8141
8142   for (auto CompIdx : VOPD::COMPONENTS) {
8143     const auto &CInfo = InstInfo[CompIdx];
8144     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8145     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8146       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8147     if (CInfo.hasSrc2Acc())
8148       addOp(CInfo.getIndexOfDstInParsedOperands());
8149   }
8150 }
8151
8152 //===----------------------------------------------------------------------===//
8153 // dpp
8154 //===----------------------------------------------------------------------===//
8155
8156 bool AMDGPUOperand::isDPP8() const {
8157   return isImmTy(ImmTyDPP8);
8158 }
8159
8160 bool AMDGPUOperand::isDPPCtrl() const {
8161   using namespace AMDGPU::DPP;
8162
8163   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8164   if (result) {
8165     int64_t Imm = getImm();
8166     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8167            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8168            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8169            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8170            (Imm == DppCtrl::WAVE_SHL1) ||
8171            (Imm == DppCtrl::WAVE_ROL1) ||
8172            (Imm == DppCtrl::WAVE_SHR1) ||
8173            (Imm == DppCtrl::WAVE_ROR1) ||
8174            (Imm == DppCtrl::ROW_MIRROR) ||
8175            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8176            (Imm == DppCtrl::BCAST15) ||
8177            (Imm == DppCtrl::BCAST31) ||
8178            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8179            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8180   }
8181   return false;
8182 }
8183
8184 //===----------------------------------------------------------------------===//
8185 // mAI
8186 //===----------------------------------------------------------------------===//
8187
8188 bool AMDGPUOperand::isBLGP() const {
8189   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8190 }
8191
8192 bool AMDGPUOperand::isCBSZ() const {
8193   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8194 }
8195
8196 bool AMDGPUOperand::isABID() const {
8197   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8198 }
8199
8200 bool AMDGPUOperand::isS16Imm() const {
8201   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8202 }
8203
8204 bool AMDGPUOperand::isU16Imm() const {
8205   return isImmLiteral() && isUInt<16>(getImm());
8206 }
8207
8208 //===----------------------------------------------------------------------===//
8209 // dim
8210 //===----------------------------------------------------------------------===//
8211
8212 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8213   // We want to allow "dim:1D" etc.,
8214   // but the initial 1 is tokenized as an integer.
8215   std::string Token;
8216   if (isToken(AsmToken::Integer)) {
8217     SMLoc Loc = getToken().getEndLoc();
8218     Token = std::string(getTokenStr());
8219     lex();
8220     if (getLoc() != Loc)
8221       return false;
8222   }
8223
8224   StringRef Suffix;
8225   if (!parseId(Suffix))
8226     return false;
8227   Token += Suffix;
8228
8229   StringRef DimId = Token;
8230   if (DimId.startswith("SQ_RSRC_IMG_"))
8231     DimId = DimId.drop_front(12);
8232
8233   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8234   if (!DimInfo)
8235     return false;
8236
8237   Encoding = DimInfo->Encoding;
8238   return true;
8239 }
8240
8241 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8242   if (!isGFX10Plus())
8243     return ParseStatus::NoMatch;
8244
8245   SMLoc S = getLoc();
8246
8247   if (!trySkipId("dim", AsmToken::Colon))
8248     return ParseStatus::NoMatch;
8249
8250   unsigned Encoding;
8251   SMLoc Loc = getLoc();
8252   if (!parseDimId(Encoding))
8253     return Error(Loc, "invalid dim value");
8254
8255   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8256                                               AMDGPUOperand::ImmTyDim));
8257   return ParseStatus::Success;
8258 }
8259
8260 //===----------------------------------------------------------------------===//
8261 // dpp
8262 //===----------------------------------------------------------------------===//
8263
8264 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8265   SMLoc S = getLoc();
8266
8267   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8268     return ParseStatus::NoMatch;
8269
8270   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8271
8272   int64_t Sels[8];
8273
8274   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8275     return ParseStatus::Failure;
8276
8277   for (size_t i = 0; i < 8; ++i) {
8278     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8279       return ParseStatus::Failure;
8280
8281     SMLoc Loc = getLoc();
8282     if (getParser().parseAbsoluteExpression(Sels[i]))
8283       return ParseStatus::Failure;
8284     if (0 > Sels[i] || 7 < Sels[i])
8285       return Error(Loc, "expected a 3-bit value");
8286   }
8287
8288   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8289     return ParseStatus::Failure;
8290
8291   unsigned DPP8 = 0;
8292   for (size_t i = 0; i < 8; ++i)
8293     DPP8 |= (Sels[i] << (i * 3));
8294
8295   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8296   return ParseStatus::Success;
8297 }
8298
8299 bool
8300 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8301                                     const OperandVector &Operands) {
8302   if (Ctrl == "row_newbcast")
8303     return isGFX90A();
8304
8305   if (Ctrl == "row_share" ||
8306       Ctrl == "row_xmask")
8307     return isGFX10Plus();
8308
8309   if (Ctrl == "wave_shl" ||
8310       Ctrl == "wave_shr" ||
8311       Ctrl == "wave_rol" ||
8312       Ctrl == "wave_ror" ||
8313       Ctrl == "row_bcast")
8314     return isVI() || isGFX9();
8315
8316   return Ctrl == "row_mirror" ||
8317          Ctrl == "row_half_mirror" ||
8318          Ctrl == "quad_perm" ||
8319          Ctrl == "row_shl" ||
8320          Ctrl == "row_shr" ||
8321          Ctrl == "row_ror";
8322 }
8323
8324 int64_t
8325 AMDGPUAsmParser::parseDPPCtrlPerm() {
8326   // quad_perm:[%d,%d,%d,%d]
8327
8328   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8329     return -1;
8330
8331   int64_t Val = 0;
8332   for (int i = 0; i < 4; ++i) {
8333     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8334       return -1;
8335
8336     int64_t Temp;
8337     SMLoc Loc = getLoc();
8338     if (getParser().parseAbsoluteExpression(Temp))
8339       return -1;
8340     if (Temp < 0 || Temp > 3) {
8341       Error(Loc, "expected a 2-bit value");
8342       return -1;
8343     }
8344
8345     Val += (Temp << i * 2);
8346   }
8347
8348   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8349     return -1;
8350
8351   return Val;
8352 }
8353
8354 int64_t
8355 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8356   using namespace AMDGPU::DPP;
8357
8358   // sel:%d
8359
8360   int64_t Val;
8361   SMLoc Loc = getLoc();
8362
8363   if (getParser().parseAbsoluteExpression(Val))
8364     return -1;
8365
8366   struct DppCtrlCheck {
8367     int64_t Ctrl;
8368     int Lo;
8369     int Hi;
8370   };
8371
8372   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8373     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8374     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8375     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8376     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8377     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8378     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8379     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8380     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8381     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8382     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8383     .Default({-1, 0, 0});
8384
8385   bool Valid;
8386   if (Check.Ctrl == -1) {
8387     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8388     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8389   } else {
8390     Valid = Check.Lo <= Val && Val <= Check.Hi;
8391     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8392   }
8393
8394   if (!Valid) {
8395     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8396     return -1;
8397   }
8398
8399   return Val;
8400 }
8401
8402 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8403   using namespace AMDGPU::DPP;
8404
8405   if (!isToken(AsmToken::Identifier) ||
8406       !isSupportedDPPCtrl(getTokenStr(), Operands))
8407     return ParseStatus::NoMatch;
8408
8409   SMLoc S = getLoc();
8410   int64_t Val = -1;
8411   StringRef Ctrl;
8412
8413   parseId(Ctrl);
8414
8415   if (Ctrl == "row_mirror") {
8416     Val = DppCtrl::ROW_MIRROR;
8417   } else if (Ctrl == "row_half_mirror") {
8418     Val = DppCtrl::ROW_HALF_MIRROR;
8419   } else {
8420     if (skipToken(AsmToken::Colon, "expected a colon")) {
8421       if (Ctrl == "quad_perm") {
8422         Val = parseDPPCtrlPerm();
8423       } else {
8424         Val = parseDPPCtrlSel(Ctrl);
8425       }
8426     }
8427   }
8428
8429   if (Val == -1)
8430     return ParseStatus::Failure;
8431
8432   Operands.push_back(
8433     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8434   return ParseStatus::Success;
8435 }
8436
8437 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8438                                  bool IsDPP8) {
8439   OptionalImmIndexMap OptionalIdx;
8440   unsigned Opc = Inst.getOpcode();
8441   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8442
8443   // MAC instructions are special because they have 'old'
8444   // operand which is not tied to dst (but assumed to be).
8445   // They also have dummy unused src2_modifiers.
8446   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8447   int Src2ModIdx =
8448       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8449   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8450                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8451
8452   unsigned I = 1;
8453   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8454     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8455   }
8456
8457   int Fi = 0;
8458   for (unsigned E = Operands.size(); I != E; ++I) {
8459
8460     if (IsMAC) {
8461       int NumOperands = Inst.getNumOperands();
8462       if (OldIdx == NumOperands) {
8463         // Handle old operand
8464         constexpr int DST_IDX = 0;
8465         Inst.addOperand(Inst.getOperand(DST_IDX));
8466       } else if (Src2ModIdx == NumOperands) {
8467         // Add unused dummy src2_modifiers
8468         Inst.addOperand(MCOperand::createImm(0));
8469       }
8470     }
8471
8472     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8473                                             MCOI::TIED_TO);
8474     if (TiedTo != -1) {
8475       assert((unsigned)TiedTo < Inst.getNumOperands());
8476       // handle tied old or src2 for MAC instructions
8477       Inst.addOperand(Inst.getOperand(TiedTo));
8478     }
8479     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8480     // Add the register arguments
8481     if (IsDPP8 && Op.isDppFI()) {
8482       Fi = Op.getImm();
8483     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8484       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8485     } else if (Op.isReg()) {
8486       Op.addRegOperands(Inst, 1);
8487     } else if (Op.isImm() &&
8488                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8489       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8490       Op.addImmOperands(Inst, 1);
8491     } else if (Op.isImm()) {
8492       OptionalIdx[Op.getImmTy()] = I;
8493     } else {
8494       llvm_unreachable("unhandled operand type");
8495     }
8496   }
8497   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8498     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8499
8500   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8501     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8502
8503   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8504     cvtVOP3P(Inst, Operands, OptionalIdx);
8505   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8506     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8507   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8508     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8509   }
8510
8511   if (IsDPP8) {
8512     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8513     using namespace llvm::AMDGPU::DPP;
8514     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8515   } else {
8516     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8517     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8518     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8519     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8520
8521     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8522       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8523                             AMDGPUOperand::ImmTyDppFI);
8524   }
8525 }
8526
8527 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8528   OptionalImmIndexMap OptionalIdx;
8529
8530   unsigned I = 1;
8531   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8532   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8533     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8534   }
8535
8536   int Fi = 0;
8537   for (unsigned E = Operands.size(); I != E; ++I) {
8538     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8539                                             MCOI::TIED_TO);
8540     if (TiedTo != -1) {
8541       assert((unsigned)TiedTo < Inst.getNumOperands());
8542       // handle tied old or src2 for MAC instructions
8543       Inst.addOperand(Inst.getOperand(TiedTo));
8544     }
8545     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8546     // Add the register arguments
8547     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8548       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8549       // Skip it.
8550       continue;
8551     }
8552
8553     if (IsDPP8) {
8554       if (Op.isDPP8()) {
8555         Op.addImmOperands(Inst, 1);
8556       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8557         Op.addRegWithFPInputModsOperands(Inst, 2);
8558       } else if (Op.isDppFI()) {
8559         Fi = Op.getImm();
8560       } else if (Op.isReg()) {
8561         Op.addRegOperands(Inst, 1);
8562       } else {
8563         llvm_unreachable("Invalid operand type");
8564       }
8565     } else {
8566       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8567         Op.addRegWithFPInputModsOperands(Inst, 2);
8568       } else if (Op.isReg()) {
8569         Op.addRegOperands(Inst, 1);
8570       } else if (Op.isDPPCtrl()) {
8571         Op.addImmOperands(Inst, 1);
8572       } else if (Op.isImm()) {
8573         // Handle optional arguments
8574         OptionalIdx[Op.getImmTy()] = I;
8575       } else {
8576         llvm_unreachable("Invalid operand type");
8577       }
8578     }
8579   }
8580
8581   if (IsDPP8) {
8582     using namespace llvm::AMDGPU::DPP;
8583     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8584   } else {
8585     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8586     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8587     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8588     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
8589       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8590                             AMDGPUOperand::ImmTyDppFI);
8591     }
8592   }
8593 }
8594
8595 //===----------------------------------------------------------------------===//
8596 // sdwa
8597 //===----------------------------------------------------------------------===//
8598
8599 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
8600                                           StringRef Prefix,
8601                                           AMDGPUOperand::ImmTy Type) {
8602   using namespace llvm::AMDGPU::SDWA;
8603
8604   SMLoc S = getLoc();
8605   StringRef Value;
8606
8607   SMLoc StringLoc;
8608   ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
8609   if (!Res.isSuccess())
8610     return Res;
8611
8612   int64_t Int;
8613   Int = StringSwitch<int64_t>(Value)
8614         .Case("BYTE_0", SdwaSel::BYTE_0)
8615         .Case("BYTE_1", SdwaSel::BYTE_1)
8616         .Case("BYTE_2", SdwaSel::BYTE_2)
8617         .Case("BYTE_3", SdwaSel::BYTE_3)
8618         .Case("WORD_0", SdwaSel::WORD_0)
8619         .Case("WORD_1", SdwaSel::WORD_1)
8620         .Case("DWORD", SdwaSel::DWORD)
8621         .Default(0xffffffff);
8622
8623   if (Int == 0xffffffff)
8624     return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8625
8626   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8627   return ParseStatus::Success;
8628 }
8629
8630 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8631   using namespace llvm::AMDGPU::SDWA;
8632
8633   SMLoc S = getLoc();
8634   StringRef Value;
8635
8636   SMLoc StringLoc;
8637   ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8638   if (!Res.isSuccess())
8639     return Res;
8640
8641   int64_t Int;
8642   Int = StringSwitch<int64_t>(Value)
8643         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8644         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8645         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8646         .Default(0xffffffff);
8647
8648   if (Int == 0xffffffff)
8649     return Error(StringLoc, "invalid dst_unused value");
8650
8651   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
8652   return ParseStatus::Success;
8653 }
8654
8655 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8656   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8657 }
8658
8659 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8660   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8661 }
8662
8663 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8664   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8665 }
8666
8667 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8668   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8669 }
8670
8671 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8672   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8673 }
8674
8675 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8676                               uint64_t BasicInstType,
8677                               bool SkipDstVcc,
8678                               bool SkipSrcVcc) {
8679   using namespace llvm::AMDGPU::SDWA;
8680
8681   OptionalImmIndexMap OptionalIdx;
8682   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8683   bool SkippedVcc = false;
8684
8685   unsigned I = 1;
8686   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8687   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8688     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8689   }
8690
8691   for (unsigned E = Operands.size(); I != E; ++I) {
8692     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8693     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8694         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8695       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8696       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8697       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8698       // Skip VCC only if we didn't skip it on previous iteration.
8699       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8700       if (BasicInstType == SIInstrFlags::VOP2 &&
8701           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8702            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8703         SkippedVcc = true;
8704         continue;
8705       } else if (BasicInstType == SIInstrFlags::VOPC &&
8706                  Inst.getNumOperands() == 0) {
8707         SkippedVcc = true;
8708         continue;
8709       }
8710     }
8711     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8712       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8713     } else if (Op.isImm()) {
8714       // Handle optional arguments
8715       OptionalIdx[Op.getImmTy()] = I;
8716     } else {
8717       llvm_unreachable("Invalid operand type");
8718     }
8719     SkippedVcc = false;
8720   }
8721
8722   const unsigned Opc = Inst.getOpcode();
8723   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
8724       Opc != AMDGPU::V_NOP_sdwa_vi) {
8725     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8726     switch (BasicInstType) {
8727     case SIInstrFlags::VOP1:
8728       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8729         addOptionalImmOperand(Inst, Operands, OptionalIdx,
8730                               AMDGPUOperand::ImmTyClampSI, 0);
8731
8732       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8733         addOptionalImmOperand(Inst, Operands, OptionalIdx,
8734                               AMDGPUOperand::ImmTyOModSI, 0);
8735
8736       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
8737         addOptionalImmOperand(Inst, Operands, OptionalIdx,
8738                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
8739
8740       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
8741         addOptionalImmOperand(Inst, Operands, OptionalIdx,
8742                               AMDGPUOperand::ImmTySDWADstUnused,
8743                               DstUnused::UNUSED_PRESERVE);
8744
8745       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8746       break;
8747
8748     case SIInstrFlags::VOP2:
8749       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8750
8751       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
8752         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8753
8754       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
8755       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
8756       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8757       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
8758       break;
8759
8760     case SIInstrFlags::VOPC:
8761       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
8762         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8763       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8764       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
8765       break;
8766
8767     default:
8768       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8769     }
8770   }
8771
8772   // special case v_mac_{f16, f32}:
8773   // it has src2 register operand that is tied to dst operand
8774   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8775       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8776     auto it = Inst.begin();
8777     std::advance(
8778       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8779     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8780   }
8781 }
8782
8783 /// Force static initialization.
8784 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8785   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
8786   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8787 }
8788
8789 #define GET_REGISTER_MATCHER
8790 #define GET_MATCHER_IMPLEMENTATION
8791 #define GET_MNEMONIC_SPELL_CHECKER
8792 #define GET_MNEMONIC_CHECKER
8793 #include "AMDGPUGenAsmMatcher.inc"
8794
8795 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
8796                                                 unsigned MCK) {
8797   switch (MCK) {
8798   case MCK_addr64:
8799     return parseTokenOp("addr64", Operands);
8800   case MCK_done:
8801     return parseTokenOp("done", Operands);
8802   case MCK_idxen:
8803     return parseTokenOp("idxen", Operands);
8804   case MCK_lds:
8805     return parseTokenOp("lds", Operands);
8806   case MCK_offen:
8807     return parseTokenOp("offen", Operands);
8808   case MCK_off:
8809     return parseTokenOp("off", Operands);
8810   case MCK_row_95_en:
8811     return parseTokenOp("row_en", Operands);
8812   case MCK_gds:
8813     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
8814   case MCK_tfe:
8815     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
8816   }
8817   return tryCustomParseOperand(Operands, MCK);
8818 }
8819
8820 // This function should be defined after auto-generated include so that we have
8821 // MatchClassKind enum defined
8822 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8823                                                      unsigned Kind) {
8824   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8825   // But MatchInstructionImpl() expects to meet token and fails to validate
8826   // operand. This method checks if we are given immediate operand but expect to
8827   // get corresponding token.
8828   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8829   switch (Kind) {
8830   case MCK_addr64:
8831     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8832   case MCK_gds:
8833     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8834   case MCK_lds:
8835     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8836   case MCK_idxen:
8837     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8838   case MCK_offen:
8839     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8840   case MCK_tfe:
8841     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
8842   case MCK_SSrcB32:
8843     // When operands have expression values, they will return true for isToken,
8844     // because it is not possible to distinguish between a token and an
8845     // expression at parse time. MatchInstructionImpl() will always try to
8846     // match an operand as a token, when isToken returns true, and when the
8847     // name of the expression is not a valid token, the match will fail,
8848     // so we need to handle it here.
8849     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8850   case MCK_SSrcF32:
8851     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8852   case MCK_SOPPBrTarget:
8853     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
8854   case MCK_VReg32OrOff:
8855     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8856   case MCK_InterpSlot:
8857     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8858   case MCK_InterpAttr:
8859     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8860   case MCK_InterpAttrChan:
8861     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
8862   case MCK_SReg_64:
8863   case MCK_SReg_64_XEXEC:
8864     // Null is defined as a 32-bit register but
8865     // it should also be enabled with 64-bit operands.
8866     // The following code enables it for SReg_64 operands
8867     // used as source and destination. Remaining source
8868     // operands are handled in isInlinableImm.
8869     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8870   default:
8871     return Match_InvalidOperand;
8872   }
8873 }
8874
8875 //===----------------------------------------------------------------------===//
8876 // endpgm
8877 //===----------------------------------------------------------------------===//
8878
8879 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
8880   SMLoc S = getLoc();
8881   int64_t Imm = 0;
8882
8883   if (!parseExpr(Imm)) {
8884     // The operand is optional, if not present default to 0
8885     Imm = 0;
8886   }
8887
8888   if (!isUInt<16>(Imm))
8889     return Error(S, "expected a 16-bit value");
8890
8891   Operands.push_back(
8892       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8893   return ParseStatus::Success;
8894 }
8895
8896 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8897
8898 //===----------------------------------------------------------------------===//
8899 // LDSDIR
8900 //===----------------------------------------------------------------------===//
8901
8902 bool AMDGPUOperand::isWaitVDST() const {
8903   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
8904 }
8905
8906 //===----------------------------------------------------------------------===//
8907 // VINTERP
8908 //===----------------------------------------------------------------------===//
8909
8910 bool AMDGPUOperand::isWaitEXP() const {
8911   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
8912 }