lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

   1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "AMDGPU.h"
  10 #include "AMDKernelCodeT.h"
  11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
  13 #include "SIDefines.h"
  14 #include "SIInstrInfo.h"
  15 #include "TargetInfo/AMDGPUTargetInfo.h"
  16 #include "Utils/AMDGPUAsmUtils.h"
  17 #include "Utils/AMDGPUBaseInfo.h"
  18 #include "Utils/AMDKernelCodeTUtils.h"
  19 #include "llvm/ADT/APFloat.h"
  20 #include "llvm/ADT/APInt.h"
  21 #include "llvm/ADT/ArrayRef.h"
  22 #include "llvm/ADT/STLExtras.h"
  23 #include "llvm/ADT/SmallBitVector.h"
  24 #include "llvm/ADT/SmallString.h"
  25 #include "llvm/ADT/StringRef.h"
  26 #include "llvm/ADT/StringSwitch.h"
  27 #include "llvm/ADT/Twine.h"
  28 #include "llvm/BinaryFormat/ELF.h"
  29 #include "llvm/MC/MCAsmInfo.h"
  30 #include "llvm/MC/MCContext.h"
  31 #include "llvm/MC/MCExpr.h"
  32 #include "llvm/MC/MCInst.h"
  33 #include "llvm/MC/MCInstrDesc.h"
  34 #include "llvm/MC/MCInstrInfo.h"
  35 #include "llvm/MC/MCParser/MCAsmLexer.h"
  36 #include "llvm/MC/MCParser/MCAsmParser.h"
  37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
  38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
  40 #include "llvm/MC/MCRegisterInfo.h"
  41 #include "llvm/MC/MCStreamer.h"
  42 #include "llvm/MC/MCSubtargetInfo.h"
  43 #include "llvm/MC/MCSymbol.h"
  44 #include "llvm/Support/AMDGPUMetadata.h"
  45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
  46 #include "llvm/Support/Casting.h"
  47 #include "llvm/Support/Compiler.h"
  48 #include "llvm/Support/ErrorHandling.h"
  49 #include "llvm/Support/MachineValueType.h"
  50 #include "llvm/Support/MathExtras.h"
  51 #include "llvm/Support/SMLoc.h"
  52 #include "llvm/Support/TargetParser.h"
  53 #include "llvm/Support/TargetRegistry.h"
  54 #include "llvm/Support/raw_ostream.h"
  55 #include <algorithm>
  56 #include <cassert>
  57 #include <cstdint>
  58 #include <cstring>
  59 #include <iterator>
  60 #include <map>
  61 #include <memory>
  62 #include <string>
  63
  64 using namespace llvm;
  65 using namespace llvm::AMDGPU;
  66 using namespace llvm::amdhsa;
  67
  68 namespace {
  69
  70 class AMDGPUAsmParser;
  71
  72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
  73
  74 //===----------------------------------------------------------------------===//
  75 // Operand
  76 //===----------------------------------------------------------------------===//
  77
  78 class AMDGPUOperand : public MCParsedAsmOperand {
  79   enum KindTy {
  80     Token,
  81     Immediate,
  82     Register,
  83     Expression
  84   } Kind;
  85
  86   SMLoc StartLoc, EndLoc;
  87   const AMDGPUAsmParser *AsmParser;
  88
  89 public:
  90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
  91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
  92
  93   using Ptr = std::unique_ptr<AMDGPUOperand>;
  94
  95   struct Modifiers {
  96     bool Abs = false;
  97     bool Neg = false;
  98     bool Sext = false;
  99
 100     bool hasFPModifiers() const { return Abs || Neg; }
 101     bool hasIntModifiers() const { return Sext; }
 102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
 103
 104     int64_t getFPModifiersOperand() const {
 105       int64_t Operand = 0;
 106       Operand |= Abs ? SISrcMods::ABS : 0u;
 107       Operand |= Neg ? SISrcMods::NEG : 0u;
 108       return Operand;
 109     }
 110
 111     int64_t getIntModifiersOperand() const {
 112       int64_t Operand = 0;
 113       Operand |= Sext ? SISrcMods::SEXT : 0u;
 114       return Operand;
 115     }
 116
 117     int64_t getModifiersOperand() const {
 118       assert(!(hasFPModifiers() && hasIntModifiers())
 119            && "fp and int modifiers should not be used simultaneously");
 120       if (hasFPModifiers()) {
 121         return getFPModifiersOperand();
 122       } else if (hasIntModifiers()) {
 123         return getIntModifiersOperand();
 124       } else {
 125         return 0;
 126       }
 127     }
 128
 129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
 130   };
 131
 132   enum ImmTy {
 133     ImmTyNone,
 134     ImmTyGDS,
 135     ImmTyLDS,
 136     ImmTyOffen,
 137     ImmTyIdxen,
 138     ImmTyAddr64,
 139     ImmTyOffset,
 140     ImmTyInstOffset,
 141     ImmTyOffset0,
 142     ImmTyOffset1,
 143     ImmTyDLC,
 144     ImmTyGLC,
 145     ImmTySLC,
 146     ImmTyTFE,
 147     ImmTyD16,
 148     ImmTyClampSI,
 149     ImmTyOModSI,
 150     ImmTyDPP8,
 151     ImmTyDppCtrl,
 152     ImmTyDppRowMask,
 153     ImmTyDppBankMask,
 154     ImmTyDppBoundCtrl,
 155     ImmTyDppFi,
 156     ImmTySdwaDstSel,
 157     ImmTySdwaSrc0Sel,
 158     ImmTySdwaSrc1Sel,
 159     ImmTySdwaDstUnused,
 160     ImmTyDMask,
 161     ImmTyDim,
 162     ImmTyUNorm,
 163     ImmTyDA,
 164     ImmTyR128A16,
 165     ImmTyLWE,
 166     ImmTyExpTgt,
 167     ImmTyExpCompr,
 168     ImmTyExpVM,
 169     ImmTyFORMAT,
 170     ImmTyHwreg,
 171     ImmTyOff,
 172     ImmTySendMsg,
 173     ImmTyInterpSlot,
 174     ImmTyInterpAttr,
 175     ImmTyAttrChan,
 176     ImmTyOpSel,
 177     ImmTyOpSelHi,
 178     ImmTyNegLo,
 179     ImmTyNegHi,
 180     ImmTySwizzle,
 181     ImmTyGprIdxMode,
 182     ImmTyHigh,
 183     ImmTyBLGP,
 184     ImmTyCBSZ,
 185     ImmTyABID,
 186     ImmTyEndpgm,
 187   };
 188
 189 private:
 190   struct TokOp {
 191     const char *Data;
 192     unsigned Length;
 193   };
 194
 195   struct ImmOp {
 196     int64_t Val;
 197     ImmTy Type;
 198     bool IsFPImm;
 199     Modifiers Mods;
 200   };
 201
 202   struct RegOp {
 203     unsigned RegNo;
 204     Modifiers Mods;
 205   };
 206
 207   union {
 208     TokOp Tok;
 209     ImmOp Imm;
 210     RegOp Reg;
 211     const MCExpr *Expr;
 212   };
 213
 214 public:
 215   bool isToken() const override {
 216     if (Kind == Token)
 217       return true;
 218
 219     // When parsing operands, we can't always tell if something was meant to be
 220     // a token, like 'gds', or an expression that references a global variable.
 221     // In this case, we assume the string is an expression, and if we need to
 222     // interpret is a token, then we treat the symbol name as the token.
 223     return isSymbolRefExpr();
 224   }
 225
 226   bool isSymbolRefExpr() const {
 227     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
 228   }
 229
 230   bool isImm() const override {
 231     return Kind == Immediate;
 232   }
 233
 234   bool isInlinableImm(MVT type) const;
 235   bool isLiteralImm(MVT type) const;
 236
 237   bool isRegKind() const {
 238     return Kind == Register;
 239   }
 240
 241   bool isReg() const override {
 242     return isRegKind() && !hasModifiers();
 243   }
 244
 245   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
 246     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
 247   }
 248
 249   bool isRegOrImmWithInt16InputMods() const {
 250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
 251   }
 252
 253   bool isRegOrImmWithInt32InputMods() const {
 254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
 255   }
 256
 257   bool isRegOrImmWithInt64InputMods() const {
 258     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
 259   }
 260
 261   bool isRegOrImmWithFP16InputMods() const {
 262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
 263   }
 264
 265   bool isRegOrImmWithFP32InputMods() const {
 266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
 267   }
 268
 269   bool isRegOrImmWithFP64InputMods() const {
 270     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
 271   }
 272
 273   bool isVReg() const {
 274     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
 275            isRegClass(AMDGPU::VReg_64RegClassID) ||
 276            isRegClass(AMDGPU::VReg_96RegClassID) ||
 277            isRegClass(AMDGPU::VReg_128RegClassID) ||
 278            isRegClass(AMDGPU::VReg_160RegClassID) ||
 279            isRegClass(AMDGPU::VReg_256RegClassID) ||
 280            isRegClass(AMDGPU::VReg_512RegClassID) ||
 281            isRegClass(AMDGPU::VReg_1024RegClassID);
 282   }
 283
 284   bool isVReg32() const {
 285     return isRegClass(AMDGPU::VGPR_32RegClassID);
 286   }
 287
 288   bool isVReg32OrOff() const {
 289     return isOff() || isVReg32();
 290   }
 291
 292   bool isSDWAOperand(MVT type) const;
 293   bool isSDWAFP16Operand() const;
 294   bool isSDWAFP32Operand() const;
 295   bool isSDWAInt16Operand() const;
 296   bool isSDWAInt32Operand() const;
 297
 298   bool isImmTy(ImmTy ImmT) const {
 299     return isImm() && Imm.Type == ImmT;
 300   }
 301
 302   bool isImmModifier() const {
 303     return isImm() && Imm.Type != ImmTyNone;
 304   }
 305
 306   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
 307   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
 308   bool isDMask() const { return isImmTy(ImmTyDMask); }
 309   bool isDim() const { return isImmTy(ImmTyDim); }
 310   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
 311   bool isDA() const { return isImmTy(ImmTyDA); }
 312   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
 313   bool isLWE() const { return isImmTy(ImmTyLWE); }
 314   bool isOff() const { return isImmTy(ImmTyOff); }
 315   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
 316   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
 317   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
 318   bool isOffen() const { return isImmTy(ImmTyOffen); }
 319   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
 320   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
 321   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
 322   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
 323   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
 324
 325   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
 326   bool isGDS() const { return isImmTy(ImmTyGDS); }
 327   bool isLDS() const { return isImmTy(ImmTyLDS); }
 328   bool isDLC() const { return isImmTy(ImmTyDLC); }
 329   bool isGLC() const { return isImmTy(ImmTyGLC); }
 330   bool isSLC() const { return isImmTy(ImmTySLC); }
 331   bool isTFE() const { return isImmTy(ImmTyTFE); }
 332   bool isD16() const { return isImmTy(ImmTyD16); }
 333   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
 334   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
 335   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
 336   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
 337   bool isFI() const { return isImmTy(ImmTyDppFi); }
 338   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
 339   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
 340   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
 341   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
 342   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
 343   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
 344   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
 345   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
 346   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
 347   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
 348   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
 349   bool isHigh() const { return isImmTy(ImmTyHigh); }
 350
 351   bool isMod() const {
 352     return isClampSI() || isOModSI();
 353   }
 354
 355   bool isRegOrImm() const {
 356     return isReg() || isImm();
 357   }
 358
 359   bool isRegClass(unsigned RCID) const;
 360
 361   bool isInlineValue() const;
 362
 363   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
 364     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
 365   }
 366
 367   bool isSCSrcB16() const {
 368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
 369   }
 370
 371   bool isSCSrcV2B16() const {
 372     return isSCSrcB16();
 373   }
 374
 375   bool isSCSrcB32() const {
 376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
 377   }
 378
 379   bool isSCSrcB64() const {
 380     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
 381   }
 382
 383   bool isBoolReg() const;
 384
 385   bool isSCSrcF16() const {
 386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
 387   }
 388
 389   bool isSCSrcV2F16() const {
 390     return isSCSrcF16();
 391   }
 392
 393   bool isSCSrcF32() const {
 394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
 395   }
 396
 397   bool isSCSrcF64() const {
 398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
 399   }
 400
 401   bool isSSrcB32() const {
 402     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
 403   }
 404
 405   bool isSSrcB16() const {
 406     return isSCSrcB16() || isLiteralImm(MVT::i16);
 407   }
 408
 409   bool isSSrcV2B16() const {
 410     llvm_unreachable("cannot happen");
 411     return isSSrcB16();
 412   }
 413
 414   bool isSSrcB64() const {
 415     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
 416     // See isVSrc64().
 417     return isSCSrcB64() || isLiteralImm(MVT::i64);
 418   }
 419
 420   bool isSSrcF32() const {
 421     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
 422   }
 423
 424   bool isSSrcF64() const {
 425     return isSCSrcB64() || isLiteralImm(MVT::f64);
 426   }
 427
 428   bool isSSrcF16() const {
 429     return isSCSrcB16() || isLiteralImm(MVT::f16);
 430   }
 431
 432   bool isSSrcV2F16() const {
 433     llvm_unreachable("cannot happen");
 434     return isSSrcF16();
 435   }
 436
 437   bool isSSrcOrLdsB32() const {
 438     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
 439            isLiteralImm(MVT::i32) || isExpr();
 440   }
 441
 442   bool isVCSrcB32() const {
 443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
 444   }
 445
 446   bool isVCSrcB64() const {
 447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
 448   }
 449
 450   bool isVCSrcB16() const {
 451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
 452   }
 453
 454   bool isVCSrcV2B16() const {
 455     return isVCSrcB16();
 456   }
 457
 458   bool isVCSrcF32() const {
 459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
 460   }
 461
 462   bool isVCSrcF64() const {
 463     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
 464   }
 465
 466   bool isVCSrcF16() const {
 467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
 468   }
 469
 470   bool isVCSrcV2F16() const {
 471     return isVCSrcF16();
 472   }
 473
 474   bool isVSrcB32() const {
 475     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
 476   }
 477
 478   bool isVSrcB64() const {
 479     return isVCSrcF64() || isLiteralImm(MVT::i64);
 480   }
 481
 482   bool isVSrcB16() const {
 483     return isVCSrcF16() || isLiteralImm(MVT::i16);
 484   }
 485
 486   bool isVSrcV2B16() const {
 487     return isVSrcB16() || isLiteralImm(MVT::v2i16);
 488   }
 489
 490   bool isVSrcF32() const {
 491     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
 492   }
 493
 494   bool isVSrcF64() const {
 495     return isVCSrcF64() || isLiteralImm(MVT::f64);
 496   }
 497
 498   bool isVSrcF16() const {
 499     return isVCSrcF16() || isLiteralImm(MVT::f16);
 500   }
 501
 502   bool isVSrcV2F16() const {
 503     return isVSrcF16() || isLiteralImm(MVT::v2f16);
 504   }
 505
 506   bool isVISrcB32() const {
 507     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
 508   }
 509
 510   bool isVISrcB16() const {
 511     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
 512   }
 513
 514   bool isVISrcV2B16() const {
 515     return isVISrcB16();
 516   }
 517
 518   bool isVISrcF32() const {
 519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
 520   }
 521
 522   bool isVISrcF16() const {
 523     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
 524   }
 525
 526   bool isVISrcV2F16() const {
 527     return isVISrcF16() || isVISrcB32();
 528   }
 529
 530   bool isAISrcB32() const {
 531     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
 532   }
 533
 534   bool isAISrcB16() const {
 535     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
 536   }
 537
 538   bool isAISrcV2B16() const {
 539     return isAISrcB16();
 540   }
 541
 542   bool isAISrcF32() const {
 543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
 544   }
 545
 546   bool isAISrcF16() const {
 547     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
 548   }
 549
 550   bool isAISrcV2F16() const {
 551     return isAISrcF16() || isAISrcB32();
 552   }
 553
 554   bool isAISrc_128B32() const {
 555     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
 556   }
 557
 558   bool isAISrc_128B16() const {
 559     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
 560   }
 561
 562   bool isAISrc_128V2B16() const {
 563     return isAISrc_128B16();
 564   }
 565
 566   bool isAISrc_128F32() const {
 567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
 568   }
 569
 570   bool isAISrc_128F16() const {
 571     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
 572   }
 573
 574   bool isAISrc_128V2F16() const {
 575     return isAISrc_128F16() || isAISrc_128B32();
 576   }
 577
 578   bool isAISrc_512B32() const {
 579     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
 580   }
 581
 582   bool isAISrc_512B16() const {
 583     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
 584   }
 585
 586   bool isAISrc_512V2B16() const {
 587     return isAISrc_512B16();
 588   }
 589
 590   bool isAISrc_512F32() const {
 591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
 592   }
 593
 594   bool isAISrc_512F16() const {
 595     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
 596   }
 597
 598   bool isAISrc_512V2F16() const {
 599     return isAISrc_512F16() || isAISrc_512B32();
 600   }
 601
 602   bool isAISrc_1024B32() const {
 603     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
 604   }
 605
 606   bool isAISrc_1024B16() const {
 607     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
 608   }
 609
 610   bool isAISrc_1024V2B16() const {
 611     return isAISrc_1024B16();
 612   }
 613
 614   bool isAISrc_1024F32() const {
 615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
 616   }
 617
 618   bool isAISrc_1024F16() const {
 619     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
 620   }
 621
 622   bool isAISrc_1024V2F16() const {
 623     return isAISrc_1024F16() || isAISrc_1024B32();
 624   }
 625
 626   bool isKImmFP32() const {
 627     return isLiteralImm(MVT::f32);
 628   }
 629
 630   bool isKImmFP16() const {
 631     return isLiteralImm(MVT::f16);
 632   }
 633
 634   bool isMem() const override {
 635     return false;
 636   }
 637
 638   bool isExpr() const {
 639     return Kind == Expression;
 640   }
 641
 642   bool isSoppBrTarget() const {
 643     return isExpr() || isImm();
 644   }
 645
 646   bool isSWaitCnt() const;
 647   bool isHwreg() const;
 648   bool isSendMsg() const;
 649   bool isSwizzle() const;
 650   bool isSMRDOffset8() const;
 651   bool isSMRDOffset20() const;
 652   bool isSMRDLiteralOffset() const;
 653   bool isDPP8() const;
 654   bool isDPPCtrl() const;
 655   bool isBLGP() const;
 656   bool isCBSZ() const;
 657   bool isABID() const;
 658   bool isGPRIdxMode() const;
 659   bool isS16Imm() const;
 660   bool isU16Imm() const;
 661   bool isEndpgm() const;
 662
 663   StringRef getExpressionAsToken() const {
 664     assert(isExpr());
 665     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
 666     return S->getSymbol().getName();
 667   }
 668
 669   StringRef getToken() const {
 670     assert(isToken());
 671
 672     if (Kind == Expression)
 673       return getExpressionAsToken();
 674
 675     return StringRef(Tok.Data, Tok.Length);
 676   }
 677
 678   int64_t getImm() const {
 679     assert(isImm());
 680     return Imm.Val;
 681   }
 682
 683   ImmTy getImmTy() const {
 684     assert(isImm());
 685     return Imm.Type;
 686   }
 687
 688   unsigned getReg() const override {
 689     assert(isRegKind());
 690     return Reg.RegNo;
 691   }
 692
 693   SMLoc getStartLoc() const override {
 694     return StartLoc;
 695   }
 696
 697   SMLoc getEndLoc() const override {
 698     return EndLoc;
 699   }
 700
 701   SMRange getLocRange() const {
 702     return SMRange(StartLoc, EndLoc);
 703   }
 704
 705   Modifiers getModifiers() const {
 706     assert(isRegKind() || isImmTy(ImmTyNone));
 707     return isRegKind() ? Reg.Mods : Imm.Mods;
 708   }
 709
 710   void setModifiers(Modifiers Mods) {
 711     assert(isRegKind() || isImmTy(ImmTyNone));
 712     if (isRegKind())
 713       Reg.Mods = Mods;
 714     else
 715       Imm.Mods = Mods;
 716   }
 717
 718   bool hasModifiers() const {
 719     return getModifiers().hasModifiers();
 720   }
 721
 722   bool hasFPModifiers() const {
 723     return getModifiers().hasFPModifiers();
 724   }
 725
 726   bool hasIntModifiers() const {
 727     return getModifiers().hasIntModifiers();
 728   }
 729
 730   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
 731
 732   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
 733
 734   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
 735
 736   template <unsigned Bitwidth>
 737   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
 738
 739   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
 740     addKImmFPOperands<16>(Inst, N);
 741   }
 742
 743   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
 744     addKImmFPOperands<32>(Inst, N);
 745   }
 746
 747   void addRegOperands(MCInst &Inst, unsigned N) const;
 748
 749   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
 750     addRegOperands(Inst, N);
 751   }
 752
 753   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
 754     if (isRegKind())
 755       addRegOperands(Inst, N);
 756     else if (isExpr())
 757       Inst.addOperand(MCOperand::createExpr(Expr));
 758     else
 759       addImmOperands(Inst, N);
 760   }
 761
 762   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
 763     Modifiers Mods = getModifiers();
 764     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
 765     if (isRegKind()) {
 766       addRegOperands(Inst, N);
 767     } else {
 768       addImmOperands(Inst, N, false);
 769     }
 770   }
 771
 772   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
 773     assert(!hasIntModifiers());
 774     addRegOrImmWithInputModsOperands(Inst, N);
 775   }
 776
 777   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
 778     assert(!hasFPModifiers());
 779     addRegOrImmWithInputModsOperands(Inst, N);
 780   }
 781
 782   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
 783     Modifiers Mods = getModifiers();
 784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
 785     assert(isRegKind());
 786     addRegOperands(Inst, N);
 787   }
 788
 789   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
 790     assert(!hasIntModifiers());
 791     addRegWithInputModsOperands(Inst, N);
 792   }
 793
 794   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
 795     assert(!hasFPModifiers());
 796     addRegWithInputModsOperands(Inst, N);
 797   }
 798
 799   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
 800     if (isImm())
 801       addImmOperands(Inst, N);
 802     else {
 803       assert(isExpr());
 804       Inst.addOperand(MCOperand::createExpr(Expr));
 805     }
 806   }
 807
 808   static void printImmTy(raw_ostream& OS, ImmTy Type) {
 809     switch (Type) {
 810     case ImmTyNone: OS << "None"; break;
 811     case ImmTyGDS: OS << "GDS"; break;
 812     case ImmTyLDS: OS << "LDS"; break;
 813     case ImmTyOffen: OS << "Offen"; break;
 814     case ImmTyIdxen: OS << "Idxen"; break;
 815     case ImmTyAddr64: OS << "Addr64"; break;
 816     case ImmTyOffset: OS << "Offset"; break;
 817     case ImmTyInstOffset: OS << "InstOffset"; break;
 818     case ImmTyOffset0: OS << "Offset0"; break;
 819     case ImmTyOffset1: OS << "Offset1"; break;
 820     case ImmTyDLC: OS << "DLC"; break;
 821     case ImmTyGLC: OS << "GLC"; break;
 822     case ImmTySLC: OS << "SLC"; break;
 823     case ImmTyTFE: OS << "TFE"; break;
 824     case ImmTyD16: OS << "D16"; break;
 825     case ImmTyFORMAT: OS << "FORMAT"; break;
 826     case ImmTyClampSI: OS << "ClampSI"; break;
 827     case ImmTyOModSI: OS << "OModSI"; break;
 828     case ImmTyDPP8: OS << "DPP8"; break;
 829     case ImmTyDppCtrl: OS << "DppCtrl"; break;
 830     case ImmTyDppRowMask: OS << "DppRowMask"; break;
 831     case ImmTyDppBankMask: OS << "DppBankMask"; break;
 832     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
 833     case ImmTyDppFi: OS << "FI"; break;
 834     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
 835     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
 836     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
 837     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
 838     case ImmTyDMask: OS << "DMask"; break;
 839     case ImmTyDim: OS << "Dim"; break;
 840     case ImmTyUNorm: OS << "UNorm"; break;
 841     case ImmTyDA: OS << "DA"; break;
 842     case ImmTyR128A16: OS << "R128A16"; break;
 843     case ImmTyLWE: OS << "LWE"; break;
 844     case ImmTyOff: OS << "Off"; break;
 845     case ImmTyExpTgt: OS << "ExpTgt"; break;
 846     case ImmTyExpCompr: OS << "ExpCompr"; break;
 847     case ImmTyExpVM: OS << "ExpVM"; break;
 848     case ImmTyHwreg: OS << "Hwreg"; break;
 849     case ImmTySendMsg: OS << "SendMsg"; break;
 850     case ImmTyInterpSlot: OS << "InterpSlot"; break;
 851     case ImmTyInterpAttr: OS << "InterpAttr"; break;
 852     case ImmTyAttrChan: OS << "AttrChan"; break;
 853     case ImmTyOpSel: OS << "OpSel"; break;
 854     case ImmTyOpSelHi: OS << "OpSelHi"; break;
 855     case ImmTyNegLo: OS << "NegLo"; break;
 856     case ImmTyNegHi: OS << "NegHi"; break;
 857     case ImmTySwizzle: OS << "Swizzle"; break;
 858     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
 859     case ImmTyHigh: OS << "High"; break;
 860     case ImmTyBLGP: OS << "BLGP"; break;
 861     case ImmTyCBSZ: OS << "CBSZ"; break;
 862     case ImmTyABID: OS << "ABID"; break;
 863     case ImmTyEndpgm: OS << "Endpgm"; break;
 864     }
 865   }
 866
 867   void print(raw_ostream &OS) const override {
 868     switch (Kind) {
 869     case Register:
 870       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
 871       break;
 872     case Immediate:
 873       OS << '<' << getImm();
 874       if (getImmTy() != ImmTyNone) {
 875         OS << " type: "; printImmTy(OS, getImmTy());
 876       }
 877       OS << " mods: " << Imm.Mods << '>';
 878       break;
 879     case Token:
 880       OS << '\'' << getToken() << '\'';
 881       break;
 882     case Expression:
 883       OS << "<expr " << *Expr << '>';
 884       break;
 885     }
 886   }
 887
 888   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
 889                                       int64_t Val, SMLoc Loc,
 890                                       ImmTy Type = ImmTyNone,
 891                                       bool IsFPImm = false) {
 892     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
 893     Op->Imm.Val = Val;
 894     Op->Imm.IsFPImm = IsFPImm;
 895     Op->Imm.Type = Type;
 896     Op->Imm.Mods = Modifiers();
 897     Op->StartLoc = Loc;
 898     Op->EndLoc = Loc;
 899     return Op;
 900   }
 901
 902   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
 903                                         StringRef Str, SMLoc Loc,
 904                                         bool HasExplicitEncodingSize = true) {
 905     auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
 906     Res->Tok.Data = Str.data();
 907     Res->Tok.Length = Str.size();
 908     Res->StartLoc = Loc;
 909     Res->EndLoc = Loc;
 910     return Res;
 911   }
 912
 913   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
 914                                       unsigned RegNo, SMLoc S,
 915                                       SMLoc E) {
 916     auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
 917     Op->Reg.RegNo = RegNo;
 918     Op->Reg.Mods = Modifiers();
 919     Op->StartLoc = S;
 920     Op->EndLoc = E;
 921     return Op;
 922   }
 923
 924   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
 925                                        const class MCExpr *Expr, SMLoc S) {
 926     auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
 927     Op->Expr = Expr;
 928     Op->StartLoc = S;
 929     Op->EndLoc = S;
 930     return Op;
 931   }
 932 };
 933
 934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
 935   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
 936   return OS;
 937 }
 938
 939 //===----------------------------------------------------------------------===//
 940 // AsmParser
 941 //===----------------------------------------------------------------------===//
 942
 943 // Holds info related to the current kernel, e.g. count of SGPRs used.
 944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
 945 // .amdgpu_hsa_kernel or at EOF.
 946 class KernelScopeInfo {
 947   int SgprIndexUnusedMin = -1;
 948   int VgprIndexUnusedMin = -1;
 949   MCContext *Ctx = nullptr;
 950
 951   void usesSgprAt(int i) {
 952     if (i >= SgprIndexUnusedMin) {
 953       SgprIndexUnusedMin = ++i;
 954       if (Ctx) {
 955         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
 956         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
 957       }
 958     }
 959   }
 960
 961   void usesVgprAt(int i) {
 962     if (i >= VgprIndexUnusedMin) {
 963       VgprIndexUnusedMin = ++i;
 964       if (Ctx) {
 965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
 966         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
 967       }
 968     }
 969   }
 970
 971 public:
 972   KernelScopeInfo() = default;
 973
 974   void initialize(MCContext &Context) {
 975     Ctx = &Context;
 976     usesSgprAt(SgprIndexUnusedMin = -1);
 977     usesVgprAt(VgprIndexUnusedMin = -1);
 978   }
 979
 980   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
 981     switch (RegKind) {
 982       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
 983       case IS_AGPR: // fall through
 984       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
 985       default: break;
 986     }
 987   }
 988 };
 989
 990 class AMDGPUAsmParser : public MCTargetAsmParser {
 991   MCAsmParser &Parser;
 992
 993   // Number of extra operands parsed after the first optional operand.
 994   // This may be necessary to skip hardcoded mandatory operands.
 995   static const unsigned MAX_OPR_LOOKAHEAD = 8;
 996
 997   unsigned ForcedEncodingSize = 0;
 998   bool ForcedDPP = false;
 999   bool ForcedSDWA = false;
1000   KernelScopeInfo KernelScope;
1001
1002   /// @name Auto-generated Match Functions
1003   /// {
1004
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007
1008   /// }
1009
1010 private:
1011   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012   bool OutOfRangeError(SMRange Range);
1013   /// Calculate VGPR/SGPR blocks required for given target, reserved
1014   /// registers, and user-specified NextFreeXGPR values.
1015   ///
1016   /// \param Features [in] Target features, used for bug corrections.
1017   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021   /// descriptor field, if valid.
1022   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026   /// \param VGPRBlocks [out] Result VGPR block count.
1027   /// \param SGPRBlocks [out] Result SGPR block count.
1028   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029                           bool FlatScrUsed, bool XNACKUsed,
1030                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031                           SMRange VGPRRange, unsigned NextFreeSGPR,
1032                           SMRange SGPRRange, unsigned &VGPRBlocks,
1033                           unsigned &SGPRBlocks);
1034   bool ParseDirectiveAMDGCNTarget();
1035   bool ParseDirectiveAMDHSAKernel();
1036   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037   bool ParseDirectiveHSACodeObjectVersion();
1038   bool ParseDirectiveHSACodeObjectISA();
1039   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040   bool ParseDirectiveAMDKernelCodeT();
1041   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042   bool ParseDirectiveAMDGPUHsaKernel();
1043
1044   bool ParseDirectiveISAVersion();
1045   bool ParseDirectiveHSAMetadata();
1046   bool ParseDirectivePALMetadataBegin();
1047   bool ParseDirectivePALMetadata();
1048   bool ParseDirectiveAMDGPULDS();
1049
1050   /// Common code to parse out a block of text (typically YAML) between start and
1051   /// end directives.
1052   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053                            const char *AssemblerDirectiveEnd,
1054                            std::string &CollectString);
1055
1056   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057                              RegisterKind RegKind, unsigned Reg1,
1058                              unsigned RegNum);
1059   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060                            unsigned& RegNum, unsigned& RegWidth,
1061                            unsigned *DwordRegIndex);
1062   bool isRegister();
1063   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065   void initializeGprCountSymbol(RegisterKind RegKind);
1066   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067                              unsigned RegWidth);
1068   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071                  bool IsGdsHardcoded);
1072
1073 public:
1074   enum AMDGPUMatchResultTy {
1075     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1076   };
1077   enum OperandMode {
1078     OperandMode_Default,
1079     OperandMode_NSA,
1080   };
1081
1082   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1083
1084   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085                const MCInstrInfo &MII,
1086                const MCTargetOptions &Options)
1087       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088     MCAsmParserExtension::Initialize(Parser);
1089
1090     if (getFeatureBits().none()) {
1091       // Set default features.
1092       copySTI().ToggleFeature("southern-islands");
1093     }
1094
1095     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1096
1097     {
1098       // TODO: make those pre-defined variables read-only.
1099       // Currently there is none suitable machinery in the core llvm-mc for this.
1100       // MCSymbol::isRedefinable is intended for another purpose, and
1101       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103       MCContext &Ctx = getContext();
1104       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105         MCSymbol *Sym =
1106             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112       } else {
1113         MCSymbol *Sym =
1114             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1120       }
1121       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122         initializeGprCountSymbol(IS_VGPR);
1123         initializeGprCountSymbol(IS_SGPR);
1124       } else
1125         KernelScope.initialize(getContext());
1126     }
1127   }
1128
1129   bool hasXNACK() const {
1130     return AMDGPU::hasXNACK(getSTI());
1131   }
1132
1133   bool hasMIMG_R128() const {
1134     return AMDGPU::hasMIMG_R128(getSTI());
1135   }
1136
1137   bool hasPackedD16() const {
1138     return AMDGPU::hasPackedD16(getSTI());
1139   }
1140
1141   bool isSI() const {
1142     return AMDGPU::isSI(getSTI());
1143   }
1144
1145   bool isCI() const {
1146     return AMDGPU::isCI(getSTI());
1147   }
1148
1149   bool isVI() const {
1150     return AMDGPU::isVI(getSTI());
1151   }
1152
1153   bool isGFX9() const {
1154     return AMDGPU::isGFX9(getSTI());
1155   }
1156
1157   bool isGFX10() const {
1158     return AMDGPU::isGFX10(getSTI());
1159   }
1160
1161   bool hasInv2PiInlineImm() const {
1162     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1163   }
1164
1165   bool hasFlatOffsets() const {
1166     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1167   }
1168
1169   bool hasSGPR102_SGPR103() const {
1170     return !isVI() && !isGFX9();
1171   }
1172
1173   bool hasSGPR104_SGPR105() const {
1174     return isGFX10();
1175   }
1176
1177   bool hasIntClamp() const {
1178     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1179   }
1180
1181   AMDGPUTargetStreamer &getTargetStreamer() {
1182     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183     return static_cast<AMDGPUTargetStreamer &>(TS);
1184   }
1185
1186   const MCRegisterInfo *getMRI() const {
1187     // We need this const_cast because for some reason getContext() is not const
1188     // in MCAsmParser.
1189     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1190   }
1191
1192   const MCInstrInfo *getMII() const {
1193     return &MII;
1194   }
1195
1196   const FeatureBitset &getFeatureBits() const {
1197     return getSTI().getFeatureBits();
1198   }
1199
1200   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1203
1204   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206   bool isForcedDPP() const { return ForcedDPP; }
1207   bool isForcedSDWA() const { return ForcedSDWA; }
1208   ArrayRef<unsigned> getMatchedVariants() const;
1209
1210   std::unique_ptr<AMDGPUOperand> parseRegister();
1211   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214                                       unsigned Kind) override;
1215   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216                                OperandVector &Operands, MCStreamer &Out,
1217                                uint64_t &ErrorInfo,
1218                                bool MatchingInlineAsm) override;
1219   bool ParseDirective(AsmToken DirectiveID) override;
1220   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221                                     OperandMode Mode = OperandMode_Default);
1222   StringRef parseMnemonicSuffix(StringRef Name);
1223   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224                         SMLoc NameLoc, OperandVector &Operands) override;
1225   //bool ProcessInstruction(MCInst &Inst);
1226
1227   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1228
1229   OperandMatchResultTy
1230   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232                      bool (*ConvertResult)(int64_t &) = nullptr);
1233
1234   OperandMatchResultTy
1235   parseOperandArrayWithPrefix(const char *Prefix,
1236                               OperandVector &Operands,
1237                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238                               bool (*ConvertResult)(int64_t&) = nullptr);
1239
1240   OperandMatchResultTy
1241   parseNamedBit(const char *Name, OperandVector &Operands,
1242                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244                                              StringRef &Value);
1245
1246   bool isModifier();
1247   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251   bool parseSP3NegModifier();
1252   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253   OperandMatchResultTy parseReg(OperandVector &Operands);
1254   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1261
1262   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1266
1267   bool parseCnt(int64_t &IntVal);
1268   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1270
1271 private:
1272   struct OperandInfoTy {
1273     int64_t Id;
1274     bool IsSymbolic = false;
1275     bool IsDefined = false;
1276
1277     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1278   };
1279
1280   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281   bool validateSendMsg(const OperandInfoTy &Msg,
1282                        const OperandInfoTy &Op,
1283                        const OperandInfoTy &Stream,
1284                        const SMLoc Loc);
1285
1286   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287   bool validateHwreg(const OperandInfoTy &HwReg,
1288                      const int64_t Offset,
1289                      const int64_t Width,
1290                      const SMLoc Loc);
1291
1292   void errorExpTgt();
1293   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1295
1296   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298   bool validateSOPLiteral(const MCInst &Inst) const;
1299   bool validateConstantBusLimitations(const MCInst &Inst);
1300   bool validateEarlyClobberLimitations(const MCInst &Inst);
1301   bool validateIntClampSupported(const MCInst &Inst);
1302   bool validateMIMGAtomicDMask(const MCInst &Inst);
1303   bool validateMIMGGatherDMask(const MCInst &Inst);
1304   bool validateMIMGDataSize(const MCInst &Inst);
1305   bool validateMIMGAddrSize(const MCInst &Inst);
1306   bool validateMIMGD16(const MCInst &Inst);
1307   bool validateMIMGDim(const MCInst &Inst);
1308   bool validateLdsDirect(const MCInst &Inst);
1309   bool validateOpSel(const MCInst &Inst);
1310   bool validateVccOperand(unsigned Reg) const;
1311   bool validateVOP3Literal(const MCInst &Inst) const;
1312   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1313   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1314   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1315
1316   bool isId(const StringRef Id) const;
1317   bool isId(const AsmToken &Token, const StringRef Id) const;
1318   bool isToken(const AsmToken::TokenKind Kind) const;
1319   bool trySkipId(const StringRef Id);
1320   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1321   bool trySkipToken(const AsmToken::TokenKind Kind);
1322   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1323   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1324   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1325   AsmToken::TokenKind getTokenKind() const;
1326   bool parseExpr(int64_t &Imm);
1327   bool parseExpr(OperandVector &Operands);
1328   StringRef getTokenStr() const;
1329   AsmToken peekToken();
1330   AsmToken getToken() const;
1331   SMLoc getLoc() const;
1332   void lex();
1333
1334 public:
1335   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1336   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1337
1338   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1339   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1340   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1341   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1342   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1343   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1344
1345   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1346                             const unsigned MinVal,
1347                             const unsigned MaxVal,
1348                             const StringRef ErrMsg);
1349   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1350   bool parseSwizzleOffset(int64_t &Imm);
1351   bool parseSwizzleMacro(int64_t &Imm);
1352   bool parseSwizzleQuadPerm(int64_t &Imm);
1353   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1354   bool parseSwizzleBroadcast(int64_t &Imm);
1355   bool parseSwizzleSwap(int64_t &Imm);
1356   bool parseSwizzleReverse(int64_t &Imm);
1357
1358   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1359   int64_t parseGPRIdxMacro();
1360
1361   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1362   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1363   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1364   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1365   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1366
1367   AMDGPUOperand::Ptr defaultDLC() const;
1368   AMDGPUOperand::Ptr defaultGLC() const;
1369   AMDGPUOperand::Ptr defaultSLC() const;
1370
1371   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1372   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1373   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1374   AMDGPUOperand::Ptr defaultFlatOffset() const;
1375
1376   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1377
1378   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1379                OptionalImmIndexMap &OptionalIdx);
1380   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1381   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1382   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1383
1384   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1385
1386   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1387                bool IsAtomic = false);
1388   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1389
1390   OperandMatchResultTy parseDim(OperandVector &Operands);
1391   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1392   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1393   AMDGPUOperand::Ptr defaultRowMask() const;
1394   AMDGPUOperand::Ptr defaultBankMask() const;
1395   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1396   AMDGPUOperand::Ptr defaultFI() const;
1397   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1398   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1399
1400   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1401                                     AMDGPUOperand::ImmTy Type);
1402   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1403   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1404   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1405   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1406   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1407   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1408                 uint64_t BasicInstType, bool skipVcc = false);
1409
1410   AMDGPUOperand::Ptr defaultBLGP() const;
1411   AMDGPUOperand::Ptr defaultCBSZ() const;
1412   AMDGPUOperand::Ptr defaultABID() const;
1413
1414   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1415   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1416 };
1417
1418 struct OptionalOperand {
1419   const char *Name;
1420   AMDGPUOperand::ImmTy Type;
1421   bool IsBit;
1422   bool (*ConvertResult)(int64_t&);
1423 };
1424
1425 } // end anonymous namespace
1426
1427 // May be called with integer type with equivalent bitwidth.
1428 static const fltSemantics *getFltSemantics(unsigned Size) {
1429   switch (Size) {
1430   case 4:
1431     return &APFloat::IEEEsingle();
1432   case 8:
1433     return &APFloat::IEEEdouble();
1434   case 2:
1435     return &APFloat::IEEEhalf();
1436   default:
1437     llvm_unreachable("unsupported fp type");
1438   }
1439 }
1440
1441 static const fltSemantics *getFltSemantics(MVT VT) {
1442   return getFltSemantics(VT.getSizeInBits() / 8);
1443 }
1444
1445 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1446   switch (OperandType) {
1447   case AMDGPU::OPERAND_REG_IMM_INT32:
1448   case AMDGPU::OPERAND_REG_IMM_FP32:
1449   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1450   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1451   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1452   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1453     return &APFloat::IEEEsingle();
1454   case AMDGPU::OPERAND_REG_IMM_INT64:
1455   case AMDGPU::OPERAND_REG_IMM_FP64:
1456   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1457   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1458     return &APFloat::IEEEdouble();
1459   case AMDGPU::OPERAND_REG_IMM_INT16:
1460   case AMDGPU::OPERAND_REG_IMM_FP16:
1461   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1462   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1463   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1464   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1465   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1466   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1467   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1468   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1469   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1470   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1471     return &APFloat::IEEEhalf();
1472   default:
1473     llvm_unreachable("unsupported fp type");
1474   }
1475 }
1476
1477 //===----------------------------------------------------------------------===//
1478 // Operand
1479 //===----------------------------------------------------------------------===//
1480
1481 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1482   bool Lost;
1483
1484   // Convert literal to single precision
1485   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1486                                                APFloat::rmNearestTiesToEven,
1487                                                &Lost);
1488   // We allow precision lost but not overflow or underflow
1489   if (Status != APFloat::opOK &&
1490       Lost &&
1491       ((Status & APFloat::opOverflow)  != 0 ||
1492        (Status & APFloat::opUnderflow) != 0)) {
1493     return false;
1494   }
1495
1496   return true;
1497 }
1498
1499 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1500   return isUIntN(Size, Val) || isIntN(Size, Val);
1501 }
1502
1503 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1504
1505   // This is a hack to enable named inline values like
1506   // shared_base with both 32-bit and 64-bit operands.
1507   // Note that these values are defined as
1508   // 32-bit operands only.
1509   if (isInlineValue()) {
1510     return true;
1511   }
1512
1513   if (!isImmTy(ImmTyNone)) {
1514     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1515     return false;
1516   }
1517   // TODO: We should avoid using host float here. It would be better to
1518   // check the float bit values which is what a few other places do.
1519   // We've had bot failures before due to weird NaN support on mips hosts.
1520
1521   APInt Literal(64, Imm.Val);
1522
1523   if (Imm.IsFPImm) { // We got fp literal token
1524     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1525       return AMDGPU::isInlinableLiteral64(Imm.Val,
1526                                           AsmParser->hasInv2PiInlineImm());
1527     }
1528
1529     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1530     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1531       return false;
1532
1533     if (type.getScalarSizeInBits() == 16) {
1534       return AMDGPU::isInlinableLiteral16(
1535         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1536         AsmParser->hasInv2PiInlineImm());
1537     }
1538
1539     // Check if single precision literal is inlinable
1540     return AMDGPU::isInlinableLiteral32(
1541       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1542       AsmParser->hasInv2PiInlineImm());
1543   }
1544
1545   // We got int literal token.
1546   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1547     return AMDGPU::isInlinableLiteral64(Imm.Val,
1548                                         AsmParser->hasInv2PiInlineImm());
1549   }
1550
1551   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1552     return false;
1553   }
1554
1555   if (type.getScalarSizeInBits() == 16) {
1556     return AMDGPU::isInlinableLiteral16(
1557       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1558       AsmParser->hasInv2PiInlineImm());
1559   }
1560
1561   return AMDGPU::isInlinableLiteral32(
1562     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1563     AsmParser->hasInv2PiInlineImm());
1564 }
1565
1566 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1567   // Check that this immediate can be added as literal
1568   if (!isImmTy(ImmTyNone)) {
1569     return false;
1570   }
1571
1572   if (!Imm.IsFPImm) {
1573     // We got int literal token.
1574
1575     if (type == MVT::f64 && hasFPModifiers()) {
1576       // Cannot apply fp modifiers to int literals preserving the same semantics
1577       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1578       // disable these cases.
1579       return false;
1580     }
1581
1582     unsigned Size = type.getSizeInBits();
1583     if (Size == 64)
1584       Size = 32;
1585
1586     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1587     // types.
1588     return isSafeTruncation(Imm.Val, Size);
1589   }
1590
1591   // We got fp literal token
1592   if (type == MVT::f64) { // Expected 64-bit fp operand
1593     // We would set low 64-bits of literal to zeroes but we accept this literals
1594     return true;
1595   }
1596
1597   if (type == MVT::i64) { // Expected 64-bit int operand
1598     // We don't allow fp literals in 64-bit integer instructions. It is
1599     // unclear how we should encode them.
1600     return false;
1601   }
1602
1603   // We allow fp literals with f16x2 operands assuming that the specified
1604   // literal goes into the lower half and the upper half is zero. We also
1605   // require that the literal may be losslesly converted to f16.
1606   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1607                      (type == MVT::v2i16)? MVT::i16 : type;
1608
1609   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1610   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1611 }
1612
1613 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1614   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1615 }
1616
1617 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1618   if (AsmParser->isVI())
1619     return isVReg32();
1620   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1621     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1622   else
1623     return false;
1624 }
1625
1626 bool AMDGPUOperand::isSDWAFP16Operand() const {
1627   return isSDWAOperand(MVT::f16);
1628 }
1629
1630 bool AMDGPUOperand::isSDWAFP32Operand() const {
1631   return isSDWAOperand(MVT::f32);
1632 }
1633
1634 bool AMDGPUOperand::isSDWAInt16Operand() const {
1635   return isSDWAOperand(MVT::i16);
1636 }
1637
1638 bool AMDGPUOperand::isSDWAInt32Operand() const {
1639   return isSDWAOperand(MVT::i32);
1640 }
1641
1642 bool AMDGPUOperand::isBoolReg() const {
1643   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1644          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1645 }
1646
1647 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1648 {
1649   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1650   assert(Size == 2 || Size == 4 || Size == 8);
1651
1652   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1653
1654   if (Imm.Mods.Abs) {
1655     Val &= ~FpSignMask;
1656   }
1657   if (Imm.Mods.Neg) {
1658     Val ^= FpSignMask;
1659   }
1660
1661   return Val;
1662 }
1663
1664 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1665   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1666                              Inst.getNumOperands())) {
1667     addLiteralImmOperand(Inst, Imm.Val,
1668                          ApplyModifiers &
1669                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1670   } else {
1671     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1672     Inst.addOperand(MCOperand::createImm(Imm.Val));
1673   }
1674 }
1675
1676 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1677   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1678   auto OpNum = Inst.getNumOperands();
1679   // Check that this operand accepts literals
1680   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1681
1682   if (ApplyModifiers) {
1683     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1684     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1685     Val = applyInputFPModifiers(Val, Size);
1686   }
1687
1688   APInt Literal(64, Val);
1689   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1690
1691   if (Imm.IsFPImm) { // We got fp literal token
1692     switch (OpTy) {
1693     case AMDGPU::OPERAND_REG_IMM_INT64:
1694     case AMDGPU::OPERAND_REG_IMM_FP64:
1695     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1696     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1697       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1698                                        AsmParser->hasInv2PiInlineImm())) {
1699         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1700         return;
1701       }
1702
1703       // Non-inlineable
1704       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1705         // For fp operands we check if low 32 bits are zeros
1706         if (Literal.getLoBits(32) != 0) {
1707           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1708           "Can't encode literal as exact 64-bit floating-point operand. "
1709           "Low 32-bits will be set to zero");
1710         }
1711
1712         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1713         return;
1714       }
1715
1716       // We don't allow fp literals in 64-bit integer instructions. It is
1717       // unclear how we should encode them. This case should be checked earlier
1718       // in predicate methods (isLiteralImm())
1719       llvm_unreachable("fp literal in 64-bit integer instruction.");
1720
1721     case AMDGPU::OPERAND_REG_IMM_INT32:
1722     case AMDGPU::OPERAND_REG_IMM_FP32:
1723     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1724     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1725     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1726     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1727     case AMDGPU::OPERAND_REG_IMM_INT16:
1728     case AMDGPU::OPERAND_REG_IMM_FP16:
1729     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1730     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1731     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1732     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1733     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1734     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1735     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1736     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1737     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1738     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1739       bool lost;
1740       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1741       // Convert literal to single precision
1742       FPLiteral.convert(*getOpFltSemantics(OpTy),
1743                         APFloat::rmNearestTiesToEven, &lost);
1744       // We allow precision lost but not overflow or underflow. This should be
1745       // checked earlier in isLiteralImm()
1746
1747       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1748       Inst.addOperand(MCOperand::createImm(ImmVal));
1749       return;
1750     }
1751     default:
1752       llvm_unreachable("invalid operand size");
1753     }
1754
1755     return;
1756   }
1757
1758   // We got int literal token.
1759   // Only sign extend inline immediates.
1760   switch (OpTy) {
1761   case AMDGPU::OPERAND_REG_IMM_INT32:
1762   case AMDGPU::OPERAND_REG_IMM_FP32:
1763   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1764   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1765   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1766   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1767   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1768   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1769     if (isSafeTruncation(Val, 32) &&
1770         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1771                                      AsmParser->hasInv2PiInlineImm())) {
1772       Inst.addOperand(MCOperand::createImm(Val));
1773       return;
1774     }
1775
1776     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1777     return;
1778
1779   case AMDGPU::OPERAND_REG_IMM_INT64:
1780   case AMDGPU::OPERAND_REG_IMM_FP64:
1781   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1782   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1783     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1784       Inst.addOperand(MCOperand::createImm(Val));
1785       return;
1786     }
1787
1788     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1789     return;
1790
1791   case AMDGPU::OPERAND_REG_IMM_INT16:
1792   case AMDGPU::OPERAND_REG_IMM_FP16:
1793   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1794   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1795   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1796   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1797     if (isSafeTruncation(Val, 16) &&
1798         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1799                                      AsmParser->hasInv2PiInlineImm())) {
1800       Inst.addOperand(MCOperand::createImm(Val));
1801       return;
1802     }
1803
1804     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1805     return;
1806
1807   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1808   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1809   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1810   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1811     assert(isSafeTruncation(Val, 16));
1812     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1813                                         AsmParser->hasInv2PiInlineImm()));
1814
1815     Inst.addOperand(MCOperand::createImm(Val));
1816     return;
1817   }
1818   default:
1819     llvm_unreachable("invalid operand size");
1820   }
1821 }
1822
1823 template <unsigned Bitwidth>
1824 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1825   APInt Literal(64, Imm.Val);
1826
1827   if (!Imm.IsFPImm) {
1828     // We got int literal token.
1829     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1830     return;
1831   }
1832
1833   bool Lost;
1834   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1835   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1836                     APFloat::rmNearestTiesToEven, &Lost);
1837   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1838 }
1839
1840 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1841   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1842 }
1843
1844 static bool isInlineValue(unsigned Reg) {
1845   switch (Reg) {
1846   case AMDGPU::SRC_SHARED_BASE:
1847   case AMDGPU::SRC_SHARED_LIMIT:
1848   case AMDGPU::SRC_PRIVATE_BASE:
1849   case AMDGPU::SRC_PRIVATE_LIMIT:
1850   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1851     return true;
1852   case AMDGPU::SRC_VCCZ:
1853   case AMDGPU::SRC_EXECZ:
1854   case AMDGPU::SRC_SCC:
1855     return true;
1856   default:
1857     return false;
1858   }
1859 }
1860
1861 bool AMDGPUOperand::isInlineValue() const {
1862   return isRegKind() && ::isInlineValue(getReg());
1863 }
1864
1865 //===----------------------------------------------------------------------===//
1866 // AsmParser
1867 //===----------------------------------------------------------------------===//
1868
1869 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1870   if (Is == IS_VGPR) {
1871     switch (RegWidth) {
1872       default: return -1;
1873       case 1: return AMDGPU::VGPR_32RegClassID;
1874       case 2: return AMDGPU::VReg_64RegClassID;
1875       case 3: return AMDGPU::VReg_96RegClassID;
1876       case 4: return AMDGPU::VReg_128RegClassID;
1877       case 5: return AMDGPU::VReg_160RegClassID;
1878       case 8: return AMDGPU::VReg_256RegClassID;
1879       case 16: return AMDGPU::VReg_512RegClassID;
1880       case 32: return AMDGPU::VReg_1024RegClassID;
1881     }
1882   } else if (Is == IS_TTMP) {
1883     switch (RegWidth) {
1884       default: return -1;
1885       case 1: return AMDGPU::TTMP_32RegClassID;
1886       case 2: return AMDGPU::TTMP_64RegClassID;
1887       case 4: return AMDGPU::TTMP_128RegClassID;
1888       case 8: return AMDGPU::TTMP_256RegClassID;
1889       case 16: return AMDGPU::TTMP_512RegClassID;
1890     }
1891   } else if (Is == IS_SGPR) {
1892     switch (RegWidth) {
1893       default: return -1;
1894       case 1: return AMDGPU::SGPR_32RegClassID;
1895       case 2: return AMDGPU::SGPR_64RegClassID;
1896       case 4: return AMDGPU::SGPR_128RegClassID;
1897       case 8: return AMDGPU::SGPR_256RegClassID;
1898       case 16: return AMDGPU::SGPR_512RegClassID;
1899     }
1900   } else if (Is == IS_AGPR) {
1901     switch (RegWidth) {
1902       default: return -1;
1903       case 1: return AMDGPU::AGPR_32RegClassID;
1904       case 2: return AMDGPU::AReg_64RegClassID;
1905       case 4: return AMDGPU::AReg_128RegClassID;
1906       case 16: return AMDGPU::AReg_512RegClassID;
1907       case 32: return AMDGPU::AReg_1024RegClassID;
1908     }
1909   }
1910   return -1;
1911 }
1912
1913 static unsigned getSpecialRegForName(StringRef RegName) {
1914   return StringSwitch<unsigned>(RegName)
1915     .Case("exec", AMDGPU::EXEC)
1916     .Case("vcc", AMDGPU::VCC)
1917     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1918     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1919     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1920     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1921     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1922     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1923     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1924     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1925     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1926     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1927     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1928     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1929     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1930     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1931     .Case("m0", AMDGPU::M0)
1932     .Case("vccz", AMDGPU::SRC_VCCZ)
1933     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1934     .Case("execz", AMDGPU::SRC_EXECZ)
1935     .Case("src_execz", AMDGPU::SRC_EXECZ)
1936     .Case("scc", AMDGPU::SRC_SCC)
1937     .Case("src_scc", AMDGPU::SRC_SCC)
1938     .Case("tba", AMDGPU::TBA)
1939     .Case("tma", AMDGPU::TMA)
1940     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1941     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1942     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1943     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1944     .Case("vcc_lo", AMDGPU::VCC_LO)
1945     .Case("vcc_hi", AMDGPU::VCC_HI)
1946     .Case("exec_lo", AMDGPU::EXEC_LO)
1947     .Case("exec_hi", AMDGPU::EXEC_HI)
1948     .Case("tma_lo", AMDGPU::TMA_LO)
1949     .Case("tma_hi", AMDGPU::TMA_HI)
1950     .Case("tba_lo", AMDGPU::TBA_LO)
1951     .Case("tba_hi", AMDGPU::TBA_HI)
1952     .Case("null", AMDGPU::SGPR_NULL)
1953     .Default(0);
1954 }
1955
1956 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1957                                     SMLoc &EndLoc) {
1958   auto R = parseRegister();
1959   if (!R) return true;
1960   assert(R->isReg());
1961   RegNo = R->getReg();
1962   StartLoc = R->getStartLoc();
1963   EndLoc = R->getEndLoc();
1964   return false;
1965 }
1966
1967 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1968                                             RegisterKind RegKind, unsigned Reg1,
1969                                             unsigned RegNum) {
1970   switch (RegKind) {
1971   case IS_SPECIAL:
1972     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1973       Reg = AMDGPU::EXEC;
1974       RegWidth = 2;
1975       return true;
1976     }
1977     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1978       Reg = AMDGPU::FLAT_SCR;
1979       RegWidth = 2;
1980       return true;
1981     }
1982     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1983       Reg = AMDGPU::XNACK_MASK;
1984       RegWidth = 2;
1985       return true;
1986     }
1987     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1988       Reg = AMDGPU::VCC;
1989       RegWidth = 2;
1990       return true;
1991     }
1992     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1993       Reg = AMDGPU::TBA;
1994       RegWidth = 2;
1995       return true;
1996     }
1997     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1998       Reg = AMDGPU::TMA;
1999       RegWidth = 2;
2000       return true;
2001     }
2002     return false;
2003   case IS_VGPR:
2004   case IS_SGPR:
2005   case IS_AGPR:
2006   case IS_TTMP:
2007     if (Reg1 != Reg + RegWidth) {
2008       return false;
2009     }
2010     RegWidth++;
2011     return true;
2012   default:
2013     llvm_unreachable("unexpected register kind");
2014   }
2015 }
2016
2017 static const StringRef Registers[] = {
2018   { "v" },
2019   { "s" },
2020   { "ttmp" },
2021   { "acc" },
2022   { "a" },
2023 };
2024
2025 bool
2026 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2027                             const AsmToken &NextToken) const {
2028
2029   // A list of consecutive registers: [s0,s1,s2,s3]
2030   if (Token.is(AsmToken::LBrac))
2031     return true;
2032
2033   if (!Token.is(AsmToken::Identifier))
2034     return false;
2035
2036   // A single register like s0 or a range of registers like s[0:1]
2037
2038   StringRef RegName = Token.getString();
2039
2040   for (StringRef Reg : Registers) {
2041     if (RegName.startswith(Reg)) {
2042       if (Reg.size() < RegName.size()) {
2043         unsigned RegNum;
2044         // A single register with an index: rXX
2045         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2046           return true;
2047       } else {
2048         // A range of registers: r[XX:YY].
2049         if (NextToken.is(AsmToken::LBrac))
2050           return true;
2051       }
2052     }
2053   }
2054
2055   return getSpecialRegForName(RegName);
2056 }
2057
2058 bool
2059 AMDGPUAsmParser::isRegister()
2060 {
2061   return isRegister(getToken(), peekToken());
2062 }
2063
2064 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2065                                           unsigned &RegNum, unsigned &RegWidth,
2066                                           unsigned *DwordRegIndex) {
2067   if (DwordRegIndex) { *DwordRegIndex = 0; }
2068   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2069   if (getLexer().is(AsmToken::Identifier)) {
2070     StringRef RegName = Parser.getTok().getString();
2071     if ((Reg = getSpecialRegForName(RegName))) {
2072       Parser.Lex();
2073       RegKind = IS_SPECIAL;
2074     } else {
2075       unsigned RegNumIndex = 0;
2076       if (RegName[0] == 'v') {
2077         RegNumIndex = 1;
2078         RegKind = IS_VGPR;
2079       } else if (RegName[0] == 's') {
2080         RegNumIndex = 1;
2081         RegKind = IS_SGPR;
2082       } else if (RegName[0] == 'a') {
2083         RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2084         RegKind = IS_AGPR;
2085       } else if (RegName.startswith("ttmp")) {
2086         RegNumIndex = strlen("ttmp");
2087         RegKind = IS_TTMP;
2088       } else {
2089         return false;
2090       }
2091       if (RegName.size() > RegNumIndex) {
2092         // Single 32-bit register: vXX.
2093         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2094           return false;
2095         Parser.Lex();
2096         RegWidth = 1;
2097       } else {
2098         // Range of registers: v[XX:YY]. ":YY" is optional.
2099         Parser.Lex();
2100         int64_t RegLo, RegHi;
2101         if (getLexer().isNot(AsmToken::LBrac))
2102           return false;
2103         Parser.Lex();
2104
2105         if (getParser().parseAbsoluteExpression(RegLo))
2106           return false;
2107
2108         const bool isRBrace = getLexer().is(AsmToken::RBrac);
2109         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2110           return false;
2111         Parser.Lex();
2112
2113         if (isRBrace) {
2114           RegHi = RegLo;
2115         } else {
2116           if (getParser().parseAbsoluteExpression(RegHi))
2117             return false;
2118
2119           if (getLexer().isNot(AsmToken::RBrac))
2120             return false;
2121           Parser.Lex();
2122         }
2123         RegNum = (unsigned) RegLo;
2124         RegWidth = (RegHi - RegLo) + 1;
2125       }
2126     }
2127   } else if (getLexer().is(AsmToken::LBrac)) {
2128     // List of consecutive registers: [s0,s1,s2,s3]
2129     Parser.Lex();
2130     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2131       return false;
2132     if (RegWidth != 1)
2133       return false;
2134     RegisterKind RegKind1;
2135     unsigned Reg1, RegNum1, RegWidth1;
2136     do {
2137       if (getLexer().is(AsmToken::Comma)) {
2138         Parser.Lex();
2139       } else if (getLexer().is(AsmToken::RBrac)) {
2140         Parser.Lex();
2141         break;
2142       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2143         if (RegWidth1 != 1) {
2144           return false;
2145         }
2146         if (RegKind1 != RegKind) {
2147           return false;
2148         }
2149         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2150           return false;
2151         }
2152       } else {
2153         return false;
2154       }
2155     } while (true);
2156   } else {
2157     return false;
2158   }
2159   switch (RegKind) {
2160   case IS_SPECIAL:
2161     RegNum = 0;
2162     RegWidth = 1;
2163     break;
2164   case IS_VGPR:
2165   case IS_SGPR:
2166   case IS_AGPR:
2167   case IS_TTMP:
2168   {
2169     unsigned Size = 1;
2170     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2171       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2172       Size = std::min(RegWidth, 4u);
2173     }
2174     if (RegNum % Size != 0)
2175       return false;
2176     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2177     RegNum = RegNum / Size;
2178     int RCID = getRegClass(RegKind, RegWidth);
2179     if (RCID == -1)
2180       return false;
2181     const MCRegisterClass RC = TRI->getRegClass(RCID);
2182     if (RegNum >= RC.getNumRegs())
2183       return false;
2184     Reg = RC.getRegister(RegNum);
2185     break;
2186   }
2187
2188   default:
2189     llvm_unreachable("unexpected register kind");
2190   }
2191
2192   if (!subtargetHasRegister(*TRI, Reg))
2193     return false;
2194   return true;
2195 }
2196
2197 Optional<StringRef>
2198 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2199   switch (RegKind) {
2200   case IS_VGPR:
2201     return StringRef(".amdgcn.next_free_vgpr");
2202   case IS_SGPR:
2203     return StringRef(".amdgcn.next_free_sgpr");
2204   default:
2205     return None;
2206   }
2207 }
2208
2209 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2210   auto SymbolName = getGprCountSymbolName(RegKind);
2211   assert(SymbolName && "initializing invalid register kind");
2212   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2213   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2214 }
2215
2216 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2217                                             unsigned DwordRegIndex,
2218                                             unsigned RegWidth) {
2219   // Symbols are only defined for GCN targets
2220   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2221     return true;
2222
2223   auto SymbolName = getGprCountSymbolName(RegKind);
2224   if (!SymbolName)
2225     return true;
2226   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2227
2228   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2229   int64_t OldCount;
2230
2231   if (!Sym->isVariable())
2232     return !Error(getParser().getTok().getLoc(),
2233                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2234   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2235     return !Error(
2236         getParser().getTok().getLoc(),
2237         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2238
2239   if (OldCount <= NewMax)
2240     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2241
2242   return true;
2243 }
2244
2245 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2246   const auto &Tok = Parser.getTok();
2247   SMLoc StartLoc = Tok.getLoc();
2248   SMLoc EndLoc = Tok.getEndLoc();
2249   RegisterKind RegKind;
2250   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2251
2252   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2253     //FIXME: improve error messages (bug 41303).
2254     Error(StartLoc, "not a valid operand.");
2255     return nullptr;
2256   }
2257   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2258     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2259       return nullptr;
2260   } else
2261     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2262   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2263 }
2264
2265 OperandMatchResultTy
2266 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2267   // TODO: add syntactic sugar for 1/(2*PI)
2268
2269   assert(!isRegister());
2270   assert(!isModifier());
2271
2272   const auto& Tok = getToken();
2273   const auto& NextTok = peekToken();
2274   bool IsReal = Tok.is(AsmToken::Real);
2275   SMLoc S = getLoc();
2276   bool Negate = false;
2277
2278   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2279     lex();
2280     IsReal = true;
2281     Negate = true;
2282   }
2283
2284   if (IsReal) {
2285     // Floating-point expressions are not supported.
2286     // Can only allow floating-point literals with an
2287     // optional sign.
2288
2289     StringRef Num = getTokenStr();
2290     lex();
2291
2292     APFloat RealVal(APFloat::IEEEdouble());
2293     auto roundMode = APFloat::rmNearestTiesToEven;
2294     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2295       return MatchOperand_ParseFail;
2296     }
2297     if (Negate)
2298       RealVal.changeSign();
2299
2300     Operands.push_back(
2301       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2302                                AMDGPUOperand::ImmTyNone, true));
2303
2304     return MatchOperand_Success;
2305
2306   } else {
2307     int64_t IntVal;
2308     const MCExpr *Expr;
2309     SMLoc S = getLoc();
2310
2311     if (HasSP3AbsModifier) {
2312       // This is a workaround for handling expressions
2313       // as arguments of SP3 'abs' modifier, for example:
2314       //     |1.0|
2315       //     |-1|
2316       //     |1+x|
2317       // This syntax is not compatible with syntax of standard
2318       // MC expressions (due to the trailing '|').
2319       SMLoc EndLoc;
2320       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2321         return MatchOperand_ParseFail;
2322     } else {
2323       if (Parser.parseExpression(Expr))
2324         return MatchOperand_ParseFail;
2325     }
2326
2327     if (Expr->evaluateAsAbsolute(IntVal)) {
2328       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2329     } else {
2330       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2331     }
2332
2333     return MatchOperand_Success;
2334   }
2335
2336   return MatchOperand_NoMatch;
2337 }
2338
2339 OperandMatchResultTy
2340 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2341   if (!isRegister())
2342     return MatchOperand_NoMatch;
2343
2344   if (auto R = parseRegister()) {
2345     assert(R->isReg());
2346     Operands.push_back(std::move(R));
2347     return MatchOperand_Success;
2348   }
2349   return MatchOperand_ParseFail;
2350 }
2351
2352 OperandMatchResultTy
2353 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2354   auto res = parseReg(Operands);
2355   if (res != MatchOperand_NoMatch) {
2356     return res;
2357   } else if (isModifier()) {
2358     return MatchOperand_NoMatch;
2359   } else {
2360     return parseImm(Operands, HasSP3AbsMod);
2361   }
2362 }
2363
2364 bool
2365 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2366   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2367     const auto &str = Token.getString();
2368     return str == "abs" || str == "neg" || str == "sext";
2369   }
2370   return false;
2371 }
2372
2373 bool
2374 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2375   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2376 }
2377
2378 bool
2379 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2380   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2381 }
2382
2383 bool
2384 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2385   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2386 }
2387
2388 // Check if this is an operand modifier or an opcode modifier
2389 // which may look like an expression but it is not. We should
2390 // avoid parsing these modifiers as expressions. Currently
2391 // recognized sequences are:
2392 //   |...|
2393 //   abs(...)
2394 //   neg(...)
2395 //   sext(...)
2396 //   -reg
2397 //   -|...|
2398 //   -abs(...)
2399 //   name:...
2400 // Note that simple opcode modifiers like 'gds' may be parsed as
2401 // expressions; this is a special case. See getExpressionAsToken.
2402 //
2403 bool
2404 AMDGPUAsmParser::isModifier() {
2405
2406   AsmToken Tok = getToken();
2407   AsmToken NextToken[2];
2408   peekTokens(NextToken);
2409
2410   return isOperandModifier(Tok, NextToken[0]) ||
2411          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2412          isOpcodeModifierWithVal(Tok, NextToken[0]);
2413 }
2414
2415 // Check if the current token is an SP3 'neg' modifier.
2416 // Currently this modifier is allowed in the following context:
2417 //
2418 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2419 // 2. Before an 'abs' modifier: -abs(...)
2420 // 3. Before an SP3 'abs' modifier: -|...|
2421 //
2422 // In all other cases "-" is handled as a part
2423 // of an expression that follows the sign.
2424 //
2425 // Note: When "-" is followed by an integer literal,
2426 // this is interpreted as integer negation rather
2427 // than a floating-point NEG modifier applied to N.
2428 // Beside being contr-intuitive, such use of floating-point
2429 // NEG modifier would have resulted in different meaning
2430 // of integer literals used with VOP1/2/C and VOP3,
2431 // for example:
2432 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2433 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2434 // Negative fp literals with preceding "-" are
2435 // handled likewise for unifomtity
2436 //
2437 bool
2438 AMDGPUAsmParser::parseSP3NegModifier() {
2439
2440   AsmToken NextToken[2];
2441   peekTokens(NextToken);
2442
2443   if (isToken(AsmToken::Minus) &&
2444       (isRegister(NextToken[0], NextToken[1]) ||
2445        NextToken[0].is(AsmToken::Pipe) ||
2446        isId(NextToken[0], "abs"))) {
2447     lex();
2448     return true;
2449   }
2450
2451   return false;
2452 }
2453
2454 OperandMatchResultTy
2455 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2456                                               bool AllowImm) {
2457   bool Neg, SP3Neg;
2458   bool Abs, SP3Abs;
2459   SMLoc Loc;
2460
2461   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2462   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2463     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2464     return MatchOperand_ParseFail;
2465   }
2466
2467   SP3Neg = parseSP3NegModifier();
2468
2469   Loc = getLoc();
2470   Neg = trySkipId("neg");
2471   if (Neg && SP3Neg) {
2472     Error(Loc, "expected register or immediate");
2473     return MatchOperand_ParseFail;
2474   }
2475   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2476     return MatchOperand_ParseFail;
2477
2478   Abs = trySkipId("abs");
2479   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2480     return MatchOperand_ParseFail;
2481
2482   Loc = getLoc();
2483   SP3Abs = trySkipToken(AsmToken::Pipe);
2484   if (Abs && SP3Abs) {
2485     Error(Loc, "expected register or immediate");
2486     return MatchOperand_ParseFail;
2487   }
2488
2489   OperandMatchResultTy Res;
2490   if (AllowImm) {
2491     Res = parseRegOrImm(Operands, SP3Abs);
2492   } else {
2493     Res = parseReg(Operands);
2494   }
2495   if (Res != MatchOperand_Success) {
2496     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2497   }
2498
2499   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2500     return MatchOperand_ParseFail;
2501   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2502     return MatchOperand_ParseFail;
2503   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2504     return MatchOperand_ParseFail;
2505
2506   AMDGPUOperand::Modifiers Mods;
2507   Mods.Abs = Abs || SP3Abs;
2508   Mods.Neg = Neg || SP3Neg;
2509
2510   if (Mods.hasFPModifiers()) {
2511     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2512     if (Op.isExpr()) {
2513       Error(Op.getStartLoc(), "expected an absolute expression");
2514       return MatchOperand_ParseFail;
2515     }
2516     Op.setModifiers(Mods);
2517   }
2518   return MatchOperand_Success;
2519 }
2520
2521 OperandMatchResultTy
2522 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2523                                                bool AllowImm) {
2524   bool Sext = trySkipId("sext");
2525   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2526     return MatchOperand_ParseFail;
2527
2528   OperandMatchResultTy Res;
2529   if (AllowImm) {
2530     Res = parseRegOrImm(Operands);
2531   } else {
2532     Res = parseReg(Operands);
2533   }
2534   if (Res != MatchOperand_Success) {
2535     return Sext? MatchOperand_ParseFail : Res;
2536   }
2537
2538   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2539     return MatchOperand_ParseFail;
2540
2541   AMDGPUOperand::Modifiers Mods;
2542   Mods.Sext = Sext;
2543
2544   if (Mods.hasIntModifiers()) {
2545     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2546     if (Op.isExpr()) {
2547       Error(Op.getStartLoc(), "expected an absolute expression");
2548       return MatchOperand_ParseFail;
2549     }
2550     Op.setModifiers(Mods);
2551   }
2552
2553   return MatchOperand_Success;
2554 }
2555
2556 OperandMatchResultTy
2557 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2558   return parseRegOrImmWithFPInputMods(Operands, false);
2559 }
2560
2561 OperandMatchResultTy
2562 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2563   return parseRegOrImmWithIntInputMods(Operands, false);
2564 }
2565
2566 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2567   auto Loc = getLoc();
2568   if (trySkipId("off")) {
2569     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2570                                                 AMDGPUOperand::ImmTyOff, false));
2571     return MatchOperand_Success;
2572   }
2573
2574   if (!isRegister())
2575     return MatchOperand_NoMatch;
2576
2577   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2578   if (Reg) {
2579     Operands.push_back(std::move(Reg));
2580     return MatchOperand_Success;
2581   }
2582
2583   return MatchOperand_ParseFail;
2584
2585 }
2586
2587 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2588   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2589
2590   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2591       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2592       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2593       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2594     return Match_InvalidOperand;
2595
2596   if ((TSFlags & SIInstrFlags::VOP3) &&
2597       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2598       getForcedEncodingSize() != 64)
2599     return Match_PreferE32;
2600
2601   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2602       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2603     // v_mac_f32/16 allow only dst_sel == DWORD;
2604     auto OpNum =
2605         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2606     const auto &Op = Inst.getOperand(OpNum);
2607     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2608       return Match_InvalidOperand;
2609     }
2610   }
2611
2612   return Match_Success;
2613 }
2614
2615 // What asm variants we should check
2616 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2617   if (getForcedEncodingSize() == 32) {
2618     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2619     return makeArrayRef(Variants);
2620   }
2621
2622   if (isForcedVOP3()) {
2623     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2624     return makeArrayRef(Variants);
2625   }
2626
2627   if (isForcedSDWA()) {
2628     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2629                                         AMDGPUAsmVariants::SDWA9};
2630     return makeArrayRef(Variants);
2631   }
2632
2633   if (isForcedDPP()) {
2634     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2635     return makeArrayRef(Variants);
2636   }
2637
2638   static const unsigned Variants[] = {
2639     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2640     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2641   };
2642
2643   return makeArrayRef(Variants);
2644 }
2645
2646 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2647   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2648   const unsigned Num = Desc.getNumImplicitUses();
2649   for (unsigned i = 0; i < Num; ++i) {
2650     unsigned Reg = Desc.ImplicitUses[i];
2651     switch (Reg) {
2652     case AMDGPU::FLAT_SCR:
2653     case AMDGPU::VCC:
2654     case AMDGPU::VCC_LO:
2655     case AMDGPU::VCC_HI:
2656     case AMDGPU::M0:
2657     case AMDGPU::SGPR_NULL:
2658       return Reg;
2659     default:
2660       break;
2661     }
2662   }
2663   return AMDGPU::NoRegister;
2664 }
2665
2666 // NB: This code is correct only when used to check constant
2667 // bus limitations because GFX7 support no f16 inline constants.
2668 // Note that there are no cases when a GFX7 opcode violates
2669 // constant bus limitations due to the use of an f16 constant.
2670 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2671                                        unsigned OpIdx) const {
2672   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2673
2674   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2675     return false;
2676   }
2677
2678   const MCOperand &MO = Inst.getOperand(OpIdx);
2679
2680   int64_t Val = MO.getImm();
2681   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2682
2683   switch (OpSize) { // expected operand size
2684   case 8:
2685     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2686   case 4:
2687     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2688   case 2: {
2689     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2690     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2691         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2692         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2693         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2694         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2695         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2696       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2697     } else {
2698       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2699     }
2700   }
2701   default:
2702     llvm_unreachable("invalid operand size");
2703   }
2704 }
2705
2706 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2707   const MCOperand &MO = Inst.getOperand(OpIdx);
2708   if (MO.isImm()) {
2709     return !isInlineConstant(Inst, OpIdx);
2710   }
2711   return !MO.isReg() ||
2712          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2713 }
2714
2715 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2716   const unsigned Opcode = Inst.getOpcode();
2717   const MCInstrDesc &Desc = MII.get(Opcode);
2718   unsigned ConstantBusUseCount = 0;
2719   unsigned NumLiterals = 0;
2720   unsigned LiteralSize;
2721
2722   if (Desc.TSFlags &
2723       (SIInstrFlags::VOPC |
2724        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2725        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2726        SIInstrFlags::SDWA)) {
2727     // Check special imm operands (used by madmk, etc)
2728     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2729       ++ConstantBusUseCount;
2730     }
2731
2732     SmallDenseSet<unsigned> SGPRsUsed;
2733     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2734     if (SGPRUsed != AMDGPU::NoRegister) {
2735       SGPRsUsed.insert(SGPRUsed);
2736       ++ConstantBusUseCount;
2737     }
2738
2739     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2740     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2741     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2742
2743     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2744
2745     for (int OpIdx : OpIndices) {
2746       if (OpIdx == -1) break;
2747
2748       const MCOperand &MO = Inst.getOperand(OpIdx);
2749       if (usesConstantBus(Inst, OpIdx)) {
2750         if (MO.isReg()) {
2751           const unsigned Reg = mc2PseudoReg(MO.getReg());
2752           // Pairs of registers with a partial intersections like these
2753           //   s0, s[0:1]
2754           //   flat_scratch_lo, flat_scratch
2755           //   flat_scratch_lo, flat_scratch_hi
2756           // are theoretically valid but they are disabled anyway.
2757           // Note that this code mimics SIInstrInfo::verifyInstruction
2758           if (!SGPRsUsed.count(Reg)) {
2759             SGPRsUsed.insert(Reg);
2760             ++ConstantBusUseCount;
2761           }
2762         } else { // Expression or a literal
2763
2764           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2765             continue; // special operand like VINTERP attr_chan
2766
2767           // An instruction may use only one literal.
2768           // This has been validated on the previous step.
2769           // See validateVOP3Literal.
2770           // This literal may be used as more than one operand.
2771           // If all these operands are of the same size,
2772           // this literal counts as one scalar value.
2773           // Otherwise it counts as 2 scalar values.
2774           // See "GFX10 Shader Programming", section 3.6.2.3.
2775
2776           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2777           if (Size < 4) Size = 4;
2778
2779           if (NumLiterals == 0) {
2780             NumLiterals = 1;
2781             LiteralSize = Size;
2782           } else if (LiteralSize != Size) {
2783             NumLiterals = 2;
2784           }
2785         }
2786       }
2787     }
2788   }
2789   ConstantBusUseCount += NumLiterals;
2790
2791   if (isGFX10())
2792     return ConstantBusUseCount <= 2;
2793
2794   return ConstantBusUseCount <= 1;
2795 }
2796
2797 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2798   const unsigned Opcode = Inst.getOpcode();
2799   const MCInstrDesc &Desc = MII.get(Opcode);
2800
2801   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2802   if (DstIdx == -1 ||
2803       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2804     return true;
2805   }
2806
2807   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2808
2809   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2810   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2811   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2812
2813   assert(DstIdx != -1);
2814   const MCOperand &Dst = Inst.getOperand(DstIdx);
2815   assert(Dst.isReg());
2816   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2817
2818   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2819
2820   for (int SrcIdx : SrcIndices) {
2821     if (SrcIdx == -1) break;
2822     const MCOperand &Src = Inst.getOperand(SrcIdx);
2823     if (Src.isReg()) {
2824       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2825       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2826         return false;
2827       }
2828     }
2829   }
2830
2831   return true;
2832 }
2833
2834 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2835
2836   const unsigned Opc = Inst.getOpcode();
2837   const MCInstrDesc &Desc = MII.get(Opc);
2838
2839   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2840     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2841     assert(ClampIdx != -1);
2842     return Inst.getOperand(ClampIdx).getImm() == 0;
2843   }
2844
2845   return true;
2846 }
2847
2848 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2849
2850   const unsigned Opc = Inst.getOpcode();
2851   const MCInstrDesc &Desc = MII.get(Opc);
2852
2853   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2854     return true;
2855
2856   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2857   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2858   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2859
2860   assert(VDataIdx != -1);
2861   assert(DMaskIdx != -1);
2862   assert(TFEIdx != -1);
2863
2864   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2865   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2866   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2867   if (DMask == 0)
2868     DMask = 1;
2869
2870   unsigned DataSize =
2871     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2872   if (hasPackedD16()) {
2873     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2874     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2875       DataSize = (DataSize + 1) / 2;
2876   }
2877
2878   return (VDataSize / 4) == DataSize + TFESize;
2879 }
2880
2881 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2882   const unsigned Opc = Inst.getOpcode();
2883   const MCInstrDesc &Desc = MII.get(Opc);
2884
2885   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2886     return true;
2887
2888   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2889   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2890       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2891   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2892   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2893   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2894
2895   assert(VAddr0Idx != -1);
2896   assert(SrsrcIdx != -1);
2897   assert(DimIdx != -1);
2898   assert(SrsrcIdx > VAddr0Idx);
2899
2900   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2901   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2902   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2903   unsigned VAddrSize =
2904       IsNSA ? SrsrcIdx - VAddr0Idx
2905             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2906
2907   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2908                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2909                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2910                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2911   if (!IsNSA) {
2912     if (AddrSize > 8)
2913       AddrSize = 16;
2914     else if (AddrSize > 4)
2915       AddrSize = 8;
2916   }
2917
2918   return VAddrSize == AddrSize;
2919 }
2920
2921 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2922
2923   const unsigned Opc = Inst.getOpcode();
2924   const MCInstrDesc &Desc = MII.get(Opc);
2925
2926   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2927     return true;
2928   if (!Desc.mayLoad() || !Desc.mayStore())
2929     return true; // Not atomic
2930
2931   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2932   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2933
2934   // This is an incomplete check because image_atomic_cmpswap
2935   // may only use 0x3 and 0xf while other atomic operations
2936   // may use 0x1 and 0x3. However these limitations are
2937   // verified when we check that dmask matches dst size.
2938   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2939 }
2940
2941 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2942
2943   const unsigned Opc = Inst.getOpcode();
2944   const MCInstrDesc &Desc = MII.get(Opc);
2945
2946   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2947     return true;
2948
2949   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2951
2952   // GATHER4 instructions use dmask in a different fashion compared to
2953   // other MIMG instructions. The only useful DMASK values are
2954   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2955   // (red,red,red,red) etc.) The ISA document doesn't mention
2956   // this.
2957   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2958 }
2959
2960 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2961
2962   const unsigned Opc = Inst.getOpcode();
2963   const MCInstrDesc &Desc = MII.get(Opc);
2964
2965   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2966     return true;
2967
2968   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2969   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2970     if (isCI() || isSI())
2971       return false;
2972   }
2973
2974   return true;
2975 }
2976
2977 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2978   const unsigned Opc = Inst.getOpcode();
2979   const MCInstrDesc &Desc = MII.get(Opc);
2980
2981   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2982     return true;
2983
2984   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2985   if (DimIdx < 0)
2986     return true;
2987
2988   long Imm = Inst.getOperand(DimIdx).getImm();
2989   if (Imm < 0 || Imm >= 8)
2990     return false;
2991
2992   return true;
2993 }
2994
2995 static bool IsRevOpcode(const unsigned Opcode)
2996 {
2997   switch (Opcode) {
2998   case AMDGPU::V_SUBREV_F32_e32:
2999   case AMDGPU::V_SUBREV_F32_e64:
3000   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3001   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3002   case AMDGPU::V_SUBREV_F32_e32_vi:
3003   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3004   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3005   case AMDGPU::V_SUBREV_F32_e64_vi:
3006
3007   case AMDGPU::V_SUBREV_I32_e32:
3008   case AMDGPU::V_SUBREV_I32_e64:
3009   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3010   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3011
3012   case AMDGPU::V_SUBBREV_U32_e32:
3013   case AMDGPU::V_SUBBREV_U32_e64:
3014   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3015   case AMDGPU::V_SUBBREV_U32_e32_vi:
3016   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3017   case AMDGPU::V_SUBBREV_U32_e64_vi:
3018
3019   case AMDGPU::V_SUBREV_U32_e32:
3020   case AMDGPU::V_SUBREV_U32_e64:
3021   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3022   case AMDGPU::V_SUBREV_U32_e32_vi:
3023   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3024   case AMDGPU::V_SUBREV_U32_e64_vi:
3025
3026   case AMDGPU::V_SUBREV_F16_e32:
3027   case AMDGPU::V_SUBREV_F16_e64:
3028   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3029   case AMDGPU::V_SUBREV_F16_e32_vi:
3030   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3031   case AMDGPU::V_SUBREV_F16_e64_vi:
3032
3033   case AMDGPU::V_SUBREV_U16_e32:
3034   case AMDGPU::V_SUBREV_U16_e64:
3035   case AMDGPU::V_SUBREV_U16_e32_vi:
3036   case AMDGPU::V_SUBREV_U16_e64_vi:
3037
3038   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3039   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3040   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3041
3042   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3043   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3044
3045   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3046   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3047
3048   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3049   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3050
3051   case AMDGPU::V_LSHRREV_B32_e32:
3052   case AMDGPU::V_LSHRREV_B32_e64:
3053   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3054   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3055   case AMDGPU::V_LSHRREV_B32_e32_vi:
3056   case AMDGPU::V_LSHRREV_B32_e64_vi:
3057   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3058   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3059
3060   case AMDGPU::V_ASHRREV_I32_e32:
3061   case AMDGPU::V_ASHRREV_I32_e64:
3062   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3063   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3064   case AMDGPU::V_ASHRREV_I32_e32_vi:
3065   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3066   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3067   case AMDGPU::V_ASHRREV_I32_e64_vi:
3068
3069   case AMDGPU::V_LSHLREV_B32_e32:
3070   case AMDGPU::V_LSHLREV_B32_e64:
3071   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3072   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3073   case AMDGPU::V_LSHLREV_B32_e32_vi:
3074   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3075   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3076   case AMDGPU::V_LSHLREV_B32_e64_vi:
3077
3078   case AMDGPU::V_LSHLREV_B16_e32:
3079   case AMDGPU::V_LSHLREV_B16_e64:
3080   case AMDGPU::V_LSHLREV_B16_e32_vi:
3081   case AMDGPU::V_LSHLREV_B16_e64_vi:
3082   case AMDGPU::V_LSHLREV_B16_gfx10:
3083
3084   case AMDGPU::V_LSHRREV_B16_e32:
3085   case AMDGPU::V_LSHRREV_B16_e64:
3086   case AMDGPU::V_LSHRREV_B16_e32_vi:
3087   case AMDGPU::V_LSHRREV_B16_e64_vi:
3088   case AMDGPU::V_LSHRREV_B16_gfx10:
3089
3090   case AMDGPU::V_ASHRREV_I16_e32:
3091   case AMDGPU::V_ASHRREV_I16_e64:
3092   case AMDGPU::V_ASHRREV_I16_e32_vi:
3093   case AMDGPU::V_ASHRREV_I16_e64_vi:
3094   case AMDGPU::V_ASHRREV_I16_gfx10:
3095
3096   case AMDGPU::V_LSHLREV_B64:
3097   case AMDGPU::V_LSHLREV_B64_gfx10:
3098   case AMDGPU::V_LSHLREV_B64_vi:
3099
3100   case AMDGPU::V_LSHRREV_B64:
3101   case AMDGPU::V_LSHRREV_B64_gfx10:
3102   case AMDGPU::V_LSHRREV_B64_vi:
3103
3104   case AMDGPU::V_ASHRREV_I64:
3105   case AMDGPU::V_ASHRREV_I64_gfx10:
3106   case AMDGPU::V_ASHRREV_I64_vi:
3107
3108   case AMDGPU::V_PK_LSHLREV_B16:
3109   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3110   case AMDGPU::V_PK_LSHLREV_B16_vi:
3111
3112   case AMDGPU::V_PK_LSHRREV_B16:
3113   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3114   case AMDGPU::V_PK_LSHRREV_B16_vi:
3115   case AMDGPU::V_PK_ASHRREV_I16:
3116   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3117   case AMDGPU::V_PK_ASHRREV_I16_vi:
3118     return true;
3119   default:
3120     return false;
3121   }
3122 }
3123
3124 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3125
3126   using namespace SIInstrFlags;
3127   const unsigned Opcode = Inst.getOpcode();
3128   const MCInstrDesc &Desc = MII.get(Opcode);
3129
3130   // lds_direct register is defined so that it can be used
3131   // with 9-bit operands only. Ignore encodings which do not accept these.
3132   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3133     return true;
3134
3135   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3136   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3137   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3138
3139   const int SrcIndices[] = { Src1Idx, Src2Idx };
3140
3141   // lds_direct cannot be specified as either src1 or src2.
3142   for (int SrcIdx : SrcIndices) {
3143     if (SrcIdx == -1) break;
3144     const MCOperand &Src = Inst.getOperand(SrcIdx);
3145     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3146       return false;
3147     }
3148   }
3149
3150   if (Src0Idx == -1)
3151     return true;
3152
3153   const MCOperand &Src = Inst.getOperand(Src0Idx);
3154   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3155     return true;
3156
3157   // lds_direct is specified as src0. Check additional limitations.
3158   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3159 }
3160
3161 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3162   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3163     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3164     if (Op.isFlatOffset())
3165       return Op.getStartLoc();
3166   }
3167   return getLoc();
3168 }
3169
3170 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3171                                          const OperandVector &Operands) {
3172   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3173   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3174     return true;
3175
3176   auto Opcode = Inst.getOpcode();
3177   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3178   assert(OpNum != -1);
3179
3180   const auto &Op = Inst.getOperand(OpNum);
3181   if (!hasFlatOffsets() && Op.getImm() != 0) {
3182     Error(getFlatOffsetLoc(Operands),
3183           "flat offset modifier is not supported on this GPU");
3184     return false;
3185   }
3186
3187   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3188   // For FLAT segment the offset must be positive;
3189   // MSB is ignored and forced to zero.
3190   unsigned OffsetSize = isGFX9() ? 13 : 12;
3191   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3192     if (!isIntN(OffsetSize, Op.getImm())) {
3193       Error(getFlatOffsetLoc(Operands),
3194             isGFX9() ? "expected a 13-bit signed offset" :
3195                        "expected a 12-bit signed offset");
3196       return false;
3197     }
3198   } else {
3199     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3200       Error(getFlatOffsetLoc(Operands),
3201             isGFX9() ? "expected a 12-bit unsigned offset" :
3202                        "expected an 11-bit unsigned offset");
3203       return false;
3204     }
3205   }
3206
3207   return true;
3208 }
3209
3210 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3211   unsigned Opcode = Inst.getOpcode();
3212   const MCInstrDesc &Desc = MII.get(Opcode);
3213   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3214     return true;
3215
3216   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3217   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3218
3219   const int OpIndices[] = { Src0Idx, Src1Idx };
3220
3221   unsigned NumLiterals = 0;
3222   uint32_t LiteralValue;
3223
3224   for (int OpIdx : OpIndices) {
3225     if (OpIdx == -1) break;
3226
3227     const MCOperand &MO = Inst.getOperand(OpIdx);
3228     if (MO.isImm() &&
3229         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3230         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3231         !isInlineConstant(Inst, OpIdx)) {
3232       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3233       if (NumLiterals == 0 || LiteralValue != Value) {
3234         LiteralValue = Value;
3235         ++NumLiterals;
3236       }
3237     }
3238   }
3239
3240   return NumLiterals <= 1;
3241 }
3242
3243 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3244   const unsigned Opc = Inst.getOpcode();
3245   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3246       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3247     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3248     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3249
3250     if (OpSel & ~3)
3251       return false;
3252   }
3253   return true;
3254 }
3255
3256 // Check if VCC register matches wavefront size
3257 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3258   auto FB = getFeatureBits();
3259   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3260     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3261 }
3262
3263 // VOP3 literal is only allowed in GFX10+ and only one can be used
3264 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3265   unsigned Opcode = Inst.getOpcode();
3266   const MCInstrDesc &Desc = MII.get(Opcode);
3267   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3268     return true;
3269
3270   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3271   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3272   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3273
3274   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3275
3276   unsigned NumLiterals = 0;
3277   uint32_t LiteralValue;
3278
3279   for (int OpIdx : OpIndices) {
3280     if (OpIdx == -1) break;
3281
3282     const MCOperand &MO = Inst.getOperand(OpIdx);
3283     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3284       continue;
3285
3286     if (!isInlineConstant(Inst, OpIdx)) {
3287       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3288       if (NumLiterals == 0 || LiteralValue != Value) {
3289         LiteralValue = Value;
3290         ++NumLiterals;
3291       }
3292     }
3293   }
3294
3295   return !NumLiterals ||
3296          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3297 }
3298
3299 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3300                                           const SMLoc &IDLoc,
3301                                           const OperandVector &Operands) {
3302   if (!validateLdsDirect(Inst)) {
3303     Error(IDLoc,
3304       "invalid use of lds_direct");
3305     return false;
3306   }
3307   if (!validateSOPLiteral(Inst)) {
3308     Error(IDLoc,
3309       "only one literal operand is allowed");
3310     return false;
3311   }
3312   if (!validateVOP3Literal(Inst)) {
3313     Error(IDLoc,
3314       "invalid literal operand");
3315     return false;
3316   }
3317   if (!validateConstantBusLimitations(Inst)) {
3318     Error(IDLoc,
3319       "invalid operand (violates constant bus restrictions)");
3320     return false;
3321   }
3322   if (!validateEarlyClobberLimitations(Inst)) {
3323     Error(IDLoc,
3324       "destination must be different than all sources");
3325     return false;
3326   }
3327   if (!validateIntClampSupported(Inst)) {
3328     Error(IDLoc,
3329       "integer clamping is not supported on this GPU");
3330     return false;
3331   }
3332   if (!validateOpSel(Inst)) {
3333     Error(IDLoc,
3334       "invalid op_sel operand");
3335     return false;
3336   }
3337   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3338   if (!validateMIMGD16(Inst)) {
3339     Error(IDLoc,
3340       "d16 modifier is not supported on this GPU");
3341     return false;
3342   }
3343   if (!validateMIMGDim(Inst)) {
3344     Error(IDLoc, "dim modifier is required on this GPU");
3345     return false;
3346   }
3347   if (!validateMIMGDataSize(Inst)) {
3348     Error(IDLoc,
3349       "image data size does not match dmask and tfe");
3350     return false;
3351   }
3352   if (!validateMIMGAddrSize(Inst)) {
3353     Error(IDLoc,
3354       "image address size does not match dim and a16");
3355     return false;
3356   }
3357   if (!validateMIMGAtomicDMask(Inst)) {
3358     Error(IDLoc,
3359       "invalid atomic image dmask");
3360     return false;
3361   }
3362   if (!validateMIMGGatherDMask(Inst)) {
3363     Error(IDLoc,
3364       "invalid image_gather dmask: only one bit must be set");
3365     return false;
3366   }
3367   if (!validateFlatOffset(Inst, Operands)) {
3368     return false;
3369   }
3370
3371   return true;
3372 }
3373
3374 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3375                                             const FeatureBitset &FBS,
3376                                             unsigned VariantID = 0);
3377
3378 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3379                                               OperandVector &Operands,
3380                                               MCStreamer &Out,
3381                                               uint64_t &ErrorInfo,
3382                                               bool MatchingInlineAsm) {
3383   MCInst Inst;
3384   unsigned Result = Match_Success;
3385   for (auto Variant : getMatchedVariants()) {
3386     uint64_t EI;
3387     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3388                                   Variant);
3389     // We order match statuses from least to most specific. We use most specific
3390     // status as resulting
3391     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3392     if ((R == Match_Success) ||
3393         (R == Match_PreferE32) ||
3394         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3395         (R == Match_InvalidOperand && Result != Match_MissingFeature
3396                                    && Result != Match_PreferE32) ||
3397         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3398                                    && Result != Match_MissingFeature
3399                                    && Result != Match_PreferE32)) {
3400       Result = R;
3401       ErrorInfo = EI;
3402     }
3403     if (R == Match_Success)
3404       break;
3405   }
3406
3407   switch (Result) {
3408   default: break;
3409   case Match_Success:
3410     if (!validateInstruction(Inst, IDLoc, Operands)) {
3411       return true;
3412     }
3413     Inst.setLoc(IDLoc);
3414     Out.EmitInstruction(Inst, getSTI());
3415     return false;
3416
3417   case Match_MissingFeature:
3418     return Error(IDLoc, "instruction not supported on this GPU");
3419
3420   case Match_MnemonicFail: {
3421     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3422     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3423         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3424     return Error(IDLoc, "invalid instruction" + Suggestion,
3425                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3426   }
3427
3428   case Match_InvalidOperand: {
3429     SMLoc ErrorLoc = IDLoc;
3430     if (ErrorInfo != ~0ULL) {
3431       if (ErrorInfo >= Operands.size()) {
3432         return Error(IDLoc, "too few operands for instruction");
3433       }
3434       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3435       if (ErrorLoc == SMLoc())
3436         ErrorLoc = IDLoc;
3437     }
3438     return Error(ErrorLoc, "invalid operand for instruction");
3439   }
3440
3441   case Match_PreferE32:
3442     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3443                         "should be encoded as e32");
3444   }
3445   llvm_unreachable("Implement any new match types added!");
3446 }
3447
3448 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3449   int64_t Tmp = -1;
3450   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3451     return true;
3452   }
3453   if (getParser().parseAbsoluteExpression(Tmp)) {
3454     return true;
3455   }
3456   Ret = static_cast<uint32_t>(Tmp);
3457   return false;
3458 }
3459
3460 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3461                                                uint32_t &Minor) {
3462   if (ParseAsAbsoluteExpression(Major))
3463     return TokError("invalid major version");
3464
3465   if (getLexer().isNot(AsmToken::Comma))
3466     return TokError("minor version number required, comma expected");
3467   Lex();
3468
3469   if (ParseAsAbsoluteExpression(Minor))
3470     return TokError("invalid minor version");
3471
3472   return false;
3473 }
3474
3475 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3476   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3477     return TokError("directive only supported for amdgcn architecture");
3478
3479   std::string Target;
3480
3481   SMLoc TargetStart = getTok().getLoc();
3482   if (getParser().parseEscapedString(Target))
3483     return true;
3484   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3485
3486   std::string ExpectedTarget;
3487   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3488   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3489
3490   if (Target != ExpectedTargetOS.str())
3491     return getParser().Error(TargetRange.Start, "target must match options",
3492                              TargetRange);
3493
3494   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3495   return false;
3496 }
3497
3498 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3499   return getParser().Error(Range.Start, "value out of range", Range);
3500 }
3501
3502 bool AMDGPUAsmParser::calculateGPRBlocks(
3503     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3504     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3505     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3506     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3507   // TODO(scott.linder): These calculations are duplicated from
3508   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3509   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3510
3511   unsigned NumVGPRs = NextFreeVGPR;
3512   unsigned NumSGPRs = NextFreeSGPR;
3513
3514   if (Version.Major >= 10)
3515     NumSGPRs = 0;
3516   else {
3517     unsigned MaxAddressableNumSGPRs =
3518         IsaInfo::getAddressableNumSGPRs(&getSTI());
3519
3520     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3521         NumSGPRs > MaxAddressableNumSGPRs)
3522       return OutOfRangeError(SGPRRange);
3523
3524     NumSGPRs +=
3525         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3526
3527     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3528         NumSGPRs > MaxAddressableNumSGPRs)
3529       return OutOfRangeError(SGPRRange);
3530
3531     if (Features.test(FeatureSGPRInitBug))
3532       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3533   }
3534
3535   VGPRBlocks =
3536       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3537   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3538
3539   return false;
3540 }
3541
3542 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3543   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3544     return TokError("directive only supported for amdgcn architecture");
3545
3546   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3547     return TokError("directive only supported for amdhsa OS");
3548
3549   StringRef KernelName;
3550   if (getParser().parseIdentifier(KernelName))
3551     return true;
3552
3553   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3554
3555   StringSet<> Seen;
3556
3557   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3558
3559   SMRange VGPRRange;
3560   uint64_t NextFreeVGPR = 0;
3561   SMRange SGPRRange;
3562   uint64_t NextFreeSGPR = 0;
3563   unsigned UserSGPRCount = 0;
3564   bool ReserveVCC = true;
3565   bool ReserveFlatScr = true;
3566   bool ReserveXNACK = hasXNACK();
3567   Optional<bool> EnableWavefrontSize32;
3568
3569   while (true) {
3570     while (getLexer().is(AsmToken::EndOfStatement))
3571       Lex();
3572
3573     if (getLexer().isNot(AsmToken::Identifier))
3574       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3575
3576     StringRef ID = getTok().getIdentifier();
3577     SMRange IDRange = getTok().getLocRange();
3578     Lex();
3579
3580     if (ID == ".end_amdhsa_kernel")
3581       break;
3582
3583     if (Seen.find(ID) != Seen.end())
3584       return TokError(".amdhsa_ directives cannot be repeated");
3585     Seen.insert(ID);
3586
3587     SMLoc ValStart = getTok().getLoc();
3588     int64_t IVal;
3589     if (getParser().parseAbsoluteExpression(IVal))
3590       return true;
3591     SMLoc ValEnd = getTok().getLoc();
3592     SMRange ValRange = SMRange(ValStart, ValEnd);
3593
3594     if (IVal < 0)
3595       return OutOfRangeError(ValRange);
3596
3597     uint64_t Val = IVal;
3598
3599 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3600   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3601     return OutOfRangeError(RANGE);                                             \
3602   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3603
3604     if (ID == ".amdhsa_group_segment_fixed_size") {
3605       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3606         return OutOfRangeError(ValRange);
3607       KD.group_segment_fixed_size = Val;
3608     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3609       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3610         return OutOfRangeError(ValRange);
3611       KD.private_segment_fixed_size = Val;
3612     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3613       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3614                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3615                        Val, ValRange);
3616       UserSGPRCount += 4;
3617     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3618       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3619                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3620                        ValRange);
3621       UserSGPRCount += 2;
3622     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3623       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3624                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3625                        ValRange);
3626       UserSGPRCount += 2;
3627     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3628       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3629                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3630                        Val, ValRange);
3631       UserSGPRCount += 2;
3632     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3633       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3634                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3635                        ValRange);
3636       UserSGPRCount += 2;
3637     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3638       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3639                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3640                        ValRange);
3641       UserSGPRCount += 2;
3642     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3643       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3644                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3645                        Val, ValRange);
3646       UserSGPRCount += 1;
3647     } else if (ID == ".amdhsa_wavefront_size32") {
3648       if (IVersion.Major < 10)
3649         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3650                                  IDRange);
3651       EnableWavefrontSize32 = Val;
3652       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3653                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3654                        Val, ValRange);
3655     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3656       PARSE_BITS_ENTRY(
3657           KD.compute_pgm_rsrc2,
3658           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3659           ValRange);
3660     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3661       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3662                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3663                        ValRange);
3664     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3665       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3666                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3667                        ValRange);
3668     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3669       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3670                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3671                        ValRange);
3672     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3673       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3674                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3675                        ValRange);
3676     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3677       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3678                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3679                        ValRange);
3680     } else if (ID == ".amdhsa_next_free_vgpr") {
3681       VGPRRange = ValRange;
3682       NextFreeVGPR = Val;
3683     } else if (ID == ".amdhsa_next_free_sgpr") {
3684       SGPRRange = ValRange;
3685       NextFreeSGPR = Val;
3686     } else if (ID == ".amdhsa_reserve_vcc") {
3687       if (!isUInt<1>(Val))
3688         return OutOfRangeError(ValRange);
3689       ReserveVCC = Val;
3690     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3691       if (IVersion.Major < 7)
3692         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3693                                  IDRange);
3694       if (!isUInt<1>(Val))
3695         return OutOfRangeError(ValRange);
3696       ReserveFlatScr = Val;
3697     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3698       if (IVersion.Major < 8)
3699         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3700                                  IDRange);
3701       if (!isUInt<1>(Val))
3702         return OutOfRangeError(ValRange);
3703       ReserveXNACK = Val;
3704     } else if (ID == ".amdhsa_float_round_mode_32") {
3705       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3706                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3707     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3708       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3709                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3710     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3711       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3712                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3713     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3714       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3715                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3716                        ValRange);
3717     } else if (ID == ".amdhsa_dx10_clamp") {
3718       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3719                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3720     } else if (ID == ".amdhsa_ieee_mode") {
3721       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3722                        Val, ValRange);
3723     } else if (ID == ".amdhsa_fp16_overflow") {
3724       if (IVersion.Major < 9)
3725         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3726                                  IDRange);
3727       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3728                        ValRange);
3729     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3730       if (IVersion.Major < 10)
3731         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3732                                  IDRange);
3733       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3734                        ValRange);
3735     } else if (ID == ".amdhsa_memory_ordered") {
3736       if (IVersion.Major < 10)
3737         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3738                                  IDRange);
3739       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3740                        ValRange);
3741     } else if (ID == ".amdhsa_forward_progress") {
3742       if (IVersion.Major < 10)
3743         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3744                                  IDRange);
3745       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3746                        ValRange);
3747     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3748       PARSE_BITS_ENTRY(
3749           KD.compute_pgm_rsrc2,
3750           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3751           ValRange);
3752     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3753       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3754                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3755                        Val, ValRange);
3756     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3757       PARSE_BITS_ENTRY(
3758           KD.compute_pgm_rsrc2,
3759           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3760           ValRange);
3761     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3762       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3763                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3764                        Val, ValRange);
3765     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3766       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3767                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3768                        Val, ValRange);
3769     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3771                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3772                        Val, ValRange);
3773     } else if (ID == ".amdhsa_exception_int_div_zero") {
3774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3775                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3776                        Val, ValRange);
3777     } else {
3778       return getParser().Error(IDRange.Start,
3779                                "unknown .amdhsa_kernel directive", IDRange);
3780     }
3781
3782 #undef PARSE_BITS_ENTRY
3783   }
3784
3785   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3786     return TokError(".amdhsa_next_free_vgpr directive is required");
3787
3788   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3789     return TokError(".amdhsa_next_free_sgpr directive is required");
3790
3791   unsigned VGPRBlocks;
3792   unsigned SGPRBlocks;
3793   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3794                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3795                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3796                          SGPRBlocks))
3797     return true;
3798
3799   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3800           VGPRBlocks))
3801     return OutOfRangeError(VGPRRange);
3802   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3803                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3804
3805   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3806           SGPRBlocks))
3807     return OutOfRangeError(SGPRRange);
3808   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3809                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3810                   SGPRBlocks);
3811
3812   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3813     return TokError("too many user SGPRs enabled");
3814   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3815                   UserSGPRCount);
3816
3817   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3818       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3819       ReserveFlatScr, ReserveXNACK);
3820   return false;
3821 }
3822
3823 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3824   uint32_t Major;
3825   uint32_t Minor;
3826
3827   if (ParseDirectiveMajorMinor(Major, Minor))
3828     return true;
3829
3830   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3831   return false;
3832 }
3833
3834 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3835   uint32_t Major;
3836   uint32_t Minor;
3837   uint32_t Stepping;
3838   StringRef VendorName;
3839   StringRef ArchName;
3840
3841   // If this directive has no arguments, then use the ISA version for the
3842   // targeted GPU.
3843   if (getLexer().is(AsmToken::EndOfStatement)) {
3844     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3845     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3846                                                       ISA.Stepping,
3847                                                       "AMD", "AMDGPU");
3848     return false;
3849   }
3850
3851   if (ParseDirectiveMajorMinor(Major, Minor))
3852     return true;
3853
3854   if (getLexer().isNot(AsmToken::Comma))
3855     return TokError("stepping version number required, comma expected");
3856   Lex();
3857
3858   if (ParseAsAbsoluteExpression(Stepping))
3859     return TokError("invalid stepping version");
3860
3861   if (getLexer().isNot(AsmToken::Comma))
3862     return TokError("vendor name required, comma expected");
3863   Lex();
3864
3865   if (getLexer().isNot(AsmToken::String))
3866     return TokError("invalid vendor name");
3867
3868   VendorName = getLexer().getTok().getStringContents();
3869   Lex();
3870
3871   if (getLexer().isNot(AsmToken::Comma))
3872     return TokError("arch name required, comma expected");
3873   Lex();
3874
3875   if (getLexer().isNot(AsmToken::String))
3876     return TokError("invalid arch name");
3877
3878   ArchName = getLexer().getTok().getStringContents();
3879   Lex();
3880
3881   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3882                                                     VendorName, ArchName);
3883   return false;
3884 }
3885
3886 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3887                                                amd_kernel_code_t &Header) {
3888   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3889   // assembly for backwards compatibility.
3890   if (ID == "max_scratch_backing_memory_byte_size") {
3891     Parser.eatToEndOfStatement();
3892     return false;
3893   }
3894
3895   SmallString<40> ErrStr;
3896   raw_svector_ostream Err(ErrStr);
3897   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3898     return TokError(Err.str());
3899   }
3900   Lex();
3901
3902   if (ID == "enable_wavefront_size32") {
3903     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3904       if (!isGFX10())
3905         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3906       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3907         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3908     } else {
3909       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3910         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3911     }
3912   }
3913
3914   if (ID == "wavefront_size") {
3915     if (Header.wavefront_size == 5) {
3916       if (!isGFX10())
3917         return TokError("wavefront_size=5 is only allowed on GFX10+");
3918       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3919         return TokError("wavefront_size=5 requires +WavefrontSize32");
3920     } else if (Header.wavefront_size == 6) {
3921       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3922         return TokError("wavefront_size=6 requires +WavefrontSize64");
3923     }
3924   }
3925
3926   if (ID == "enable_wgp_mode") {
3927     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3928       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3929   }
3930
3931   if (ID == "enable_mem_ordered") {
3932     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3933       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3934   }
3935
3936   if (ID == "enable_fwd_progress") {
3937     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3938       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3939   }
3940
3941   return false;
3942 }
3943
3944 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3945   amd_kernel_code_t Header;
3946   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3947
3948   while (true) {
3949     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3950     // will set the current token to EndOfStatement.
3951     while(getLexer().is(AsmToken::EndOfStatement))
3952       Lex();
3953
3954     if (getLexer().isNot(AsmToken::Identifier))
3955       return TokError("expected value identifier or .end_amd_kernel_code_t");
3956
3957     StringRef ID = getLexer().getTok().getIdentifier();
3958     Lex();
3959
3960     if (ID == ".end_amd_kernel_code_t")
3961       break;
3962
3963     if (ParseAMDKernelCodeTValue(ID, Header))
3964       return true;
3965   }
3966
3967   getTargetStreamer().EmitAMDKernelCodeT(Header);
3968
3969   return false;
3970 }
3971
3972 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3973   if (getLexer().isNot(AsmToken::Identifier))
3974     return TokError("expected symbol name");
3975
3976   StringRef KernelName = Parser.getTok().getString();
3977
3978   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3979                                            ELF::STT_AMDGPU_HSA_KERNEL);
3980   Lex();
3981   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3982     KernelScope.initialize(getContext());
3983   return false;
3984 }
3985
3986 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3987   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3988     return Error(getParser().getTok().getLoc(),
3989                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3990                  "architectures");
3991   }
3992
3993   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3994
3995   std::string ISAVersionStringFromSTI;
3996   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3997   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3998
3999   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4000     return Error(getParser().getTok().getLoc(),
4001                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4002                  "arguments specified through the command line");
4003   }
4004
4005   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4006   Lex();
4007
4008   return false;
4009 }
4010
4011 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4012   const char *AssemblerDirectiveBegin;
4013   const char *AssemblerDirectiveEnd;
4014   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4015       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4016           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4017                             HSAMD::V3::AssemblerDirectiveEnd)
4018           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4019                             HSAMD::AssemblerDirectiveEnd);
4020
4021   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4022     return Error(getParser().getTok().getLoc(),
4023                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4024                  "not available on non-amdhsa OSes")).str());
4025   }
4026
4027   std::string HSAMetadataString;
4028   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4029                           HSAMetadataString))
4030     return true;
4031
4032   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4033     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4034       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4035   } else {
4036     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4037       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4038   }
4039
4040   return false;
4041 }
4042
4043 /// Common code to parse out a block of text (typically YAML) between start and
4044 /// end directives.
4045 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4046                                           const char *AssemblerDirectiveEnd,
4047                                           std::string &CollectString) {
4048
4049   raw_string_ostream CollectStream(CollectString);
4050
4051   getLexer().setSkipSpace(false);
4052
4053   bool FoundEnd = false;
4054   while (!getLexer().is(AsmToken::Eof)) {
4055     while (getLexer().is(AsmToken::Space)) {
4056       CollectStream << getLexer().getTok().getString();
4057       Lex();
4058     }
4059
4060     if (getLexer().is(AsmToken::Identifier)) {
4061       StringRef ID = getLexer().getTok().getIdentifier();
4062       if (ID == AssemblerDirectiveEnd) {
4063         Lex();
4064         FoundEnd = true;
4065         break;
4066       }
4067     }
4068
4069     CollectStream << Parser.parseStringToEndOfStatement()
4070                   << getContext().getAsmInfo()->getSeparatorString();
4071
4072     Parser.eatToEndOfStatement();
4073   }
4074
4075   getLexer().setSkipSpace(true);
4076
4077   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4078     return TokError(Twine("expected directive ") +
4079                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4080   }
4081
4082   CollectStream.flush();
4083   return false;
4084 }
4085
4086 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4087 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4088   std::string String;
4089   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4090                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4091     return true;
4092
4093   auto PALMetadata = getTargetStreamer().getPALMetadata();
4094   if (!PALMetadata->setFromString(String))
4095     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4096   return false;
4097 }
4098
4099 /// Parse the assembler directive for old linear-format PAL metadata.
4100 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4101   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4102     return Error(getParser().getTok().getLoc(),
4103                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4104                  "not available on non-amdpal OSes")).str());
4105   }
4106
4107   auto PALMetadata = getTargetStreamer().getPALMetadata();
4108   PALMetadata->setLegacy();
4109   for (;;) {
4110     uint32_t Key, Value;
4111     if (ParseAsAbsoluteExpression(Key)) {
4112       return TokError(Twine("invalid value in ") +
4113                       Twine(PALMD::AssemblerDirective));
4114     }
4115     if (getLexer().isNot(AsmToken::Comma)) {
4116       return TokError(Twine("expected an even number of values in ") +
4117                       Twine(PALMD::AssemblerDirective));
4118     }
4119     Lex();
4120     if (ParseAsAbsoluteExpression(Value)) {
4121       return TokError(Twine("invalid value in ") +
4122                       Twine(PALMD::AssemblerDirective));
4123     }
4124     PALMetadata->setRegister(Key, Value);
4125     if (getLexer().isNot(AsmToken::Comma))
4126       break;
4127     Lex();
4128   }
4129   return false;
4130 }
4131
4132 /// ParseDirectiveAMDGPULDS
4133 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4134 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4135   if (getParser().checkForValidSection())
4136     return true;
4137
4138   StringRef Name;
4139   SMLoc NameLoc = getLexer().getLoc();
4140   if (getParser().parseIdentifier(Name))
4141     return TokError("expected identifier in directive");
4142
4143   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4144   if (parseToken(AsmToken::Comma, "expected ','"))
4145     return true;
4146
4147   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4148
4149   int64_t Size;
4150   SMLoc SizeLoc = getLexer().getLoc();
4151   if (getParser().parseAbsoluteExpression(Size))
4152     return true;
4153   if (Size < 0)
4154     return Error(SizeLoc, "size must be non-negative");
4155   if (Size > LocalMemorySize)
4156     return Error(SizeLoc, "size is too large");
4157
4158   int64_t Align = 4;
4159   if (getLexer().is(AsmToken::Comma)) {
4160     Lex();
4161     SMLoc AlignLoc = getLexer().getLoc();
4162     if (getParser().parseAbsoluteExpression(Align))
4163       return true;
4164     if (Align < 0 || !isPowerOf2_64(Align))
4165       return Error(AlignLoc, "alignment must be a power of two");
4166
4167     // Alignment larger than the size of LDS is possible in theory, as long
4168     // as the linker manages to place to symbol at address 0, but we do want
4169     // to make sure the alignment fits nicely into a 32-bit integer.
4170     if (Align >= 1u << 31)
4171       return Error(AlignLoc, "alignment is too large");
4172   }
4173
4174   if (parseToken(AsmToken::EndOfStatement,
4175                  "unexpected token in '.amdgpu_lds' directive"))
4176     return true;
4177
4178   Symbol->redefineIfPossible();
4179   if (!Symbol->isUndefined())
4180     return Error(NameLoc, "invalid symbol redefinition");
4181
4182   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4183   return false;
4184 }
4185
4186 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4187   StringRef IDVal = DirectiveID.getString();
4188
4189   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4190     if (IDVal == ".amdgcn_target")
4191       return ParseDirectiveAMDGCNTarget();
4192
4193     if (IDVal == ".amdhsa_kernel")
4194       return ParseDirectiveAMDHSAKernel();
4195
4196     // TODO: Restructure/combine with PAL metadata directive.
4197     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4198       return ParseDirectiveHSAMetadata();
4199   } else {
4200     if (IDVal == ".hsa_code_object_version")
4201       return ParseDirectiveHSACodeObjectVersion();
4202
4203     if (IDVal == ".hsa_code_object_isa")
4204       return ParseDirectiveHSACodeObjectISA();
4205
4206     if (IDVal == ".amd_kernel_code_t")
4207       return ParseDirectiveAMDKernelCodeT();
4208
4209     if (IDVal == ".amdgpu_hsa_kernel")
4210       return ParseDirectiveAMDGPUHsaKernel();
4211
4212     if (IDVal == ".amd_amdgpu_isa")
4213       return ParseDirectiveISAVersion();
4214
4215     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4216       return ParseDirectiveHSAMetadata();
4217   }
4218
4219   if (IDVal == ".amdgpu_lds")
4220     return ParseDirectiveAMDGPULDS();
4221
4222   if (IDVal == PALMD::AssemblerDirectiveBegin)
4223     return ParseDirectivePALMetadataBegin();
4224
4225   if (IDVal == PALMD::AssemblerDirective)
4226     return ParseDirectivePALMetadata();
4227
4228   return true;
4229 }
4230
4231 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4232                                            unsigned RegNo) const {
4233
4234   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4235        R.isValid(); ++R) {
4236     if (*R == RegNo)
4237       return isGFX9() || isGFX10();
4238   }
4239
4240   // GFX10 has 2 more SGPRs 104 and 105.
4241   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4242        R.isValid(); ++R) {
4243     if (*R == RegNo)
4244       return hasSGPR104_SGPR105();
4245   }
4246
4247   switch (RegNo) {
4248   case AMDGPU::SRC_SHARED_BASE:
4249   case AMDGPU::SRC_SHARED_LIMIT:
4250   case AMDGPU::SRC_PRIVATE_BASE:
4251   case AMDGPU::SRC_PRIVATE_LIMIT:
4252   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4253     return !isCI() && !isSI() && !isVI();
4254   case AMDGPU::TBA:
4255   case AMDGPU::TBA_LO:
4256   case AMDGPU::TBA_HI:
4257   case AMDGPU::TMA:
4258   case AMDGPU::TMA_LO:
4259   case AMDGPU::TMA_HI:
4260     return !isGFX9() && !isGFX10();
4261   case AMDGPU::XNACK_MASK:
4262   case AMDGPU::XNACK_MASK_LO:
4263   case AMDGPU::XNACK_MASK_HI:
4264     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4265   case AMDGPU::SGPR_NULL:
4266     return isGFX10();
4267   default:
4268     break;
4269   }
4270
4271   if (isCI())
4272     return true;
4273
4274   if (isSI() || isGFX10()) {
4275     // No flat_scr on SI.
4276     // On GFX10 flat scratch is not a valid register operand and can only be
4277     // accessed with s_setreg/s_getreg.
4278     switch (RegNo) {
4279     case AMDGPU::FLAT_SCR:
4280     case AMDGPU::FLAT_SCR_LO:
4281     case AMDGPU::FLAT_SCR_HI:
4282       return false;
4283     default:
4284       return true;
4285     }
4286   }
4287
4288   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4289   // SI/CI have.
4290   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4291        R.isValid(); ++R) {
4292     if (*R == RegNo)
4293       return hasSGPR102_SGPR103();
4294   }
4295
4296   return true;
4297 }
4298
4299 OperandMatchResultTy
4300 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4301                               OperandMode Mode) {
4302   // Try to parse with a custom parser
4303   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4304
4305   // If we successfully parsed the operand or if there as an error parsing,
4306   // we are done.
4307   //
4308   // If we are parsing after we reach EndOfStatement then this means we
4309   // are appending default values to the Operands list.  This is only done
4310   // by custom parser, so we shouldn't continue on to the generic parsing.
4311   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4312       getLexer().is(AsmToken::EndOfStatement))
4313     return ResTy;
4314
4315   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4316     unsigned Prefix = Operands.size();
4317     SMLoc LBraceLoc = getTok().getLoc();
4318     Parser.Lex(); // eat the '['
4319
4320     for (;;) {
4321       ResTy = parseReg(Operands);
4322       if (ResTy != MatchOperand_Success)
4323         return ResTy;
4324
4325       if (getLexer().is(AsmToken::RBrac))
4326         break;
4327
4328       if (getLexer().isNot(AsmToken::Comma))
4329         return MatchOperand_ParseFail;
4330       Parser.Lex();
4331     }
4332
4333     if (Operands.size() - Prefix > 1) {
4334       Operands.insert(Operands.begin() + Prefix,
4335                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4336       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4337                                                     getTok().getLoc()));
4338     }
4339
4340     Parser.Lex(); // eat the ']'
4341     return MatchOperand_Success;
4342   }
4343
4344   return parseRegOrImm(Operands);
4345 }
4346
4347 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4348   // Clear any forced encodings from the previous instruction.
4349   setForcedEncodingSize(0);
4350   setForcedDPP(false);
4351   setForcedSDWA(false);
4352
4353   if (Name.endswith("_e64")) {
4354     setForcedEncodingSize(64);
4355     return Name.substr(0, Name.size() - 4);
4356   } else if (Name.endswith("_e32")) {
4357     setForcedEncodingSize(32);
4358     return Name.substr(0, Name.size() - 4);
4359   } else if (Name.endswith("_dpp")) {
4360     setForcedDPP(true);
4361     return Name.substr(0, Name.size() - 4);
4362   } else if (Name.endswith("_sdwa")) {
4363     setForcedSDWA(true);
4364     return Name.substr(0, Name.size() - 5);
4365   }
4366   return Name;
4367 }
4368
4369 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4370                                        StringRef Name,
4371                                        SMLoc NameLoc, OperandVector &Operands) {
4372   // Add the instruction mnemonic
4373   Name = parseMnemonicSuffix(Name);
4374   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4375
4376   bool IsMIMG = Name.startswith("image_");
4377
4378   while (!getLexer().is(AsmToken::EndOfStatement)) {
4379     OperandMode Mode = OperandMode_Default;
4380     if (IsMIMG && isGFX10() && Operands.size() == 2)
4381       Mode = OperandMode_NSA;
4382     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4383
4384     // Eat the comma or space if there is one.
4385     if (getLexer().is(AsmToken::Comma))
4386       Parser.Lex();
4387
4388     switch (Res) {
4389       case MatchOperand_Success: break;
4390       case MatchOperand_ParseFail:
4391         // FIXME: use real operand location rather than the current location.
4392         Error(getLexer().getLoc(), "failed parsing operand.");
4393         while (!getLexer().is(AsmToken::EndOfStatement)) {
4394           Parser.Lex();
4395         }
4396         return true;
4397       case MatchOperand_NoMatch:
4398         // FIXME: use real operand location rather than the current location.
4399         Error(getLexer().getLoc(), "not a valid operand.");
4400         while (!getLexer().is(AsmToken::EndOfStatement)) {
4401           Parser.Lex();
4402         }
4403         return true;
4404     }
4405   }
4406
4407   return false;
4408 }
4409
4410 //===----------------------------------------------------------------------===//
4411 // Utility functions
4412 //===----------------------------------------------------------------------===//
4413
4414 OperandMatchResultTy
4415 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4416
4417   if (!trySkipId(Prefix, AsmToken::Colon))
4418     return MatchOperand_NoMatch;
4419
4420   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4421 }
4422
4423 OperandMatchResultTy
4424 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4425                                     AMDGPUOperand::ImmTy ImmTy,
4426                                     bool (*ConvertResult)(int64_t&)) {
4427   SMLoc S = getLoc();
4428   int64_t Value = 0;
4429
4430   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4431   if (Res != MatchOperand_Success)
4432     return Res;
4433
4434   if (ConvertResult && !ConvertResult(Value)) {
4435     Error(S, "invalid " + StringRef(Prefix) + " value.");
4436   }
4437
4438   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4439   return MatchOperand_Success;
4440 }
4441
4442 OperandMatchResultTy
4443 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4444                                              OperandVector &Operands,
4445                                              AMDGPUOperand::ImmTy ImmTy,
4446                                              bool (*ConvertResult)(int64_t&)) {
4447   SMLoc S = getLoc();
4448   if (!trySkipId(Prefix, AsmToken::Colon))
4449     return MatchOperand_NoMatch;
4450
4451   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4452     return MatchOperand_ParseFail;
4453
4454   unsigned Val = 0;
4455   const unsigned MaxSize = 4;
4456
4457   // FIXME: How to verify the number of elements matches the number of src
4458   // operands?
4459   for (int I = 0; ; ++I) {
4460     int64_t Op;
4461     SMLoc Loc = getLoc();
4462     if (!parseExpr(Op))
4463       return MatchOperand_ParseFail;
4464
4465     if (Op != 0 && Op != 1) {
4466       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4467       return MatchOperand_ParseFail;
4468     }
4469
4470     Val |= (Op << I);
4471
4472     if (trySkipToken(AsmToken::RBrac))
4473       break;
4474
4475     if (I + 1 == MaxSize) {
4476       Error(getLoc(), "expected a closing square bracket");
4477       return MatchOperand_ParseFail;
4478     }
4479
4480     if (!skipToken(AsmToken::Comma, "expected a comma"))
4481       return MatchOperand_ParseFail;
4482   }
4483
4484   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4485   return MatchOperand_Success;
4486 }
4487
4488 OperandMatchResultTy
4489 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4490                                AMDGPUOperand::ImmTy ImmTy) {
4491   int64_t Bit = 0;
4492   SMLoc S = Parser.getTok().getLoc();
4493
4494   // We are at the end of the statement, and this is a default argument, so
4495   // use a default value.
4496   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4497     switch(getLexer().getKind()) {
4498       case AsmToken::Identifier: {
4499         StringRef Tok = Parser.getTok().getString();
4500         if (Tok == Name) {
4501           if (Tok == "r128" && isGFX9())
4502             Error(S, "r128 modifier is not supported on this GPU");
4503           if (Tok == "a16" && !isGFX9() && !isGFX10())
4504             Error(S, "a16 modifier is not supported on this GPU");
4505           Bit = 1;
4506           Parser.Lex();
4507         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4508           Bit = 0;
4509           Parser.Lex();
4510         } else {
4511           return MatchOperand_NoMatch;
4512         }
4513         break;
4514       }
4515       default:
4516         return MatchOperand_NoMatch;
4517     }
4518   }
4519
4520   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4521     return MatchOperand_ParseFail;
4522
4523   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4524   return MatchOperand_Success;
4525 }
4526
4527 static void addOptionalImmOperand(
4528   MCInst& Inst, const OperandVector& Operands,
4529   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4530   AMDGPUOperand::ImmTy ImmT,
4531   int64_t Default = 0) {
4532   auto i = OptionalIdx.find(ImmT);
4533   if (i != OptionalIdx.end()) {
4534     unsigned Idx = i->second;
4535     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4536   } else {
4537     Inst.addOperand(MCOperand::createImm(Default));
4538   }
4539 }
4540
4541 OperandMatchResultTy
4542 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4543   if (getLexer().isNot(AsmToken::Identifier)) {
4544     return MatchOperand_NoMatch;
4545   }
4546   StringRef Tok = Parser.getTok().getString();
4547   if (Tok != Prefix) {
4548     return MatchOperand_NoMatch;
4549   }
4550
4551   Parser.Lex();
4552   if (getLexer().isNot(AsmToken::Colon)) {
4553     return MatchOperand_ParseFail;
4554   }
4555
4556   Parser.Lex();
4557   if (getLexer().isNot(AsmToken::Identifier)) {
4558     return MatchOperand_ParseFail;
4559   }
4560
4561   Value = Parser.getTok().getString();
4562   return MatchOperand_Success;
4563 }
4564
4565 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4566 // values to live in a joint format operand in the MCInst encoding.
4567 OperandMatchResultTy
4568 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4569   SMLoc S = Parser.getTok().getLoc();
4570   int64_t Dfmt = 0, Nfmt = 0;
4571   // dfmt and nfmt can appear in either order, and each is optional.
4572   bool GotDfmt = false, GotNfmt = false;
4573   while (!GotDfmt || !GotNfmt) {
4574     if (!GotDfmt) {
4575       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4576       if (Res != MatchOperand_NoMatch) {
4577         if (Res != MatchOperand_Success)
4578           return Res;
4579         if (Dfmt >= 16) {
4580           Error(Parser.getTok().getLoc(), "out of range dfmt");
4581           return MatchOperand_ParseFail;
4582         }
4583         GotDfmt = true;
4584         Parser.Lex();
4585         continue;
4586       }
4587     }
4588     if (!GotNfmt) {
4589       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4590       if (Res != MatchOperand_NoMatch) {
4591         if (Res != MatchOperand_Success)
4592           return Res;
4593         if (Nfmt >= 8) {
4594           Error(Parser.getTok().getLoc(), "out of range nfmt");
4595           return MatchOperand_ParseFail;
4596         }
4597         GotNfmt = true;
4598         Parser.Lex();
4599         continue;
4600       }
4601     }
4602     break;
4603   }
4604   if (!GotDfmt && !GotNfmt)
4605     return MatchOperand_NoMatch;
4606   auto Format = Dfmt | Nfmt << 4;
4607   Operands.push_back(
4608       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4609   return MatchOperand_Success;
4610 }
4611
4612 //===----------------------------------------------------------------------===//
4613 // ds
4614 //===----------------------------------------------------------------------===//
4615
4616 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4617                                     const OperandVector &Operands) {
4618   OptionalImmIndexMap OptionalIdx;
4619
4620   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4621     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4622
4623     // Add the register arguments
4624     if (Op.isReg()) {
4625       Op.addRegOperands(Inst, 1);
4626       continue;
4627     }
4628
4629     // Handle optional arguments
4630     OptionalIdx[Op.getImmTy()] = i;
4631   }
4632
4633   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4634   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4635   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4636
4637   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4638 }
4639
4640 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4641                                 bool IsGdsHardcoded) {
4642   OptionalImmIndexMap OptionalIdx;
4643
4644   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4645     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4646
4647     // Add the register arguments
4648     if (Op.isReg()) {
4649       Op.addRegOperands(Inst, 1);
4650       continue;
4651     }
4652
4653     if (Op.isToken() && Op.getToken() == "gds") {
4654       IsGdsHardcoded = true;
4655       continue;
4656     }
4657
4658     // Handle optional arguments
4659     OptionalIdx[Op.getImmTy()] = i;
4660   }
4661
4662   AMDGPUOperand::ImmTy OffsetType =
4663     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4664      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4665      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4666                                                       AMDGPUOperand::ImmTyOffset;
4667
4668   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4669
4670   if (!IsGdsHardcoded) {
4671     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4672   }
4673   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4674 }
4675
4676 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4677   OptionalImmIndexMap OptionalIdx;
4678
4679   unsigned OperandIdx[4];
4680   unsigned EnMask = 0;
4681   int SrcIdx = 0;
4682
4683   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4684     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4685
4686     // Add the register arguments
4687     if (Op.isReg()) {
4688       assert(SrcIdx < 4);
4689       OperandIdx[SrcIdx] = Inst.size();
4690       Op.addRegOperands(Inst, 1);
4691       ++SrcIdx;
4692       continue;
4693     }
4694
4695     if (Op.isOff()) {
4696       assert(SrcIdx < 4);
4697       OperandIdx[SrcIdx] = Inst.size();
4698       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4699       ++SrcIdx;
4700       continue;
4701     }
4702
4703     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4704       Op.addImmOperands(Inst, 1);
4705       continue;
4706     }
4707
4708     if (Op.isToken() && Op.getToken() == "done")
4709       continue;
4710
4711     // Handle optional arguments
4712     OptionalIdx[Op.getImmTy()] = i;
4713   }
4714
4715   assert(SrcIdx == 4);
4716
4717   bool Compr = false;
4718   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4719     Compr = true;
4720     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4721     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4722     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4723   }
4724
4725   for (auto i = 0; i < SrcIdx; ++i) {
4726     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4727       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4728     }
4729   }
4730
4731   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4732   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4733
4734   Inst.addOperand(MCOperand::createImm(EnMask));
4735 }
4736
4737 //===----------------------------------------------------------------------===//
4738 // s_waitcnt
4739 //===----------------------------------------------------------------------===//
4740
4741 static bool
4742 encodeCnt(
4743   const AMDGPU::IsaVersion ISA,
4744   int64_t &IntVal,
4745   int64_t CntVal,
4746   bool Saturate,
4747   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4748   unsigned (*decode)(const IsaVersion &Version, unsigned))
4749 {
4750   bool Failed = false;
4751
4752   IntVal = encode(ISA, IntVal, CntVal);
4753   if (CntVal != decode(ISA, IntVal)) {
4754     if (Saturate) {
4755       IntVal = encode(ISA, IntVal, -1);
4756     } else {
4757       Failed = true;
4758     }
4759   }
4760   return Failed;
4761 }
4762
4763 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4764
4765   SMLoc CntLoc = getLoc();
4766   StringRef CntName = getTokenStr();
4767
4768   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4769       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4770     return false;
4771
4772   int64_t CntVal;
4773   SMLoc ValLoc = getLoc();
4774   if (!parseExpr(CntVal))
4775     return false;
4776
4777   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4778
4779   bool Failed = true;
4780   bool Sat = CntName.endswith("_sat");
4781
4782   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4783     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4784   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4785     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4786   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4787     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4788   } else {
4789     Error(CntLoc, "invalid counter name " + CntName);
4790     return false;
4791   }
4792
4793   if (Failed) {
4794     Error(ValLoc, "too large value for " + CntName);
4795     return false;
4796   }
4797
4798   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4799     return false;
4800
4801   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4802     if (isToken(AsmToken::EndOfStatement)) {
4803       Error(getLoc(), "expected a counter name");
4804       return false;
4805     }
4806   }
4807
4808   return true;
4809 }
4810
4811 OperandMatchResultTy
4812 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4813   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4814   int64_t Waitcnt = getWaitcntBitMask(ISA);
4815   SMLoc S = getLoc();
4816
4817   // If parse failed, do not return error code
4818   // to avoid excessive error messages.
4819   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4820     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4821   } else {
4822     parseExpr(Waitcnt);
4823   }
4824
4825   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4826   return MatchOperand_Success;
4827 }
4828
4829 bool
4830 AMDGPUOperand::isSWaitCnt() const {
4831   return isImm();
4832 }
4833
4834 //===----------------------------------------------------------------------===//
4835 // hwreg
4836 //===----------------------------------------------------------------------===//
4837
4838 bool
4839 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4840                                 int64_t &Offset,
4841                                 int64_t &Width) {
4842   using namespace llvm::AMDGPU::Hwreg;
4843
4844   // The register may be specified by name or using a numeric code
4845   if (isToken(AsmToken::Identifier) &&
4846       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4847     HwReg.IsSymbolic = true;
4848     lex(); // skip message name
4849   } else if (!parseExpr(HwReg.Id)) {
4850     return false;
4851   }
4852
4853   if (trySkipToken(AsmToken::RParen))
4854     return true;
4855
4856   // parse optional params
4857   return
4858     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4859     parseExpr(Offset) &&
4860     skipToken(AsmToken::Comma, "expected a comma") &&
4861     parseExpr(Width) &&
4862     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4863 }
4864
4865 bool
4866 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4867                                const int64_t Offset,
4868                                const int64_t Width,
4869                                const SMLoc Loc) {
4870
4871   using namespace llvm::AMDGPU::Hwreg;
4872
4873   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4874     Error(Loc, "specified hardware register is not supported on this GPU");
4875     return false;
4876   } else if (!isValidHwreg(HwReg.Id)) {
4877     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4878     return false;
4879   } else if (!isValidHwregOffset(Offset)) {
4880     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4881     return false;
4882   } else if (!isValidHwregWidth(Width)) {
4883     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4884     return false;
4885   }
4886   return true;
4887 }
4888
4889 OperandMatchResultTy
4890 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4891   using namespace llvm::AMDGPU::Hwreg;
4892
4893   int64_t ImmVal = 0;
4894   SMLoc Loc = getLoc();
4895
4896   // If parse failed, do not return error code
4897   // to avoid excessive error messages.
4898   if (trySkipId("hwreg", AsmToken::LParen)) {
4899     OperandInfoTy HwReg(ID_UNKNOWN_);
4900     int64_t Offset = OFFSET_DEFAULT_;
4901     int64_t Width = WIDTH_DEFAULT_;
4902     if (parseHwregBody(HwReg, Offset, Width) &&
4903         validateHwreg(HwReg, Offset, Width, Loc)) {
4904       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4905     }
4906   } else if (parseExpr(ImmVal)) {
4907     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4908       Error(Loc, "invalid immediate: only 16-bit values are legal");
4909   }
4910
4911   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4912   return MatchOperand_Success;
4913 }
4914
4915 bool AMDGPUOperand::isHwreg() const {
4916   return isImmTy(ImmTyHwreg);
4917 }
4918
4919 //===----------------------------------------------------------------------===//
4920 // sendmsg
4921 //===----------------------------------------------------------------------===//
4922
4923 bool
4924 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4925                                   OperandInfoTy &Op,
4926                                   OperandInfoTy &Stream) {
4927   using namespace llvm::AMDGPU::SendMsg;
4928
4929   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4930     Msg.IsSymbolic = true;
4931     lex(); // skip message name
4932   } else if (!parseExpr(Msg.Id)) {
4933     return false;
4934   }
4935
4936   if (trySkipToken(AsmToken::Comma)) {
4937     Op.IsDefined = true;
4938     if (isToken(AsmToken::Identifier) &&
4939         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4940       lex(); // skip operation name
4941     } else if (!parseExpr(Op.Id)) {
4942       return false;
4943     }
4944
4945     if (trySkipToken(AsmToken::Comma)) {
4946       Stream.IsDefined = true;
4947       if (!parseExpr(Stream.Id))
4948         return false;
4949     }
4950   }
4951
4952   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4953 }
4954
4955 bool
4956 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4957                                  const OperandInfoTy &Op,
4958                                  const OperandInfoTy &Stream,
4959                                  const SMLoc S) {
4960   using namespace llvm::AMDGPU::SendMsg;
4961
4962   // Validation strictness depends on whether message is specified
4963   // in a symbolc or in a numeric form. In the latter case
4964   // only encoding possibility is checked.
4965   bool Strict = Msg.IsSymbolic;
4966
4967   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4968     Error(S, "invalid message id");
4969     return false;
4970   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4971     Error(S, Op.IsDefined ?
4972              "message does not support operations" :
4973              "missing message operation");
4974     return false;
4975   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4976     Error(S, "invalid operation id");
4977     return false;
4978   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4979     Error(S, "message operation does not support streams");
4980     return false;
4981   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4982     Error(S, "invalid message stream id");
4983     return false;
4984   }
4985   return true;
4986 }
4987
4988 OperandMatchResultTy
4989 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4990   using namespace llvm::AMDGPU::SendMsg;
4991
4992   int64_t ImmVal = 0;
4993   SMLoc Loc = getLoc();
4994
4995   // If parse failed, do not return error code
4996   // to avoid excessive error messages.
4997   if (trySkipId("sendmsg", AsmToken::LParen)) {
4998     OperandInfoTy Msg(ID_UNKNOWN_);
4999     OperandInfoTy Op(OP_NONE_);
5000     OperandInfoTy Stream(STREAM_ID_NONE_);
5001     if (parseSendMsgBody(Msg, Op, Stream) &&
5002         validateSendMsg(Msg, Op, Stream, Loc)) {
5003       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5004     }
5005   } else if (parseExpr(ImmVal)) {
5006     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5007       Error(Loc, "invalid immediate: only 16-bit values are legal");
5008   }
5009
5010   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5011   return MatchOperand_Success;
5012 }
5013
5014 bool AMDGPUOperand::isSendMsg() const {
5015   return isImmTy(ImmTySendMsg);
5016 }
5017
5018 //===----------------------------------------------------------------------===//
5019 // v_interp
5020 //===----------------------------------------------------------------------===//
5021
5022 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5023   if (getLexer().getKind() != AsmToken::Identifier)
5024     return MatchOperand_NoMatch;
5025
5026   StringRef Str = Parser.getTok().getString();
5027   int Slot = StringSwitch<int>(Str)
5028     .Case("p10", 0)
5029     .Case("p20", 1)
5030     .Case("p0", 2)
5031     .Default(-1);
5032
5033   SMLoc S = Parser.getTok().getLoc();
5034   if (Slot == -1)
5035     return MatchOperand_ParseFail;
5036
5037   Parser.Lex();
5038   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5039                                               AMDGPUOperand::ImmTyInterpSlot));
5040   return MatchOperand_Success;
5041 }
5042
5043 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5044   if (getLexer().getKind() != AsmToken::Identifier)
5045     return MatchOperand_NoMatch;
5046
5047   StringRef Str = Parser.getTok().getString();
5048   if (!Str.startswith("attr"))
5049     return MatchOperand_NoMatch;
5050
5051   StringRef Chan = Str.take_back(2);
5052   int AttrChan = StringSwitch<int>(Chan)
5053     .Case(".x", 0)
5054     .Case(".y", 1)
5055     .Case(".z", 2)
5056     .Case(".w", 3)
5057     .Default(-1);
5058   if (AttrChan == -1)
5059     return MatchOperand_ParseFail;
5060
5061   Str = Str.drop_back(2).drop_front(4);
5062
5063   uint8_t Attr;
5064   if (Str.getAsInteger(10, Attr))
5065     return MatchOperand_ParseFail;
5066
5067   SMLoc S = Parser.getTok().getLoc();
5068   Parser.Lex();
5069   if (Attr > 63) {
5070     Error(S, "out of bounds attr");
5071     return MatchOperand_Success;
5072   }
5073
5074   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5075
5076   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5077                                               AMDGPUOperand::ImmTyInterpAttr));
5078   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5079                                               AMDGPUOperand::ImmTyAttrChan));
5080   return MatchOperand_Success;
5081 }
5082
5083 //===----------------------------------------------------------------------===//
5084 // exp
5085 //===----------------------------------------------------------------------===//
5086
5087 void AMDGPUAsmParser::errorExpTgt() {
5088   Error(Parser.getTok().getLoc(), "invalid exp target");
5089 }
5090
5091 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5092                                                       uint8_t &Val) {
5093   if (Str == "null") {
5094     Val = 9;
5095     return MatchOperand_Success;
5096   }
5097
5098   if (Str.startswith("mrt")) {
5099     Str = Str.drop_front(3);
5100     if (Str == "z") { // == mrtz
5101       Val = 8;
5102       return MatchOperand_Success;
5103     }
5104
5105     if (Str.getAsInteger(10, Val))
5106       return MatchOperand_ParseFail;
5107
5108     if (Val > 7)
5109       errorExpTgt();
5110
5111     return MatchOperand_Success;
5112   }
5113
5114   if (Str.startswith("pos")) {
5115     Str = Str.drop_front(3);
5116     if (Str.getAsInteger(10, Val))
5117       return MatchOperand_ParseFail;
5118
5119     if (Val > 4 || (Val == 4 && !isGFX10()))
5120       errorExpTgt();
5121
5122     Val += 12;
5123     return MatchOperand_Success;
5124   }
5125
5126   if (isGFX10() && Str == "prim") {
5127     Val = 20;
5128     return MatchOperand_Success;
5129   }
5130
5131   if (Str.startswith("param")) {
5132     Str = Str.drop_front(5);
5133     if (Str.getAsInteger(10, Val))
5134       return MatchOperand_ParseFail;
5135
5136     if (Val >= 32)
5137       errorExpTgt();
5138
5139     Val += 32;
5140     return MatchOperand_Success;
5141   }
5142
5143   if (Str.startswith("invalid_target_")) {
5144     Str = Str.drop_front(15);
5145     if (Str.getAsInteger(10, Val))
5146       return MatchOperand_ParseFail;
5147
5148     errorExpTgt();
5149     return MatchOperand_Success;
5150   }
5151
5152   return MatchOperand_NoMatch;
5153 }
5154
5155 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5156   uint8_t Val;
5157   StringRef Str = Parser.getTok().getString();
5158
5159   auto Res = parseExpTgtImpl(Str, Val);
5160   if (Res != MatchOperand_Success)
5161     return Res;
5162
5163   SMLoc S = Parser.getTok().getLoc();
5164   Parser.Lex();
5165
5166   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5167                                               AMDGPUOperand::ImmTyExpTgt));
5168   return MatchOperand_Success;
5169 }
5170
5171 //===----------------------------------------------------------------------===//
5172 // parser helpers
5173 //===----------------------------------------------------------------------===//
5174
5175 bool
5176 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5177   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5178 }
5179
5180 bool
5181 AMDGPUAsmParser::isId(const StringRef Id) const {
5182   return isId(getToken(), Id);
5183 }
5184
5185 bool
5186 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5187   return getTokenKind() == Kind;
5188 }
5189
5190 bool
5191 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5192   if (isId(Id)) {
5193     lex();
5194     return true;
5195   }
5196   return false;
5197 }
5198
5199 bool
5200 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5201   if (isId(Id) && peekToken().is(Kind)) {
5202     lex();
5203     lex();
5204     return true;
5205   }
5206   return false;
5207 }
5208
5209 bool
5210 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5211   if (isToken(Kind)) {
5212     lex();
5213     return true;
5214   }
5215   return false;
5216 }
5217
5218 bool
5219 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5220                            const StringRef ErrMsg) {
5221   if (!trySkipToken(Kind)) {
5222     Error(getLoc(), ErrMsg);
5223     return false;
5224   }
5225   return true;
5226 }
5227
5228 bool
5229 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5230   return !getParser().parseAbsoluteExpression(Imm);
5231 }
5232
5233 bool
5234 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5235   SMLoc S = getLoc();
5236
5237   const MCExpr *Expr;
5238   if (Parser.parseExpression(Expr))
5239     return false;
5240
5241   int64_t IntVal;
5242   if (Expr->evaluateAsAbsolute(IntVal)) {
5243     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5244   } else {
5245     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5246   }
5247   return true;
5248 }
5249
5250 bool
5251 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5252   if (isToken(AsmToken::String)) {
5253     Val = getToken().getStringContents();
5254     lex();
5255     return true;
5256   } else {
5257     Error(getLoc(), ErrMsg);
5258     return false;
5259   }
5260 }
5261
5262 AsmToken
5263 AMDGPUAsmParser::getToken() const {
5264   return Parser.getTok();
5265 }
5266
5267 AsmToken
5268 AMDGPUAsmParser::peekToken() {
5269   return getLexer().peekTok();
5270 }
5271
5272 void
5273 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5274   auto TokCount = getLexer().peekTokens(Tokens);
5275
5276   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5277     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5278 }
5279
5280 AsmToken::TokenKind
5281 AMDGPUAsmParser::getTokenKind() const {
5282   return getLexer().getKind();
5283 }
5284
5285 SMLoc
5286 AMDGPUAsmParser::getLoc() const {
5287   return getToken().getLoc();
5288 }
5289
5290 StringRef
5291 AMDGPUAsmParser::getTokenStr() const {
5292   return getToken().getString();
5293 }
5294
5295 void
5296 AMDGPUAsmParser::lex() {
5297   Parser.Lex();
5298 }
5299
5300 //===----------------------------------------------------------------------===//
5301 // swizzle
5302 //===----------------------------------------------------------------------===//
5303
5304 LLVM_READNONE
5305 static unsigned
5306 encodeBitmaskPerm(const unsigned AndMask,
5307                   const unsigned OrMask,
5308                   const unsigned XorMask) {
5309   using namespace llvm::AMDGPU::Swizzle;
5310
5311   return BITMASK_PERM_ENC |
5312          (AndMask << BITMASK_AND_SHIFT) |
5313          (OrMask  << BITMASK_OR_SHIFT)  |
5314          (XorMask << BITMASK_XOR_SHIFT);
5315 }
5316
5317 bool
5318 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5319                                       const unsigned MinVal,
5320                                       const unsigned MaxVal,
5321                                       const StringRef ErrMsg) {
5322   for (unsigned i = 0; i < OpNum; ++i) {
5323     if (!skipToken(AsmToken::Comma, "expected a comma")){
5324       return false;
5325     }
5326     SMLoc ExprLoc = Parser.getTok().getLoc();
5327     if (!parseExpr(Op[i])) {
5328       return false;
5329     }
5330     if (Op[i] < MinVal || Op[i] > MaxVal) {
5331       Error(ExprLoc, ErrMsg);
5332       return false;
5333     }
5334   }
5335
5336   return true;
5337 }
5338
5339 bool
5340 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5341   using namespace llvm::AMDGPU::Swizzle;
5342
5343   int64_t Lane[LANE_NUM];
5344   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5345                            "expected a 2-bit lane id")) {
5346     Imm = QUAD_PERM_ENC;
5347     for (unsigned I = 0; I < LANE_NUM; ++I) {
5348       Imm |= Lane[I] << (LANE_SHIFT * I);
5349     }
5350     return true;
5351   }
5352   return false;
5353 }
5354
5355 bool
5356 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5357   using namespace llvm::AMDGPU::Swizzle;
5358
5359   SMLoc S = Parser.getTok().getLoc();
5360   int64_t GroupSize;
5361   int64_t LaneIdx;
5362
5363   if (!parseSwizzleOperands(1, &GroupSize,
5364                             2, 32,
5365                             "group size must be in the interval [2,32]")) {
5366     return false;
5367   }
5368   if (!isPowerOf2_64(GroupSize)) {
5369     Error(S, "group size must be a power of two");
5370     return false;
5371   }
5372   if (parseSwizzleOperands(1, &LaneIdx,
5373                            0, GroupSize - 1,
5374                            "lane id must be in the interval [0,group size - 1]")) {
5375     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5376     return true;
5377   }
5378   return false;
5379 }
5380
5381 bool
5382 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5383   using namespace llvm::AMDGPU::Swizzle;
5384
5385   SMLoc S = Parser.getTok().getLoc();
5386   int64_t GroupSize;
5387
5388   if (!parseSwizzleOperands(1, &GroupSize,
5389       2, 32, "group size must be in the interval [2,32]")) {
5390     return false;
5391   }
5392   if (!isPowerOf2_64(GroupSize)) {
5393     Error(S, "group size must be a power of two");
5394     return false;
5395   }
5396
5397   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5398   return true;
5399 }
5400
5401 bool
5402 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5403   using namespace llvm::AMDGPU::Swizzle;
5404
5405   SMLoc S = Parser.getTok().getLoc();
5406   int64_t GroupSize;
5407
5408   if (!parseSwizzleOperands(1, &GroupSize,
5409       1, 16, "group size must be in the interval [1,16]")) {
5410     return false;
5411   }
5412   if (!isPowerOf2_64(GroupSize)) {
5413     Error(S, "group size must be a power of two");
5414     return false;
5415   }
5416
5417   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5418   return true;
5419 }
5420
5421 bool
5422 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5423   using namespace llvm::AMDGPU::Swizzle;
5424
5425   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5426     return false;
5427   }
5428
5429   StringRef Ctl;
5430   SMLoc StrLoc = Parser.getTok().getLoc();
5431   if (!parseString(Ctl)) {
5432     return false;
5433   }
5434   if (Ctl.size() != BITMASK_WIDTH) {
5435     Error(StrLoc, "expected a 5-character mask");
5436     return false;
5437   }
5438
5439   unsigned AndMask = 0;
5440   unsigned OrMask = 0;
5441   unsigned XorMask = 0;
5442
5443   for (size_t i = 0; i < Ctl.size(); ++i) {
5444     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5445     switch(Ctl[i]) {
5446     default:
5447       Error(StrLoc, "invalid mask");
5448       return false;
5449     case '0':
5450       break;
5451     case '1':
5452       OrMask |= Mask;
5453       break;
5454     case 'p':
5455       AndMask |= Mask;
5456       break;
5457     case 'i':
5458       AndMask |= Mask;
5459       XorMask |= Mask;
5460       break;
5461     }
5462   }
5463
5464   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5465   return true;
5466 }
5467
5468 bool
5469 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5470
5471   SMLoc OffsetLoc = Parser.getTok().getLoc();
5472
5473   if (!parseExpr(Imm)) {
5474     return false;
5475   }
5476   if (!isUInt<16>(Imm)) {
5477     Error(OffsetLoc, "expected a 16-bit offset");
5478     return false;
5479   }
5480   return true;
5481 }
5482
5483 bool
5484 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5485   using namespace llvm::AMDGPU::Swizzle;
5486
5487   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5488
5489     SMLoc ModeLoc = Parser.getTok().getLoc();
5490     bool Ok = false;
5491
5492     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5493       Ok = parseSwizzleQuadPerm(Imm);
5494     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5495       Ok = parseSwizzleBitmaskPerm(Imm);
5496     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5497       Ok = parseSwizzleBroadcast(Imm);
5498     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5499       Ok = parseSwizzleSwap(Imm);
5500     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5501       Ok = parseSwizzleReverse(Imm);
5502     } else {
5503       Error(ModeLoc, "expected a swizzle mode");
5504     }
5505
5506     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5507   }
5508
5509   return false;
5510 }
5511
5512 OperandMatchResultTy
5513 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5514   SMLoc S = Parser.getTok().getLoc();
5515   int64_t Imm = 0;
5516
5517   if (trySkipId("offset")) {
5518
5519     bool Ok = false;
5520     if (skipToken(AsmToken::Colon, "expected a colon")) {
5521       if (trySkipId("swizzle")) {
5522         Ok = parseSwizzleMacro(Imm);
5523       } else {
5524         Ok = parseSwizzleOffset(Imm);
5525       }
5526     }
5527
5528     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5529
5530     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5531   } else {
5532     // Swizzle "offset" operand is optional.
5533     // If it is omitted, try parsing other optional operands.
5534     return parseOptionalOpr(Operands);
5535   }
5536 }
5537
5538 bool
5539 AMDGPUOperand::isSwizzle() const {
5540   return isImmTy(ImmTySwizzle);
5541 }
5542
5543 //===----------------------------------------------------------------------===//
5544 // VGPR Index Mode
5545 //===----------------------------------------------------------------------===//
5546
5547 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5548
5549   using namespace llvm::AMDGPU::VGPRIndexMode;
5550
5551   if (trySkipToken(AsmToken::RParen)) {
5552     return OFF;
5553   }
5554
5555   int64_t Imm = 0;
5556
5557   while (true) {
5558     unsigned Mode = 0;
5559     SMLoc S = Parser.getTok().getLoc();
5560
5561     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5562       if (trySkipId(IdSymbolic[ModeId])) {
5563         Mode = 1 << ModeId;
5564         break;
5565       }
5566     }
5567
5568     if (Mode == 0) {
5569       Error(S, (Imm == 0)?
5570                "expected a VGPR index mode or a closing parenthesis" :
5571                "expected a VGPR index mode");
5572       break;
5573     }
5574
5575     if (Imm & Mode) {
5576       Error(S, "duplicate VGPR index mode");
5577       break;
5578     }
5579     Imm |= Mode;
5580
5581     if (trySkipToken(AsmToken::RParen))
5582       break;
5583     if (!skipToken(AsmToken::Comma,
5584                    "expected a comma or a closing parenthesis"))
5585       break;
5586   }
5587
5588   return Imm;
5589 }
5590
5591 OperandMatchResultTy
5592 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5593
5594   int64_t Imm = 0;
5595   SMLoc S = Parser.getTok().getLoc();
5596
5597   if (getLexer().getKind() == AsmToken::Identifier &&
5598       Parser.getTok().getString() == "gpr_idx" &&
5599       getLexer().peekTok().is(AsmToken::LParen)) {
5600
5601     Parser.Lex();
5602     Parser.Lex();
5603
5604     // If parse failed, trigger an error but do not return error code
5605     // to avoid excessive error messages.
5606     Imm = parseGPRIdxMacro();
5607
5608   } else {
5609     if (getParser().parseAbsoluteExpression(Imm))
5610       return MatchOperand_NoMatch;
5611     if (Imm < 0 || !isUInt<4>(Imm)) {
5612       Error(S, "invalid immediate: only 4-bit values are legal");
5613     }
5614   }
5615
5616   Operands.push_back(
5617       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5618   return MatchOperand_Success;
5619 }
5620
5621 bool AMDGPUOperand::isGPRIdxMode() const {
5622   return isImmTy(ImmTyGprIdxMode);
5623 }
5624
5625 //===----------------------------------------------------------------------===//
5626 // sopp branch targets
5627 //===----------------------------------------------------------------------===//
5628
5629 OperandMatchResultTy
5630 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5631
5632   // Make sure we are not parsing something
5633   // that looks like a label or an expression but is not.
5634   // This will improve error messages.
5635   if (isRegister() || isModifier())
5636     return MatchOperand_NoMatch;
5637
5638   if (parseExpr(Operands)) {
5639
5640     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5641     assert(Opr.isImm() || Opr.isExpr());
5642     SMLoc Loc = Opr.getStartLoc();
5643
5644     // Currently we do not support arbitrary expressions as branch targets.
5645     // Only labels and absolute expressions are accepted.
5646     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5647       Error(Loc, "expected an absolute expression or a label");
5648     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5649       Error(Loc, "expected a 16-bit signed jump offset");
5650     }
5651   }
5652
5653   return MatchOperand_Success; // avoid excessive error messages
5654 }
5655
5656 //===----------------------------------------------------------------------===//
5657 // Boolean holding registers
5658 //===----------------------------------------------------------------------===//
5659
5660 OperandMatchResultTy
5661 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5662   return parseReg(Operands);
5663 }
5664
5665 //===----------------------------------------------------------------------===//
5666 // mubuf
5667 //===----------------------------------------------------------------------===//
5668
5669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5670   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5671 }
5672
5673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5674   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5675 }
5676
5677 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5678   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5679 }
5680
5681 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5682                                const OperandVector &Operands,
5683                                bool IsAtomic,
5684                                bool IsAtomicReturn,
5685                                bool IsLds) {
5686   bool IsLdsOpcode = IsLds;
5687   bool HasLdsModifier = false;
5688   OptionalImmIndexMap OptionalIdx;
5689   assert(IsAtomicReturn ? IsAtomic : true);
5690   unsigned FirstOperandIdx = 1;
5691
5692   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5693     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5694
5695     // Add the register arguments
5696     if (Op.isReg()) {
5697       Op.addRegOperands(Inst, 1);
5698       // Insert a tied src for atomic return dst.
5699       // This cannot be postponed as subsequent calls to
5700       // addImmOperands rely on correct number of MC operands.
5701       if (IsAtomicReturn && i == FirstOperandIdx)
5702         Op.addRegOperands(Inst, 1);
5703       continue;
5704     }
5705
5706     // Handle the case where soffset is an immediate
5707     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5708       Op.addImmOperands(Inst, 1);
5709       continue;
5710     }
5711
5712     HasLdsModifier |= Op.isLDS();
5713
5714     // Handle tokens like 'offen' which are sometimes hard-coded into the
5715     // asm string.  There are no MCInst operands for these.
5716     if (Op.isToken()) {
5717       continue;
5718     }
5719     assert(Op.isImm());
5720
5721     // Handle optional arguments
5722     OptionalIdx[Op.getImmTy()] = i;
5723   }
5724
5725   // This is a workaround for an llvm quirk which may result in an
5726   // incorrect instruction selection. Lds and non-lds versions of
5727   // MUBUF instructions are identical except that lds versions
5728   // have mandatory 'lds' modifier. However this modifier follows
5729   // optional modifiers and llvm asm matcher regards this 'lds'
5730   // modifier as an optional one. As a result, an lds version
5731   // of opcode may be selected even if it has no 'lds' modifier.
5732   if (IsLdsOpcode && !HasLdsModifier) {
5733     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5734     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5735       Inst.setOpcode(NoLdsOpcode);
5736       IsLdsOpcode = false;
5737     }
5738   }
5739
5740   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5741   if (!IsAtomic) { // glc is hard-coded.
5742     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5743   }
5744   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5745
5746   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5747     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5748   }
5749
5750   if (isGFX10())
5751     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5752 }
5753
5754 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5755   OptionalImmIndexMap OptionalIdx;
5756
5757   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5758     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5759
5760     // Add the register arguments
5761     if (Op.isReg()) {
5762       Op.addRegOperands(Inst, 1);
5763       continue;
5764     }
5765
5766     // Handle the case where soffset is an immediate
5767     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5768       Op.addImmOperands(Inst, 1);
5769       continue;
5770     }
5771
5772     // Handle tokens like 'offen' which are sometimes hard-coded into the
5773     // asm string.  There are no MCInst operands for these.
5774     if (Op.isToken()) {
5775       continue;
5776     }
5777     assert(Op.isImm());
5778
5779     // Handle optional arguments
5780     OptionalIdx[Op.getImmTy()] = i;
5781   }
5782
5783   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5784                         AMDGPUOperand::ImmTyOffset);
5785   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5786   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5787   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5788   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5789
5790   if (isGFX10())
5791     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5792 }
5793
5794 //===----------------------------------------------------------------------===//
5795 // mimg
5796 //===----------------------------------------------------------------------===//
5797
5798 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5799                               bool IsAtomic) {
5800   unsigned I = 1;
5801   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5802   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5803     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5804   }
5805
5806   if (IsAtomic) {
5807     // Add src, same as dst
5808     assert(Desc.getNumDefs() == 1);
5809     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5810   }
5811
5812   OptionalImmIndexMap OptionalIdx;
5813
5814   for (unsigned E = Operands.size(); I != E; ++I) {
5815     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5816
5817     // Add the register arguments
5818     if (Op.isReg()) {
5819       Op.addRegOperands(Inst, 1);
5820     } else if (Op.isImmModifier()) {
5821       OptionalIdx[Op.getImmTy()] = I;
5822     } else if (!Op.isToken()) {
5823       llvm_unreachable("unexpected operand type");
5824     }
5825   }
5826
5827   bool IsGFX10 = isGFX10();
5828
5829   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5830   if (IsGFX10)
5831     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5832   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5833   if (IsGFX10)
5834     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5835   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5836   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5837   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5838   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5839   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5840   if (!IsGFX10)
5841     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5842   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5843 }
5844
5845 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5846   cvtMIMG(Inst, Operands, true);
5847 }
5848
5849 //===----------------------------------------------------------------------===//
5850 // smrd
5851 //===----------------------------------------------------------------------===//
5852
5853 bool AMDGPUOperand::isSMRDOffset8() const {
5854   return isImm() && isUInt<8>(getImm());
5855 }
5856
5857 bool AMDGPUOperand::isSMRDOffset20() const {
5858   return isImm() && isUInt<20>(getImm());
5859 }
5860
5861 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5862   // 32-bit literals are only supported on CI and we only want to use them
5863   // when the offset is > 8-bits.
5864   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5865 }
5866
5867 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5868   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5869 }
5870
5871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5872   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5873 }
5874
5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5876   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5877 }
5878
5879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5880   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5881 }
5882
5883 //===----------------------------------------------------------------------===//
5884 // vop3
5885 //===----------------------------------------------------------------------===//
5886
5887 static bool ConvertOmodMul(int64_t &Mul) {
5888   if (Mul != 1 && Mul != 2 && Mul != 4)
5889     return false;
5890
5891   Mul >>= 1;
5892   return true;
5893 }
5894
5895 static bool ConvertOmodDiv(int64_t &Div) {
5896   if (Div == 1) {
5897     Div = 0;
5898     return true;
5899   }
5900
5901   if (Div == 2) {
5902     Div = 3;
5903     return true;
5904   }
5905
5906   return false;
5907 }
5908
5909 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5910   if (BoundCtrl == 0) {
5911     BoundCtrl = 1;
5912     return true;
5913   }
5914
5915   if (BoundCtrl == -1) {
5916     BoundCtrl = 0;
5917     return true;
5918   }
5919
5920   return false;
5921 }
5922
5923 // Note: the order in this table matches the order of operands in AsmString.
5924 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5925   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5926   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5927   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5928   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5929   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5930   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5931   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5932   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5933   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5934   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5935   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5936   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5937   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5938   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5939   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5940   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5941   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5942   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5943   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5944   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5945   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5946   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5947   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5948   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5949   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5950   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5951   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5952   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5953   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5954   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5955   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5956   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5957   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5958   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5959   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5960   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5961   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5962   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5963   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5964   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5965   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5966   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5967   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5968 };
5969
5970 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5971   unsigned size = Operands.size();
5972   assert(size > 0);
5973
5974   OperandMatchResultTy res = parseOptionalOpr(Operands);
5975
5976   // This is a hack to enable hardcoded mandatory operands which follow
5977   // optional operands.
5978   //
5979   // Current design assumes that all operands after the first optional operand
5980   // are also optional. However implementation of some instructions violates
5981   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5982   //
5983   // To alleviate this problem, we have to (implicitly) parse extra operands
5984   // to make sure autogenerated parser of custom operands never hit hardcoded
5985   // mandatory operands.
5986
5987   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5988
5989     // We have parsed the first optional operand.
5990     // Parse as many operands as necessary to skip all mandatory operands.
5991
5992     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5993       if (res != MatchOperand_Success ||
5994           getLexer().is(AsmToken::EndOfStatement)) break;
5995       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
5996       res = parseOptionalOpr(Operands);
5997     }
5998   }
5999
6000   return res;
6001 }
6002
6003 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6004   OperandMatchResultTy res;
6005   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6006     // try to parse any optional operand here
6007     if (Op.IsBit) {
6008       res = parseNamedBit(Op.Name, Operands, Op.Type);
6009     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6010       res = parseOModOperand(Operands);
6011     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6012                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6013                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6014       res = parseSDWASel(Operands, Op.Name, Op.Type);
6015     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6016       res = parseSDWADstUnused(Operands);
6017     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6018                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6019                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6020                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6021       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6022                                         Op.ConvertResult);
6023     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6024       res = parseDim(Operands);
6025     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6026       res = parseDfmtNfmt(Operands);
6027     } else {
6028       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6029     }
6030     if (res != MatchOperand_NoMatch) {
6031       return res;
6032     }
6033   }
6034   return MatchOperand_NoMatch;
6035 }
6036
6037 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6038   StringRef Name = Parser.getTok().getString();
6039   if (Name == "mul") {
6040     return parseIntWithPrefix("mul", Operands,
6041                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6042   }
6043
6044   if (Name == "div") {
6045     return parseIntWithPrefix("div", Operands,
6046                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6047   }
6048
6049   return MatchOperand_NoMatch;
6050 }
6051
6052 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6053   cvtVOP3P(Inst, Operands);
6054
6055   int Opc = Inst.getOpcode();
6056
6057   int SrcNum;
6058   const int Ops[] = { AMDGPU::OpName::src0,
6059                       AMDGPU::OpName::src1,
6060                       AMDGPU::OpName::src2 };
6061   for (SrcNum = 0;
6062        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6063        ++SrcNum);
6064   assert(SrcNum > 0);
6065
6066   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6067   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6068
6069   if ((OpSel & (1 << SrcNum)) != 0) {
6070     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6071     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6072     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6073   }
6074 }
6075
6076 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6077       // 1. This operand is input modifiers
6078   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6079       // 2. This is not last operand
6080       && Desc.NumOperands > (OpNum + 1)
6081       // 3. Next operand is register class
6082       && Desc.OpInfo[OpNum + 1].RegClass != -1
6083       // 4. Next register is not tied to any other operand
6084       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6085 }
6086
6087 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6088 {
6089   OptionalImmIndexMap OptionalIdx;
6090   unsigned Opc = Inst.getOpcode();
6091
6092   unsigned I = 1;
6093   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6094   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6095     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6096   }
6097
6098   for (unsigned E = Operands.size(); I != E; ++I) {
6099     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6100     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6101       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6102     } else if (Op.isInterpSlot() ||
6103                Op.isInterpAttr() ||
6104                Op.isAttrChan()) {
6105       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6106     } else if (Op.isImmModifier()) {
6107       OptionalIdx[Op.getImmTy()] = I;
6108     } else {
6109       llvm_unreachable("unhandled operand type");
6110     }
6111   }
6112
6113   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6114     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6115   }
6116
6117   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6118     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6119   }
6120
6121   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6122     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6123   }
6124 }
6125
6126 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6127                               OptionalImmIndexMap &OptionalIdx) {
6128   unsigned Opc = Inst.getOpcode();
6129
6130   unsigned I = 1;
6131   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6132   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6133     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6134   }
6135
6136   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6137     // This instruction has src modifiers
6138     for (unsigned E = Operands.size(); I != E; ++I) {
6139       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6140       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6141         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6142       } else if (Op.isImmModifier()) {
6143         OptionalIdx[Op.getImmTy()] = I;
6144       } else if (Op.isRegOrImm()) {
6145         Op.addRegOrImmOperands(Inst, 1);
6146       } else {
6147         llvm_unreachable("unhandled operand type");
6148       }
6149     }
6150   } else {
6151     // No src modifiers
6152     for (unsigned E = Operands.size(); I != E; ++I) {
6153       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6154       if (Op.isMod()) {
6155         OptionalIdx[Op.getImmTy()] = I;
6156       } else {
6157         Op.addRegOrImmOperands(Inst, 1);
6158       }
6159     }
6160   }
6161
6162   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6163     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6164   }
6165
6166   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6167     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6168   }
6169
6170   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6171   // it has src2 register operand that is tied to dst operand
6172   // we don't allow modifiers for this operand in assembler so src2_modifiers
6173   // should be 0.
6174   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6175       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6176       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6177       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6178       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6179       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6180       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6181     auto it = Inst.begin();
6182     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6183     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6184     ++it;
6185     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6186   }
6187 }
6188
6189 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6190   OptionalImmIndexMap OptionalIdx;
6191   cvtVOP3(Inst, Operands, OptionalIdx);
6192 }
6193
6194 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6195                                const OperandVector &Operands) {
6196   OptionalImmIndexMap OptIdx;
6197   const int Opc = Inst.getOpcode();
6198   const MCInstrDesc &Desc = MII.get(Opc);
6199
6200   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6201
6202   cvtVOP3(Inst, Operands, OptIdx);
6203
6204   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6205     assert(!IsPacked);
6206     Inst.addOperand(Inst.getOperand(0));
6207   }
6208
6209   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6210   // instruction, and then figure out where to actually put the modifiers
6211
6212   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6213
6214   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6215   if (OpSelHiIdx != -1) {
6216     int DefaultVal = IsPacked ? -1 : 0;
6217     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6218                           DefaultVal);
6219   }
6220
6221   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6222   if (NegLoIdx != -1) {
6223     assert(IsPacked);
6224     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6225     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6226   }
6227
6228   const int Ops[] = { AMDGPU::OpName::src0,
6229                       AMDGPU::OpName::src1,
6230                       AMDGPU::OpName::src2 };
6231   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6232                          AMDGPU::OpName::src1_modifiers,
6233                          AMDGPU::OpName::src2_modifiers };
6234
6235   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6236
6237   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6238   unsigned OpSelHi = 0;
6239   unsigned NegLo = 0;
6240   unsigned NegHi = 0;
6241
6242   if (OpSelHiIdx != -1) {
6243     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6244   }
6245
6246   if (NegLoIdx != -1) {
6247     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6248     NegLo = Inst.getOperand(NegLoIdx).getImm();
6249     NegHi = Inst.getOperand(NegHiIdx).getImm();
6250   }
6251
6252   for (int J = 0; J < 3; ++J) {
6253     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6254     if (OpIdx == -1)
6255       break;
6256
6257     uint32_t ModVal = 0;
6258
6259     if ((OpSel & (1 << J)) != 0)
6260       ModVal |= SISrcMods::OP_SEL_0;
6261
6262     if ((OpSelHi & (1 << J)) != 0)
6263       ModVal |= SISrcMods::OP_SEL_1;
6264
6265     if ((NegLo & (1 << J)) != 0)
6266       ModVal |= SISrcMods::NEG;
6267
6268     if ((NegHi & (1 << J)) != 0)
6269       ModVal |= SISrcMods::NEG_HI;
6270
6271     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6272
6273     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6274   }
6275 }
6276
6277 //===----------------------------------------------------------------------===//
6278 // dpp
6279 //===----------------------------------------------------------------------===//
6280
6281 bool AMDGPUOperand::isDPP8() const {
6282   return isImmTy(ImmTyDPP8);
6283 }
6284
6285 bool AMDGPUOperand::isDPPCtrl() const {
6286   using namespace AMDGPU::DPP;
6287
6288   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6289   if (result) {
6290     int64_t Imm = getImm();
6291     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6292            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6293            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6294            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6295            (Imm == DppCtrl::WAVE_SHL1) ||
6296            (Imm == DppCtrl::WAVE_ROL1) ||
6297            (Imm == DppCtrl::WAVE_SHR1) ||
6298            (Imm == DppCtrl::WAVE_ROR1) ||
6299            (Imm == DppCtrl::ROW_MIRROR) ||
6300            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6301            (Imm == DppCtrl::BCAST15) ||
6302            (Imm == DppCtrl::BCAST31) ||
6303            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6304            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6305   }
6306   return false;
6307 }
6308
6309 //===----------------------------------------------------------------------===//
6310 // mAI
6311 //===----------------------------------------------------------------------===//
6312
6313 bool AMDGPUOperand::isBLGP() const {
6314   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6315 }
6316
6317 bool AMDGPUOperand::isCBSZ() const {
6318   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6319 }
6320
6321 bool AMDGPUOperand::isABID() const {
6322   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6323 }
6324
6325 bool AMDGPUOperand::isS16Imm() const {
6326   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6327 }
6328
6329 bool AMDGPUOperand::isU16Imm() const {
6330   return isImm() && isUInt<16>(getImm());
6331 }
6332
6333 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6334   if (!isGFX10())
6335     return MatchOperand_NoMatch;
6336
6337   SMLoc S = Parser.getTok().getLoc();
6338
6339   if (getLexer().isNot(AsmToken::Identifier))
6340     return MatchOperand_NoMatch;
6341   if (getLexer().getTok().getString() != "dim")
6342     return MatchOperand_NoMatch;
6343
6344   Parser.Lex();
6345   if (getLexer().isNot(AsmToken::Colon))
6346     return MatchOperand_ParseFail;
6347
6348   Parser.Lex();
6349
6350   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6351   // integer.
6352   std::string Token;
6353   if (getLexer().is(AsmToken::Integer)) {
6354     SMLoc Loc = getLexer().getTok().getEndLoc();
6355     Token = getLexer().getTok().getString();
6356     Parser.Lex();
6357     if (getLexer().getTok().getLoc() != Loc)
6358       return MatchOperand_ParseFail;
6359   }
6360   if (getLexer().isNot(AsmToken::Identifier))
6361     return MatchOperand_ParseFail;
6362   Token += getLexer().getTok().getString();
6363
6364   StringRef DimId = Token;
6365   if (DimId.startswith("SQ_RSRC_IMG_"))
6366     DimId = DimId.substr(12);
6367
6368   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6369   if (!DimInfo)
6370     return MatchOperand_ParseFail;
6371
6372   Parser.Lex();
6373
6374   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6375                                               AMDGPUOperand::ImmTyDim));
6376   return MatchOperand_Success;
6377 }
6378
6379 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6380   SMLoc S = Parser.getTok().getLoc();
6381   StringRef Prefix;
6382
6383   if (getLexer().getKind() == AsmToken::Identifier) {
6384     Prefix = Parser.getTok().getString();
6385   } else {
6386     return MatchOperand_NoMatch;
6387   }
6388
6389   if (Prefix != "dpp8")
6390     return parseDPPCtrl(Operands);
6391   if (!isGFX10())
6392     return MatchOperand_NoMatch;
6393
6394   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6395
6396   int64_t Sels[8];
6397
6398   Parser.Lex();
6399   if (getLexer().isNot(AsmToken::Colon))
6400     return MatchOperand_ParseFail;
6401
6402   Parser.Lex();
6403   if (getLexer().isNot(AsmToken::LBrac))
6404     return MatchOperand_ParseFail;
6405
6406   Parser.Lex();
6407   if (getParser().parseAbsoluteExpression(Sels[0]))
6408     return MatchOperand_ParseFail;
6409   if (0 > Sels[0] || 7 < Sels[0])
6410     return MatchOperand_ParseFail;
6411
6412   for (size_t i = 1; i < 8; ++i) {
6413     if (getLexer().isNot(AsmToken::Comma))
6414       return MatchOperand_ParseFail;
6415
6416     Parser.Lex();
6417     if (getParser().parseAbsoluteExpression(Sels[i]))
6418       return MatchOperand_ParseFail;
6419     if (0 > Sels[i] || 7 < Sels[i])
6420       return MatchOperand_ParseFail;
6421   }
6422
6423   if (getLexer().isNot(AsmToken::RBrac))
6424     return MatchOperand_ParseFail;
6425   Parser.Lex();
6426
6427   unsigned DPP8 = 0;
6428   for (size_t i = 0; i < 8; ++i)
6429     DPP8 |= (Sels[i] << (i * 3));
6430
6431   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6432   return MatchOperand_Success;
6433 }
6434
6435 OperandMatchResultTy
6436 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6437   using namespace AMDGPU::DPP;
6438
6439   SMLoc S = Parser.getTok().getLoc();
6440   StringRef Prefix;
6441   int64_t Int;
6442
6443   if (getLexer().getKind() == AsmToken::Identifier) {
6444     Prefix = Parser.getTok().getString();
6445   } else {
6446     return MatchOperand_NoMatch;
6447   }
6448
6449   if (Prefix == "row_mirror") {
6450     Int = DppCtrl::ROW_MIRROR;
6451     Parser.Lex();
6452   } else if (Prefix == "row_half_mirror") {
6453     Int = DppCtrl::ROW_HALF_MIRROR;
6454     Parser.Lex();
6455   } else {
6456     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6457     if (Prefix != "quad_perm"
6458         && Prefix != "row_shl"
6459         && Prefix != "row_shr"
6460         && Prefix != "row_ror"
6461         && Prefix != "wave_shl"
6462         && Prefix != "wave_rol"
6463         && Prefix != "wave_shr"
6464         && Prefix != "wave_ror"
6465         && Prefix != "row_bcast"
6466         && Prefix != "row_share"
6467         && Prefix != "row_xmask") {
6468       return MatchOperand_NoMatch;
6469     }
6470
6471     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6472       return MatchOperand_NoMatch;
6473
6474     if (!isVI() && !isGFX9() &&
6475         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6476          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6477          Prefix == "row_bcast"))
6478       return MatchOperand_NoMatch;
6479
6480     Parser.Lex();
6481     if (getLexer().isNot(AsmToken::Colon))
6482       return MatchOperand_ParseFail;
6483
6484     if (Prefix == "quad_perm") {
6485       // quad_perm:[%d,%d,%d,%d]
6486       Parser.Lex();
6487       if (getLexer().isNot(AsmToken::LBrac))
6488         return MatchOperand_ParseFail;
6489       Parser.Lex();
6490
6491       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6492         return MatchOperand_ParseFail;
6493
6494       for (int i = 0; i < 3; ++i) {
6495         if (getLexer().isNot(AsmToken::Comma))
6496           return MatchOperand_ParseFail;
6497         Parser.Lex();
6498
6499         int64_t Temp;
6500         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6501           return MatchOperand_ParseFail;
6502         const int shift = i*2 + 2;
6503         Int += (Temp << shift);
6504       }
6505
6506       if (getLexer().isNot(AsmToken::RBrac))
6507         return MatchOperand_ParseFail;
6508       Parser.Lex();
6509     } else {
6510       // sel:%d
6511       Parser.Lex();
6512       if (getParser().parseAbsoluteExpression(Int))
6513         return MatchOperand_ParseFail;
6514
6515       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6516         Int |= DppCtrl::ROW_SHL0;
6517       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6518         Int |= DppCtrl::ROW_SHR0;
6519       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6520         Int |= DppCtrl::ROW_ROR0;
6521       } else if (Prefix == "wave_shl" && 1 == Int) {
6522         Int = DppCtrl::WAVE_SHL1;
6523       } else if (Prefix == "wave_rol" && 1 == Int) {
6524         Int = DppCtrl::WAVE_ROL1;
6525       } else if (Prefix == "wave_shr" && 1 == Int) {
6526         Int = DppCtrl::WAVE_SHR1;
6527       } else if (Prefix == "wave_ror" && 1 == Int) {
6528         Int = DppCtrl::WAVE_ROR1;
6529       } else if (Prefix == "row_bcast") {
6530         if (Int == 15) {
6531           Int = DppCtrl::BCAST15;
6532         } else if (Int == 31) {
6533           Int = DppCtrl::BCAST31;
6534         } else {
6535           return MatchOperand_ParseFail;
6536         }
6537       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6538         Int |= DppCtrl::ROW_SHARE_FIRST;
6539       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6540         Int |= DppCtrl::ROW_XMASK_FIRST;
6541       } else {
6542         return MatchOperand_ParseFail;
6543       }
6544     }
6545   }
6546
6547   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6548   return MatchOperand_Success;
6549 }
6550
6551 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6552   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6553 }
6554
6555 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6556   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6557 }
6558
6559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6560   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6561 }
6562
6563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6564   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6565 }
6566
6567 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6568   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6569 }
6570
6571 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6572   OptionalImmIndexMap OptionalIdx;
6573
6574   unsigned I = 1;
6575   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6576   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6577     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6578   }
6579
6580   int Fi = 0;
6581   for (unsigned E = Operands.size(); I != E; ++I) {
6582     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6583                                             MCOI::TIED_TO);
6584     if (TiedTo != -1) {
6585       assert((unsigned)TiedTo < Inst.getNumOperands());
6586       // handle tied old or src2 for MAC instructions
6587       Inst.addOperand(Inst.getOperand(TiedTo));
6588     }
6589     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6590     // Add the register arguments
6591     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6592       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6593       // Skip it.
6594       continue;
6595     }
6596
6597     if (IsDPP8) {
6598       if (Op.isDPP8()) {
6599         Op.addImmOperands(Inst, 1);
6600       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6601         Op.addRegWithFPInputModsOperands(Inst, 2);
6602       } else if (Op.isFI()) {
6603         Fi = Op.getImm();
6604       } else if (Op.isReg()) {
6605         Op.addRegOperands(Inst, 1);
6606       } else {
6607         llvm_unreachable("Invalid operand type");
6608       }
6609     } else {
6610       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6611         Op.addRegWithFPInputModsOperands(Inst, 2);
6612       } else if (Op.isDPPCtrl()) {
6613         Op.addImmOperands(Inst, 1);
6614       } else if (Op.isImm()) {
6615         // Handle optional arguments
6616         OptionalIdx[Op.getImmTy()] = I;
6617       } else {
6618         llvm_unreachable("Invalid operand type");
6619       }
6620     }
6621   }
6622
6623   if (IsDPP8) {
6624     using namespace llvm::AMDGPU::DPP;
6625     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6626   } else {
6627     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6628     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6629     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6630     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6631       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6632     }
6633   }
6634 }
6635
6636 //===----------------------------------------------------------------------===//
6637 // sdwa
6638 //===----------------------------------------------------------------------===//
6639
6640 OperandMatchResultTy
6641 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6642                               AMDGPUOperand::ImmTy Type) {
6643   using namespace llvm::AMDGPU::SDWA;
6644
6645   SMLoc S = Parser.getTok().getLoc();
6646   StringRef Value;
6647   OperandMatchResultTy res;
6648
6649   res = parseStringWithPrefix(Prefix, Value);
6650   if (res != MatchOperand_Success) {
6651     return res;
6652   }
6653
6654   int64_t Int;
6655   Int = StringSwitch<int64_t>(Value)
6656         .Case("BYTE_0", SdwaSel::BYTE_0)
6657         .Case("BYTE_1", SdwaSel::BYTE_1)
6658         .Case("BYTE_2", SdwaSel::BYTE_2)
6659         .Case("BYTE_3", SdwaSel::BYTE_3)
6660         .Case("WORD_0", SdwaSel::WORD_0)
6661         .Case("WORD_1", SdwaSel::WORD_1)
6662         .Case("DWORD", SdwaSel::DWORD)
6663         .Default(0xffffffff);
6664   Parser.Lex(); // eat last token
6665
6666   if (Int == 0xffffffff) {
6667     return MatchOperand_ParseFail;
6668   }
6669
6670   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6671   return MatchOperand_Success;
6672 }
6673
6674 OperandMatchResultTy
6675 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6676   using namespace llvm::AMDGPU::SDWA;
6677
6678   SMLoc S = Parser.getTok().getLoc();
6679   StringRef Value;
6680   OperandMatchResultTy res;
6681
6682   res = parseStringWithPrefix("dst_unused", Value);
6683   if (res != MatchOperand_Success) {
6684     return res;
6685   }
6686
6687   int64_t Int;
6688   Int = StringSwitch<int64_t>(Value)
6689         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6690         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6691         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6692         .Default(0xffffffff);
6693   Parser.Lex(); // eat last token
6694
6695   if (Int == 0xffffffff) {
6696     return MatchOperand_ParseFail;
6697   }
6698
6699   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6700   return MatchOperand_Success;
6701 }
6702
6703 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6704   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6705 }
6706
6707 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6708   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6709 }
6710
6711 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6712   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6713 }
6714
6715 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6716   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6717 }
6718
6719 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6720                               uint64_t BasicInstType, bool skipVcc) {
6721   using namespace llvm::AMDGPU::SDWA;
6722
6723   OptionalImmIndexMap OptionalIdx;
6724   bool skippedVcc = false;
6725
6726   unsigned I = 1;
6727   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6728   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6729     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6730   }
6731
6732   for (unsigned E = Operands.size(); I != E; ++I) {
6733     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6734     if (skipVcc && !skippedVcc && Op.isReg() &&
6735         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6736       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6737       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6738       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6739       // Skip VCC only if we didn't skip it on previous iteration.
6740       if (BasicInstType == SIInstrFlags::VOP2 &&
6741           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6742         skippedVcc = true;
6743         continue;
6744       } else if (BasicInstType == SIInstrFlags::VOPC &&
6745                  Inst.getNumOperands() == 0) {
6746         skippedVcc = true;
6747         continue;
6748       }
6749     }
6750     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6751       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6752     } else if (Op.isImm()) {
6753       // Handle optional arguments
6754       OptionalIdx[Op.getImmTy()] = I;
6755     } else {
6756       llvm_unreachable("Invalid operand type");
6757     }
6758     skippedVcc = false;
6759   }
6760
6761   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6762       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6763       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6764     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6765     switch (BasicInstType) {
6766     case SIInstrFlags::VOP1:
6767       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6768       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6769         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6770       }
6771       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6772       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6773       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6774       break;
6775
6776     case SIInstrFlags::VOP2:
6777       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6778       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6779         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6780       }
6781       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6782       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6783       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6784       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6785       break;
6786
6787     case SIInstrFlags::VOPC:
6788       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6789         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6790       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6791       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6792       break;
6793
6794     default:
6795       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6796     }
6797   }
6798
6799   // special case v_mac_{f16, f32}:
6800   // it has src2 register operand that is tied to dst operand
6801   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6802       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6803     auto it = Inst.begin();
6804     std::advance(
6805       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6806     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6807   }
6808 }
6809
6810 //===----------------------------------------------------------------------===//
6811 // mAI
6812 //===----------------------------------------------------------------------===//
6813
6814 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6815   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6816 }
6817
6818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6819   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6820 }
6821
6822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6823   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6824 }
6825
6826 /// Force static initialization.
6827 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6828   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6829   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6830 }
6831
6832 #define GET_REGISTER_MATCHER
6833 #define GET_MATCHER_IMPLEMENTATION
6834 #define GET_MNEMONIC_SPELL_CHECKER
6835 #include "AMDGPUGenAsmMatcher.inc"
6836
6837 // This fuction should be defined after auto-generated include so that we have
6838 // MatchClassKind enum defined
6839 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6840                                                      unsigned Kind) {
6841   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6842   // But MatchInstructionImpl() expects to meet token and fails to validate
6843   // operand. This method checks if we are given immediate operand but expect to
6844   // get corresponding token.
6845   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6846   switch (Kind) {
6847   case MCK_addr64:
6848     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6849   case MCK_gds:
6850     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6851   case MCK_lds:
6852     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6853   case MCK_glc:
6854     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6855   case MCK_idxen:
6856     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6857   case MCK_offen:
6858     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6859   case MCK_SSrcB32:
6860     // When operands have expression values, they will return true for isToken,
6861     // because it is not possible to distinguish between a token and an
6862     // expression at parse time. MatchInstructionImpl() will always try to
6863     // match an operand as a token, when isToken returns true, and when the
6864     // name of the expression is not a valid token, the match will fail,
6865     // so we need to handle it here.
6866     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6867   case MCK_SSrcF32:
6868     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6869   case MCK_SoppBrTarget:
6870     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6871   case MCK_VReg32OrOff:
6872     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6873   case MCK_InterpSlot:
6874     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6875   case MCK_Attr:
6876     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6877   case MCK_AttrChan:
6878     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6879   default:
6880     return Match_InvalidOperand;
6881   }
6882 }
6883
6884 //===----------------------------------------------------------------------===//
6885 // endpgm
6886 //===----------------------------------------------------------------------===//
6887
6888 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6889   SMLoc S = Parser.getTok().getLoc();
6890   int64_t Imm = 0;
6891
6892   if (!parseExpr(Imm)) {
6893     // The operand is optional, if not present default to 0
6894     Imm = 0;
6895   }
6896
6897   if (!isUInt<16>(Imm)) {
6898     Error(S, "expected a 16-bit value");
6899     return MatchOperand_ParseFail;
6900   }
6901
6902   Operands.push_back(
6903       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6904   return MatchOperand_Success;
6905 }
6906
6907 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }