lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

   1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "AMDGPU.h"
  10 #include "AMDKernelCodeT.h"
  11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
  13 #include "SIDefines.h"
  14 #include "SIInstrInfo.h"
  15 #include "TargetInfo/AMDGPUTargetInfo.h"
  16 #include "Utils/AMDGPUAsmUtils.h"
  17 #include "Utils/AMDGPUBaseInfo.h"
  18 #include "Utils/AMDKernelCodeTUtils.h"
  19 #include "llvm/ADT/APFloat.h"
  20 #include "llvm/ADT/APInt.h"
  21 #include "llvm/ADT/ArrayRef.h"
  22 #include "llvm/ADT/STLExtras.h"
  23 #include "llvm/ADT/SmallBitVector.h"
  24 #include "llvm/ADT/SmallString.h"
  25 #include "llvm/ADT/StringRef.h"
  26 #include "llvm/ADT/StringSwitch.h"
  27 #include "llvm/ADT/Twine.h"
  28 #include "llvm/BinaryFormat/ELF.h"
  29 #include "llvm/MC/MCAsmInfo.h"
  30 #include "llvm/MC/MCContext.h"
  31 #include "llvm/MC/MCExpr.h"
  32 #include "llvm/MC/MCInst.h"
  33 #include "llvm/MC/MCInstrDesc.h"
  34 #include "llvm/MC/MCInstrInfo.h"
  35 #include "llvm/MC/MCParser/MCAsmLexer.h"
  36 #include "llvm/MC/MCParser/MCAsmParser.h"
  37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
  38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
  40 #include "llvm/MC/MCRegisterInfo.h"
  41 #include "llvm/MC/MCStreamer.h"
  42 #include "llvm/MC/MCSubtargetInfo.h"
  43 #include "llvm/MC/MCSymbol.h"
  44 #include "llvm/Support/AMDGPUMetadata.h"
  45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
  46 #include "llvm/Support/Casting.h"
  47 #include "llvm/Support/Compiler.h"
  48 #include "llvm/Support/ErrorHandling.h"
  49 #include "llvm/Support/MachineValueType.h"
  50 #include "llvm/Support/MathExtras.h"
  51 #include "llvm/Support/SMLoc.h"
  52 #include "llvm/Support/TargetParser.h"
  53 #include "llvm/Support/TargetRegistry.h"
  54 #include "llvm/Support/raw_ostream.h"
  55 #include <algorithm>
  56 #include <cassert>
  57 #include <cstdint>
  58 #include <cstring>
  59 #include <iterator>
  60 #include <map>
  61 #include <memory>
  62 #include <string>
  63
  64 using namespace llvm;
  65 using namespace llvm::AMDGPU;
  66 using namespace llvm::amdhsa;
  67
  68 namespace {
  69
  70 class AMDGPUAsmParser;
  71
  72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
  73
  74 //===----------------------------------------------------------------------===//
  75 // Operand
  76 //===----------------------------------------------------------------------===//
  77
  78 class AMDGPUOperand : public MCParsedAsmOperand {
  79   enum KindTy {
  80     Token,
  81     Immediate,
  82     Register,
  83     Expression
  84   } Kind;
  85
  86   SMLoc StartLoc, EndLoc;
  87   const AMDGPUAsmParser *AsmParser;
  88
  89 public:
  90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
  91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
  92
  93   using Ptr = std::unique_ptr<AMDGPUOperand>;
  94
  95   struct Modifiers {
  96     bool Abs = false;
  97     bool Neg = false;
  98     bool Sext = false;
  99
 100     bool hasFPModifiers() const { return Abs || Neg; }
 101     bool hasIntModifiers() const { return Sext; }
 102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
 103
 104     int64_t getFPModifiersOperand() const {
 105       int64_t Operand = 0;
 106       Operand |= Abs ? SISrcMods::ABS : 0u;
 107       Operand |= Neg ? SISrcMods::NEG : 0u;
 108       return Operand;
 109     }
 110
 111     int64_t getIntModifiersOperand() const {
 112       int64_t Operand = 0;
 113       Operand |= Sext ? SISrcMods::SEXT : 0u;
 114       return Operand;
 115     }
 116
 117     int64_t getModifiersOperand() const {
 118       assert(!(hasFPModifiers() && hasIntModifiers())
 119            && "fp and int modifiers should not be used simultaneously");
 120       if (hasFPModifiers()) {
 121         return getFPModifiersOperand();
 122       } else if (hasIntModifiers()) {
 123         return getIntModifiersOperand();
 124       } else {
 125         return 0;
 126       }
 127     }
 128
 129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
 130   };
 131
 132   enum ImmTy {
 133     ImmTyNone,
 134     ImmTyGDS,
 135     ImmTyLDS,
 136     ImmTyOffen,
 137     ImmTyIdxen,
 138     ImmTyAddr64,
 139     ImmTyOffset,
 140     ImmTyInstOffset,
 141     ImmTyOffset0,
 142     ImmTyOffset1,
 143     ImmTyDLC,
 144     ImmTyGLC,
 145     ImmTySLC,
 146     ImmTyTFE,
 147     ImmTyD16,
 148     ImmTyClampSI,
 149     ImmTyOModSI,
 150     ImmTyDPP8,
 151     ImmTyDppCtrl,
 152     ImmTyDppRowMask,
 153     ImmTyDppBankMask,
 154     ImmTyDppBoundCtrl,
 155     ImmTyDppFi,
 156     ImmTySdwaDstSel,
 157     ImmTySdwaSrc0Sel,
 158     ImmTySdwaSrc1Sel,
 159     ImmTySdwaDstUnused,
 160     ImmTyDMask,
 161     ImmTyDim,
 162     ImmTyUNorm,
 163     ImmTyDA,
 164     ImmTyR128A16,
 165     ImmTyLWE,
 166     ImmTyExpTgt,
 167     ImmTyExpCompr,
 168     ImmTyExpVM,
 169     ImmTyFORMAT,
 170     ImmTyHwreg,
 171     ImmTyOff,
 172     ImmTySendMsg,
 173     ImmTyInterpSlot,
 174     ImmTyInterpAttr,
 175     ImmTyAttrChan,
 176     ImmTyOpSel,
 177     ImmTyOpSelHi,
 178     ImmTyNegLo,
 179     ImmTyNegHi,
 180     ImmTySwizzle,
 181     ImmTyGprIdxMode,
 182     ImmTyHigh,
 183     ImmTyBLGP,
 184     ImmTyCBSZ,
 185     ImmTyABID,
 186     ImmTyEndpgm,
 187   };
 188
 189 private:
 190   struct TokOp {
 191     const char *Data;
 192     unsigned Length;
 193   };
 194
 195   struct ImmOp {
 196     int64_t Val;
 197     ImmTy Type;
 198     bool IsFPImm;
 199     Modifiers Mods;
 200   };
 201
 202   struct RegOp {
 203     unsigned RegNo;
 204     Modifiers Mods;
 205   };
 206
 207   union {
 208     TokOp Tok;
 209     ImmOp Imm;
 210     RegOp Reg;
 211     const MCExpr *Expr;
 212   };
 213
 214 public:
 215   bool isToken() const override {
 216     if (Kind == Token)
 217       return true;
 218
 219     // When parsing operands, we can't always tell if something was meant to be
 220     // a token, like 'gds', or an expression that references a global variable.
 221     // In this case, we assume the string is an expression, and if we need to
 222     // interpret is a token, then we treat the symbol name as the token.
 223     return isSymbolRefExpr();
 224   }
 225
 226   bool isSymbolRefExpr() const {
 227     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
 228   }
 229
 230   bool isImm() const override {
 231     return Kind == Immediate;
 232   }
 233
 234   bool isInlinableImm(MVT type) const;
 235   bool isLiteralImm(MVT type) const;
 236
 237   bool isRegKind() const {
 238     return Kind == Register;
 239   }
 240
 241   bool isReg() const override {
 242     return isRegKind() && !hasModifiers();
 243   }
 244
 245   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
 246     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
 247   }
 248
 249   bool isRegOrImmWithInt16InputMods() const {
 250     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
 251   }
 252
 253   bool isRegOrImmWithInt32InputMods() const {
 254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
 255   }
 256
 257   bool isRegOrImmWithInt64InputMods() const {
 258     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
 259   }
 260
 261   bool isRegOrImmWithFP16InputMods() const {
 262     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
 263   }
 264
 265   bool isRegOrImmWithFP32InputMods() const {
 266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
 267   }
 268
 269   bool isRegOrImmWithFP64InputMods() const {
 270     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
 271   }
 272
 273   bool isVReg() const {
 274     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
 275            isRegClass(AMDGPU::VReg_64RegClassID) ||
 276            isRegClass(AMDGPU::VReg_96RegClassID) ||
 277            isRegClass(AMDGPU::VReg_128RegClassID) ||
 278            isRegClass(AMDGPU::VReg_160RegClassID) ||
 279            isRegClass(AMDGPU::VReg_256RegClassID) ||
 280            isRegClass(AMDGPU::VReg_512RegClassID) ||
 281            isRegClass(AMDGPU::VReg_1024RegClassID);
 282   }
 283
 284   bool isVReg32() const {
 285     return isRegClass(AMDGPU::VGPR_32RegClassID);
 286   }
 287
 288   bool isVReg32OrOff() const {
 289     return isOff() || isVReg32();
 290   }
 291
 292   bool isSDWAOperand(MVT type) const;
 293   bool isSDWAFP16Operand() const;
 294   bool isSDWAFP32Operand() const;
 295   bool isSDWAInt16Operand() const;
 296   bool isSDWAInt32Operand() const;
 297
 298   bool isImmTy(ImmTy ImmT) const {
 299     return isImm() && Imm.Type == ImmT;
 300   }
 301
 302   bool isImmModifier() const {
 303     return isImm() && Imm.Type != ImmTyNone;
 304   }
 305
 306   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
 307   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
 308   bool isDMask() const { return isImmTy(ImmTyDMask); }
 309   bool isDim() const { return isImmTy(ImmTyDim); }
 310   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
 311   bool isDA() const { return isImmTy(ImmTyDA); }
 312   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
 313   bool isLWE() const { return isImmTy(ImmTyLWE); }
 314   bool isOff() const { return isImmTy(ImmTyOff); }
 315   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
 316   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
 317   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
 318   bool isOffen() const { return isImmTy(ImmTyOffen); }
 319   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
 320   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
 321   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
 322   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
 323   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
 324
 325   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
 326   bool isGDS() const { return isImmTy(ImmTyGDS); }
 327   bool isLDS() const { return isImmTy(ImmTyLDS); }
 328   bool isDLC() const { return isImmTy(ImmTyDLC); }
 329   bool isGLC() const { return isImmTy(ImmTyGLC); }
 330   bool isSLC() const { return isImmTy(ImmTySLC); }
 331   bool isTFE() const { return isImmTy(ImmTyTFE); }
 332   bool isD16() const { return isImmTy(ImmTyD16); }
 333   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
 334   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
 335   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
 336   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
 337   bool isFI() const { return isImmTy(ImmTyDppFi); }
 338   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
 339   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
 340   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
 341   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
 342   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
 343   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
 344   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
 345   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
 346   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
 347   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
 348   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
 349   bool isHigh() const { return isImmTy(ImmTyHigh); }
 350
 351   bool isMod() const {
 352     return isClampSI() || isOModSI();
 353   }
 354
 355   bool isRegOrImm() const {
 356     return isReg() || isImm();
 357   }
 358
 359   bool isRegClass(unsigned RCID) const;
 360
 361   bool isInlineValue() const;
 362
 363   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
 364     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
 365   }
 366
 367   bool isSCSrcB16() const {
 368     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
 369   }
 370
 371   bool isSCSrcV2B16() const {
 372     return isSCSrcB16();
 373   }
 374
 375   bool isSCSrcB32() const {
 376     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
 377   }
 378
 379   bool isSCSrcB64() const {
 380     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
 381   }
 382
 383   bool isBoolReg() const;
 384
 385   bool isSCSrcF16() const {
 386     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
 387   }
 388
 389   bool isSCSrcV2F16() const {
 390     return isSCSrcF16();
 391   }
 392
 393   bool isSCSrcF32() const {
 394     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
 395   }
 396
 397   bool isSCSrcF64() const {
 398     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
 399   }
 400
 401   bool isSSrcB32() const {
 402     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
 403   }
 404
 405   bool isSSrcB16() const {
 406     return isSCSrcB16() || isLiteralImm(MVT::i16);
 407   }
 408
 409   bool isSSrcV2B16() const {
 410     llvm_unreachable("cannot happen");
 411     return isSSrcB16();
 412   }
 413
 414   bool isSSrcB64() const {
 415     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
 416     // See isVSrc64().
 417     return isSCSrcB64() || isLiteralImm(MVT::i64);
 418   }
 419
 420   bool isSSrcF32() const {
 421     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
 422   }
 423
 424   bool isSSrcF64() const {
 425     return isSCSrcB64() || isLiteralImm(MVT::f64);
 426   }
 427
 428   bool isSSrcF16() const {
 429     return isSCSrcB16() || isLiteralImm(MVT::f16);
 430   }
 431
 432   bool isSSrcV2F16() const {
 433     llvm_unreachable("cannot happen");
 434     return isSSrcF16();
 435   }
 436
 437   bool isSSrcOrLdsB32() const {
 438     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
 439            isLiteralImm(MVT::i32) || isExpr();
 440   }
 441
 442   bool isVCSrcB32() const {
 443     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
 444   }
 445
 446   bool isVCSrcB64() const {
 447     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
 448   }
 449
 450   bool isVCSrcB16() const {
 451     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
 452   }
 453
 454   bool isVCSrcV2B16() const {
 455     return isVCSrcB16();
 456   }
 457
 458   bool isVCSrcF32() const {
 459     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
 460   }
 461
 462   bool isVCSrcF64() const {
 463     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
 464   }
 465
 466   bool isVCSrcF16() const {
 467     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
 468   }
 469
 470   bool isVCSrcV2F16() const {
 471     return isVCSrcF16();
 472   }
 473
 474   bool isVSrcB32() const {
 475     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
 476   }
 477
 478   bool isVSrcB64() const {
 479     return isVCSrcF64() || isLiteralImm(MVT::i64);
 480   }
 481
 482   bool isVSrcB16() const {
 483     return isVCSrcF16() || isLiteralImm(MVT::i16);
 484   }
 485
 486   bool isVSrcV2B16() const {
 487     return isVSrcB16() || isLiteralImm(MVT::v2i16);
 488   }
 489
 490   bool isVSrcF32() const {
 491     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
 492   }
 493
 494   bool isVSrcF64() const {
 495     return isVCSrcF64() || isLiteralImm(MVT::f64);
 496   }
 497
 498   bool isVSrcF16() const {
 499     return isVCSrcF16() || isLiteralImm(MVT::f16);
 500   }
 501
 502   bool isVSrcV2F16() const {
 503     return isVSrcF16() || isLiteralImm(MVT::v2f16);
 504   }
 505
 506   bool isVISrcB32() const {
 507     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
 508   }
 509
 510   bool isVISrcB16() const {
 511     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
 512   }
 513
 514   bool isVISrcV2B16() const {
 515     return isVISrcB16();
 516   }
 517
 518   bool isVISrcF32() const {
 519     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
 520   }
 521
 522   bool isVISrcF16() const {
 523     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
 524   }
 525
 526   bool isVISrcV2F16() const {
 527     return isVISrcF16() || isVISrcB32();
 528   }
 529
 530   bool isAISrcB32() const {
 531     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
 532   }
 533
 534   bool isAISrcB16() const {
 535     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
 536   }
 537
 538   bool isAISrcV2B16() const {
 539     return isAISrcB16();
 540   }
 541
 542   bool isAISrcF32() const {
 543     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
 544   }
 545
 546   bool isAISrcF16() const {
 547     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
 548   }
 549
 550   bool isAISrcV2F16() const {
 551     return isAISrcF16() || isAISrcB32();
 552   }
 553
 554   bool isAISrc_128B32() const {
 555     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
 556   }
 557
 558   bool isAISrc_128B16() const {
 559     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
 560   }
 561
 562   bool isAISrc_128V2B16() const {
 563     return isAISrc_128B16();
 564   }
 565
 566   bool isAISrc_128F32() const {
 567     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
 568   }
 569
 570   bool isAISrc_128F16() const {
 571     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
 572   }
 573
 574   bool isAISrc_128V2F16() const {
 575     return isAISrc_128F16() || isAISrc_128B32();
 576   }
 577
 578   bool isAISrc_512B32() const {
 579     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
 580   }
 581
 582   bool isAISrc_512B16() const {
 583     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
 584   }
 585
 586   bool isAISrc_512V2B16() const {
 587     return isAISrc_512B16();
 588   }
 589
 590   bool isAISrc_512F32() const {
 591     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
 592   }
 593
 594   bool isAISrc_512F16() const {
 595     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
 596   }
 597
 598   bool isAISrc_512V2F16() const {
 599     return isAISrc_512F16() || isAISrc_512B32();
 600   }
 601
 602   bool isAISrc_1024B32() const {
 603     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
 604   }
 605
 606   bool isAISrc_1024B16() const {
 607     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
 608   }
 609
 610   bool isAISrc_1024V2B16() const {
 611     return isAISrc_1024B16();
 612   }
 613
 614   bool isAISrc_1024F32() const {
 615     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
 616   }
 617
 618   bool isAISrc_1024F16() const {
 619     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
 620   }
 621
 622   bool isAISrc_1024V2F16() const {
 623     return isAISrc_1024F16() || isAISrc_1024B32();
 624   }
 625
 626   bool isKImmFP32() const {
 627     return isLiteralImm(MVT::f32);
 628   }
 629
 630   bool isKImmFP16() const {
 631     return isLiteralImm(MVT::f16);
 632   }
 633
 634   bool isMem() const override {
 635     return false;
 636   }
 637
 638   bool isExpr() const {
 639     return Kind == Expression;
 640   }
 641
 642   bool isSoppBrTarget() const {
 643     return isExpr() || isImm();
 644   }
 645
 646   bool isSWaitCnt() const;
 647   bool isHwreg() const;
 648   bool isSendMsg() const;
 649   bool isSwizzle() const;
 650   bool isSMRDOffset8() const;
 651   bool isSMRDOffset20() const;
 652   bool isSMRDLiteralOffset() const;
 653   bool isDPP8() const;
 654   bool isDPPCtrl() const;
 655   bool isBLGP() const;
 656   bool isCBSZ() const;
 657   bool isABID() const;
 658   bool isGPRIdxMode() const;
 659   bool isS16Imm() const;
 660   bool isU16Imm() const;
 661   bool isEndpgm() const;
 662
 663   StringRef getExpressionAsToken() const {
 664     assert(isExpr());
 665     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
 666     return S->getSymbol().getName();
 667   }
 668
 669   StringRef getToken() const {
 670     assert(isToken());
 671
 672     if (Kind == Expression)
 673       return getExpressionAsToken();
 674
 675     return StringRef(Tok.Data, Tok.Length);
 676   }
 677
 678   int64_t getImm() const {
 679     assert(isImm());
 680     return Imm.Val;
 681   }
 682
 683   ImmTy getImmTy() const {
 684     assert(isImm());
 685     return Imm.Type;
 686   }
 687
 688   unsigned getReg() const override {
 689     assert(isRegKind());
 690     return Reg.RegNo;
 691   }
 692
 693   SMLoc getStartLoc() const override {
 694     return StartLoc;
 695   }
 696
 697   SMLoc getEndLoc() const override {
 698     return EndLoc;
 699   }
 700
 701   SMRange getLocRange() const {
 702     return SMRange(StartLoc, EndLoc);
 703   }
 704
 705   Modifiers getModifiers() const {
 706     assert(isRegKind() || isImmTy(ImmTyNone));
 707     return isRegKind() ? Reg.Mods : Imm.Mods;
 708   }
 709
 710   void setModifiers(Modifiers Mods) {
 711     assert(isRegKind() || isImmTy(ImmTyNone));
 712     if (isRegKind())
 713       Reg.Mods = Mods;
 714     else
 715       Imm.Mods = Mods;
 716   }
 717
 718   bool hasModifiers() const {
 719     return getModifiers().hasModifiers();
 720   }
 721
 722   bool hasFPModifiers() const {
 723     return getModifiers().hasFPModifiers();
 724   }
 725
 726   bool hasIntModifiers() const {
 727     return getModifiers().hasIntModifiers();
 728   }
 729
 730   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
 731
 732   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
 733
 734   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
 735
 736   template <unsigned Bitwidth>
 737   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
 738
 739   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
 740     addKImmFPOperands<16>(Inst, N);
 741   }
 742
 743   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
 744     addKImmFPOperands<32>(Inst, N);
 745   }
 746
 747   void addRegOperands(MCInst &Inst, unsigned N) const;
 748
 749   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
 750     addRegOperands(Inst, N);
 751   }
 752
 753   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
 754     if (isRegKind())
 755       addRegOperands(Inst, N);
 756     else if (isExpr())
 757       Inst.addOperand(MCOperand::createExpr(Expr));
 758     else
 759       addImmOperands(Inst, N);
 760   }
 761
 762   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
 763     Modifiers Mods = getModifiers();
 764     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
 765     if (isRegKind()) {
 766       addRegOperands(Inst, N);
 767     } else {
 768       addImmOperands(Inst, N, false);
 769     }
 770   }
 771
 772   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
 773     assert(!hasIntModifiers());
 774     addRegOrImmWithInputModsOperands(Inst, N);
 775   }
 776
 777   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
 778     assert(!hasFPModifiers());
 779     addRegOrImmWithInputModsOperands(Inst, N);
 780   }
 781
 782   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
 783     Modifiers Mods = getModifiers();
 784     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
 785     assert(isRegKind());
 786     addRegOperands(Inst, N);
 787   }
 788
 789   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
 790     assert(!hasIntModifiers());
 791     addRegWithInputModsOperands(Inst, N);
 792   }
 793
 794   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
 795     assert(!hasFPModifiers());
 796     addRegWithInputModsOperands(Inst, N);
 797   }
 798
 799   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
 800     if (isImm())
 801       addImmOperands(Inst, N);
 802     else {
 803       assert(isExpr());
 804       Inst.addOperand(MCOperand::createExpr(Expr));
 805     }
 806   }
 807
 808   static void printImmTy(raw_ostream& OS, ImmTy Type) {
 809     switch (Type) {
 810     case ImmTyNone: OS << "None"; break;
 811     case ImmTyGDS: OS << "GDS"; break;
 812     case ImmTyLDS: OS << "LDS"; break;
 813     case ImmTyOffen: OS << "Offen"; break;
 814     case ImmTyIdxen: OS << "Idxen"; break;
 815     case ImmTyAddr64: OS << "Addr64"; break;
 816     case ImmTyOffset: OS << "Offset"; break;
 817     case ImmTyInstOffset: OS << "InstOffset"; break;
 818     case ImmTyOffset0: OS << "Offset0"; break;
 819     case ImmTyOffset1: OS << "Offset1"; break;
 820     case ImmTyDLC: OS << "DLC"; break;
 821     case ImmTyGLC: OS << "GLC"; break;
 822     case ImmTySLC: OS << "SLC"; break;
 823     case ImmTyTFE: OS << "TFE"; break;
 824     case ImmTyD16: OS << "D16"; break;
 825     case ImmTyFORMAT: OS << "FORMAT"; break;
 826     case ImmTyClampSI: OS << "ClampSI"; break;
 827     case ImmTyOModSI: OS << "OModSI"; break;
 828     case ImmTyDPP8: OS << "DPP8"; break;
 829     case ImmTyDppCtrl: OS << "DppCtrl"; break;
 830     case ImmTyDppRowMask: OS << "DppRowMask"; break;
 831     case ImmTyDppBankMask: OS << "DppBankMask"; break;
 832     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
 833     case ImmTyDppFi: OS << "FI"; break;
 834     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
 835     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
 836     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
 837     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
 838     case ImmTyDMask: OS << "DMask"; break;
 839     case ImmTyDim: OS << "Dim"; break;
 840     case ImmTyUNorm: OS << "UNorm"; break;
 841     case ImmTyDA: OS << "DA"; break;
 842     case ImmTyR128A16: OS << "R128A16"; break;
 843     case ImmTyLWE: OS << "LWE"; break;
 844     case ImmTyOff: OS << "Off"; break;
 845     case ImmTyExpTgt: OS << "ExpTgt"; break;
 846     case ImmTyExpCompr: OS << "ExpCompr"; break;
 847     case ImmTyExpVM: OS << "ExpVM"; break;
 848     case ImmTyHwreg: OS << "Hwreg"; break;
 849     case ImmTySendMsg: OS << "SendMsg"; break;
 850     case ImmTyInterpSlot: OS << "InterpSlot"; break;
 851     case ImmTyInterpAttr: OS << "InterpAttr"; break;
 852     case ImmTyAttrChan: OS << "AttrChan"; break;
 853     case ImmTyOpSel: OS << "OpSel"; break;
 854     case ImmTyOpSelHi: OS << "OpSelHi"; break;
 855     case ImmTyNegLo: OS << "NegLo"; break;
 856     case ImmTyNegHi: OS << "NegHi"; break;
 857     case ImmTySwizzle: OS << "Swizzle"; break;
 858     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
 859     case ImmTyHigh: OS << "High"; break;
 860     case ImmTyBLGP: OS << "BLGP"; break;
 861     case ImmTyCBSZ: OS << "CBSZ"; break;
 862     case ImmTyABID: OS << "ABID"; break;
 863     case ImmTyEndpgm: OS << "Endpgm"; break;
 864     }
 865   }
 866
 867   void print(raw_ostream &OS) const override {
 868     switch (Kind) {
 869     case Register:
 870       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
 871       break;
 872     case Immediate:
 873       OS << '<' << getImm();
 874       if (getImmTy() != ImmTyNone) {
 875         OS << " type: "; printImmTy(OS, getImmTy());
 876       }
 877       OS << " mods: " << Imm.Mods << '>';
 878       break;
 879     case Token:
 880       OS << '\'' << getToken() << '\'';
 881       break;
 882     case Expression:
 883       OS << "<expr " << *Expr << '>';
 884       break;
 885     }
 886   }
 887
 888   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
 889                                       int64_t Val, SMLoc Loc,
 890                                       ImmTy Type = ImmTyNone,
 891                                       bool IsFPImm = false) {
 892     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
 893     Op->Imm.Val = Val;
 894     Op->Imm.IsFPImm = IsFPImm;
 895     Op->Imm.Type = Type;
 896     Op->Imm.Mods = Modifiers();
 897     Op->StartLoc = Loc;
 898     Op->EndLoc = Loc;
 899     return Op;
 900   }
 901
 902   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
 903                                         StringRef Str, SMLoc Loc,
 904                                         bool HasExplicitEncodingSize = true) {
 905     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
 906     Res->Tok.Data = Str.data();
 907     Res->Tok.Length = Str.size();
 908     Res->StartLoc = Loc;
 909     Res->EndLoc = Loc;
 910     return Res;
 911   }
 912
 913   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
 914                                       unsigned RegNo, SMLoc S,
 915                                       SMLoc E) {
 916     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
 917     Op->Reg.RegNo = RegNo;
 918     Op->Reg.Mods = Modifiers();
 919     Op->StartLoc = S;
 920     Op->EndLoc = E;
 921     return Op;
 922   }
 923
 924   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
 925                                        const class MCExpr *Expr, SMLoc S) {
 926     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
 927     Op->Expr = Expr;
 928     Op->StartLoc = S;
 929     Op->EndLoc = S;
 930     return Op;
 931   }
 932 };
 933
 934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
 935   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
 936   return OS;
 937 }
 938
 939 //===----------------------------------------------------------------------===//
 940 // AsmParser
 941 //===----------------------------------------------------------------------===//
 942
 943 // Holds info related to the current kernel, e.g. count of SGPRs used.
 944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
 945 // .amdgpu_hsa_kernel or at EOF.
 946 class KernelScopeInfo {
 947   int SgprIndexUnusedMin = -1;
 948   int VgprIndexUnusedMin = -1;
 949   MCContext *Ctx = nullptr;
 950
 951   void usesSgprAt(int i) {
 952     if (i >= SgprIndexUnusedMin) {
 953       SgprIndexUnusedMin = ++i;
 954       if (Ctx) {
 955         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
 956         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
 957       }
 958     }
 959   }
 960
 961   void usesVgprAt(int i) {
 962     if (i >= VgprIndexUnusedMin) {
 963       VgprIndexUnusedMin = ++i;
 964       if (Ctx) {
 965         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
 966         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
 967       }
 968     }
 969   }
 970
 971 public:
 972   KernelScopeInfo() = default;
 973
 974   void initialize(MCContext &Context) {
 975     Ctx = &Context;
 976     usesSgprAt(SgprIndexUnusedMin = -1);
 977     usesVgprAt(VgprIndexUnusedMin = -1);
 978   }
 979
 980   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
 981     switch (RegKind) {
 982       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
 983       case IS_AGPR: // fall through
 984       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
 985       default: break;
 986     }
 987   }
 988 };
 989
 990 class AMDGPUAsmParser : public MCTargetAsmParser {
 991   MCAsmParser &Parser;
 992
 993   // Number of extra operands parsed after the first optional operand.
 994   // This may be necessary to skip hardcoded mandatory operands.
 995   static const unsigned MAX_OPR_LOOKAHEAD = 8;
 996
 997   unsigned ForcedEncodingSize = 0;
 998   bool ForcedDPP = false;
 999   bool ForcedSDWA = false;
1000   KernelScopeInfo KernelScope;
1001
1002   /// @name Auto-generated Match Functions
1003   /// {
1004
1005 #define GET_ASSEMBLER_HEADER
1006 #include "AMDGPUGenAsmMatcher.inc"
1007
1008   /// }
1009
1010 private:
1011   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1012   bool OutOfRangeError(SMRange Range);
1013   /// Calculate VGPR/SGPR blocks required for given target, reserved
1014   /// registers, and user-specified NextFreeXGPR values.
1015   ///
1016   /// \param Features [in] Target features, used for bug corrections.
1017   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1018   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1019   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1020   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1021   /// descriptor field, if valid.
1022   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1023   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1024   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1025   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1026   /// \param VGPRBlocks [out] Result VGPR block count.
1027   /// \param SGPRBlocks [out] Result SGPR block count.
1028   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1029                           bool FlatScrUsed, bool XNACKUsed,
1030                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1031                           SMRange VGPRRange, unsigned NextFreeSGPR,
1032                           SMRange SGPRRange, unsigned &VGPRBlocks,
1033                           unsigned &SGPRBlocks);
1034   bool ParseDirectiveAMDGCNTarget();
1035   bool ParseDirectiveAMDHSAKernel();
1036   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1037   bool ParseDirectiveHSACodeObjectVersion();
1038   bool ParseDirectiveHSACodeObjectISA();
1039   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1040   bool ParseDirectiveAMDKernelCodeT();
1041   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1042   bool ParseDirectiveAMDGPUHsaKernel();
1043
1044   bool ParseDirectiveISAVersion();
1045   bool ParseDirectiveHSAMetadata();
1046   bool ParseDirectivePALMetadataBegin();
1047   bool ParseDirectivePALMetadata();
1048   bool ParseDirectiveAMDGPULDS();
1049
1050   /// Common code to parse out a block of text (typically YAML) between start and
1051   /// end directives.
1052   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1053                            const char *AssemblerDirectiveEnd,
1054                            std::string &CollectString);
1055
1056   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1057                              RegisterKind RegKind, unsigned Reg1,
1058                              unsigned RegNum);
1059   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1060                            unsigned& RegNum, unsigned& RegWidth,
1061                            unsigned *DwordRegIndex);
1062   bool isRegister();
1063   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1064   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1065   void initializeGprCountSymbol(RegisterKind RegKind);
1066   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1067                              unsigned RegWidth);
1068   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1069                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1070   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1071                  bool IsGdsHardcoded);
1072
1073 public:
1074   enum AMDGPUMatchResultTy {
1075     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1076   };
1077   enum OperandMode {
1078     OperandMode_Default,
1079     OperandMode_NSA,
1080   };
1081
1082   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1083
1084   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1085                const MCInstrInfo &MII,
1086                const MCTargetOptions &Options)
1087       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1088     MCAsmParserExtension::Initialize(Parser);
1089
1090     if (getFeatureBits().none()) {
1091       // Set default features.
1092       copySTI().ToggleFeature("southern-islands");
1093     }
1094
1095     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1096
1097     {
1098       // TODO: make those pre-defined variables read-only.
1099       // Currently there is none suitable machinery in the core llvm-mc for this.
1100       // MCSymbol::isRedefinable is intended for another purpose, and
1101       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1102       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1103       MCContext &Ctx = getContext();
1104       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1105         MCSymbol *Sym =
1106             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1107         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1108         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1109         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1110         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1111         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1112       } else {
1113         MCSymbol *Sym =
1114             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1115         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1116         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1117         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1118         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1119         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1120       }
1121       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1122         initializeGprCountSymbol(IS_VGPR);
1123         initializeGprCountSymbol(IS_SGPR);
1124       } else
1125         KernelScope.initialize(getContext());
1126     }
1127   }
1128
1129   bool hasXNACK() const {
1130     return AMDGPU::hasXNACK(getSTI());
1131   }
1132
1133   bool hasMIMG_R128() const {
1134     return AMDGPU::hasMIMG_R128(getSTI());
1135   }
1136
1137   bool hasPackedD16() const {
1138     return AMDGPU::hasPackedD16(getSTI());
1139   }
1140
1141   bool isSI() const {
1142     return AMDGPU::isSI(getSTI());
1143   }
1144
1145   bool isCI() const {
1146     return AMDGPU::isCI(getSTI());
1147   }
1148
1149   bool isVI() const {
1150     return AMDGPU::isVI(getSTI());
1151   }
1152
1153   bool isGFX9() const {
1154     return AMDGPU::isGFX9(getSTI());
1155   }
1156
1157   bool isGFX10() const {
1158     return AMDGPU::isGFX10(getSTI());
1159   }
1160
1161   bool hasInv2PiInlineImm() const {
1162     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1163   }
1164
1165   bool hasFlatOffsets() const {
1166     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1167   }
1168
1169   bool hasSGPR102_SGPR103() const {
1170     return !isVI() && !isGFX9();
1171   }
1172
1173   bool hasSGPR104_SGPR105() const {
1174     return isGFX10();
1175   }
1176
1177   bool hasIntClamp() const {
1178     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1179   }
1180
1181   AMDGPUTargetStreamer &getTargetStreamer() {
1182     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1183     return static_cast<AMDGPUTargetStreamer &>(TS);
1184   }
1185
1186   const MCRegisterInfo *getMRI() const {
1187     // We need this const_cast because for some reason getContext() is not const
1188     // in MCAsmParser.
1189     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1190   }
1191
1192   const MCInstrInfo *getMII() const {
1193     return &MII;
1194   }
1195
1196   const FeatureBitset &getFeatureBits() const {
1197     return getSTI().getFeatureBits();
1198   }
1199
1200   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1201   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1202   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1203
1204   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1205   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1206   bool isForcedDPP() const { return ForcedDPP; }
1207   bool isForcedSDWA() const { return ForcedSDWA; }
1208   ArrayRef<unsigned> getMatchedVariants() const;
1209
1210   std::unique_ptr<AMDGPUOperand> parseRegister();
1211   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1212   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1213   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1214                                       unsigned Kind) override;
1215   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1216                                OperandVector &Operands, MCStreamer &Out,
1217                                uint64_t &ErrorInfo,
1218                                bool MatchingInlineAsm) override;
1219   bool ParseDirective(AsmToken DirectiveID) override;
1220   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1221                                     OperandMode Mode = OperandMode_Default);
1222   StringRef parseMnemonicSuffix(StringRef Name);
1223   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1224                         SMLoc NameLoc, OperandVector &Operands) override;
1225   //bool ProcessInstruction(MCInst &Inst);
1226
1227   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1228
1229   OperandMatchResultTy
1230   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1231                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1232                      bool (*ConvertResult)(int64_t &) = nullptr);
1233
1234   OperandMatchResultTy
1235   parseOperandArrayWithPrefix(const char *Prefix,
1236                               OperandVector &Operands,
1237                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1238                               bool (*ConvertResult)(int64_t&) = nullptr);
1239
1240   OperandMatchResultTy
1241   parseNamedBit(const char *Name, OperandVector &Operands,
1242                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1243   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1244                                              StringRef &Value);
1245
1246   bool isModifier();
1247   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1248   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1249   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1250   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1251   bool parseSP3NegModifier();
1252   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1253   OperandMatchResultTy parseReg(OperandVector &Operands);
1254   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1255   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1256   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1257   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1258   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1259   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1260   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1261
1262   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1263   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1264   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1265   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1266
1267   bool parseCnt(int64_t &IntVal);
1268   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1269   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1270
1271 private:
1272   struct OperandInfoTy {
1273     int64_t Id;
1274     bool IsSymbolic = false;
1275     bool IsDefined = false;
1276
1277     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1278   };
1279
1280   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1281   bool validateSendMsg(const OperandInfoTy &Msg,
1282                        const OperandInfoTy &Op,
1283                        const OperandInfoTy &Stream,
1284                        const SMLoc Loc);
1285
1286   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1287   bool validateHwreg(const OperandInfoTy &HwReg,
1288                      const int64_t Offset,
1289                      const int64_t Width,
1290                      const SMLoc Loc);
1291
1292   void errorExpTgt();
1293   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1294   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1295
1296   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1297   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1298   bool validateSOPLiteral(const MCInst &Inst) const;
1299   bool validateConstantBusLimitations(const MCInst &Inst);
1300   bool validateEarlyClobberLimitations(const MCInst &Inst);
1301   bool validateIntClampSupported(const MCInst &Inst);
1302   bool validateMIMGAtomicDMask(const MCInst &Inst);
1303   bool validateMIMGGatherDMask(const MCInst &Inst);
1304   bool validateMIMGDataSize(const MCInst &Inst);
1305   bool validateMIMGAddrSize(const MCInst &Inst);
1306   bool validateMIMGD16(const MCInst &Inst);
1307   bool validateMIMGDim(const MCInst &Inst);
1308   bool validateLdsDirect(const MCInst &Inst);
1309   bool validateOpSel(const MCInst &Inst);
1310   bool validateVccOperand(unsigned Reg) const;
1311   bool validateVOP3Literal(const MCInst &Inst) const;
1312   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1313   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1314   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1315
1316   bool isId(const StringRef Id) const;
1317   bool isId(const AsmToken &Token, const StringRef Id) const;
1318   bool isToken(const AsmToken::TokenKind Kind) const;
1319   bool trySkipId(const StringRef Id);
1320   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1321   bool trySkipToken(const AsmToken::TokenKind Kind);
1322   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1323   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1324   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1325   AsmToken::TokenKind getTokenKind() const;
1326   bool parseExpr(int64_t &Imm);
1327   bool parseExpr(OperandVector &Operands);
1328   StringRef getTokenStr() const;
1329   AsmToken peekToken();
1330   AsmToken getToken() const;
1331   SMLoc getLoc() const;
1332   void lex();
1333
1334 public:
1335   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1336   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1337
1338   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1339   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1340   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1341   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1342   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1343   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1344
1345   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1346                             const unsigned MinVal,
1347                             const unsigned MaxVal,
1348                             const StringRef ErrMsg);
1349   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1350   bool parseSwizzleOffset(int64_t &Imm);
1351   bool parseSwizzleMacro(int64_t &Imm);
1352   bool parseSwizzleQuadPerm(int64_t &Imm);
1353   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1354   bool parseSwizzleBroadcast(int64_t &Imm);
1355   bool parseSwizzleSwap(int64_t &Imm);
1356   bool parseSwizzleReverse(int64_t &Imm);
1357
1358   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1359   int64_t parseGPRIdxMacro();
1360
1361   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1362   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1363   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1364   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1365   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1366
1367   AMDGPUOperand::Ptr defaultDLC() const;
1368   AMDGPUOperand::Ptr defaultGLC() const;
1369   AMDGPUOperand::Ptr defaultSLC() const;
1370
1371   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1372   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1373   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1374   AMDGPUOperand::Ptr defaultFlatOffset() const;
1375
1376   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1377
1378   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1379                OptionalImmIndexMap &OptionalIdx);
1380   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1381   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1382   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1383
1384   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1385
1386   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1387                bool IsAtomic = false);
1388   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1389
1390   OperandMatchResultTy parseDim(OperandVector &Operands);
1391   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1392   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1393   AMDGPUOperand::Ptr defaultRowMask() const;
1394   AMDGPUOperand::Ptr defaultBankMask() const;
1395   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1396   AMDGPUOperand::Ptr defaultFI() const;
1397   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1398   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1399
1400   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1401                                     AMDGPUOperand::ImmTy Type);
1402   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1403   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1404   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1405   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1406   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1407   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1408                 uint64_t BasicInstType, bool skipVcc = false);
1409
1410   AMDGPUOperand::Ptr defaultBLGP() const;
1411   AMDGPUOperand::Ptr defaultCBSZ() const;
1412   AMDGPUOperand::Ptr defaultABID() const;
1413
1414   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1415   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1416 };
1417
1418 struct OptionalOperand {
1419   const char *Name;
1420   AMDGPUOperand::ImmTy Type;
1421   bool IsBit;
1422   bool (*ConvertResult)(int64_t&);
1423 };
1424
1425 } // end anonymous namespace
1426
1427 // May be called with integer type with equivalent bitwidth.
1428 static const fltSemantics *getFltSemantics(unsigned Size) {
1429   switch (Size) {
1430   case 4:
1431     return &APFloat::IEEEsingle();
1432   case 8:
1433     return &APFloat::IEEEdouble();
1434   case 2:
1435     return &APFloat::IEEEhalf();
1436   default:
1437     llvm_unreachable("unsupported fp type");
1438   }
1439 }
1440
1441 static const fltSemantics *getFltSemantics(MVT VT) {
1442   return getFltSemantics(VT.getSizeInBits() / 8);
1443 }
1444
1445 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1446   switch (OperandType) {
1447   case AMDGPU::OPERAND_REG_IMM_INT32:
1448   case AMDGPU::OPERAND_REG_IMM_FP32:
1449   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1450   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1451   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1452   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1453     return &APFloat::IEEEsingle();
1454   case AMDGPU::OPERAND_REG_IMM_INT64:
1455   case AMDGPU::OPERAND_REG_IMM_FP64:
1456   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1457   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1458     return &APFloat::IEEEdouble();
1459   case AMDGPU::OPERAND_REG_IMM_INT16:
1460   case AMDGPU::OPERAND_REG_IMM_FP16:
1461   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1462   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1463   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1464   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1465   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1466   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1467   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1468   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1469   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1470   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1471     return &APFloat::IEEEhalf();
1472   default:
1473     llvm_unreachable("unsupported fp type");
1474   }
1475 }
1476
1477 //===----------------------------------------------------------------------===//
1478 // Operand
1479 //===----------------------------------------------------------------------===//
1480
1481 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1482   bool Lost;
1483
1484   // Convert literal to single precision
1485   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1486                                                APFloat::rmNearestTiesToEven,
1487                                                &Lost);
1488   // We allow precision lost but not overflow or underflow
1489   if (Status != APFloat::opOK &&
1490       Lost &&
1491       ((Status & APFloat::opOverflow)  != 0 ||
1492        (Status & APFloat::opUnderflow) != 0)) {
1493     return false;
1494   }
1495
1496   return true;
1497 }
1498
1499 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1500   return isUIntN(Size, Val) || isIntN(Size, Val);
1501 }
1502
1503 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1504
1505   // This is a hack to enable named inline values like
1506   // shared_base with both 32-bit and 64-bit operands.
1507   // Note that these values are defined as
1508   // 32-bit operands only.
1509   if (isInlineValue()) {
1510     return true;
1511   }
1512
1513   if (!isImmTy(ImmTyNone)) {
1514     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1515     return false;
1516   }
1517   // TODO: We should avoid using host float here. It would be better to
1518   // check the float bit values which is what a few other places do.
1519   // We've had bot failures before due to weird NaN support on mips hosts.
1520
1521   APInt Literal(64, Imm.Val);
1522
1523   if (Imm.IsFPImm) { // We got fp literal token
1524     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1525       return AMDGPU::isInlinableLiteral64(Imm.Val,
1526                                           AsmParser->hasInv2PiInlineImm());
1527     }
1528
1529     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1530     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1531       return false;
1532
1533     if (type.getScalarSizeInBits() == 16) {
1534       return AMDGPU::isInlinableLiteral16(
1535         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1536         AsmParser->hasInv2PiInlineImm());
1537     }
1538
1539     // Check if single precision literal is inlinable
1540     return AMDGPU::isInlinableLiteral32(
1541       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1542       AsmParser->hasInv2PiInlineImm());
1543   }
1544
1545   // We got int literal token.
1546   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1547     return AMDGPU::isInlinableLiteral64(Imm.Val,
1548                                         AsmParser->hasInv2PiInlineImm());
1549   }
1550
1551   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1552     return false;
1553   }
1554
1555   if (type.getScalarSizeInBits() == 16) {
1556     return AMDGPU::isInlinableLiteral16(
1557       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1558       AsmParser->hasInv2PiInlineImm());
1559   }
1560
1561   return AMDGPU::isInlinableLiteral32(
1562     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1563     AsmParser->hasInv2PiInlineImm());
1564 }
1565
1566 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1567   // Check that this immediate can be added as literal
1568   if (!isImmTy(ImmTyNone)) {
1569     return false;
1570   }
1571
1572   if (!Imm.IsFPImm) {
1573     // We got int literal token.
1574
1575     if (type == MVT::f64 && hasFPModifiers()) {
1576       // Cannot apply fp modifiers to int literals preserving the same semantics
1577       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1578       // disable these cases.
1579       return false;
1580     }
1581
1582     unsigned Size = type.getSizeInBits();
1583     if (Size == 64)
1584       Size = 32;
1585
1586     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1587     // types.
1588     return isSafeTruncation(Imm.Val, Size);
1589   }
1590
1591   // We got fp literal token
1592   if (type == MVT::f64) { // Expected 64-bit fp operand
1593     // We would set low 64-bits of literal to zeroes but we accept this literals
1594     return true;
1595   }
1596
1597   if (type == MVT::i64) { // Expected 64-bit int operand
1598     // We don't allow fp literals in 64-bit integer instructions. It is
1599     // unclear how we should encode them.
1600     return false;
1601   }
1602
1603   // We allow fp literals with f16x2 operands assuming that the specified
1604   // literal goes into the lower half and the upper half is zero. We also
1605   // require that the literal may be losslesly converted to f16.
1606   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1607                      (type == MVT::v2i16)? MVT::i16 : type;
1608
1609   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1610   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1611 }
1612
1613 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1614   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1615 }
1616
1617 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1618   if (AsmParser->isVI())
1619     return isVReg32();
1620   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1621     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1622   else
1623     return false;
1624 }
1625
1626 bool AMDGPUOperand::isSDWAFP16Operand() const {
1627   return isSDWAOperand(MVT::f16);
1628 }
1629
1630 bool AMDGPUOperand::isSDWAFP32Operand() const {
1631   return isSDWAOperand(MVT::f32);
1632 }
1633
1634 bool AMDGPUOperand::isSDWAInt16Operand() const {
1635   return isSDWAOperand(MVT::i16);
1636 }
1637
1638 bool AMDGPUOperand::isSDWAInt32Operand() const {
1639   return isSDWAOperand(MVT::i32);
1640 }
1641
1642 bool AMDGPUOperand::isBoolReg() const {
1643   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1644          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1645 }
1646
1647 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1648 {
1649   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1650   assert(Size == 2 || Size == 4 || Size == 8);
1651
1652   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1653
1654   if (Imm.Mods.Abs) {
1655     Val &= ~FpSignMask;
1656   }
1657   if (Imm.Mods.Neg) {
1658     Val ^= FpSignMask;
1659   }
1660
1661   return Val;
1662 }
1663
1664 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1665   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1666                              Inst.getNumOperands())) {
1667     addLiteralImmOperand(Inst, Imm.Val,
1668                          ApplyModifiers &
1669                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1670   } else {
1671     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1672     Inst.addOperand(MCOperand::createImm(Imm.Val));
1673   }
1674 }
1675
1676 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1677   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1678   auto OpNum = Inst.getNumOperands();
1679   // Check that this operand accepts literals
1680   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1681
1682   if (ApplyModifiers) {
1683     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1684     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1685     Val = applyInputFPModifiers(Val, Size);
1686   }
1687
1688   APInt Literal(64, Val);
1689   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1690
1691   if (Imm.IsFPImm) { // We got fp literal token
1692     switch (OpTy) {
1693     case AMDGPU::OPERAND_REG_IMM_INT64:
1694     case AMDGPU::OPERAND_REG_IMM_FP64:
1695     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1696     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1697       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1698                                        AsmParser->hasInv2PiInlineImm())) {
1699         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1700         return;
1701       }
1702
1703       // Non-inlineable
1704       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1705         // For fp operands we check if low 32 bits are zeros
1706         if (Literal.getLoBits(32) != 0) {
1707           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1708           "Can't encode literal as exact 64-bit floating-point operand. "
1709           "Low 32-bits will be set to zero");
1710         }
1711
1712         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1713         return;
1714       }
1715
1716       // We don't allow fp literals in 64-bit integer instructions. It is
1717       // unclear how we should encode them. This case should be checked earlier
1718       // in predicate methods (isLiteralImm())
1719       llvm_unreachable("fp literal in 64-bit integer instruction.");
1720
1721     case AMDGPU::OPERAND_REG_IMM_INT32:
1722     case AMDGPU::OPERAND_REG_IMM_FP32:
1723     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1724     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1725     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1726     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1727     case AMDGPU::OPERAND_REG_IMM_INT16:
1728     case AMDGPU::OPERAND_REG_IMM_FP16:
1729     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1730     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1731     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1732     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1733     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1734     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1735     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1736     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1737     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1738     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1739       bool lost;
1740       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1741       // Convert literal to single precision
1742       FPLiteral.convert(*getOpFltSemantics(OpTy),
1743                         APFloat::rmNearestTiesToEven, &lost);
1744       // We allow precision lost but not overflow or underflow. This should be
1745       // checked earlier in isLiteralImm()
1746
1747       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1748       Inst.addOperand(MCOperand::createImm(ImmVal));
1749       return;
1750     }
1751     default:
1752       llvm_unreachable("invalid operand size");
1753     }
1754
1755     return;
1756   }
1757
1758   // We got int literal token.
1759   // Only sign extend inline immediates.
1760   switch (OpTy) {
1761   case AMDGPU::OPERAND_REG_IMM_INT32:
1762   case AMDGPU::OPERAND_REG_IMM_FP32:
1763   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1764   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1765   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1766   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1767   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1768   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1769     if (isSafeTruncation(Val, 32) &&
1770         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1771                                      AsmParser->hasInv2PiInlineImm())) {
1772       Inst.addOperand(MCOperand::createImm(Val));
1773       return;
1774     }
1775
1776     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1777     return;
1778
1779   case AMDGPU::OPERAND_REG_IMM_INT64:
1780   case AMDGPU::OPERAND_REG_IMM_FP64:
1781   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1782   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1783     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1784       Inst.addOperand(MCOperand::createImm(Val));
1785       return;
1786     }
1787
1788     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1789     return;
1790
1791   case AMDGPU::OPERAND_REG_IMM_INT16:
1792   case AMDGPU::OPERAND_REG_IMM_FP16:
1793   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1794   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1795   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1796   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1797     if (isSafeTruncation(Val, 16) &&
1798         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1799                                      AsmParser->hasInv2PiInlineImm())) {
1800       Inst.addOperand(MCOperand::createImm(Val));
1801       return;
1802     }
1803
1804     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1805     return;
1806
1807   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1808   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1809   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1810   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1811     assert(isSafeTruncation(Val, 16));
1812     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1813                                         AsmParser->hasInv2PiInlineImm()));
1814
1815     Inst.addOperand(MCOperand::createImm(Val));
1816     return;
1817   }
1818   default:
1819     llvm_unreachable("invalid operand size");
1820   }
1821 }
1822
1823 template <unsigned Bitwidth>
1824 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1825   APInt Literal(64, Imm.Val);
1826
1827   if (!Imm.IsFPImm) {
1828     // We got int literal token.
1829     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1830     return;
1831   }
1832
1833   bool Lost;
1834   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1835   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1836                     APFloat::rmNearestTiesToEven, &Lost);
1837   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1838 }
1839
1840 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1841   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1842 }
1843
1844 static bool isInlineValue(unsigned Reg) {
1845   switch (Reg) {
1846   case AMDGPU::SRC_SHARED_BASE:
1847   case AMDGPU::SRC_SHARED_LIMIT:
1848   case AMDGPU::SRC_PRIVATE_BASE:
1849   case AMDGPU::SRC_PRIVATE_LIMIT:
1850   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1851     return true;
1852   case AMDGPU::SRC_VCCZ:
1853   case AMDGPU::SRC_EXECZ:
1854   case AMDGPU::SRC_SCC:
1855     return true;
1856   default:
1857     return false;
1858   }
1859 }
1860
1861 bool AMDGPUOperand::isInlineValue() const {
1862   return isRegKind() && ::isInlineValue(getReg());
1863 }
1864
1865 //===----------------------------------------------------------------------===//
1866 // AsmParser
1867 //===----------------------------------------------------------------------===//
1868
1869 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1870   if (Is == IS_VGPR) {
1871     switch (RegWidth) {
1872       default: return -1;
1873       case 1: return AMDGPU::VGPR_32RegClassID;
1874       case 2: return AMDGPU::VReg_64RegClassID;
1875       case 3: return AMDGPU::VReg_96RegClassID;
1876       case 4: return AMDGPU::VReg_128RegClassID;
1877       case 5: return AMDGPU::VReg_160RegClassID;
1878       case 8: return AMDGPU::VReg_256RegClassID;
1879       case 16: return AMDGPU::VReg_512RegClassID;
1880       case 32: return AMDGPU::VReg_1024RegClassID;
1881     }
1882   } else if (Is == IS_TTMP) {
1883     switch (RegWidth) {
1884       default: return -1;
1885       case 1: return AMDGPU::TTMP_32RegClassID;
1886       case 2: return AMDGPU::TTMP_64RegClassID;
1887       case 4: return AMDGPU::TTMP_128RegClassID;
1888       case 8: return AMDGPU::TTMP_256RegClassID;
1889       case 16: return AMDGPU::TTMP_512RegClassID;
1890     }
1891   } else if (Is == IS_SGPR) {
1892     switch (RegWidth) {
1893       default: return -1;
1894       case 1: return AMDGPU::SGPR_32RegClassID;
1895       case 2: return AMDGPU::SGPR_64RegClassID;
1896       case 4: return AMDGPU::SGPR_128RegClassID;
1897       case 8: return AMDGPU::SGPR_256RegClassID;
1898       case 16: return AMDGPU::SGPR_512RegClassID;
1899     }
1900   } else if (Is == IS_AGPR) {
1901     switch (RegWidth) {
1902       default: return -1;
1903       case 1: return AMDGPU::AGPR_32RegClassID;
1904       case 2: return AMDGPU::AReg_64RegClassID;
1905       case 4: return AMDGPU::AReg_128RegClassID;
1906       case 16: return AMDGPU::AReg_512RegClassID;
1907       case 32: return AMDGPU::AReg_1024RegClassID;
1908     }
1909   }
1910   return -1;
1911 }
1912
1913 static unsigned getSpecialRegForName(StringRef RegName) {
1914   return StringSwitch<unsigned>(RegName)
1915     .Case("exec", AMDGPU::EXEC)
1916     .Case("vcc", AMDGPU::VCC)
1917     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1918     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1919     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1920     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1921     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1922     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1923     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1924     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1925     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1926     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1927     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1928     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1929     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1930     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1931     .Case("m0", AMDGPU::M0)
1932     .Case("vccz", AMDGPU::SRC_VCCZ)
1933     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1934     .Case("execz", AMDGPU::SRC_EXECZ)
1935     .Case("src_execz", AMDGPU::SRC_EXECZ)
1936     .Case("scc", AMDGPU::SRC_SCC)
1937     .Case("src_scc", AMDGPU::SRC_SCC)
1938     .Case("tba", AMDGPU::TBA)
1939     .Case("tma", AMDGPU::TMA)
1940     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1941     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1942     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1943     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1944     .Case("vcc_lo", AMDGPU::VCC_LO)
1945     .Case("vcc_hi", AMDGPU::VCC_HI)
1946     .Case("exec_lo", AMDGPU::EXEC_LO)
1947     .Case("exec_hi", AMDGPU::EXEC_HI)
1948     .Case("tma_lo", AMDGPU::TMA_LO)
1949     .Case("tma_hi", AMDGPU::TMA_HI)
1950     .Case("tba_lo", AMDGPU::TBA_LO)
1951     .Case("tba_hi", AMDGPU::TBA_HI)
1952     .Case("null", AMDGPU::SGPR_NULL)
1953     .Default(0);
1954 }
1955
1956 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1957                                     SMLoc &EndLoc) {
1958   auto R = parseRegister();
1959   if (!R) return true;
1960   assert(R->isReg());
1961   RegNo = R->getReg();
1962   StartLoc = R->getStartLoc();
1963   EndLoc = R->getEndLoc();
1964   return false;
1965 }
1966
1967 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1968                                             RegisterKind RegKind, unsigned Reg1,
1969                                             unsigned RegNum) {
1970   switch (RegKind) {
1971   case IS_SPECIAL:
1972     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1973       Reg = AMDGPU::EXEC;
1974       RegWidth = 2;
1975       return true;
1976     }
1977     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1978       Reg = AMDGPU::FLAT_SCR;
1979       RegWidth = 2;
1980       return true;
1981     }
1982     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1983       Reg = AMDGPU::XNACK_MASK;
1984       RegWidth = 2;
1985       return true;
1986     }
1987     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1988       Reg = AMDGPU::VCC;
1989       RegWidth = 2;
1990       return true;
1991     }
1992     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1993       Reg = AMDGPU::TBA;
1994       RegWidth = 2;
1995       return true;
1996     }
1997     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1998       Reg = AMDGPU::TMA;
1999       RegWidth = 2;
2000       return true;
2001     }
2002     return false;
2003   case IS_VGPR:
2004   case IS_SGPR:
2005   case IS_AGPR:
2006   case IS_TTMP:
2007     if (Reg1 != Reg + RegWidth) {
2008       return false;
2009     }
2010     RegWidth++;
2011     return true;
2012   default:
2013     llvm_unreachable("unexpected register kind");
2014   }
2015 }
2016
2017 static const StringRef Registers[] = {
2018   { "v" },
2019   { "s" },
2020   { "ttmp" },
2021   { "acc" },
2022   { "a" },
2023 };
2024
2025 bool
2026 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2027                             const AsmToken &NextToken) const {
2028
2029   // A list of consecutive registers: [s0,s1,s2,s3]
2030   if (Token.is(AsmToken::LBrac))
2031     return true;
2032
2033   if (!Token.is(AsmToken::Identifier))
2034     return false;
2035
2036   // A single register like s0 or a range of registers like s[0:1]
2037
2038   StringRef RegName = Token.getString();
2039
2040   for (StringRef Reg : Registers) {
2041     if (RegName.startswith(Reg)) {
2042       if (Reg.size() < RegName.size()) {
2043         unsigned RegNum;
2044         // A single register with an index: rXX
2045         if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
2046           return true;
2047       } else {
2048         // A range of registers: r[XX:YY].
2049         if (NextToken.is(AsmToken::LBrac))
2050           return true;
2051       }
2052     }
2053   }
2054
2055   return getSpecialRegForName(RegName);
2056 }
2057
2058 bool
2059 AMDGPUAsmParser::isRegister()
2060 {
2061   return isRegister(getToken(), peekToken());
2062 }
2063
2064 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2065                                           unsigned &RegNum, unsigned &RegWidth,
2066                                           unsigned *DwordRegIndex) {
2067   if (DwordRegIndex) { *DwordRegIndex = 0; }
2068   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2069   if (getLexer().is(AsmToken::Identifier)) {
2070     StringRef RegName = Parser.getTok().getString();
2071     if ((Reg = getSpecialRegForName(RegName))) {
2072       Parser.Lex();
2073       RegKind = IS_SPECIAL;
2074     } else {
2075       unsigned RegNumIndex = 0;
2076       if (RegName[0] == 'v') {
2077         RegNumIndex = 1;
2078         RegKind = IS_VGPR;
2079       } else if (RegName[0] == 's') {
2080         RegNumIndex = 1;
2081         RegKind = IS_SGPR;
2082       } else if (RegName[0] == 'a') {
2083         RegNumIndex = RegName.startswith("acc") ? 3 : 1;
2084         RegKind = IS_AGPR;
2085       } else if (RegName.startswith("ttmp")) {
2086         RegNumIndex = strlen("ttmp");
2087         RegKind = IS_TTMP;
2088       } else {
2089         return false;
2090       }
2091       if (RegName.size() > RegNumIndex) {
2092         // Single 32-bit register: vXX.
2093         if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
2094           return false;
2095         Parser.Lex();
2096         RegWidth = 1;
2097       } else {
2098         // Range of registers: v[XX:YY]. ":YY" is optional.
2099         Parser.Lex();
2100         int64_t RegLo, RegHi;
2101         if (getLexer().isNot(AsmToken::LBrac))
2102           return false;
2103         Parser.Lex();
2104
2105         if (getParser().parseAbsoluteExpression(RegLo))
2106           return false;
2107
2108         const bool isRBrace = getLexer().is(AsmToken::RBrac);
2109         if (!isRBrace && getLexer().isNot(AsmToken::Colon))
2110           return false;
2111         Parser.Lex();
2112
2113         if (isRBrace) {
2114           RegHi = RegLo;
2115         } else {
2116           if (getParser().parseAbsoluteExpression(RegHi))
2117             return false;
2118
2119           if (getLexer().isNot(AsmToken::RBrac))
2120             return false;
2121           Parser.Lex();
2122         }
2123         RegNum = (unsigned) RegLo;
2124         RegWidth = (RegHi - RegLo) + 1;
2125       }
2126     }
2127   } else if (getLexer().is(AsmToken::LBrac)) {
2128     // List of consecutive registers: [s0,s1,s2,s3]
2129     Parser.Lex();
2130     if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
2131       return false;
2132     if (RegWidth != 1)
2133       return false;
2134     RegisterKind RegKind1;
2135     unsigned Reg1, RegNum1, RegWidth1;
2136     do {
2137       if (getLexer().is(AsmToken::Comma)) {
2138         Parser.Lex();
2139       } else if (getLexer().is(AsmToken::RBrac)) {
2140         Parser.Lex();
2141         break;
2142       } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
2143         if (RegWidth1 != 1) {
2144           return false;
2145         }
2146         if (RegKind1 != RegKind) {
2147           return false;
2148         }
2149         if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
2150           return false;
2151         }
2152       } else {
2153         return false;
2154       }
2155     } while (true);
2156   } else {
2157     return false;
2158   }
2159   switch (RegKind) {
2160   case IS_SPECIAL:
2161     RegNum = 0;
2162     RegWidth = 1;
2163     break;
2164   case IS_VGPR:
2165   case IS_SGPR:
2166   case IS_AGPR:
2167   case IS_TTMP:
2168   {
2169     unsigned Size = 1;
2170     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2171       // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
2172       Size = std::min(RegWidth, 4u);
2173     }
2174     if (RegNum % Size != 0)
2175       return false;
2176     if (DwordRegIndex) { *DwordRegIndex = RegNum; }
2177     RegNum = RegNum / Size;
2178     int RCID = getRegClass(RegKind, RegWidth);
2179     if (RCID == -1)
2180       return false;
2181     const MCRegisterClass RC = TRI->getRegClass(RCID);
2182     if (RegNum >= RC.getNumRegs())
2183       return false;
2184     Reg = RC.getRegister(RegNum);
2185     break;
2186   }
2187
2188   default:
2189     llvm_unreachable("unexpected register kind");
2190   }
2191
2192   if (!subtargetHasRegister(*TRI, Reg))
2193     return false;
2194   return true;
2195 }
2196
2197 Optional<StringRef>
2198 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2199   switch (RegKind) {
2200   case IS_VGPR:
2201     return StringRef(".amdgcn.next_free_vgpr");
2202   case IS_SGPR:
2203     return StringRef(".amdgcn.next_free_sgpr");
2204   default:
2205     return None;
2206   }
2207 }
2208
2209 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2210   auto SymbolName = getGprCountSymbolName(RegKind);
2211   assert(SymbolName && "initializing invalid register kind");
2212   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2213   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2214 }
2215
2216 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2217                                             unsigned DwordRegIndex,
2218                                             unsigned RegWidth) {
2219   // Symbols are only defined for GCN targets
2220   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2221     return true;
2222
2223   auto SymbolName = getGprCountSymbolName(RegKind);
2224   if (!SymbolName)
2225     return true;
2226   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2227
2228   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2229   int64_t OldCount;
2230
2231   if (!Sym->isVariable())
2232     return !Error(getParser().getTok().getLoc(),
2233                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2234   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2235     return !Error(
2236         getParser().getTok().getLoc(),
2237         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2238
2239   if (OldCount <= NewMax)
2240     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2241
2242   return true;
2243 }
2244
2245 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2246   const auto &Tok = Parser.getTok();
2247   SMLoc StartLoc = Tok.getLoc();
2248   SMLoc EndLoc = Tok.getEndLoc();
2249   RegisterKind RegKind;
2250   unsigned Reg, RegNum, RegWidth, DwordRegIndex;
2251
2252   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
2253     //FIXME: improve error messages (bug 41303).
2254     Error(StartLoc, "not a valid operand.");
2255     return nullptr;
2256   }
2257   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2258     if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
2259       return nullptr;
2260   } else
2261     KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
2262   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2263 }
2264
2265 OperandMatchResultTy
2266 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2267   // TODO: add syntactic sugar for 1/(2*PI)
2268
2269   assert(!isRegister());
2270   assert(!isModifier());
2271
2272   const auto& Tok = getToken();
2273   const auto& NextTok = peekToken();
2274   bool IsReal = Tok.is(AsmToken::Real);
2275   SMLoc S = getLoc();
2276   bool Negate = false;
2277
2278   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2279     lex();
2280     IsReal = true;
2281     Negate = true;
2282   }
2283
2284   if (IsReal) {
2285     // Floating-point expressions are not supported.
2286     // Can only allow floating-point literals with an
2287     // optional sign.
2288
2289     StringRef Num = getTokenStr();
2290     lex();
2291
2292     APFloat RealVal(APFloat::IEEEdouble());
2293     auto roundMode = APFloat::rmNearestTiesToEven;
2294     if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
2295       return MatchOperand_ParseFail;
2296     }
2297     if (Negate)
2298       RealVal.changeSign();
2299
2300     Operands.push_back(
2301       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2302                                AMDGPUOperand::ImmTyNone, true));
2303
2304     return MatchOperand_Success;
2305
2306   } else {
2307     int64_t IntVal;
2308     const MCExpr *Expr;
2309     SMLoc S = getLoc();
2310
2311     if (HasSP3AbsModifier) {
2312       // This is a workaround for handling expressions
2313       // as arguments of SP3 'abs' modifier, for example:
2314       //     |1.0|
2315       //     |-1|
2316       //     |1+x|
2317       // This syntax is not compatible with syntax of standard
2318       // MC expressions (due to the trailing '|').
2319       SMLoc EndLoc;
2320       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2321         return MatchOperand_ParseFail;
2322     } else {
2323       if (Parser.parseExpression(Expr))
2324         return MatchOperand_ParseFail;
2325     }
2326
2327     if (Expr->evaluateAsAbsolute(IntVal)) {
2328       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2329     } else {
2330       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2331     }
2332
2333     return MatchOperand_Success;
2334   }
2335
2336   return MatchOperand_NoMatch;
2337 }
2338
2339 OperandMatchResultTy
2340 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2341   if (!isRegister())
2342     return MatchOperand_NoMatch;
2343
2344   if (auto R = parseRegister()) {
2345     assert(R->isReg());
2346     Operands.push_back(std::move(R));
2347     return MatchOperand_Success;
2348   }
2349   return MatchOperand_ParseFail;
2350 }
2351
2352 OperandMatchResultTy
2353 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2354   auto res = parseReg(Operands);
2355   if (res != MatchOperand_NoMatch) {
2356     return res;
2357   } else if (isModifier()) {
2358     return MatchOperand_NoMatch;
2359   } else {
2360     return parseImm(Operands, HasSP3AbsMod);
2361   }
2362 }
2363
2364 bool
2365 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2366   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2367     const auto &str = Token.getString();
2368     return str == "abs" || str == "neg" || str == "sext";
2369   }
2370   return false;
2371 }
2372
2373 bool
2374 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2375   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2376 }
2377
2378 bool
2379 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2380   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2381 }
2382
2383 bool
2384 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2385   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2386 }
2387
2388 // Check if this is an operand modifier or an opcode modifier
2389 // which may look like an expression but it is not. We should
2390 // avoid parsing these modifiers as expressions. Currently
2391 // recognized sequences are:
2392 //   |...|
2393 //   abs(...)
2394 //   neg(...)
2395 //   sext(...)
2396 //   -reg
2397 //   -|...|
2398 //   -abs(...)
2399 //   name:...
2400 // Note that simple opcode modifiers like 'gds' may be parsed as
2401 // expressions; this is a special case. See getExpressionAsToken.
2402 //
2403 bool
2404 AMDGPUAsmParser::isModifier() {
2405
2406   AsmToken Tok = getToken();
2407   AsmToken NextToken[2];
2408   peekTokens(NextToken);
2409
2410   return isOperandModifier(Tok, NextToken[0]) ||
2411          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2412          isOpcodeModifierWithVal(Tok, NextToken[0]);
2413 }
2414
2415 // Check if the current token is an SP3 'neg' modifier.
2416 // Currently this modifier is allowed in the following context:
2417 //
2418 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2419 // 2. Before an 'abs' modifier: -abs(...)
2420 // 3. Before an SP3 'abs' modifier: -|...|
2421 //
2422 // In all other cases "-" is handled as a part
2423 // of an expression that follows the sign.
2424 //
2425 // Note: When "-" is followed by an integer literal,
2426 // this is interpreted as integer negation rather
2427 // than a floating-point NEG modifier applied to N.
2428 // Beside being contr-intuitive, such use of floating-point
2429 // NEG modifier would have resulted in different meaning
2430 // of integer literals used with VOP1/2/C and VOP3,
2431 // for example:
2432 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2433 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2434 // Negative fp literals with preceding "-" are
2435 // handled likewise for unifomtity
2436 //
2437 bool
2438 AMDGPUAsmParser::parseSP3NegModifier() {
2439
2440   AsmToken NextToken[2];
2441   peekTokens(NextToken);
2442
2443   if (isToken(AsmToken::Minus) &&
2444       (isRegister(NextToken[0], NextToken[1]) ||
2445        NextToken[0].is(AsmToken::Pipe) ||
2446        isId(NextToken[0], "abs"))) {
2447     lex();
2448     return true;
2449   }
2450
2451   return false;
2452 }
2453
2454 OperandMatchResultTy
2455 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2456                                               bool AllowImm) {
2457   bool Neg, SP3Neg;
2458   bool Abs, SP3Abs;
2459   SMLoc Loc;
2460
2461   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2462   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2463     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2464     return MatchOperand_ParseFail;
2465   }
2466
2467   SP3Neg = parseSP3NegModifier();
2468
2469   Loc = getLoc();
2470   Neg = trySkipId("neg");
2471   if (Neg && SP3Neg) {
2472     Error(Loc, "expected register or immediate");
2473     return MatchOperand_ParseFail;
2474   }
2475   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2476     return MatchOperand_ParseFail;
2477
2478   Abs = trySkipId("abs");
2479   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2480     return MatchOperand_ParseFail;
2481
2482   Loc = getLoc();
2483   SP3Abs = trySkipToken(AsmToken::Pipe);
2484   if (Abs && SP3Abs) {
2485     Error(Loc, "expected register or immediate");
2486     return MatchOperand_ParseFail;
2487   }
2488
2489   OperandMatchResultTy Res;
2490   if (AllowImm) {
2491     Res = parseRegOrImm(Operands, SP3Abs);
2492   } else {
2493     Res = parseReg(Operands);
2494   }
2495   if (Res != MatchOperand_Success) {
2496     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2497   }
2498
2499   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2500     return MatchOperand_ParseFail;
2501   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2502     return MatchOperand_ParseFail;
2503   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2504     return MatchOperand_ParseFail;
2505
2506   AMDGPUOperand::Modifiers Mods;
2507   Mods.Abs = Abs || SP3Abs;
2508   Mods.Neg = Neg || SP3Neg;
2509
2510   if (Mods.hasFPModifiers()) {
2511     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2512     if (Op.isExpr()) {
2513       Error(Op.getStartLoc(), "expected an absolute expression");
2514       return MatchOperand_ParseFail;
2515     }
2516     Op.setModifiers(Mods);
2517   }
2518   return MatchOperand_Success;
2519 }
2520
2521 OperandMatchResultTy
2522 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2523                                                bool AllowImm) {
2524   bool Sext = trySkipId("sext");
2525   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2526     return MatchOperand_ParseFail;
2527
2528   OperandMatchResultTy Res;
2529   if (AllowImm) {
2530     Res = parseRegOrImm(Operands);
2531   } else {
2532     Res = parseReg(Operands);
2533   }
2534   if (Res != MatchOperand_Success) {
2535     return Sext? MatchOperand_ParseFail : Res;
2536   }
2537
2538   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2539     return MatchOperand_ParseFail;
2540
2541   AMDGPUOperand::Modifiers Mods;
2542   Mods.Sext = Sext;
2543
2544   if (Mods.hasIntModifiers()) {
2545     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2546     if (Op.isExpr()) {
2547       Error(Op.getStartLoc(), "expected an absolute expression");
2548       return MatchOperand_ParseFail;
2549     }
2550     Op.setModifiers(Mods);
2551   }
2552
2553   return MatchOperand_Success;
2554 }
2555
2556 OperandMatchResultTy
2557 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2558   return parseRegOrImmWithFPInputMods(Operands, false);
2559 }
2560
2561 OperandMatchResultTy
2562 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2563   return parseRegOrImmWithIntInputMods(Operands, false);
2564 }
2565
2566 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2567   auto Loc = getLoc();
2568   if (trySkipId("off")) {
2569     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2570                                                 AMDGPUOperand::ImmTyOff, false));
2571     return MatchOperand_Success;
2572   }
2573
2574   if (!isRegister())
2575     return MatchOperand_NoMatch;
2576
2577   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2578   if (Reg) {
2579     Operands.push_back(std::move(Reg));
2580     return MatchOperand_Success;
2581   }
2582
2583   return MatchOperand_ParseFail;
2584
2585 }
2586
2587 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2588   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2589
2590   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2591       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2592       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2593       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2594     return Match_InvalidOperand;
2595
2596   if ((TSFlags & SIInstrFlags::VOP3) &&
2597       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2598       getForcedEncodingSize() != 64)
2599     return Match_PreferE32;
2600
2601   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2602       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2603     // v_mac_f32/16 allow only dst_sel == DWORD;
2604     auto OpNum =
2605         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2606     const auto &Op = Inst.getOperand(OpNum);
2607     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2608       return Match_InvalidOperand;
2609     }
2610   }
2611
2612   return Match_Success;
2613 }
2614
2615 // What asm variants we should check
2616 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2617   if (getForcedEncodingSize() == 32) {
2618     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2619     return makeArrayRef(Variants);
2620   }
2621
2622   if (isForcedVOP3()) {
2623     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2624     return makeArrayRef(Variants);
2625   }
2626
2627   if (isForcedSDWA()) {
2628     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2629                                         AMDGPUAsmVariants::SDWA9};
2630     return makeArrayRef(Variants);
2631   }
2632
2633   if (isForcedDPP()) {
2634     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2635     return makeArrayRef(Variants);
2636   }
2637
2638   static const unsigned Variants[] = {
2639     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2640     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2641   };
2642
2643   return makeArrayRef(Variants);
2644 }
2645
2646 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2647   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2648   const unsigned Num = Desc.getNumImplicitUses();
2649   for (unsigned i = 0; i < Num; ++i) {
2650     unsigned Reg = Desc.ImplicitUses[i];
2651     switch (Reg) {
2652     case AMDGPU::FLAT_SCR:
2653     case AMDGPU::VCC:
2654     case AMDGPU::VCC_LO:
2655     case AMDGPU::VCC_HI:
2656     case AMDGPU::M0:
2657     case AMDGPU::SGPR_NULL:
2658       return Reg;
2659     default:
2660       break;
2661     }
2662   }
2663   return AMDGPU::NoRegister;
2664 }
2665
2666 // NB: This code is correct only when used to check constant
2667 // bus limitations because GFX7 support no f16 inline constants.
2668 // Note that there are no cases when a GFX7 opcode violates
2669 // constant bus limitations due to the use of an f16 constant.
2670 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2671                                        unsigned OpIdx) const {
2672   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2673
2674   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2675     return false;
2676   }
2677
2678   const MCOperand &MO = Inst.getOperand(OpIdx);
2679
2680   int64_t Val = MO.getImm();
2681   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2682
2683   switch (OpSize) { // expected operand size
2684   case 8:
2685     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2686   case 4:
2687     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2688   case 2: {
2689     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2690     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2691         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2692         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2693         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2694         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2695         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2696       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2697     } else {
2698       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2699     }
2700   }
2701   default:
2702     llvm_unreachable("invalid operand size");
2703   }
2704 }
2705
2706 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2707   const MCOperand &MO = Inst.getOperand(OpIdx);
2708   if (MO.isImm()) {
2709     return !isInlineConstant(Inst, OpIdx);
2710   }
2711   return !MO.isReg() ||
2712          isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2713 }
2714
2715 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2716   const unsigned Opcode = Inst.getOpcode();
2717   const MCInstrDesc &Desc = MII.get(Opcode);
2718   unsigned ConstantBusUseCount = 0;
2719   unsigned NumLiterals = 0;
2720   unsigned LiteralSize;
2721
2722   if (Desc.TSFlags &
2723       (SIInstrFlags::VOPC |
2724        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2725        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2726        SIInstrFlags::SDWA)) {
2727     // Check special imm operands (used by madmk, etc)
2728     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2729       ++ConstantBusUseCount;
2730     }
2731
2732     SmallDenseSet<unsigned> SGPRsUsed;
2733     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2734     if (SGPRUsed != AMDGPU::NoRegister) {
2735       SGPRsUsed.insert(SGPRUsed);
2736       ++ConstantBusUseCount;
2737     }
2738
2739     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2740     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2741     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2742
2743     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2744
2745     for (int OpIdx : OpIndices) {
2746       if (OpIdx == -1) break;
2747
2748       const MCOperand &MO = Inst.getOperand(OpIdx);
2749       if (usesConstantBus(Inst, OpIdx)) {
2750         if (MO.isReg()) {
2751           const unsigned Reg = mc2PseudoReg(MO.getReg());
2752           // Pairs of registers with a partial intersections like these
2753           //   s0, s[0:1]
2754           //   flat_scratch_lo, flat_scratch
2755           //   flat_scratch_lo, flat_scratch_hi
2756           // are theoretically valid but they are disabled anyway.
2757           // Note that this code mimics SIInstrInfo::verifyInstruction
2758           if (!SGPRsUsed.count(Reg)) {
2759             SGPRsUsed.insert(Reg);
2760             ++ConstantBusUseCount;
2761           }
2762         } else { // Expression or a literal
2763
2764           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2765             continue; // special operand like VINTERP attr_chan
2766
2767           // An instruction may use only one literal.
2768           // This has been validated on the previous step.
2769           // See validateVOP3Literal.
2770           // This literal may be used as more than one operand.
2771           // If all these operands are of the same size,
2772           // this literal counts as one scalar value.
2773           // Otherwise it counts as 2 scalar values.
2774           // See "GFX10 Shader Programming", section 3.6.2.3.
2775
2776           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2777           if (Size < 4) Size = 4;
2778
2779           if (NumLiterals == 0) {
2780             NumLiterals = 1;
2781             LiteralSize = Size;
2782           } else if (LiteralSize != Size) {
2783             NumLiterals = 2;
2784           }
2785         }
2786       }
2787     }
2788   }
2789   ConstantBusUseCount += NumLiterals;
2790
2791   if (isGFX10())
2792     return ConstantBusUseCount <= 2;
2793
2794   return ConstantBusUseCount <= 1;
2795 }
2796
2797 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2798   const unsigned Opcode = Inst.getOpcode();
2799   const MCInstrDesc &Desc = MII.get(Opcode);
2800
2801   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2802   if (DstIdx == -1 ||
2803       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2804     return true;
2805   }
2806
2807   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2808
2809   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2810   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2811   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2812
2813   assert(DstIdx != -1);
2814   const MCOperand &Dst = Inst.getOperand(DstIdx);
2815   assert(Dst.isReg());
2816   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2817
2818   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2819
2820   for (int SrcIdx : SrcIndices) {
2821     if (SrcIdx == -1) break;
2822     const MCOperand &Src = Inst.getOperand(SrcIdx);
2823     if (Src.isReg()) {
2824       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2825       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2826         return false;
2827       }
2828     }
2829   }
2830
2831   return true;
2832 }
2833
2834 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2835
2836   const unsigned Opc = Inst.getOpcode();
2837   const MCInstrDesc &Desc = MII.get(Opc);
2838
2839   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2840     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2841     assert(ClampIdx != -1);
2842     return Inst.getOperand(ClampIdx).getImm() == 0;
2843   }
2844
2845   return true;
2846 }
2847
2848 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2849
2850   const unsigned Opc = Inst.getOpcode();
2851   const MCInstrDesc &Desc = MII.get(Opc);
2852
2853   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2854     return true;
2855
2856   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2857   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2858   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2859
2860   assert(VDataIdx != -1);
2861   assert(DMaskIdx != -1);
2862   assert(TFEIdx != -1);
2863
2864   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2865   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2866   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2867   if (DMask == 0)
2868     DMask = 1;
2869
2870   unsigned DataSize =
2871     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2872   if (hasPackedD16()) {
2873     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2874     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2875       DataSize = (DataSize + 1) / 2;
2876   }
2877
2878   return (VDataSize / 4) == DataSize + TFESize;
2879 }
2880
2881 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2882   const unsigned Opc = Inst.getOpcode();
2883   const MCInstrDesc &Desc = MII.get(Opc);
2884
2885   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2886     return true;
2887
2888   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2889   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2890       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2891   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2892   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2893   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2894
2895   assert(VAddr0Idx != -1);
2896   assert(SrsrcIdx != -1);
2897   assert(DimIdx != -1);
2898   assert(SrsrcIdx > VAddr0Idx);
2899
2900   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2901   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2902   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2903   unsigned VAddrSize =
2904       IsNSA ? SrsrcIdx - VAddr0Idx
2905             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2906
2907   unsigned AddrSize = BaseOpcode->NumExtraArgs +
2908                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
2909                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
2910                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
2911   if (!IsNSA) {
2912     if (AddrSize > 8)
2913       AddrSize = 16;
2914     else if (AddrSize > 4)
2915       AddrSize = 8;
2916   }
2917
2918   return VAddrSize == AddrSize;
2919 }
2920
2921 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2922
2923   const unsigned Opc = Inst.getOpcode();
2924   const MCInstrDesc &Desc = MII.get(Opc);
2925
2926   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2927     return true;
2928   if (!Desc.mayLoad() || !Desc.mayStore())
2929     return true; // Not atomic
2930
2931   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2932   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2933
2934   // This is an incomplete check because image_atomic_cmpswap
2935   // may only use 0x3 and 0xf while other atomic operations
2936   // may use 0x1 and 0x3. However these limitations are
2937   // verified when we check that dmask matches dst size.
2938   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2939 }
2940
2941 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2942
2943   const unsigned Opc = Inst.getOpcode();
2944   const MCInstrDesc &Desc = MII.get(Opc);
2945
2946   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2947     return true;
2948
2949   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2950   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2951
2952   // GATHER4 instructions use dmask in a different fashion compared to
2953   // other MIMG instructions. The only useful DMASK values are
2954   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2955   // (red,red,red,red) etc.) The ISA document doesn't mention
2956   // this.
2957   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2958 }
2959
2960 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2961
2962   const unsigned Opc = Inst.getOpcode();
2963   const MCInstrDesc &Desc = MII.get(Opc);
2964
2965   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2966     return true;
2967
2968   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2969   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2970     if (isCI() || isSI())
2971       return false;
2972   }
2973
2974   return true;
2975 }
2976
2977 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
2978   const unsigned Opc = Inst.getOpcode();
2979   const MCInstrDesc &Desc = MII.get(Opc);
2980
2981   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2982     return true;
2983
2984   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2985   if (DimIdx < 0)
2986     return true;
2987
2988   long Imm = Inst.getOperand(DimIdx).getImm();
2989   if (Imm < 0 || Imm >= 8)
2990     return false;
2991
2992   return true;
2993 }
2994
2995 static bool IsRevOpcode(const unsigned Opcode)
2996 {
2997   switch (Opcode) {
2998   case AMDGPU::V_SUBREV_F32_e32:
2999   case AMDGPU::V_SUBREV_F32_e64:
3000   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3001   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3002   case AMDGPU::V_SUBREV_F32_e32_vi:
3003   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3004   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3005   case AMDGPU::V_SUBREV_F32_e64_vi:
3006
3007   case AMDGPU::V_SUBREV_I32_e32:
3008   case AMDGPU::V_SUBREV_I32_e64:
3009   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3010   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3011
3012   case AMDGPU::V_SUBBREV_U32_e32:
3013   case AMDGPU::V_SUBBREV_U32_e64:
3014   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3015   case AMDGPU::V_SUBBREV_U32_e32_vi:
3016   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3017   case AMDGPU::V_SUBBREV_U32_e64_vi:
3018
3019   case AMDGPU::V_SUBREV_U32_e32:
3020   case AMDGPU::V_SUBREV_U32_e64:
3021   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3022   case AMDGPU::V_SUBREV_U32_e32_vi:
3023   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3024   case AMDGPU::V_SUBREV_U32_e64_vi:
3025
3026   case AMDGPU::V_SUBREV_F16_e32:
3027   case AMDGPU::V_SUBREV_F16_e64:
3028   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3029   case AMDGPU::V_SUBREV_F16_e32_vi:
3030   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3031   case AMDGPU::V_SUBREV_F16_e64_vi:
3032
3033   case AMDGPU::V_SUBREV_U16_e32:
3034   case AMDGPU::V_SUBREV_U16_e64:
3035   case AMDGPU::V_SUBREV_U16_e32_vi:
3036   case AMDGPU::V_SUBREV_U16_e64_vi:
3037
3038   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3039   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3040   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3041
3042   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3043   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3044
3045   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3046   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3047
3048   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3049   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3050
3051   case AMDGPU::V_LSHRREV_B32_e32:
3052   case AMDGPU::V_LSHRREV_B32_e64:
3053   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3054   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3055   case AMDGPU::V_LSHRREV_B32_e32_vi:
3056   case AMDGPU::V_LSHRREV_B32_e64_vi:
3057   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3058   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3059
3060   case AMDGPU::V_ASHRREV_I32_e32:
3061   case AMDGPU::V_ASHRREV_I32_e64:
3062   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3063   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3064   case AMDGPU::V_ASHRREV_I32_e32_vi:
3065   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3066   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3067   case AMDGPU::V_ASHRREV_I32_e64_vi:
3068
3069   case AMDGPU::V_LSHLREV_B32_e32:
3070   case AMDGPU::V_LSHLREV_B32_e64:
3071   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3072   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3073   case AMDGPU::V_LSHLREV_B32_e32_vi:
3074   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3075   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3076   case AMDGPU::V_LSHLREV_B32_e64_vi:
3077
3078   case AMDGPU::V_LSHLREV_B16_e32:
3079   case AMDGPU::V_LSHLREV_B16_e64:
3080   case AMDGPU::V_LSHLREV_B16_e32_vi:
3081   case AMDGPU::V_LSHLREV_B16_e64_vi:
3082   case AMDGPU::V_LSHLREV_B16_gfx10:
3083
3084   case AMDGPU::V_LSHRREV_B16_e32:
3085   case AMDGPU::V_LSHRREV_B16_e64:
3086   case AMDGPU::V_LSHRREV_B16_e32_vi:
3087   case AMDGPU::V_LSHRREV_B16_e64_vi:
3088   case AMDGPU::V_LSHRREV_B16_gfx10:
3089
3090   case AMDGPU::V_ASHRREV_I16_e32:
3091   case AMDGPU::V_ASHRREV_I16_e64:
3092   case AMDGPU::V_ASHRREV_I16_e32_vi:
3093   case AMDGPU::V_ASHRREV_I16_e64_vi:
3094   case AMDGPU::V_ASHRREV_I16_gfx10:
3095
3096   case AMDGPU::V_LSHLREV_B64:
3097   case AMDGPU::V_LSHLREV_B64_gfx10:
3098   case AMDGPU::V_LSHLREV_B64_vi:
3099
3100   case AMDGPU::V_LSHRREV_B64:
3101   case AMDGPU::V_LSHRREV_B64_gfx10:
3102   case AMDGPU::V_LSHRREV_B64_vi:
3103
3104   case AMDGPU::V_ASHRREV_I64:
3105   case AMDGPU::V_ASHRREV_I64_gfx10:
3106   case AMDGPU::V_ASHRREV_I64_vi:
3107
3108   case AMDGPU::V_PK_LSHLREV_B16:
3109   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3110   case AMDGPU::V_PK_LSHLREV_B16_vi:
3111
3112   case AMDGPU::V_PK_LSHRREV_B16:
3113   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3114   case AMDGPU::V_PK_LSHRREV_B16_vi:
3115   case AMDGPU::V_PK_ASHRREV_I16:
3116   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3117   case AMDGPU::V_PK_ASHRREV_I16_vi:
3118     return true;
3119   default:
3120     return false;
3121   }
3122 }
3123
3124 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3125
3126   using namespace SIInstrFlags;
3127   const unsigned Opcode = Inst.getOpcode();
3128   const MCInstrDesc &Desc = MII.get(Opcode);
3129
3130   // lds_direct register is defined so that it can be used
3131   // with 9-bit operands only. Ignore encodings which do not accept these.
3132   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3133     return true;
3134
3135   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3136   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3137   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3138
3139   const int SrcIndices[] = { Src1Idx, Src2Idx };
3140
3141   // lds_direct cannot be specified as either src1 or src2.
3142   for (int SrcIdx : SrcIndices) {
3143     if (SrcIdx == -1) break;
3144     const MCOperand &Src = Inst.getOperand(SrcIdx);
3145     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3146       return false;
3147     }
3148   }
3149
3150   if (Src0Idx == -1)
3151     return true;
3152
3153   const MCOperand &Src = Inst.getOperand(Src0Idx);
3154   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3155     return true;
3156
3157   // lds_direct is specified as src0. Check additional limitations.
3158   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3159 }
3160
3161 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3162   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3163     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3164     if (Op.isFlatOffset())
3165       return Op.getStartLoc();
3166   }
3167   return getLoc();
3168 }
3169
3170 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3171                                          const OperandVector &Operands) {
3172   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3173   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3174     return true;
3175
3176   auto Opcode = Inst.getOpcode();
3177   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3178   assert(OpNum != -1);
3179
3180   const auto &Op = Inst.getOperand(OpNum);
3181   if (!hasFlatOffsets() && Op.getImm() != 0) {
3182     Error(getFlatOffsetLoc(Operands),
3183           "flat offset modifier is not supported on this GPU");
3184     return false;
3185   }
3186
3187   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3188   // For FLAT segment the offset must be positive;
3189   // MSB is ignored and forced to zero.
3190   unsigned OffsetSize = isGFX9() ? 13 : 12;
3191   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3192     if (!isIntN(OffsetSize, Op.getImm())) {
3193       Error(getFlatOffsetLoc(Operands),
3194             isGFX9() ? "expected a 13-bit signed offset" :
3195                        "expected a 12-bit signed offset");
3196       return false;
3197     }
3198   } else {
3199     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3200       Error(getFlatOffsetLoc(Operands),
3201             isGFX9() ? "expected a 12-bit unsigned offset" :
3202                        "expected an 11-bit unsigned offset");
3203       return false;
3204     }
3205   }
3206
3207   return true;
3208 }
3209
3210 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3211   unsigned Opcode = Inst.getOpcode();
3212   const MCInstrDesc &Desc = MII.get(Opcode);
3213   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3214     return true;
3215
3216   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3217   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3218
3219   const int OpIndices[] = { Src0Idx, Src1Idx };
3220
3221   unsigned NumLiterals = 0;
3222   uint32_t LiteralValue;
3223
3224   for (int OpIdx : OpIndices) {
3225     if (OpIdx == -1) break;
3226
3227     const MCOperand &MO = Inst.getOperand(OpIdx);
3228     if (MO.isImm() &&
3229         // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3230         AMDGPU::isSISrcOperand(Desc, OpIdx) &&
3231         !isInlineConstant(Inst, OpIdx)) {
3232       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3233       if (NumLiterals == 0 || LiteralValue != Value) {
3234         LiteralValue = Value;
3235         ++NumLiterals;
3236       }
3237     }
3238   }
3239
3240   return NumLiterals <= 1;
3241 }
3242
3243 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3244   const unsigned Opc = Inst.getOpcode();
3245   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3246       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3247     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3248     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3249
3250     if (OpSel & ~3)
3251       return false;
3252   }
3253   return true;
3254 }
3255
3256 // Check if VCC register matches wavefront size
3257 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3258   auto FB = getFeatureBits();
3259   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3260     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3261 }
3262
3263 // VOP3 literal is only allowed in GFX10+ and only one can be used
3264 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3265   unsigned Opcode = Inst.getOpcode();
3266   const MCInstrDesc &Desc = MII.get(Opcode);
3267   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3268     return true;
3269
3270   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3271   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3272   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3273
3274   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3275
3276   unsigned NumLiterals = 0;
3277   uint32_t LiteralValue;
3278
3279   for (int OpIdx : OpIndices) {
3280     if (OpIdx == -1) break;
3281
3282     const MCOperand &MO = Inst.getOperand(OpIdx);
3283     if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
3284       continue;
3285
3286     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3287         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3288       return false;
3289
3290     if (!isInlineConstant(Inst, OpIdx)) {
3291       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3292       if (NumLiterals == 0 || LiteralValue != Value) {
3293         LiteralValue = Value;
3294         ++NumLiterals;
3295       }
3296     }
3297   }
3298
3299   return !NumLiterals ||
3300          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3301 }
3302
3303 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3304                                           const SMLoc &IDLoc,
3305                                           const OperandVector &Operands) {
3306   if (!validateLdsDirect(Inst)) {
3307     Error(IDLoc,
3308       "invalid use of lds_direct");
3309     return false;
3310   }
3311   if (!validateSOPLiteral(Inst)) {
3312     Error(IDLoc,
3313       "only one literal operand is allowed");
3314     return false;
3315   }
3316   if (!validateVOP3Literal(Inst)) {
3317     Error(IDLoc,
3318       "invalid literal operand");
3319     return false;
3320   }
3321   if (!validateConstantBusLimitations(Inst)) {
3322     Error(IDLoc,
3323       "invalid operand (violates constant bus restrictions)");
3324     return false;
3325   }
3326   if (!validateEarlyClobberLimitations(Inst)) {
3327     Error(IDLoc,
3328       "destination must be different than all sources");
3329     return false;
3330   }
3331   if (!validateIntClampSupported(Inst)) {
3332     Error(IDLoc,
3333       "integer clamping is not supported on this GPU");
3334     return false;
3335   }
3336   if (!validateOpSel(Inst)) {
3337     Error(IDLoc,
3338       "invalid op_sel operand");
3339     return false;
3340   }
3341   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3342   if (!validateMIMGD16(Inst)) {
3343     Error(IDLoc,
3344       "d16 modifier is not supported on this GPU");
3345     return false;
3346   }
3347   if (!validateMIMGDim(Inst)) {
3348     Error(IDLoc, "dim modifier is required on this GPU");
3349     return false;
3350   }
3351   if (!validateMIMGDataSize(Inst)) {
3352     Error(IDLoc,
3353       "image data size does not match dmask and tfe");
3354     return false;
3355   }
3356   if (!validateMIMGAddrSize(Inst)) {
3357     Error(IDLoc,
3358       "image address size does not match dim and a16");
3359     return false;
3360   }
3361   if (!validateMIMGAtomicDMask(Inst)) {
3362     Error(IDLoc,
3363       "invalid atomic image dmask");
3364     return false;
3365   }
3366   if (!validateMIMGGatherDMask(Inst)) {
3367     Error(IDLoc,
3368       "invalid image_gather dmask: only one bit must be set");
3369     return false;
3370   }
3371   if (!validateFlatOffset(Inst, Operands)) {
3372     return false;
3373   }
3374
3375   return true;
3376 }
3377
3378 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3379                                             const FeatureBitset &FBS,
3380                                             unsigned VariantID = 0);
3381
3382 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3383                                               OperandVector &Operands,
3384                                               MCStreamer &Out,
3385                                               uint64_t &ErrorInfo,
3386                                               bool MatchingInlineAsm) {
3387   MCInst Inst;
3388   unsigned Result = Match_Success;
3389   for (auto Variant : getMatchedVariants()) {
3390     uint64_t EI;
3391     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3392                                   Variant);
3393     // We order match statuses from least to most specific. We use most specific
3394     // status as resulting
3395     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3396     if ((R == Match_Success) ||
3397         (R == Match_PreferE32) ||
3398         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3399         (R == Match_InvalidOperand && Result != Match_MissingFeature
3400                                    && Result != Match_PreferE32) ||
3401         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3402                                    && Result != Match_MissingFeature
3403                                    && Result != Match_PreferE32)) {
3404       Result = R;
3405       ErrorInfo = EI;
3406     }
3407     if (R == Match_Success)
3408       break;
3409   }
3410
3411   switch (Result) {
3412   default: break;
3413   case Match_Success:
3414     if (!validateInstruction(Inst, IDLoc, Operands)) {
3415       return true;
3416     }
3417     Inst.setLoc(IDLoc);
3418     Out.EmitInstruction(Inst, getSTI());
3419     return false;
3420
3421   case Match_MissingFeature:
3422     return Error(IDLoc, "instruction not supported on this GPU");
3423
3424   case Match_MnemonicFail: {
3425     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3426     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3427         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3428     return Error(IDLoc, "invalid instruction" + Suggestion,
3429                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3430   }
3431
3432   case Match_InvalidOperand: {
3433     SMLoc ErrorLoc = IDLoc;
3434     if (ErrorInfo != ~0ULL) {
3435       if (ErrorInfo >= Operands.size()) {
3436         return Error(IDLoc, "too few operands for instruction");
3437       }
3438       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3439       if (ErrorLoc == SMLoc())
3440         ErrorLoc = IDLoc;
3441     }
3442     return Error(ErrorLoc, "invalid operand for instruction");
3443   }
3444
3445   case Match_PreferE32:
3446     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3447                         "should be encoded as e32");
3448   }
3449   llvm_unreachable("Implement any new match types added!");
3450 }
3451
3452 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3453   int64_t Tmp = -1;
3454   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3455     return true;
3456   }
3457   if (getParser().parseAbsoluteExpression(Tmp)) {
3458     return true;
3459   }
3460   Ret = static_cast<uint32_t>(Tmp);
3461   return false;
3462 }
3463
3464 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3465                                                uint32_t &Minor) {
3466   if (ParseAsAbsoluteExpression(Major))
3467     return TokError("invalid major version");
3468
3469   if (getLexer().isNot(AsmToken::Comma))
3470     return TokError("minor version number required, comma expected");
3471   Lex();
3472
3473   if (ParseAsAbsoluteExpression(Minor))
3474     return TokError("invalid minor version");
3475
3476   return false;
3477 }
3478
3479 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3480   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3481     return TokError("directive only supported for amdgcn architecture");
3482
3483   std::string Target;
3484
3485   SMLoc TargetStart = getTok().getLoc();
3486   if (getParser().parseEscapedString(Target))
3487     return true;
3488   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3489
3490   std::string ExpectedTarget;
3491   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3492   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3493
3494   if (Target != ExpectedTargetOS.str())
3495     return getParser().Error(TargetRange.Start, "target must match options",
3496                              TargetRange);
3497
3498   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3499   return false;
3500 }
3501
3502 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3503   return getParser().Error(Range.Start, "value out of range", Range);
3504 }
3505
3506 bool AMDGPUAsmParser::calculateGPRBlocks(
3507     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3508     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3509     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3510     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3511   // TODO(scott.linder): These calculations are duplicated from
3512   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3513   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3514
3515   unsigned NumVGPRs = NextFreeVGPR;
3516   unsigned NumSGPRs = NextFreeSGPR;
3517
3518   if (Version.Major >= 10)
3519     NumSGPRs = 0;
3520   else {
3521     unsigned MaxAddressableNumSGPRs =
3522         IsaInfo::getAddressableNumSGPRs(&getSTI());
3523
3524     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3525         NumSGPRs > MaxAddressableNumSGPRs)
3526       return OutOfRangeError(SGPRRange);
3527
3528     NumSGPRs +=
3529         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3530
3531     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3532         NumSGPRs > MaxAddressableNumSGPRs)
3533       return OutOfRangeError(SGPRRange);
3534
3535     if (Features.test(FeatureSGPRInitBug))
3536       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3537   }
3538
3539   VGPRBlocks =
3540       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3541   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3542
3543   return false;
3544 }
3545
3546 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3547   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3548     return TokError("directive only supported for amdgcn architecture");
3549
3550   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3551     return TokError("directive only supported for amdhsa OS");
3552
3553   StringRef KernelName;
3554   if (getParser().parseIdentifier(KernelName))
3555     return true;
3556
3557   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3558
3559   StringSet<> Seen;
3560
3561   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3562
3563   SMRange VGPRRange;
3564   uint64_t NextFreeVGPR = 0;
3565   SMRange SGPRRange;
3566   uint64_t NextFreeSGPR = 0;
3567   unsigned UserSGPRCount = 0;
3568   bool ReserveVCC = true;
3569   bool ReserveFlatScr = true;
3570   bool ReserveXNACK = hasXNACK();
3571   Optional<bool> EnableWavefrontSize32;
3572
3573   while (true) {
3574     while (getLexer().is(AsmToken::EndOfStatement))
3575       Lex();
3576
3577     if (getLexer().isNot(AsmToken::Identifier))
3578       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3579
3580     StringRef ID = getTok().getIdentifier();
3581     SMRange IDRange = getTok().getLocRange();
3582     Lex();
3583
3584     if (ID == ".end_amdhsa_kernel")
3585       break;
3586
3587     if (Seen.find(ID) != Seen.end())
3588       return TokError(".amdhsa_ directives cannot be repeated");
3589     Seen.insert(ID);
3590
3591     SMLoc ValStart = getTok().getLoc();
3592     int64_t IVal;
3593     if (getParser().parseAbsoluteExpression(IVal))
3594       return true;
3595     SMLoc ValEnd = getTok().getLoc();
3596     SMRange ValRange = SMRange(ValStart, ValEnd);
3597
3598     if (IVal < 0)
3599       return OutOfRangeError(ValRange);
3600
3601     uint64_t Val = IVal;
3602
3603 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3604   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3605     return OutOfRangeError(RANGE);                                             \
3606   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3607
3608     if (ID == ".amdhsa_group_segment_fixed_size") {
3609       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3610         return OutOfRangeError(ValRange);
3611       KD.group_segment_fixed_size = Val;
3612     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3613       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3614         return OutOfRangeError(ValRange);
3615       KD.private_segment_fixed_size = Val;
3616     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3617       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3618                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3619                        Val, ValRange);
3620       UserSGPRCount += 4;
3621     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3622       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3623                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3624                        ValRange);
3625       UserSGPRCount += 2;
3626     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3627       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3628                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3629                        ValRange);
3630       UserSGPRCount += 2;
3631     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3632       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3633                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3634                        Val, ValRange);
3635       UserSGPRCount += 2;
3636     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3637       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3638                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3639                        ValRange);
3640       UserSGPRCount += 2;
3641     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3642       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3643                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3644                        ValRange);
3645       UserSGPRCount += 2;
3646     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3647       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3648                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3649                        Val, ValRange);
3650       UserSGPRCount += 1;
3651     } else if (ID == ".amdhsa_wavefront_size32") {
3652       if (IVersion.Major < 10)
3653         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3654                                  IDRange);
3655       EnableWavefrontSize32 = Val;
3656       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3657                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3658                        Val, ValRange);
3659     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3660       PARSE_BITS_ENTRY(
3661           KD.compute_pgm_rsrc2,
3662           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3663           ValRange);
3664     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3665       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3666                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3667                        ValRange);
3668     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3669       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3670                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3671                        ValRange);
3672     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3673       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3674                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3675                        ValRange);
3676     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3677       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3678                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3679                        ValRange);
3680     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3681       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3682                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3683                        ValRange);
3684     } else if (ID == ".amdhsa_next_free_vgpr") {
3685       VGPRRange = ValRange;
3686       NextFreeVGPR = Val;
3687     } else if (ID == ".amdhsa_next_free_sgpr") {
3688       SGPRRange = ValRange;
3689       NextFreeSGPR = Val;
3690     } else if (ID == ".amdhsa_reserve_vcc") {
3691       if (!isUInt<1>(Val))
3692         return OutOfRangeError(ValRange);
3693       ReserveVCC = Val;
3694     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3695       if (IVersion.Major < 7)
3696         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3697                                  IDRange);
3698       if (!isUInt<1>(Val))
3699         return OutOfRangeError(ValRange);
3700       ReserveFlatScr = Val;
3701     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3702       if (IVersion.Major < 8)
3703         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3704                                  IDRange);
3705       if (!isUInt<1>(Val))
3706         return OutOfRangeError(ValRange);
3707       ReserveXNACK = Val;
3708     } else if (ID == ".amdhsa_float_round_mode_32") {
3709       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3710                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3711     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3712       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3713                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3714     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3715       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3716                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3717     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3718       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3719                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3720                        ValRange);
3721     } else if (ID == ".amdhsa_dx10_clamp") {
3722       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3723                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3724     } else if (ID == ".amdhsa_ieee_mode") {
3725       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3726                        Val, ValRange);
3727     } else if (ID == ".amdhsa_fp16_overflow") {
3728       if (IVersion.Major < 9)
3729         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3730                                  IDRange);
3731       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3732                        ValRange);
3733     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3734       if (IVersion.Major < 10)
3735         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3736                                  IDRange);
3737       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3738                        ValRange);
3739     } else if (ID == ".amdhsa_memory_ordered") {
3740       if (IVersion.Major < 10)
3741         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3742                                  IDRange);
3743       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3744                        ValRange);
3745     } else if (ID == ".amdhsa_forward_progress") {
3746       if (IVersion.Major < 10)
3747         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3748                                  IDRange);
3749       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3750                        ValRange);
3751     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3752       PARSE_BITS_ENTRY(
3753           KD.compute_pgm_rsrc2,
3754           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3755           ValRange);
3756     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3757       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3758                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3759                        Val, ValRange);
3760     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3761       PARSE_BITS_ENTRY(
3762           KD.compute_pgm_rsrc2,
3763           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3764           ValRange);
3765     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3766       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3767                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3768                        Val, ValRange);
3769     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3770       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3771                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3772                        Val, ValRange);
3773     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3774       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3775                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3776                        Val, ValRange);
3777     } else if (ID == ".amdhsa_exception_int_div_zero") {
3778       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3779                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3780                        Val, ValRange);
3781     } else {
3782       return getParser().Error(IDRange.Start,
3783                                "unknown .amdhsa_kernel directive", IDRange);
3784     }
3785
3786 #undef PARSE_BITS_ENTRY
3787   }
3788
3789   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3790     return TokError(".amdhsa_next_free_vgpr directive is required");
3791
3792   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3793     return TokError(".amdhsa_next_free_sgpr directive is required");
3794
3795   unsigned VGPRBlocks;
3796   unsigned SGPRBlocks;
3797   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3798                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3799                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3800                          SGPRBlocks))
3801     return true;
3802
3803   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3804           VGPRBlocks))
3805     return OutOfRangeError(VGPRRange);
3806   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3807                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3808
3809   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3810           SGPRBlocks))
3811     return OutOfRangeError(SGPRRange);
3812   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3813                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3814                   SGPRBlocks);
3815
3816   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3817     return TokError("too many user SGPRs enabled");
3818   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3819                   UserSGPRCount);
3820
3821   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3822       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3823       ReserveFlatScr, ReserveXNACK);
3824   return false;
3825 }
3826
3827 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3828   uint32_t Major;
3829   uint32_t Minor;
3830
3831   if (ParseDirectiveMajorMinor(Major, Minor))
3832     return true;
3833
3834   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3835   return false;
3836 }
3837
3838 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3839   uint32_t Major;
3840   uint32_t Minor;
3841   uint32_t Stepping;
3842   StringRef VendorName;
3843   StringRef ArchName;
3844
3845   // If this directive has no arguments, then use the ISA version for the
3846   // targeted GPU.
3847   if (getLexer().is(AsmToken::EndOfStatement)) {
3848     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3849     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3850                                                       ISA.Stepping,
3851                                                       "AMD", "AMDGPU");
3852     return false;
3853   }
3854
3855   if (ParseDirectiveMajorMinor(Major, Minor))
3856     return true;
3857
3858   if (getLexer().isNot(AsmToken::Comma))
3859     return TokError("stepping version number required, comma expected");
3860   Lex();
3861
3862   if (ParseAsAbsoluteExpression(Stepping))
3863     return TokError("invalid stepping version");
3864
3865   if (getLexer().isNot(AsmToken::Comma))
3866     return TokError("vendor name required, comma expected");
3867   Lex();
3868
3869   if (getLexer().isNot(AsmToken::String))
3870     return TokError("invalid vendor name");
3871
3872   VendorName = getLexer().getTok().getStringContents();
3873   Lex();
3874
3875   if (getLexer().isNot(AsmToken::Comma))
3876     return TokError("arch name required, comma expected");
3877   Lex();
3878
3879   if (getLexer().isNot(AsmToken::String))
3880     return TokError("invalid arch name");
3881
3882   ArchName = getLexer().getTok().getStringContents();
3883   Lex();
3884
3885   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
3886                                                     VendorName, ArchName);
3887   return false;
3888 }
3889
3890 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
3891                                                amd_kernel_code_t &Header) {
3892   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
3893   // assembly for backwards compatibility.
3894   if (ID == "max_scratch_backing_memory_byte_size") {
3895     Parser.eatToEndOfStatement();
3896     return false;
3897   }
3898
3899   SmallString<40> ErrStr;
3900   raw_svector_ostream Err(ErrStr);
3901   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
3902     return TokError(Err.str());
3903   }
3904   Lex();
3905
3906   if (ID == "enable_wavefront_size32") {
3907     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
3908       if (!isGFX10())
3909         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
3910       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3911         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
3912     } else {
3913       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3914         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
3915     }
3916   }
3917
3918   if (ID == "wavefront_size") {
3919     if (Header.wavefront_size == 5) {
3920       if (!isGFX10())
3921         return TokError("wavefront_size=5 is only allowed on GFX10+");
3922       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
3923         return TokError("wavefront_size=5 requires +WavefrontSize32");
3924     } else if (Header.wavefront_size == 6) {
3925       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
3926         return TokError("wavefront_size=6 requires +WavefrontSize64");
3927     }
3928   }
3929
3930   if (ID == "enable_wgp_mode") {
3931     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
3932       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
3933   }
3934
3935   if (ID == "enable_mem_ordered") {
3936     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
3937       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
3938   }
3939
3940   if (ID == "enable_fwd_progress") {
3941     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
3942       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
3943   }
3944
3945   return false;
3946 }
3947
3948 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3949   amd_kernel_code_t Header;
3950   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3951
3952   while (true) {
3953     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
3954     // will set the current token to EndOfStatement.
3955     while(getLexer().is(AsmToken::EndOfStatement))
3956       Lex();
3957
3958     if (getLexer().isNot(AsmToken::Identifier))
3959       return TokError("expected value identifier or .end_amd_kernel_code_t");
3960
3961     StringRef ID = getLexer().getTok().getIdentifier();
3962     Lex();
3963
3964     if (ID == ".end_amd_kernel_code_t")
3965       break;
3966
3967     if (ParseAMDKernelCodeTValue(ID, Header))
3968       return true;
3969   }
3970
3971   getTargetStreamer().EmitAMDKernelCodeT(Header);
3972
3973   return false;
3974 }
3975
3976 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3977   if (getLexer().isNot(AsmToken::Identifier))
3978     return TokError("expected symbol name");
3979
3980   StringRef KernelName = Parser.getTok().getString();
3981
3982   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3983                                            ELF::STT_AMDGPU_HSA_KERNEL);
3984   Lex();
3985   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3986     KernelScope.initialize(getContext());
3987   return false;
3988 }
3989
3990 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3991   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3992     return Error(getParser().getTok().getLoc(),
3993                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
3994                  "architectures");
3995   }
3996
3997   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3998
3999   std::string ISAVersionStringFromSTI;
4000   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4001   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4002
4003   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4004     return Error(getParser().getTok().getLoc(),
4005                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4006                  "arguments specified through the command line");
4007   }
4008
4009   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4010   Lex();
4011
4012   return false;
4013 }
4014
4015 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4016   const char *AssemblerDirectiveBegin;
4017   const char *AssemblerDirectiveEnd;
4018   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4019       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4020           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4021                             HSAMD::V3::AssemblerDirectiveEnd)
4022           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4023                             HSAMD::AssemblerDirectiveEnd);
4024
4025   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4026     return Error(getParser().getTok().getLoc(),
4027                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4028                  "not available on non-amdhsa OSes")).str());
4029   }
4030
4031   std::string HSAMetadataString;
4032   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4033                           HSAMetadataString))
4034     return true;
4035
4036   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4037     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4038       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4039   } else {
4040     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4041       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4042   }
4043
4044   return false;
4045 }
4046
4047 /// Common code to parse out a block of text (typically YAML) between start and
4048 /// end directives.
4049 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4050                                           const char *AssemblerDirectiveEnd,
4051                                           std::string &CollectString) {
4052
4053   raw_string_ostream CollectStream(CollectString);
4054
4055   getLexer().setSkipSpace(false);
4056
4057   bool FoundEnd = false;
4058   while (!getLexer().is(AsmToken::Eof)) {
4059     while (getLexer().is(AsmToken::Space)) {
4060       CollectStream << getLexer().getTok().getString();
4061       Lex();
4062     }
4063
4064     if (getLexer().is(AsmToken::Identifier)) {
4065       StringRef ID = getLexer().getTok().getIdentifier();
4066       if (ID == AssemblerDirectiveEnd) {
4067         Lex();
4068         FoundEnd = true;
4069         break;
4070       }
4071     }
4072
4073     CollectStream << Parser.parseStringToEndOfStatement()
4074                   << getContext().getAsmInfo()->getSeparatorString();
4075
4076     Parser.eatToEndOfStatement();
4077   }
4078
4079   getLexer().setSkipSpace(true);
4080
4081   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4082     return TokError(Twine("expected directive ") +
4083                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4084   }
4085
4086   CollectStream.flush();
4087   return false;
4088 }
4089
4090 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4091 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4092   std::string String;
4093   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4094                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4095     return true;
4096
4097   auto PALMetadata = getTargetStreamer().getPALMetadata();
4098   if (!PALMetadata->setFromString(String))
4099     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4100   return false;
4101 }
4102
4103 /// Parse the assembler directive for old linear-format PAL metadata.
4104 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4105   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4106     return Error(getParser().getTok().getLoc(),
4107                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4108                  "not available on non-amdpal OSes")).str());
4109   }
4110
4111   auto PALMetadata = getTargetStreamer().getPALMetadata();
4112   PALMetadata->setLegacy();
4113   for (;;) {
4114     uint32_t Key, Value;
4115     if (ParseAsAbsoluteExpression(Key)) {
4116       return TokError(Twine("invalid value in ") +
4117                       Twine(PALMD::AssemblerDirective));
4118     }
4119     if (getLexer().isNot(AsmToken::Comma)) {
4120       return TokError(Twine("expected an even number of values in ") +
4121                       Twine(PALMD::AssemblerDirective));
4122     }
4123     Lex();
4124     if (ParseAsAbsoluteExpression(Value)) {
4125       return TokError(Twine("invalid value in ") +
4126                       Twine(PALMD::AssemblerDirective));
4127     }
4128     PALMetadata->setRegister(Key, Value);
4129     if (getLexer().isNot(AsmToken::Comma))
4130       break;
4131     Lex();
4132   }
4133   return false;
4134 }
4135
4136 /// ParseDirectiveAMDGPULDS
4137 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4138 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4139   if (getParser().checkForValidSection())
4140     return true;
4141
4142   StringRef Name;
4143   SMLoc NameLoc = getLexer().getLoc();
4144   if (getParser().parseIdentifier(Name))
4145     return TokError("expected identifier in directive");
4146
4147   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4148   if (parseToken(AsmToken::Comma, "expected ','"))
4149     return true;
4150
4151   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4152
4153   int64_t Size;
4154   SMLoc SizeLoc = getLexer().getLoc();
4155   if (getParser().parseAbsoluteExpression(Size))
4156     return true;
4157   if (Size < 0)
4158     return Error(SizeLoc, "size must be non-negative");
4159   if (Size > LocalMemorySize)
4160     return Error(SizeLoc, "size is too large");
4161
4162   int64_t Align = 4;
4163   if (getLexer().is(AsmToken::Comma)) {
4164     Lex();
4165     SMLoc AlignLoc = getLexer().getLoc();
4166     if (getParser().parseAbsoluteExpression(Align))
4167       return true;
4168     if (Align < 0 || !isPowerOf2_64(Align))
4169       return Error(AlignLoc, "alignment must be a power of two");
4170
4171     // Alignment larger than the size of LDS is possible in theory, as long
4172     // as the linker manages to place to symbol at address 0, but we do want
4173     // to make sure the alignment fits nicely into a 32-bit integer.
4174     if (Align >= 1u << 31)
4175       return Error(AlignLoc, "alignment is too large");
4176   }
4177
4178   if (parseToken(AsmToken::EndOfStatement,
4179                  "unexpected token in '.amdgpu_lds' directive"))
4180     return true;
4181
4182   Symbol->redefineIfPossible();
4183   if (!Symbol->isUndefined())
4184     return Error(NameLoc, "invalid symbol redefinition");
4185
4186   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4187   return false;
4188 }
4189
4190 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4191   StringRef IDVal = DirectiveID.getString();
4192
4193   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4194     if (IDVal == ".amdgcn_target")
4195       return ParseDirectiveAMDGCNTarget();
4196
4197     if (IDVal == ".amdhsa_kernel")
4198       return ParseDirectiveAMDHSAKernel();
4199
4200     // TODO: Restructure/combine with PAL metadata directive.
4201     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4202       return ParseDirectiveHSAMetadata();
4203   } else {
4204     if (IDVal == ".hsa_code_object_version")
4205       return ParseDirectiveHSACodeObjectVersion();
4206
4207     if (IDVal == ".hsa_code_object_isa")
4208       return ParseDirectiveHSACodeObjectISA();
4209
4210     if (IDVal == ".amd_kernel_code_t")
4211       return ParseDirectiveAMDKernelCodeT();
4212
4213     if (IDVal == ".amdgpu_hsa_kernel")
4214       return ParseDirectiveAMDGPUHsaKernel();
4215
4216     if (IDVal == ".amd_amdgpu_isa")
4217       return ParseDirectiveISAVersion();
4218
4219     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4220       return ParseDirectiveHSAMetadata();
4221   }
4222
4223   if (IDVal == ".amdgpu_lds")
4224     return ParseDirectiveAMDGPULDS();
4225
4226   if (IDVal == PALMD::AssemblerDirectiveBegin)
4227     return ParseDirectivePALMetadataBegin();
4228
4229   if (IDVal == PALMD::AssemblerDirective)
4230     return ParseDirectivePALMetadata();
4231
4232   return true;
4233 }
4234
4235 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4236                                            unsigned RegNo) const {
4237
4238   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4239        R.isValid(); ++R) {
4240     if (*R == RegNo)
4241       return isGFX9() || isGFX10();
4242   }
4243
4244   // GFX10 has 2 more SGPRs 104 and 105.
4245   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4246        R.isValid(); ++R) {
4247     if (*R == RegNo)
4248       return hasSGPR104_SGPR105();
4249   }
4250
4251   switch (RegNo) {
4252   case AMDGPU::SRC_SHARED_BASE:
4253   case AMDGPU::SRC_SHARED_LIMIT:
4254   case AMDGPU::SRC_PRIVATE_BASE:
4255   case AMDGPU::SRC_PRIVATE_LIMIT:
4256   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4257     return !isCI() && !isSI() && !isVI();
4258   case AMDGPU::TBA:
4259   case AMDGPU::TBA_LO:
4260   case AMDGPU::TBA_HI:
4261   case AMDGPU::TMA:
4262   case AMDGPU::TMA_LO:
4263   case AMDGPU::TMA_HI:
4264     return !isGFX9() && !isGFX10();
4265   case AMDGPU::XNACK_MASK:
4266   case AMDGPU::XNACK_MASK_LO:
4267   case AMDGPU::XNACK_MASK_HI:
4268     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4269   case AMDGPU::SGPR_NULL:
4270     return isGFX10();
4271   default:
4272     break;
4273   }
4274
4275   if (isCI())
4276     return true;
4277
4278   if (isSI() || isGFX10()) {
4279     // No flat_scr on SI.
4280     // On GFX10 flat scratch is not a valid register operand and can only be
4281     // accessed with s_setreg/s_getreg.
4282     switch (RegNo) {
4283     case AMDGPU::FLAT_SCR:
4284     case AMDGPU::FLAT_SCR_LO:
4285     case AMDGPU::FLAT_SCR_HI:
4286       return false;
4287     default:
4288       return true;
4289     }
4290   }
4291
4292   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4293   // SI/CI have.
4294   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4295        R.isValid(); ++R) {
4296     if (*R == RegNo)
4297       return hasSGPR102_SGPR103();
4298   }
4299
4300   return true;
4301 }
4302
4303 OperandMatchResultTy
4304 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4305                               OperandMode Mode) {
4306   // Try to parse with a custom parser
4307   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4308
4309   // If we successfully parsed the operand or if there as an error parsing,
4310   // we are done.
4311   //
4312   // If we are parsing after we reach EndOfStatement then this means we
4313   // are appending default values to the Operands list.  This is only done
4314   // by custom parser, so we shouldn't continue on to the generic parsing.
4315   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4316       getLexer().is(AsmToken::EndOfStatement))
4317     return ResTy;
4318
4319   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4320     unsigned Prefix = Operands.size();
4321     SMLoc LBraceLoc = getTok().getLoc();
4322     Parser.Lex(); // eat the '['
4323
4324     for (;;) {
4325       ResTy = parseReg(Operands);
4326       if (ResTy != MatchOperand_Success)
4327         return ResTy;
4328
4329       if (getLexer().is(AsmToken::RBrac))
4330         break;
4331
4332       if (getLexer().isNot(AsmToken::Comma))
4333         return MatchOperand_ParseFail;
4334       Parser.Lex();
4335     }
4336
4337     if (Operands.size() - Prefix > 1) {
4338       Operands.insert(Operands.begin() + Prefix,
4339                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4340       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4341                                                     getTok().getLoc()));
4342     }
4343
4344     Parser.Lex(); // eat the ']'
4345     return MatchOperand_Success;
4346   }
4347
4348   return parseRegOrImm(Operands);
4349 }
4350
4351 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4352   // Clear any forced encodings from the previous instruction.
4353   setForcedEncodingSize(0);
4354   setForcedDPP(false);
4355   setForcedSDWA(false);
4356
4357   if (Name.endswith("_e64")) {
4358     setForcedEncodingSize(64);
4359     return Name.substr(0, Name.size() - 4);
4360   } else if (Name.endswith("_e32")) {
4361     setForcedEncodingSize(32);
4362     return Name.substr(0, Name.size() - 4);
4363   } else if (Name.endswith("_dpp")) {
4364     setForcedDPP(true);
4365     return Name.substr(0, Name.size() - 4);
4366   } else if (Name.endswith("_sdwa")) {
4367     setForcedSDWA(true);
4368     return Name.substr(0, Name.size() - 5);
4369   }
4370   return Name;
4371 }
4372
4373 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4374                                        StringRef Name,
4375                                        SMLoc NameLoc, OperandVector &Operands) {
4376   // Add the instruction mnemonic
4377   Name = parseMnemonicSuffix(Name);
4378   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4379
4380   bool IsMIMG = Name.startswith("image_");
4381
4382   while (!getLexer().is(AsmToken::EndOfStatement)) {
4383     OperandMode Mode = OperandMode_Default;
4384     if (IsMIMG && isGFX10() && Operands.size() == 2)
4385       Mode = OperandMode_NSA;
4386     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4387
4388     // Eat the comma or space if there is one.
4389     if (getLexer().is(AsmToken::Comma))
4390       Parser.Lex();
4391
4392     switch (Res) {
4393       case MatchOperand_Success: break;
4394       case MatchOperand_ParseFail:
4395         // FIXME: use real operand location rather than the current location.
4396         Error(getLexer().getLoc(), "failed parsing operand.");
4397         while (!getLexer().is(AsmToken::EndOfStatement)) {
4398           Parser.Lex();
4399         }
4400         return true;
4401       case MatchOperand_NoMatch:
4402         // FIXME: use real operand location rather than the current location.
4403         Error(getLexer().getLoc(), "not a valid operand.");
4404         while (!getLexer().is(AsmToken::EndOfStatement)) {
4405           Parser.Lex();
4406         }
4407         return true;
4408     }
4409   }
4410
4411   return false;
4412 }
4413
4414 //===----------------------------------------------------------------------===//
4415 // Utility functions
4416 //===----------------------------------------------------------------------===//
4417
4418 OperandMatchResultTy
4419 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4420
4421   if (!trySkipId(Prefix, AsmToken::Colon))
4422     return MatchOperand_NoMatch;
4423
4424   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4425 }
4426
4427 OperandMatchResultTy
4428 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4429                                     AMDGPUOperand::ImmTy ImmTy,
4430                                     bool (*ConvertResult)(int64_t&)) {
4431   SMLoc S = getLoc();
4432   int64_t Value = 0;
4433
4434   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4435   if (Res != MatchOperand_Success)
4436     return Res;
4437
4438   if (ConvertResult && !ConvertResult(Value)) {
4439     Error(S, "invalid " + StringRef(Prefix) + " value.");
4440   }
4441
4442   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4443   return MatchOperand_Success;
4444 }
4445
4446 OperandMatchResultTy
4447 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4448                                              OperandVector &Operands,
4449                                              AMDGPUOperand::ImmTy ImmTy,
4450                                              bool (*ConvertResult)(int64_t&)) {
4451   SMLoc S = getLoc();
4452   if (!trySkipId(Prefix, AsmToken::Colon))
4453     return MatchOperand_NoMatch;
4454
4455   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4456     return MatchOperand_ParseFail;
4457
4458   unsigned Val = 0;
4459   const unsigned MaxSize = 4;
4460
4461   // FIXME: How to verify the number of elements matches the number of src
4462   // operands?
4463   for (int I = 0; ; ++I) {
4464     int64_t Op;
4465     SMLoc Loc = getLoc();
4466     if (!parseExpr(Op))
4467       return MatchOperand_ParseFail;
4468
4469     if (Op != 0 && Op != 1) {
4470       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4471       return MatchOperand_ParseFail;
4472     }
4473
4474     Val |= (Op << I);
4475
4476     if (trySkipToken(AsmToken::RBrac))
4477       break;
4478
4479     if (I + 1 == MaxSize) {
4480       Error(getLoc(), "expected a closing square bracket");
4481       return MatchOperand_ParseFail;
4482     }
4483
4484     if (!skipToken(AsmToken::Comma, "expected a comma"))
4485       return MatchOperand_ParseFail;
4486   }
4487
4488   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4489   return MatchOperand_Success;
4490 }
4491
4492 OperandMatchResultTy
4493 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4494                                AMDGPUOperand::ImmTy ImmTy) {
4495   int64_t Bit = 0;
4496   SMLoc S = Parser.getTok().getLoc();
4497
4498   // We are at the end of the statement, and this is a default argument, so
4499   // use a default value.
4500   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4501     switch(getLexer().getKind()) {
4502       case AsmToken::Identifier: {
4503         StringRef Tok = Parser.getTok().getString();
4504         if (Tok == Name) {
4505           if (Tok == "r128" && isGFX9())
4506             Error(S, "r128 modifier is not supported on this GPU");
4507           if (Tok == "a16" && !isGFX9() && !isGFX10())
4508             Error(S, "a16 modifier is not supported on this GPU");
4509           Bit = 1;
4510           Parser.Lex();
4511         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4512           Bit = 0;
4513           Parser.Lex();
4514         } else {
4515           return MatchOperand_NoMatch;
4516         }
4517         break;
4518       }
4519       default:
4520         return MatchOperand_NoMatch;
4521     }
4522   }
4523
4524   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4525     return MatchOperand_ParseFail;
4526
4527   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4528   return MatchOperand_Success;
4529 }
4530
4531 static void addOptionalImmOperand(
4532   MCInst& Inst, const OperandVector& Operands,
4533   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4534   AMDGPUOperand::ImmTy ImmT,
4535   int64_t Default = 0) {
4536   auto i = OptionalIdx.find(ImmT);
4537   if (i != OptionalIdx.end()) {
4538     unsigned Idx = i->second;
4539     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4540   } else {
4541     Inst.addOperand(MCOperand::createImm(Default));
4542   }
4543 }
4544
4545 OperandMatchResultTy
4546 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4547   if (getLexer().isNot(AsmToken::Identifier)) {
4548     return MatchOperand_NoMatch;
4549   }
4550   StringRef Tok = Parser.getTok().getString();
4551   if (Tok != Prefix) {
4552     return MatchOperand_NoMatch;
4553   }
4554
4555   Parser.Lex();
4556   if (getLexer().isNot(AsmToken::Colon)) {
4557     return MatchOperand_ParseFail;
4558   }
4559
4560   Parser.Lex();
4561   if (getLexer().isNot(AsmToken::Identifier)) {
4562     return MatchOperand_ParseFail;
4563   }
4564
4565   Value = Parser.getTok().getString();
4566   return MatchOperand_Success;
4567 }
4568
4569 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4570 // values to live in a joint format operand in the MCInst encoding.
4571 OperandMatchResultTy
4572 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4573   SMLoc S = Parser.getTok().getLoc();
4574   int64_t Dfmt = 0, Nfmt = 0;
4575   // dfmt and nfmt can appear in either order, and each is optional.
4576   bool GotDfmt = false, GotNfmt = false;
4577   while (!GotDfmt || !GotNfmt) {
4578     if (!GotDfmt) {
4579       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4580       if (Res != MatchOperand_NoMatch) {
4581         if (Res != MatchOperand_Success)
4582           return Res;
4583         if (Dfmt >= 16) {
4584           Error(Parser.getTok().getLoc(), "out of range dfmt");
4585           return MatchOperand_ParseFail;
4586         }
4587         GotDfmt = true;
4588         Parser.Lex();
4589         continue;
4590       }
4591     }
4592     if (!GotNfmt) {
4593       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4594       if (Res != MatchOperand_NoMatch) {
4595         if (Res != MatchOperand_Success)
4596           return Res;
4597         if (Nfmt >= 8) {
4598           Error(Parser.getTok().getLoc(), "out of range nfmt");
4599           return MatchOperand_ParseFail;
4600         }
4601         GotNfmt = true;
4602         Parser.Lex();
4603         continue;
4604       }
4605     }
4606     break;
4607   }
4608   if (!GotDfmt && !GotNfmt)
4609     return MatchOperand_NoMatch;
4610   auto Format = Dfmt | Nfmt << 4;
4611   Operands.push_back(
4612       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4613   return MatchOperand_Success;
4614 }
4615
4616 //===----------------------------------------------------------------------===//
4617 // ds
4618 //===----------------------------------------------------------------------===//
4619
4620 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4621                                     const OperandVector &Operands) {
4622   OptionalImmIndexMap OptionalIdx;
4623
4624   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4625     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4626
4627     // Add the register arguments
4628     if (Op.isReg()) {
4629       Op.addRegOperands(Inst, 1);
4630       continue;
4631     }
4632
4633     // Handle optional arguments
4634     OptionalIdx[Op.getImmTy()] = i;
4635   }
4636
4637   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4638   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4639   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4640
4641   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4642 }
4643
4644 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4645                                 bool IsGdsHardcoded) {
4646   OptionalImmIndexMap OptionalIdx;
4647
4648   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4649     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4650
4651     // Add the register arguments
4652     if (Op.isReg()) {
4653       Op.addRegOperands(Inst, 1);
4654       continue;
4655     }
4656
4657     if (Op.isToken() && Op.getToken() == "gds") {
4658       IsGdsHardcoded = true;
4659       continue;
4660     }
4661
4662     // Handle optional arguments
4663     OptionalIdx[Op.getImmTy()] = i;
4664   }
4665
4666   AMDGPUOperand::ImmTy OffsetType =
4667     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4668      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4669      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4670                                                       AMDGPUOperand::ImmTyOffset;
4671
4672   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4673
4674   if (!IsGdsHardcoded) {
4675     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4676   }
4677   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4678 }
4679
4680 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4681   OptionalImmIndexMap OptionalIdx;
4682
4683   unsigned OperandIdx[4];
4684   unsigned EnMask = 0;
4685   int SrcIdx = 0;
4686
4687   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4688     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4689
4690     // Add the register arguments
4691     if (Op.isReg()) {
4692       assert(SrcIdx < 4);
4693       OperandIdx[SrcIdx] = Inst.size();
4694       Op.addRegOperands(Inst, 1);
4695       ++SrcIdx;
4696       continue;
4697     }
4698
4699     if (Op.isOff()) {
4700       assert(SrcIdx < 4);
4701       OperandIdx[SrcIdx] = Inst.size();
4702       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4703       ++SrcIdx;
4704       continue;
4705     }
4706
4707     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4708       Op.addImmOperands(Inst, 1);
4709       continue;
4710     }
4711
4712     if (Op.isToken() && Op.getToken() == "done")
4713       continue;
4714
4715     // Handle optional arguments
4716     OptionalIdx[Op.getImmTy()] = i;
4717   }
4718
4719   assert(SrcIdx == 4);
4720
4721   bool Compr = false;
4722   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4723     Compr = true;
4724     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4725     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4726     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4727   }
4728
4729   for (auto i = 0; i < SrcIdx; ++i) {
4730     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4731       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4732     }
4733   }
4734
4735   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4736   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4737
4738   Inst.addOperand(MCOperand::createImm(EnMask));
4739 }
4740
4741 //===----------------------------------------------------------------------===//
4742 // s_waitcnt
4743 //===----------------------------------------------------------------------===//
4744
4745 static bool
4746 encodeCnt(
4747   const AMDGPU::IsaVersion ISA,
4748   int64_t &IntVal,
4749   int64_t CntVal,
4750   bool Saturate,
4751   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4752   unsigned (*decode)(const IsaVersion &Version, unsigned))
4753 {
4754   bool Failed = false;
4755
4756   IntVal = encode(ISA, IntVal, CntVal);
4757   if (CntVal != decode(ISA, IntVal)) {
4758     if (Saturate) {
4759       IntVal = encode(ISA, IntVal, -1);
4760     } else {
4761       Failed = true;
4762     }
4763   }
4764   return Failed;
4765 }
4766
4767 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4768
4769   SMLoc CntLoc = getLoc();
4770   StringRef CntName = getTokenStr();
4771
4772   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4773       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4774     return false;
4775
4776   int64_t CntVal;
4777   SMLoc ValLoc = getLoc();
4778   if (!parseExpr(CntVal))
4779     return false;
4780
4781   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4782
4783   bool Failed = true;
4784   bool Sat = CntName.endswith("_sat");
4785
4786   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4787     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4788   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4789     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4790   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4791     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4792   } else {
4793     Error(CntLoc, "invalid counter name " + CntName);
4794     return false;
4795   }
4796
4797   if (Failed) {
4798     Error(ValLoc, "too large value for " + CntName);
4799     return false;
4800   }
4801
4802   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4803     return false;
4804
4805   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4806     if (isToken(AsmToken::EndOfStatement)) {
4807       Error(getLoc(), "expected a counter name");
4808       return false;
4809     }
4810   }
4811
4812   return true;
4813 }
4814
4815 OperandMatchResultTy
4816 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4817   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4818   int64_t Waitcnt = getWaitcntBitMask(ISA);
4819   SMLoc S = getLoc();
4820
4821   // If parse failed, do not return error code
4822   // to avoid excessive error messages.
4823   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4824     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4825   } else {
4826     parseExpr(Waitcnt);
4827   }
4828
4829   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4830   return MatchOperand_Success;
4831 }
4832
4833 bool
4834 AMDGPUOperand::isSWaitCnt() const {
4835   return isImm();
4836 }
4837
4838 //===----------------------------------------------------------------------===//
4839 // hwreg
4840 //===----------------------------------------------------------------------===//
4841
4842 bool
4843 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4844                                 int64_t &Offset,
4845                                 int64_t &Width) {
4846   using namespace llvm::AMDGPU::Hwreg;
4847
4848   // The register may be specified by name or using a numeric code
4849   if (isToken(AsmToken::Identifier) &&
4850       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4851     HwReg.IsSymbolic = true;
4852     lex(); // skip message name
4853   } else if (!parseExpr(HwReg.Id)) {
4854     return false;
4855   }
4856
4857   if (trySkipToken(AsmToken::RParen))
4858     return true;
4859
4860   // parse optional params
4861   return
4862     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
4863     parseExpr(Offset) &&
4864     skipToken(AsmToken::Comma, "expected a comma") &&
4865     parseExpr(Width) &&
4866     skipToken(AsmToken::RParen, "expected a closing parenthesis");
4867 }
4868
4869 bool
4870 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
4871                                const int64_t Offset,
4872                                const int64_t Width,
4873                                const SMLoc Loc) {
4874
4875   using namespace llvm::AMDGPU::Hwreg;
4876
4877   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
4878     Error(Loc, "specified hardware register is not supported on this GPU");
4879     return false;
4880   } else if (!isValidHwreg(HwReg.Id)) {
4881     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
4882     return false;
4883   } else if (!isValidHwregOffset(Offset)) {
4884     Error(Loc, "invalid bit offset: only 5-bit values are legal");
4885     return false;
4886   } else if (!isValidHwregWidth(Width)) {
4887     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
4888     return false;
4889   }
4890   return true;
4891 }
4892
4893 OperandMatchResultTy
4894 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
4895   using namespace llvm::AMDGPU::Hwreg;
4896
4897   int64_t ImmVal = 0;
4898   SMLoc Loc = getLoc();
4899
4900   // If parse failed, do not return error code
4901   // to avoid excessive error messages.
4902   if (trySkipId("hwreg", AsmToken::LParen)) {
4903     OperandInfoTy HwReg(ID_UNKNOWN_);
4904     int64_t Offset = OFFSET_DEFAULT_;
4905     int64_t Width = WIDTH_DEFAULT_;
4906     if (parseHwregBody(HwReg, Offset, Width) &&
4907         validateHwreg(HwReg, Offset, Width, Loc)) {
4908       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
4909     }
4910   } else if (parseExpr(ImmVal)) {
4911     if (ImmVal < 0 || !isUInt<16>(ImmVal))
4912       Error(Loc, "invalid immediate: only 16-bit values are legal");
4913   }
4914
4915   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
4916   return MatchOperand_Success;
4917 }
4918
4919 bool AMDGPUOperand::isHwreg() const {
4920   return isImmTy(ImmTyHwreg);
4921 }
4922
4923 //===----------------------------------------------------------------------===//
4924 // sendmsg
4925 //===----------------------------------------------------------------------===//
4926
4927 bool
4928 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
4929                                   OperandInfoTy &Op,
4930                                   OperandInfoTy &Stream) {
4931   using namespace llvm::AMDGPU::SendMsg;
4932
4933   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
4934     Msg.IsSymbolic = true;
4935     lex(); // skip message name
4936   } else if (!parseExpr(Msg.Id)) {
4937     return false;
4938   }
4939
4940   if (trySkipToken(AsmToken::Comma)) {
4941     Op.IsDefined = true;
4942     if (isToken(AsmToken::Identifier) &&
4943         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
4944       lex(); // skip operation name
4945     } else if (!parseExpr(Op.Id)) {
4946       return false;
4947     }
4948
4949     if (trySkipToken(AsmToken::Comma)) {
4950       Stream.IsDefined = true;
4951       if (!parseExpr(Stream.Id))
4952         return false;
4953     }
4954   }
4955
4956   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
4957 }
4958
4959 bool
4960 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
4961                                  const OperandInfoTy &Op,
4962                                  const OperandInfoTy &Stream,
4963                                  const SMLoc S) {
4964   using namespace llvm::AMDGPU::SendMsg;
4965
4966   // Validation strictness depends on whether message is specified
4967   // in a symbolc or in a numeric form. In the latter case
4968   // only encoding possibility is checked.
4969   bool Strict = Msg.IsSymbolic;
4970
4971   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
4972     Error(S, "invalid message id");
4973     return false;
4974   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
4975     Error(S, Op.IsDefined ?
4976              "message does not support operations" :
4977              "missing message operation");
4978     return false;
4979   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
4980     Error(S, "invalid operation id");
4981     return false;
4982   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
4983     Error(S, "message operation does not support streams");
4984     return false;
4985   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
4986     Error(S, "invalid message stream id");
4987     return false;
4988   }
4989   return true;
4990 }
4991
4992 OperandMatchResultTy
4993 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4994   using namespace llvm::AMDGPU::SendMsg;
4995
4996   int64_t ImmVal = 0;
4997   SMLoc Loc = getLoc();
4998
4999   // If parse failed, do not return error code
5000   // to avoid excessive error messages.
5001   if (trySkipId("sendmsg", AsmToken::LParen)) {
5002     OperandInfoTy Msg(ID_UNKNOWN_);
5003     OperandInfoTy Op(OP_NONE_);
5004     OperandInfoTy Stream(STREAM_ID_NONE_);
5005     if (parseSendMsgBody(Msg, Op, Stream) &&
5006         validateSendMsg(Msg, Op, Stream, Loc)) {
5007       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5008     }
5009   } else if (parseExpr(ImmVal)) {
5010     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5011       Error(Loc, "invalid immediate: only 16-bit values are legal");
5012   }
5013
5014   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5015   return MatchOperand_Success;
5016 }
5017
5018 bool AMDGPUOperand::isSendMsg() const {
5019   return isImmTy(ImmTySendMsg);
5020 }
5021
5022 //===----------------------------------------------------------------------===//
5023 // v_interp
5024 //===----------------------------------------------------------------------===//
5025
5026 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5027   if (getLexer().getKind() != AsmToken::Identifier)
5028     return MatchOperand_NoMatch;
5029
5030   StringRef Str = Parser.getTok().getString();
5031   int Slot = StringSwitch<int>(Str)
5032     .Case("p10", 0)
5033     .Case("p20", 1)
5034     .Case("p0", 2)
5035     .Default(-1);
5036
5037   SMLoc S = Parser.getTok().getLoc();
5038   if (Slot == -1)
5039     return MatchOperand_ParseFail;
5040
5041   Parser.Lex();
5042   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5043                                               AMDGPUOperand::ImmTyInterpSlot));
5044   return MatchOperand_Success;
5045 }
5046
5047 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5048   if (getLexer().getKind() != AsmToken::Identifier)
5049     return MatchOperand_NoMatch;
5050
5051   StringRef Str = Parser.getTok().getString();
5052   if (!Str.startswith("attr"))
5053     return MatchOperand_NoMatch;
5054
5055   StringRef Chan = Str.take_back(2);
5056   int AttrChan = StringSwitch<int>(Chan)
5057     .Case(".x", 0)
5058     .Case(".y", 1)
5059     .Case(".z", 2)
5060     .Case(".w", 3)
5061     .Default(-1);
5062   if (AttrChan == -1)
5063     return MatchOperand_ParseFail;
5064
5065   Str = Str.drop_back(2).drop_front(4);
5066
5067   uint8_t Attr;
5068   if (Str.getAsInteger(10, Attr))
5069     return MatchOperand_ParseFail;
5070
5071   SMLoc S = Parser.getTok().getLoc();
5072   Parser.Lex();
5073   if (Attr > 63) {
5074     Error(S, "out of bounds attr");
5075     return MatchOperand_Success;
5076   }
5077
5078   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5079
5080   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5081                                               AMDGPUOperand::ImmTyInterpAttr));
5082   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5083                                               AMDGPUOperand::ImmTyAttrChan));
5084   return MatchOperand_Success;
5085 }
5086
5087 //===----------------------------------------------------------------------===//
5088 // exp
5089 //===----------------------------------------------------------------------===//
5090
5091 void AMDGPUAsmParser::errorExpTgt() {
5092   Error(Parser.getTok().getLoc(), "invalid exp target");
5093 }
5094
5095 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5096                                                       uint8_t &Val) {
5097   if (Str == "null") {
5098     Val = 9;
5099     return MatchOperand_Success;
5100   }
5101
5102   if (Str.startswith("mrt")) {
5103     Str = Str.drop_front(3);
5104     if (Str == "z") { // == mrtz
5105       Val = 8;
5106       return MatchOperand_Success;
5107     }
5108
5109     if (Str.getAsInteger(10, Val))
5110       return MatchOperand_ParseFail;
5111
5112     if (Val > 7)
5113       errorExpTgt();
5114
5115     return MatchOperand_Success;
5116   }
5117
5118   if (Str.startswith("pos")) {
5119     Str = Str.drop_front(3);
5120     if (Str.getAsInteger(10, Val))
5121       return MatchOperand_ParseFail;
5122
5123     if (Val > 4 || (Val == 4 && !isGFX10()))
5124       errorExpTgt();
5125
5126     Val += 12;
5127     return MatchOperand_Success;
5128   }
5129
5130   if (isGFX10() && Str == "prim") {
5131     Val = 20;
5132     return MatchOperand_Success;
5133   }
5134
5135   if (Str.startswith("param")) {
5136     Str = Str.drop_front(5);
5137     if (Str.getAsInteger(10, Val))
5138       return MatchOperand_ParseFail;
5139
5140     if (Val >= 32)
5141       errorExpTgt();
5142
5143     Val += 32;
5144     return MatchOperand_Success;
5145   }
5146
5147   if (Str.startswith("invalid_target_")) {
5148     Str = Str.drop_front(15);
5149     if (Str.getAsInteger(10, Val))
5150       return MatchOperand_ParseFail;
5151
5152     errorExpTgt();
5153     return MatchOperand_Success;
5154   }
5155
5156   return MatchOperand_NoMatch;
5157 }
5158
5159 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5160   uint8_t Val;
5161   StringRef Str = Parser.getTok().getString();
5162
5163   auto Res = parseExpTgtImpl(Str, Val);
5164   if (Res != MatchOperand_Success)
5165     return Res;
5166
5167   SMLoc S = Parser.getTok().getLoc();
5168   Parser.Lex();
5169
5170   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5171                                               AMDGPUOperand::ImmTyExpTgt));
5172   return MatchOperand_Success;
5173 }
5174
5175 //===----------------------------------------------------------------------===//
5176 // parser helpers
5177 //===----------------------------------------------------------------------===//
5178
5179 bool
5180 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5181   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5182 }
5183
5184 bool
5185 AMDGPUAsmParser::isId(const StringRef Id) const {
5186   return isId(getToken(), Id);
5187 }
5188
5189 bool
5190 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5191   return getTokenKind() == Kind;
5192 }
5193
5194 bool
5195 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5196   if (isId(Id)) {
5197     lex();
5198     return true;
5199   }
5200   return false;
5201 }
5202
5203 bool
5204 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5205   if (isId(Id) && peekToken().is(Kind)) {
5206     lex();
5207     lex();
5208     return true;
5209   }
5210   return false;
5211 }
5212
5213 bool
5214 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5215   if (isToken(Kind)) {
5216     lex();
5217     return true;
5218   }
5219   return false;
5220 }
5221
5222 bool
5223 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5224                            const StringRef ErrMsg) {
5225   if (!trySkipToken(Kind)) {
5226     Error(getLoc(), ErrMsg);
5227     return false;
5228   }
5229   return true;
5230 }
5231
5232 bool
5233 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5234   return !getParser().parseAbsoluteExpression(Imm);
5235 }
5236
5237 bool
5238 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5239   SMLoc S = getLoc();
5240
5241   const MCExpr *Expr;
5242   if (Parser.parseExpression(Expr))
5243     return false;
5244
5245   int64_t IntVal;
5246   if (Expr->evaluateAsAbsolute(IntVal)) {
5247     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5248   } else {
5249     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5250   }
5251   return true;
5252 }
5253
5254 bool
5255 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5256   if (isToken(AsmToken::String)) {
5257     Val = getToken().getStringContents();
5258     lex();
5259     return true;
5260   } else {
5261     Error(getLoc(), ErrMsg);
5262     return false;
5263   }
5264 }
5265
5266 AsmToken
5267 AMDGPUAsmParser::getToken() const {
5268   return Parser.getTok();
5269 }
5270
5271 AsmToken
5272 AMDGPUAsmParser::peekToken() {
5273   return getLexer().peekTok();
5274 }
5275
5276 void
5277 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5278   auto TokCount = getLexer().peekTokens(Tokens);
5279
5280   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5281     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5282 }
5283
5284 AsmToken::TokenKind
5285 AMDGPUAsmParser::getTokenKind() const {
5286   return getLexer().getKind();
5287 }
5288
5289 SMLoc
5290 AMDGPUAsmParser::getLoc() const {
5291   return getToken().getLoc();
5292 }
5293
5294 StringRef
5295 AMDGPUAsmParser::getTokenStr() const {
5296   return getToken().getString();
5297 }
5298
5299 void
5300 AMDGPUAsmParser::lex() {
5301   Parser.Lex();
5302 }
5303
5304 //===----------------------------------------------------------------------===//
5305 // swizzle
5306 //===----------------------------------------------------------------------===//
5307
5308 LLVM_READNONE
5309 static unsigned
5310 encodeBitmaskPerm(const unsigned AndMask,
5311                   const unsigned OrMask,
5312                   const unsigned XorMask) {
5313   using namespace llvm::AMDGPU::Swizzle;
5314
5315   return BITMASK_PERM_ENC |
5316          (AndMask << BITMASK_AND_SHIFT) |
5317          (OrMask  << BITMASK_OR_SHIFT)  |
5318          (XorMask << BITMASK_XOR_SHIFT);
5319 }
5320
5321 bool
5322 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5323                                       const unsigned MinVal,
5324                                       const unsigned MaxVal,
5325                                       const StringRef ErrMsg) {
5326   for (unsigned i = 0; i < OpNum; ++i) {
5327     if (!skipToken(AsmToken::Comma, "expected a comma")){
5328       return false;
5329     }
5330     SMLoc ExprLoc = Parser.getTok().getLoc();
5331     if (!parseExpr(Op[i])) {
5332       return false;
5333     }
5334     if (Op[i] < MinVal || Op[i] > MaxVal) {
5335       Error(ExprLoc, ErrMsg);
5336       return false;
5337     }
5338   }
5339
5340   return true;
5341 }
5342
5343 bool
5344 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5345   using namespace llvm::AMDGPU::Swizzle;
5346
5347   int64_t Lane[LANE_NUM];
5348   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5349                            "expected a 2-bit lane id")) {
5350     Imm = QUAD_PERM_ENC;
5351     for (unsigned I = 0; I < LANE_NUM; ++I) {
5352       Imm |= Lane[I] << (LANE_SHIFT * I);
5353     }
5354     return true;
5355   }
5356   return false;
5357 }
5358
5359 bool
5360 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5361   using namespace llvm::AMDGPU::Swizzle;
5362
5363   SMLoc S = Parser.getTok().getLoc();
5364   int64_t GroupSize;
5365   int64_t LaneIdx;
5366
5367   if (!parseSwizzleOperands(1, &GroupSize,
5368                             2, 32,
5369                             "group size must be in the interval [2,32]")) {
5370     return false;
5371   }
5372   if (!isPowerOf2_64(GroupSize)) {
5373     Error(S, "group size must be a power of two");
5374     return false;
5375   }
5376   if (parseSwizzleOperands(1, &LaneIdx,
5377                            0, GroupSize - 1,
5378                            "lane id must be in the interval [0,group size - 1]")) {
5379     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5380     return true;
5381   }
5382   return false;
5383 }
5384
5385 bool
5386 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5387   using namespace llvm::AMDGPU::Swizzle;
5388
5389   SMLoc S = Parser.getTok().getLoc();
5390   int64_t GroupSize;
5391
5392   if (!parseSwizzleOperands(1, &GroupSize,
5393       2, 32, "group size must be in the interval [2,32]")) {
5394     return false;
5395   }
5396   if (!isPowerOf2_64(GroupSize)) {
5397     Error(S, "group size must be a power of two");
5398     return false;
5399   }
5400
5401   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5402   return true;
5403 }
5404
5405 bool
5406 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5407   using namespace llvm::AMDGPU::Swizzle;
5408
5409   SMLoc S = Parser.getTok().getLoc();
5410   int64_t GroupSize;
5411
5412   if (!parseSwizzleOperands(1, &GroupSize,
5413       1, 16, "group size must be in the interval [1,16]")) {
5414     return false;
5415   }
5416   if (!isPowerOf2_64(GroupSize)) {
5417     Error(S, "group size must be a power of two");
5418     return false;
5419   }
5420
5421   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5422   return true;
5423 }
5424
5425 bool
5426 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5427   using namespace llvm::AMDGPU::Swizzle;
5428
5429   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5430     return false;
5431   }
5432
5433   StringRef Ctl;
5434   SMLoc StrLoc = Parser.getTok().getLoc();
5435   if (!parseString(Ctl)) {
5436     return false;
5437   }
5438   if (Ctl.size() != BITMASK_WIDTH) {
5439     Error(StrLoc, "expected a 5-character mask");
5440     return false;
5441   }
5442
5443   unsigned AndMask = 0;
5444   unsigned OrMask = 0;
5445   unsigned XorMask = 0;
5446
5447   for (size_t i = 0; i < Ctl.size(); ++i) {
5448     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5449     switch(Ctl[i]) {
5450     default:
5451       Error(StrLoc, "invalid mask");
5452       return false;
5453     case '0':
5454       break;
5455     case '1':
5456       OrMask |= Mask;
5457       break;
5458     case 'p':
5459       AndMask |= Mask;
5460       break;
5461     case 'i':
5462       AndMask |= Mask;
5463       XorMask |= Mask;
5464       break;
5465     }
5466   }
5467
5468   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5469   return true;
5470 }
5471
5472 bool
5473 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5474
5475   SMLoc OffsetLoc = Parser.getTok().getLoc();
5476
5477   if (!parseExpr(Imm)) {
5478     return false;
5479   }
5480   if (!isUInt<16>(Imm)) {
5481     Error(OffsetLoc, "expected a 16-bit offset");
5482     return false;
5483   }
5484   return true;
5485 }
5486
5487 bool
5488 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5489   using namespace llvm::AMDGPU::Swizzle;
5490
5491   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5492
5493     SMLoc ModeLoc = Parser.getTok().getLoc();
5494     bool Ok = false;
5495
5496     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5497       Ok = parseSwizzleQuadPerm(Imm);
5498     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5499       Ok = parseSwizzleBitmaskPerm(Imm);
5500     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5501       Ok = parseSwizzleBroadcast(Imm);
5502     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5503       Ok = parseSwizzleSwap(Imm);
5504     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5505       Ok = parseSwizzleReverse(Imm);
5506     } else {
5507       Error(ModeLoc, "expected a swizzle mode");
5508     }
5509
5510     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5511   }
5512
5513   return false;
5514 }
5515
5516 OperandMatchResultTy
5517 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5518   SMLoc S = Parser.getTok().getLoc();
5519   int64_t Imm = 0;
5520
5521   if (trySkipId("offset")) {
5522
5523     bool Ok = false;
5524     if (skipToken(AsmToken::Colon, "expected a colon")) {
5525       if (trySkipId("swizzle")) {
5526         Ok = parseSwizzleMacro(Imm);
5527       } else {
5528         Ok = parseSwizzleOffset(Imm);
5529       }
5530     }
5531
5532     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5533
5534     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5535   } else {
5536     // Swizzle "offset" operand is optional.
5537     // If it is omitted, try parsing other optional operands.
5538     return parseOptionalOpr(Operands);
5539   }
5540 }
5541
5542 bool
5543 AMDGPUOperand::isSwizzle() const {
5544   return isImmTy(ImmTySwizzle);
5545 }
5546
5547 //===----------------------------------------------------------------------===//
5548 // VGPR Index Mode
5549 //===----------------------------------------------------------------------===//
5550
5551 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5552
5553   using namespace llvm::AMDGPU::VGPRIndexMode;
5554
5555   if (trySkipToken(AsmToken::RParen)) {
5556     return OFF;
5557   }
5558
5559   int64_t Imm = 0;
5560
5561   while (true) {
5562     unsigned Mode = 0;
5563     SMLoc S = Parser.getTok().getLoc();
5564
5565     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5566       if (trySkipId(IdSymbolic[ModeId])) {
5567         Mode = 1 << ModeId;
5568         break;
5569       }
5570     }
5571
5572     if (Mode == 0) {
5573       Error(S, (Imm == 0)?
5574                "expected a VGPR index mode or a closing parenthesis" :
5575                "expected a VGPR index mode");
5576       break;
5577     }
5578
5579     if (Imm & Mode) {
5580       Error(S, "duplicate VGPR index mode");
5581       break;
5582     }
5583     Imm |= Mode;
5584
5585     if (trySkipToken(AsmToken::RParen))
5586       break;
5587     if (!skipToken(AsmToken::Comma,
5588                    "expected a comma or a closing parenthesis"))
5589       break;
5590   }
5591
5592   return Imm;
5593 }
5594
5595 OperandMatchResultTy
5596 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5597
5598   int64_t Imm = 0;
5599   SMLoc S = Parser.getTok().getLoc();
5600
5601   if (getLexer().getKind() == AsmToken::Identifier &&
5602       Parser.getTok().getString() == "gpr_idx" &&
5603       getLexer().peekTok().is(AsmToken::LParen)) {
5604
5605     Parser.Lex();
5606     Parser.Lex();
5607
5608     // If parse failed, trigger an error but do not return error code
5609     // to avoid excessive error messages.
5610     Imm = parseGPRIdxMacro();
5611
5612   } else {
5613     if (getParser().parseAbsoluteExpression(Imm))
5614       return MatchOperand_NoMatch;
5615     if (Imm < 0 || !isUInt<4>(Imm)) {
5616       Error(S, "invalid immediate: only 4-bit values are legal");
5617     }
5618   }
5619
5620   Operands.push_back(
5621       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5622   return MatchOperand_Success;
5623 }
5624
5625 bool AMDGPUOperand::isGPRIdxMode() const {
5626   return isImmTy(ImmTyGprIdxMode);
5627 }
5628
5629 //===----------------------------------------------------------------------===//
5630 // sopp branch targets
5631 //===----------------------------------------------------------------------===//
5632
5633 OperandMatchResultTy
5634 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5635
5636   // Make sure we are not parsing something
5637   // that looks like a label or an expression but is not.
5638   // This will improve error messages.
5639   if (isRegister() || isModifier())
5640     return MatchOperand_NoMatch;
5641
5642   if (parseExpr(Operands)) {
5643
5644     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5645     assert(Opr.isImm() || Opr.isExpr());
5646     SMLoc Loc = Opr.getStartLoc();
5647
5648     // Currently we do not support arbitrary expressions as branch targets.
5649     // Only labels and absolute expressions are accepted.
5650     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5651       Error(Loc, "expected an absolute expression or a label");
5652     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5653       Error(Loc, "expected a 16-bit signed jump offset");
5654     }
5655   }
5656
5657   return MatchOperand_Success; // avoid excessive error messages
5658 }
5659
5660 //===----------------------------------------------------------------------===//
5661 // Boolean holding registers
5662 //===----------------------------------------------------------------------===//
5663
5664 OperandMatchResultTy
5665 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5666   return parseReg(Operands);
5667 }
5668
5669 //===----------------------------------------------------------------------===//
5670 // mubuf
5671 //===----------------------------------------------------------------------===//
5672
5673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5674   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5675 }
5676
5677 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5678   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5679 }
5680
5681 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5682   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5683 }
5684
5685 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5686                                const OperandVector &Operands,
5687                                bool IsAtomic,
5688                                bool IsAtomicReturn,
5689                                bool IsLds) {
5690   bool IsLdsOpcode = IsLds;
5691   bool HasLdsModifier = false;
5692   OptionalImmIndexMap OptionalIdx;
5693   assert(IsAtomicReturn ? IsAtomic : true);
5694   unsigned FirstOperandIdx = 1;
5695
5696   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5697     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5698
5699     // Add the register arguments
5700     if (Op.isReg()) {
5701       Op.addRegOperands(Inst, 1);
5702       // Insert a tied src for atomic return dst.
5703       // This cannot be postponed as subsequent calls to
5704       // addImmOperands rely on correct number of MC operands.
5705       if (IsAtomicReturn && i == FirstOperandIdx)
5706         Op.addRegOperands(Inst, 1);
5707       continue;
5708     }
5709
5710     // Handle the case where soffset is an immediate
5711     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5712       Op.addImmOperands(Inst, 1);
5713       continue;
5714     }
5715
5716     HasLdsModifier |= Op.isLDS();
5717
5718     // Handle tokens like 'offen' which are sometimes hard-coded into the
5719     // asm string.  There are no MCInst operands for these.
5720     if (Op.isToken()) {
5721       continue;
5722     }
5723     assert(Op.isImm());
5724
5725     // Handle optional arguments
5726     OptionalIdx[Op.getImmTy()] = i;
5727   }
5728
5729   // This is a workaround for an llvm quirk which may result in an
5730   // incorrect instruction selection. Lds and non-lds versions of
5731   // MUBUF instructions are identical except that lds versions
5732   // have mandatory 'lds' modifier. However this modifier follows
5733   // optional modifiers and llvm asm matcher regards this 'lds'
5734   // modifier as an optional one. As a result, an lds version
5735   // of opcode may be selected even if it has no 'lds' modifier.
5736   if (IsLdsOpcode && !HasLdsModifier) {
5737     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5738     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5739       Inst.setOpcode(NoLdsOpcode);
5740       IsLdsOpcode = false;
5741     }
5742   }
5743
5744   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5745   if (!IsAtomic) { // glc is hard-coded.
5746     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5747   }
5748   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5749
5750   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5751     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5752   }
5753
5754   if (isGFX10())
5755     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5756 }
5757
5758 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5759   OptionalImmIndexMap OptionalIdx;
5760
5761   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5762     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5763
5764     // Add the register arguments
5765     if (Op.isReg()) {
5766       Op.addRegOperands(Inst, 1);
5767       continue;
5768     }
5769
5770     // Handle the case where soffset is an immediate
5771     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5772       Op.addImmOperands(Inst, 1);
5773       continue;
5774     }
5775
5776     // Handle tokens like 'offen' which are sometimes hard-coded into the
5777     // asm string.  There are no MCInst operands for these.
5778     if (Op.isToken()) {
5779       continue;
5780     }
5781     assert(Op.isImm());
5782
5783     // Handle optional arguments
5784     OptionalIdx[Op.getImmTy()] = i;
5785   }
5786
5787   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5788                         AMDGPUOperand::ImmTyOffset);
5789   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5790   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5791   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5792   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5793
5794   if (isGFX10())
5795     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5796 }
5797
5798 //===----------------------------------------------------------------------===//
5799 // mimg
5800 //===----------------------------------------------------------------------===//
5801
5802 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5803                               bool IsAtomic) {
5804   unsigned I = 1;
5805   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5806   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5807     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5808   }
5809
5810   if (IsAtomic) {
5811     // Add src, same as dst
5812     assert(Desc.getNumDefs() == 1);
5813     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5814   }
5815
5816   OptionalImmIndexMap OptionalIdx;
5817
5818   for (unsigned E = Operands.size(); I != E; ++I) {
5819     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5820
5821     // Add the register arguments
5822     if (Op.isReg()) {
5823       Op.addRegOperands(Inst, 1);
5824     } else if (Op.isImmModifier()) {
5825       OptionalIdx[Op.getImmTy()] = I;
5826     } else if (!Op.isToken()) {
5827       llvm_unreachable("unexpected operand type");
5828     }
5829   }
5830
5831   bool IsGFX10 = isGFX10();
5832
5833   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5834   if (IsGFX10)
5835     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5836   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5837   if (IsGFX10)
5838     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5839   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5840   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5841   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5842   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5843   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5844   if (!IsGFX10)
5845     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5846   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5847 }
5848
5849 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5850   cvtMIMG(Inst, Operands, true);
5851 }
5852
5853 //===----------------------------------------------------------------------===//
5854 // smrd
5855 //===----------------------------------------------------------------------===//
5856
5857 bool AMDGPUOperand::isSMRDOffset8() const {
5858   return isImm() && isUInt<8>(getImm());
5859 }
5860
5861 bool AMDGPUOperand::isSMRDOffset20() const {
5862   return isImm() && isUInt<20>(getImm());
5863 }
5864
5865 bool AMDGPUOperand::isSMRDLiteralOffset() const {
5866   // 32-bit literals are only supported on CI and we only want to use them
5867   // when the offset is > 8-bits.
5868   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
5869 }
5870
5871 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
5872   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5873 }
5874
5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
5876   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5877 }
5878
5879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
5880   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5881 }
5882
5883 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
5884   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
5885 }
5886
5887 //===----------------------------------------------------------------------===//
5888 // vop3
5889 //===----------------------------------------------------------------------===//
5890
5891 static bool ConvertOmodMul(int64_t &Mul) {
5892   if (Mul != 1 && Mul != 2 && Mul != 4)
5893     return false;
5894
5895   Mul >>= 1;
5896   return true;
5897 }
5898
5899 static bool ConvertOmodDiv(int64_t &Div) {
5900   if (Div == 1) {
5901     Div = 0;
5902     return true;
5903   }
5904
5905   if (Div == 2) {
5906     Div = 3;
5907     return true;
5908   }
5909
5910   return false;
5911 }
5912
5913 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
5914   if (BoundCtrl == 0) {
5915     BoundCtrl = 1;
5916     return true;
5917   }
5918
5919   if (BoundCtrl == -1) {
5920     BoundCtrl = 0;
5921     return true;
5922   }
5923
5924   return false;
5925 }
5926
5927 // Note: the order in this table matches the order of operands in AsmString.
5928 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
5929   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
5930   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
5931   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
5932   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
5933   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
5934   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
5935   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
5936   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
5937   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
5938   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
5939   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
5940   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
5941   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
5942   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
5943   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
5944   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
5945   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
5946   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
5947   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
5948   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
5949   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5950   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
5951   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
5952   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
5953   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
5954   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
5955   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
5956   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
5957   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
5958   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
5959   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
5960   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
5961   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
5962   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
5963   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
5964   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
5965   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
5966   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
5967   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
5968   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
5969   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
5970   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
5971   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
5972 };
5973
5974 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
5975   unsigned size = Operands.size();
5976   assert(size > 0);
5977
5978   OperandMatchResultTy res = parseOptionalOpr(Operands);
5979
5980   // This is a hack to enable hardcoded mandatory operands which follow
5981   // optional operands.
5982   //
5983   // Current design assumes that all operands after the first optional operand
5984   // are also optional. However implementation of some instructions violates
5985   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
5986   //
5987   // To alleviate this problem, we have to (implicitly) parse extra operands
5988   // to make sure autogenerated parser of custom operands never hit hardcoded
5989   // mandatory operands.
5990
5991   if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
5992
5993     // We have parsed the first optional operand.
5994     // Parse as many operands as necessary to skip all mandatory operands.
5995
5996     for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
5997       if (res != MatchOperand_Success ||
5998           getLexer().is(AsmToken::EndOfStatement)) break;
5999       if (getLexer().is(AsmToken::Comma)) Parser.Lex();
6000       res = parseOptionalOpr(Operands);
6001     }
6002   }
6003
6004   return res;
6005 }
6006
6007 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6008   OperandMatchResultTy res;
6009   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6010     // try to parse any optional operand here
6011     if (Op.IsBit) {
6012       res = parseNamedBit(Op.Name, Operands, Op.Type);
6013     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6014       res = parseOModOperand(Operands);
6015     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6016                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6017                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6018       res = parseSDWASel(Operands, Op.Name, Op.Type);
6019     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6020       res = parseSDWADstUnused(Operands);
6021     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6022                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6023                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6024                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6025       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6026                                         Op.ConvertResult);
6027     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6028       res = parseDim(Operands);
6029     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6030       res = parseDfmtNfmt(Operands);
6031     } else {
6032       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6033     }
6034     if (res != MatchOperand_NoMatch) {
6035       return res;
6036     }
6037   }
6038   return MatchOperand_NoMatch;
6039 }
6040
6041 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6042   StringRef Name = Parser.getTok().getString();
6043   if (Name == "mul") {
6044     return parseIntWithPrefix("mul", Operands,
6045                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6046   }
6047
6048   if (Name == "div") {
6049     return parseIntWithPrefix("div", Operands,
6050                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6051   }
6052
6053   return MatchOperand_NoMatch;
6054 }
6055
6056 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6057   cvtVOP3P(Inst, Operands);
6058
6059   int Opc = Inst.getOpcode();
6060
6061   int SrcNum;
6062   const int Ops[] = { AMDGPU::OpName::src0,
6063                       AMDGPU::OpName::src1,
6064                       AMDGPU::OpName::src2 };
6065   for (SrcNum = 0;
6066        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6067        ++SrcNum);
6068   assert(SrcNum > 0);
6069
6070   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6071   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6072
6073   if ((OpSel & (1 << SrcNum)) != 0) {
6074     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6075     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6076     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6077   }
6078 }
6079
6080 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6081       // 1. This operand is input modifiers
6082   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6083       // 2. This is not last operand
6084       && Desc.NumOperands > (OpNum + 1)
6085       // 3. Next operand is register class
6086       && Desc.OpInfo[OpNum + 1].RegClass != -1
6087       // 4. Next register is not tied to any other operand
6088       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6089 }
6090
6091 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6092 {
6093   OptionalImmIndexMap OptionalIdx;
6094   unsigned Opc = Inst.getOpcode();
6095
6096   unsigned I = 1;
6097   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6098   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6099     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6100   }
6101
6102   for (unsigned E = Operands.size(); I != E; ++I) {
6103     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6104     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6105       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6106     } else if (Op.isInterpSlot() ||
6107                Op.isInterpAttr() ||
6108                Op.isAttrChan()) {
6109       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6110     } else if (Op.isImmModifier()) {
6111       OptionalIdx[Op.getImmTy()] = I;
6112     } else {
6113       llvm_unreachable("unhandled operand type");
6114     }
6115   }
6116
6117   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6118     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6119   }
6120
6121   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6122     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6123   }
6124
6125   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6126     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6127   }
6128 }
6129
6130 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6131                               OptionalImmIndexMap &OptionalIdx) {
6132   unsigned Opc = Inst.getOpcode();
6133
6134   unsigned I = 1;
6135   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6136   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6137     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6138   }
6139
6140   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6141     // This instruction has src modifiers
6142     for (unsigned E = Operands.size(); I != E; ++I) {
6143       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6144       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6145         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6146       } else if (Op.isImmModifier()) {
6147         OptionalIdx[Op.getImmTy()] = I;
6148       } else if (Op.isRegOrImm()) {
6149         Op.addRegOrImmOperands(Inst, 1);
6150       } else {
6151         llvm_unreachable("unhandled operand type");
6152       }
6153     }
6154   } else {
6155     // No src modifiers
6156     for (unsigned E = Operands.size(); I != E; ++I) {
6157       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6158       if (Op.isMod()) {
6159         OptionalIdx[Op.getImmTy()] = I;
6160       } else {
6161         Op.addRegOrImmOperands(Inst, 1);
6162       }
6163     }
6164   }
6165
6166   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6167     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6168   }
6169
6170   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6171     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6172   }
6173
6174   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6175   // it has src2 register operand that is tied to dst operand
6176   // we don't allow modifiers for this operand in assembler so src2_modifiers
6177   // should be 0.
6178   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6179       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6180       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6181       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6182       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6183       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6184       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6185     auto it = Inst.begin();
6186     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6187     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6188     ++it;
6189     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6190   }
6191 }
6192
6193 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6194   OptionalImmIndexMap OptionalIdx;
6195   cvtVOP3(Inst, Operands, OptionalIdx);
6196 }
6197
6198 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6199                                const OperandVector &Operands) {
6200   OptionalImmIndexMap OptIdx;
6201   const int Opc = Inst.getOpcode();
6202   const MCInstrDesc &Desc = MII.get(Opc);
6203
6204   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6205
6206   cvtVOP3(Inst, Operands, OptIdx);
6207
6208   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6209     assert(!IsPacked);
6210     Inst.addOperand(Inst.getOperand(0));
6211   }
6212
6213   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6214   // instruction, and then figure out where to actually put the modifiers
6215
6216   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6217
6218   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6219   if (OpSelHiIdx != -1) {
6220     int DefaultVal = IsPacked ? -1 : 0;
6221     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6222                           DefaultVal);
6223   }
6224
6225   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6226   if (NegLoIdx != -1) {
6227     assert(IsPacked);
6228     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6229     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6230   }
6231
6232   const int Ops[] = { AMDGPU::OpName::src0,
6233                       AMDGPU::OpName::src1,
6234                       AMDGPU::OpName::src2 };
6235   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6236                          AMDGPU::OpName::src1_modifiers,
6237                          AMDGPU::OpName::src2_modifiers };
6238
6239   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6240
6241   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6242   unsigned OpSelHi = 0;
6243   unsigned NegLo = 0;
6244   unsigned NegHi = 0;
6245
6246   if (OpSelHiIdx != -1) {
6247     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6248   }
6249
6250   if (NegLoIdx != -1) {
6251     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6252     NegLo = Inst.getOperand(NegLoIdx).getImm();
6253     NegHi = Inst.getOperand(NegHiIdx).getImm();
6254   }
6255
6256   for (int J = 0; J < 3; ++J) {
6257     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6258     if (OpIdx == -1)
6259       break;
6260
6261     uint32_t ModVal = 0;
6262
6263     if ((OpSel & (1 << J)) != 0)
6264       ModVal |= SISrcMods::OP_SEL_0;
6265
6266     if ((OpSelHi & (1 << J)) != 0)
6267       ModVal |= SISrcMods::OP_SEL_1;
6268
6269     if ((NegLo & (1 << J)) != 0)
6270       ModVal |= SISrcMods::NEG;
6271
6272     if ((NegHi & (1 << J)) != 0)
6273       ModVal |= SISrcMods::NEG_HI;
6274
6275     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6276
6277     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6278   }
6279 }
6280
6281 //===----------------------------------------------------------------------===//
6282 // dpp
6283 //===----------------------------------------------------------------------===//
6284
6285 bool AMDGPUOperand::isDPP8() const {
6286   return isImmTy(ImmTyDPP8);
6287 }
6288
6289 bool AMDGPUOperand::isDPPCtrl() const {
6290   using namespace AMDGPU::DPP;
6291
6292   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6293   if (result) {
6294     int64_t Imm = getImm();
6295     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6296            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6297            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6298            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6299            (Imm == DppCtrl::WAVE_SHL1) ||
6300            (Imm == DppCtrl::WAVE_ROL1) ||
6301            (Imm == DppCtrl::WAVE_SHR1) ||
6302            (Imm == DppCtrl::WAVE_ROR1) ||
6303            (Imm == DppCtrl::ROW_MIRROR) ||
6304            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6305            (Imm == DppCtrl::BCAST15) ||
6306            (Imm == DppCtrl::BCAST31) ||
6307            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6308            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6309   }
6310   return false;
6311 }
6312
6313 //===----------------------------------------------------------------------===//
6314 // mAI
6315 //===----------------------------------------------------------------------===//
6316
6317 bool AMDGPUOperand::isBLGP() const {
6318   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6319 }
6320
6321 bool AMDGPUOperand::isCBSZ() const {
6322   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6323 }
6324
6325 bool AMDGPUOperand::isABID() const {
6326   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6327 }
6328
6329 bool AMDGPUOperand::isS16Imm() const {
6330   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6331 }
6332
6333 bool AMDGPUOperand::isU16Imm() const {
6334   return isImm() && isUInt<16>(getImm());
6335 }
6336
6337 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6338   if (!isGFX10())
6339     return MatchOperand_NoMatch;
6340
6341   SMLoc S = Parser.getTok().getLoc();
6342
6343   if (getLexer().isNot(AsmToken::Identifier))
6344     return MatchOperand_NoMatch;
6345   if (getLexer().getTok().getString() != "dim")
6346     return MatchOperand_NoMatch;
6347
6348   Parser.Lex();
6349   if (getLexer().isNot(AsmToken::Colon))
6350     return MatchOperand_ParseFail;
6351
6352   Parser.Lex();
6353
6354   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6355   // integer.
6356   std::string Token;
6357   if (getLexer().is(AsmToken::Integer)) {
6358     SMLoc Loc = getLexer().getTok().getEndLoc();
6359     Token = getLexer().getTok().getString();
6360     Parser.Lex();
6361     if (getLexer().getTok().getLoc() != Loc)
6362       return MatchOperand_ParseFail;
6363   }
6364   if (getLexer().isNot(AsmToken::Identifier))
6365     return MatchOperand_ParseFail;
6366   Token += getLexer().getTok().getString();
6367
6368   StringRef DimId = Token;
6369   if (DimId.startswith("SQ_RSRC_IMG_"))
6370     DimId = DimId.substr(12);
6371
6372   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6373   if (!DimInfo)
6374     return MatchOperand_ParseFail;
6375
6376   Parser.Lex();
6377
6378   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6379                                               AMDGPUOperand::ImmTyDim));
6380   return MatchOperand_Success;
6381 }
6382
6383 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6384   SMLoc S = Parser.getTok().getLoc();
6385   StringRef Prefix;
6386
6387   if (getLexer().getKind() == AsmToken::Identifier) {
6388     Prefix = Parser.getTok().getString();
6389   } else {
6390     return MatchOperand_NoMatch;
6391   }
6392
6393   if (Prefix != "dpp8")
6394     return parseDPPCtrl(Operands);
6395   if (!isGFX10())
6396     return MatchOperand_NoMatch;
6397
6398   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6399
6400   int64_t Sels[8];
6401
6402   Parser.Lex();
6403   if (getLexer().isNot(AsmToken::Colon))
6404     return MatchOperand_ParseFail;
6405
6406   Parser.Lex();
6407   if (getLexer().isNot(AsmToken::LBrac))
6408     return MatchOperand_ParseFail;
6409
6410   Parser.Lex();
6411   if (getParser().parseAbsoluteExpression(Sels[0]))
6412     return MatchOperand_ParseFail;
6413   if (0 > Sels[0] || 7 < Sels[0])
6414     return MatchOperand_ParseFail;
6415
6416   for (size_t i = 1; i < 8; ++i) {
6417     if (getLexer().isNot(AsmToken::Comma))
6418       return MatchOperand_ParseFail;
6419
6420     Parser.Lex();
6421     if (getParser().parseAbsoluteExpression(Sels[i]))
6422       return MatchOperand_ParseFail;
6423     if (0 > Sels[i] || 7 < Sels[i])
6424       return MatchOperand_ParseFail;
6425   }
6426
6427   if (getLexer().isNot(AsmToken::RBrac))
6428     return MatchOperand_ParseFail;
6429   Parser.Lex();
6430
6431   unsigned DPP8 = 0;
6432   for (size_t i = 0; i < 8; ++i)
6433     DPP8 |= (Sels[i] << (i * 3));
6434
6435   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6436   return MatchOperand_Success;
6437 }
6438
6439 OperandMatchResultTy
6440 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6441   using namespace AMDGPU::DPP;
6442
6443   SMLoc S = Parser.getTok().getLoc();
6444   StringRef Prefix;
6445   int64_t Int;
6446
6447   if (getLexer().getKind() == AsmToken::Identifier) {
6448     Prefix = Parser.getTok().getString();
6449   } else {
6450     return MatchOperand_NoMatch;
6451   }
6452
6453   if (Prefix == "row_mirror") {
6454     Int = DppCtrl::ROW_MIRROR;
6455     Parser.Lex();
6456   } else if (Prefix == "row_half_mirror") {
6457     Int = DppCtrl::ROW_HALF_MIRROR;
6458     Parser.Lex();
6459   } else {
6460     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6461     if (Prefix != "quad_perm"
6462         && Prefix != "row_shl"
6463         && Prefix != "row_shr"
6464         && Prefix != "row_ror"
6465         && Prefix != "wave_shl"
6466         && Prefix != "wave_rol"
6467         && Prefix != "wave_shr"
6468         && Prefix != "wave_ror"
6469         && Prefix != "row_bcast"
6470         && Prefix != "row_share"
6471         && Prefix != "row_xmask") {
6472       return MatchOperand_NoMatch;
6473     }
6474
6475     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6476       return MatchOperand_NoMatch;
6477
6478     if (!isVI() && !isGFX9() &&
6479         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6480          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6481          Prefix == "row_bcast"))
6482       return MatchOperand_NoMatch;
6483
6484     Parser.Lex();
6485     if (getLexer().isNot(AsmToken::Colon))
6486       return MatchOperand_ParseFail;
6487
6488     if (Prefix == "quad_perm") {
6489       // quad_perm:[%d,%d,%d,%d]
6490       Parser.Lex();
6491       if (getLexer().isNot(AsmToken::LBrac))
6492         return MatchOperand_ParseFail;
6493       Parser.Lex();
6494
6495       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6496         return MatchOperand_ParseFail;
6497
6498       for (int i = 0; i < 3; ++i) {
6499         if (getLexer().isNot(AsmToken::Comma))
6500           return MatchOperand_ParseFail;
6501         Parser.Lex();
6502
6503         int64_t Temp;
6504         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6505           return MatchOperand_ParseFail;
6506         const int shift = i*2 + 2;
6507         Int += (Temp << shift);
6508       }
6509
6510       if (getLexer().isNot(AsmToken::RBrac))
6511         return MatchOperand_ParseFail;
6512       Parser.Lex();
6513     } else {
6514       // sel:%d
6515       Parser.Lex();
6516       if (getParser().parseAbsoluteExpression(Int))
6517         return MatchOperand_ParseFail;
6518
6519       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6520         Int |= DppCtrl::ROW_SHL0;
6521       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6522         Int |= DppCtrl::ROW_SHR0;
6523       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6524         Int |= DppCtrl::ROW_ROR0;
6525       } else if (Prefix == "wave_shl" && 1 == Int) {
6526         Int = DppCtrl::WAVE_SHL1;
6527       } else if (Prefix == "wave_rol" && 1 == Int) {
6528         Int = DppCtrl::WAVE_ROL1;
6529       } else if (Prefix == "wave_shr" && 1 == Int) {
6530         Int = DppCtrl::WAVE_SHR1;
6531       } else if (Prefix == "wave_ror" && 1 == Int) {
6532         Int = DppCtrl::WAVE_ROR1;
6533       } else if (Prefix == "row_bcast") {
6534         if (Int == 15) {
6535           Int = DppCtrl::BCAST15;
6536         } else if (Int == 31) {
6537           Int = DppCtrl::BCAST31;
6538         } else {
6539           return MatchOperand_ParseFail;
6540         }
6541       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6542         Int |= DppCtrl::ROW_SHARE_FIRST;
6543       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6544         Int |= DppCtrl::ROW_XMASK_FIRST;
6545       } else {
6546         return MatchOperand_ParseFail;
6547       }
6548     }
6549   }
6550
6551   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6552   return MatchOperand_Success;
6553 }
6554
6555 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6556   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6557 }
6558
6559 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6560   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6561 }
6562
6563 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6564   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6565 }
6566
6567 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6568   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6569 }
6570
6571 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6572   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6573 }
6574
6575 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6576   OptionalImmIndexMap OptionalIdx;
6577
6578   unsigned I = 1;
6579   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6580   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6581     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6582   }
6583
6584   int Fi = 0;
6585   for (unsigned E = Operands.size(); I != E; ++I) {
6586     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6587                                             MCOI::TIED_TO);
6588     if (TiedTo != -1) {
6589       assert((unsigned)TiedTo < Inst.getNumOperands());
6590       // handle tied old or src2 for MAC instructions
6591       Inst.addOperand(Inst.getOperand(TiedTo));
6592     }
6593     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6594     // Add the register arguments
6595     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6596       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6597       // Skip it.
6598       continue;
6599     }
6600
6601     if (IsDPP8) {
6602       if (Op.isDPP8()) {
6603         Op.addImmOperands(Inst, 1);
6604       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6605         Op.addRegWithFPInputModsOperands(Inst, 2);
6606       } else if (Op.isFI()) {
6607         Fi = Op.getImm();
6608       } else if (Op.isReg()) {
6609         Op.addRegOperands(Inst, 1);
6610       } else {
6611         llvm_unreachable("Invalid operand type");
6612       }
6613     } else {
6614       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6615         Op.addRegWithFPInputModsOperands(Inst, 2);
6616       } else if (Op.isDPPCtrl()) {
6617         Op.addImmOperands(Inst, 1);
6618       } else if (Op.isImm()) {
6619         // Handle optional arguments
6620         OptionalIdx[Op.getImmTy()] = I;
6621       } else {
6622         llvm_unreachable("Invalid operand type");
6623       }
6624     }
6625   }
6626
6627   if (IsDPP8) {
6628     using namespace llvm::AMDGPU::DPP;
6629     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6630   } else {
6631     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6632     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6633     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6634     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6635       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6636     }
6637   }
6638 }
6639
6640 //===----------------------------------------------------------------------===//
6641 // sdwa
6642 //===----------------------------------------------------------------------===//
6643
6644 OperandMatchResultTy
6645 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6646                               AMDGPUOperand::ImmTy Type) {
6647   using namespace llvm::AMDGPU::SDWA;
6648
6649   SMLoc S = Parser.getTok().getLoc();
6650   StringRef Value;
6651   OperandMatchResultTy res;
6652
6653   res = parseStringWithPrefix(Prefix, Value);
6654   if (res != MatchOperand_Success) {
6655     return res;
6656   }
6657
6658   int64_t Int;
6659   Int = StringSwitch<int64_t>(Value)
6660         .Case("BYTE_0", SdwaSel::BYTE_0)
6661         .Case("BYTE_1", SdwaSel::BYTE_1)
6662         .Case("BYTE_2", SdwaSel::BYTE_2)
6663         .Case("BYTE_3", SdwaSel::BYTE_3)
6664         .Case("WORD_0", SdwaSel::WORD_0)
6665         .Case("WORD_1", SdwaSel::WORD_1)
6666         .Case("DWORD", SdwaSel::DWORD)
6667         .Default(0xffffffff);
6668   Parser.Lex(); // eat last token
6669
6670   if (Int == 0xffffffff) {
6671     return MatchOperand_ParseFail;
6672   }
6673
6674   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6675   return MatchOperand_Success;
6676 }
6677
6678 OperandMatchResultTy
6679 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6680   using namespace llvm::AMDGPU::SDWA;
6681
6682   SMLoc S = Parser.getTok().getLoc();
6683   StringRef Value;
6684   OperandMatchResultTy res;
6685
6686   res = parseStringWithPrefix("dst_unused", Value);
6687   if (res != MatchOperand_Success) {
6688     return res;
6689   }
6690
6691   int64_t Int;
6692   Int = StringSwitch<int64_t>(Value)
6693         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6694         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6695         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6696         .Default(0xffffffff);
6697   Parser.Lex(); // eat last token
6698
6699   if (Int == 0xffffffff) {
6700     return MatchOperand_ParseFail;
6701   }
6702
6703   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6704   return MatchOperand_Success;
6705 }
6706
6707 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6708   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6709 }
6710
6711 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6712   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6713 }
6714
6715 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6716   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
6717 }
6718
6719 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6720   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6721 }
6722
6723 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6724                               uint64_t BasicInstType, bool skipVcc) {
6725   using namespace llvm::AMDGPU::SDWA;
6726
6727   OptionalImmIndexMap OptionalIdx;
6728   bool skippedVcc = false;
6729
6730   unsigned I = 1;
6731   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6732   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6733     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6734   }
6735
6736   for (unsigned E = Operands.size(); I != E; ++I) {
6737     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6738     if (skipVcc && !skippedVcc && Op.isReg() &&
6739         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6740       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6741       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6742       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6743       // Skip VCC only if we didn't skip it on previous iteration.
6744       if (BasicInstType == SIInstrFlags::VOP2 &&
6745           (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
6746         skippedVcc = true;
6747         continue;
6748       } else if (BasicInstType == SIInstrFlags::VOPC &&
6749                  Inst.getNumOperands() == 0) {
6750         skippedVcc = true;
6751         continue;
6752       }
6753     }
6754     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6755       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6756     } else if (Op.isImm()) {
6757       // Handle optional arguments
6758       OptionalIdx[Op.getImmTy()] = I;
6759     } else {
6760       llvm_unreachable("Invalid operand type");
6761     }
6762     skippedVcc = false;
6763   }
6764
6765   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6766       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6767       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6768     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6769     switch (BasicInstType) {
6770     case SIInstrFlags::VOP1:
6771       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6772       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6773         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6774       }
6775       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6776       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6777       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6778       break;
6779
6780     case SIInstrFlags::VOP2:
6781       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6782       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6783         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6784       }
6785       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6786       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6787       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6788       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6789       break;
6790
6791     case SIInstrFlags::VOPC:
6792       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6793         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6794       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6795       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6796       break;
6797
6798     default:
6799       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6800     }
6801   }
6802
6803   // special case v_mac_{f16, f32}:
6804   // it has src2 register operand that is tied to dst operand
6805   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6806       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6807     auto it = Inst.begin();
6808     std::advance(
6809       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6810     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6811   }
6812 }
6813
6814 //===----------------------------------------------------------------------===//
6815 // mAI
6816 //===----------------------------------------------------------------------===//
6817
6818 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6819   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6820 }
6821
6822 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6823   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6824 }
6825
6826 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6827   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6828 }
6829
6830 /// Force static initialization.
6831 extern "C" void LLVMInitializeAMDGPUAsmParser() {
6832   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6833   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6834 }
6835
6836 #define GET_REGISTER_MATCHER
6837 #define GET_MATCHER_IMPLEMENTATION
6838 #define GET_MNEMONIC_SPELL_CHECKER
6839 #include "AMDGPUGenAsmMatcher.inc"
6840
6841 // This fuction should be defined after auto-generated include so that we have
6842 // MatchClassKind enum defined
6843 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6844                                                      unsigned Kind) {
6845   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6846   // But MatchInstructionImpl() expects to meet token and fails to validate
6847   // operand. This method checks if we are given immediate operand but expect to
6848   // get corresponding token.
6849   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
6850   switch (Kind) {
6851   case MCK_addr64:
6852     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
6853   case MCK_gds:
6854     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
6855   case MCK_lds:
6856     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
6857   case MCK_glc:
6858     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
6859   case MCK_idxen:
6860     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
6861   case MCK_offen:
6862     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
6863   case MCK_SSrcB32:
6864     // When operands have expression values, they will return true for isToken,
6865     // because it is not possible to distinguish between a token and an
6866     // expression at parse time. MatchInstructionImpl() will always try to
6867     // match an operand as a token, when isToken returns true, and when the
6868     // name of the expression is not a valid token, the match will fail,
6869     // so we need to handle it here.
6870     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
6871   case MCK_SSrcF32:
6872     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
6873   case MCK_SoppBrTarget:
6874     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
6875   case MCK_VReg32OrOff:
6876     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
6877   case MCK_InterpSlot:
6878     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
6879   case MCK_Attr:
6880     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
6881   case MCK_AttrChan:
6882     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
6883   default:
6884     return Match_InvalidOperand;
6885   }
6886 }
6887
6888 //===----------------------------------------------------------------------===//
6889 // endpgm
6890 //===----------------------------------------------------------------------===//
6891
6892 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
6893   SMLoc S = Parser.getTok().getLoc();
6894   int64_t Imm = 0;
6895
6896   if (!parseExpr(Imm)) {
6897     // The operand is optional, if not present default to 0
6898     Imm = 0;
6899   }
6900
6901   if (!isUInt<16>(Imm)) {
6902     Error(S, "expected a 16-bit value");
6903     return MatchOperand_ParseFail;
6904   }
6905
6906   Operands.push_back(
6907       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
6908   return MatchOperand_Success;
6909 }
6910
6911 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }