[Codegen] Alter the default promotion for saturating adds and subs
[llvm-complete.git] / lib / Target / AMDGPU / SIInstrInfo.h
bloba1a3962c4130097f3e7d76a2d71f0c2e634a5dd5
1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
17 #include "AMDGPUInstrInfo.h"
18 #include "SIDefines.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/Support/Compiler.h"
30 #include <cassert>
31 #include <cstdint>
33 #define GET_INSTRINFO_HEADER
34 #include "AMDGPUGenInstrInfo.inc"
36 namespace llvm {
38 class APInt;
39 class MachineDominatorTree;
40 class MachineRegisterInfo;
41 class RegScavenger;
42 class GCNSubtarget;
43 class TargetRegisterClass;
45 class SIInstrInfo final : public AMDGPUGenInstrInfo {
46 private:
47 const SIRegisterInfo RI;
48 const GCNSubtarget &ST;
50 // The inverse predicate should have the negative value.
51 enum BranchPredicate {
52 INVALID_BR = 0,
53 SCC_TRUE = 1,
54 SCC_FALSE = -1,
55 VCCNZ = 2,
56 VCCZ = -2,
57 EXECNZ = -3,
58 EXECZ = 3
61 using SetVectorType = SmallSetVector<MachineInstr *, 32>;
63 static unsigned getBranchOpcode(BranchPredicate Cond);
64 static BranchPredicate getBranchPredicate(unsigned Opcode);
66 public:
67 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
68 MachineRegisterInfo &MRI,
69 MachineOperand &SuperReg,
70 const TargetRegisterClass *SuperRC,
71 unsigned SubIdx,
72 const TargetRegisterClass *SubRC) const;
73 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
74 MachineRegisterInfo &MRI,
75 MachineOperand &SuperReg,
76 const TargetRegisterClass *SuperRC,
77 unsigned SubIdx,
78 const TargetRegisterClass *SubRC) const;
79 private:
80 void swapOperands(MachineInstr &Inst) const;
82 bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
83 MachineDominatorTree *MDT = nullptr) const;
85 void lowerScalarAbs(SetVectorType &Worklist,
86 MachineInstr &Inst) const;
88 void lowerScalarXnor(SetVectorType &Worklist,
89 MachineInstr &Inst) const;
91 void splitScalarNotBinop(SetVectorType &Worklist,
92 MachineInstr &Inst,
93 unsigned Opcode) const;
95 void splitScalarBinOpN2(SetVectorType &Worklist,
96 MachineInstr &Inst,
97 unsigned Opcode) const;
99 void splitScalar64BitUnaryOp(SetVectorType &Worklist,
100 MachineInstr &Inst, unsigned Opcode) const;
102 void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
103 MachineDominatorTree *MDT = nullptr) const;
105 void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
106 unsigned Opcode,
107 MachineDominatorTree *MDT = nullptr) const;
109 void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
110 MachineDominatorTree *MDT = nullptr) const;
112 void splitScalar64BitBCNT(SetVectorType &Worklist,
113 MachineInstr &Inst) const;
114 void splitScalar64BitBFE(SetVectorType &Worklist,
115 MachineInstr &Inst) const;
116 void movePackToVALU(SetVectorType &Worklist,
117 MachineRegisterInfo &MRI,
118 MachineInstr &Inst) const;
120 void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
121 SetVectorType &Worklist) const;
123 void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
124 MachineInstr &SCCDefInst,
125 SetVectorType &Worklist) const;
127 const TargetRegisterClass *
128 getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
130 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
131 const MachineInstr &MIb) const;
133 unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
135 protected:
136 bool swapSourceModifiers(MachineInstr &MI,
137 MachineOperand &Src0, unsigned Src0OpName,
138 MachineOperand &Src1, unsigned Src1OpName) const;
140 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
141 unsigned OpIdx0,
142 unsigned OpIdx1) const override;
144 public:
145 enum TargetOperandFlags {
146 MO_MASK = 0xf,
148 MO_NONE = 0,
149 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
150 MO_GOTPCREL = 1,
151 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
152 MO_GOTPCREL32 = 2,
153 MO_GOTPCREL32_LO = 2,
154 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
155 MO_GOTPCREL32_HI = 3,
156 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
157 MO_REL32 = 4,
158 MO_REL32_LO = 4,
159 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
160 MO_REL32_HI = 5,
162 MO_LONG_BRANCH_FORWARD = 6,
163 MO_LONG_BRANCH_BACKWARD = 7,
165 MO_ABS32_LO = 8,
166 MO_ABS32_HI = 9,
169 explicit SIInstrInfo(const GCNSubtarget &ST);
171 const SIRegisterInfo &getRegisterInfo() const {
172 return RI;
175 bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
176 AliasAnalysis *AA) const override;
178 bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
179 int64_t &Offset1,
180 int64_t &Offset2) const override;
182 bool getMemOperandWithOffset(const MachineInstr &LdSt,
183 const MachineOperand *&BaseOp,
184 int64_t &Offset,
185 const TargetRegisterInfo *TRI) const final;
187 bool shouldClusterMemOps(const MachineOperand &BaseOp1,
188 const MachineOperand &BaseOp2,
189 unsigned NumLoads) const override;
191 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
192 int64_t Offset1, unsigned NumLoads) const override;
194 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
195 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
196 bool KillSrc) const override;
198 unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI,
199 RegScavenger *RS, unsigned TmpReg,
200 unsigned Offset, unsigned Size) const;
202 void materializeImmediate(MachineBasicBlock &MBB,
203 MachineBasicBlock::iterator MI,
204 const DebugLoc &DL,
205 unsigned DestReg,
206 int64_t Value) const;
208 const TargetRegisterClass *getPreferredSelectRegClass(
209 unsigned Size) const;
211 unsigned insertNE(MachineBasicBlock *MBB,
212 MachineBasicBlock::iterator I, const DebugLoc &DL,
213 unsigned SrcReg, int Value) const;
215 unsigned insertEQ(MachineBasicBlock *MBB,
216 MachineBasicBlock::iterator I, const DebugLoc &DL,
217 unsigned SrcReg, int Value) const;
219 void storeRegToStackSlot(MachineBasicBlock &MBB,
220 MachineBasicBlock::iterator MI, unsigned SrcReg,
221 bool isKill, int FrameIndex,
222 const TargetRegisterClass *RC,
223 const TargetRegisterInfo *TRI) const override;
225 void loadRegFromStackSlot(MachineBasicBlock &MBB,
226 MachineBasicBlock::iterator MI, unsigned DestReg,
227 int FrameIndex, const TargetRegisterClass *RC,
228 const TargetRegisterInfo *TRI) const override;
230 bool expandPostRAPseudo(MachineInstr &MI) const override;
232 // Returns an opcode that can be used to move a value to a \p DstRC
233 // register. If there is no hardware instruction that can store to \p
234 // DstRC, then AMDGPU::COPY is returned.
235 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
237 LLVM_READONLY
238 int commuteOpcode(unsigned Opc) const;
240 LLVM_READONLY
241 inline int commuteOpcode(const MachineInstr &MI) const {
242 return commuteOpcode(MI.getOpcode());
245 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
246 unsigned &SrcOpIdx2) const override;
248 bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
249 unsigned & SrcOpIdx1) const;
251 bool isBranchOffsetInRange(unsigned BranchOpc,
252 int64_t BrOffset) const override;
254 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
256 unsigned insertIndirectBranch(MachineBasicBlock &MBB,
257 MachineBasicBlock &NewDestBB,
258 const DebugLoc &DL,
259 int64_t BrOffset,
260 RegScavenger *RS = nullptr) const override;
262 bool analyzeBranchImpl(MachineBasicBlock &MBB,
263 MachineBasicBlock::iterator I,
264 MachineBasicBlock *&TBB,
265 MachineBasicBlock *&FBB,
266 SmallVectorImpl<MachineOperand> &Cond,
267 bool AllowModify) const;
269 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
270 MachineBasicBlock *&FBB,
271 SmallVectorImpl<MachineOperand> &Cond,
272 bool AllowModify = false) const override;
274 unsigned removeBranch(MachineBasicBlock &MBB,
275 int *BytesRemoved = nullptr) const override;
277 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
278 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
279 const DebugLoc &DL,
280 int *BytesAdded = nullptr) const override;
282 bool reverseBranchCondition(
283 SmallVectorImpl<MachineOperand> &Cond) const override;
285 bool canInsertSelect(const MachineBasicBlock &MBB,
286 ArrayRef<MachineOperand> Cond,
287 unsigned TrueReg, unsigned FalseReg,
288 int &CondCycles,
289 int &TrueCycles, int &FalseCycles) const override;
291 void insertSelect(MachineBasicBlock &MBB,
292 MachineBasicBlock::iterator I, const DebugLoc &DL,
293 unsigned DstReg, ArrayRef<MachineOperand> Cond,
294 unsigned TrueReg, unsigned FalseReg) const override;
296 void insertVectorSelect(MachineBasicBlock &MBB,
297 MachineBasicBlock::iterator I, const DebugLoc &DL,
298 unsigned DstReg, ArrayRef<MachineOperand> Cond,
299 unsigned TrueReg, unsigned FalseReg) const;
301 unsigned getAddressSpaceForPseudoSourceKind(
302 unsigned Kind) const override;
304 bool
305 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
306 const MachineInstr &MIb) const override;
308 bool isFoldableCopy(const MachineInstr &MI) const;
310 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
311 MachineRegisterInfo *MRI) const final;
313 unsigned getMachineCSELookAheadLimit() const override { return 500; }
315 MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
316 MachineInstr &MI,
317 LiveVariables *LV) const override;
319 bool isSchedulingBoundary(const MachineInstr &MI,
320 const MachineBasicBlock *MBB,
321 const MachineFunction &MF) const override;
323 static bool isSALU(const MachineInstr &MI) {
324 return MI.getDesc().TSFlags & SIInstrFlags::SALU;
327 bool isSALU(uint16_t Opcode) const {
328 return get(Opcode).TSFlags & SIInstrFlags::SALU;
331 static bool isVALU(const MachineInstr &MI) {
332 return MI.getDesc().TSFlags & SIInstrFlags::VALU;
335 bool isVALU(uint16_t Opcode) const {
336 return get(Opcode).TSFlags & SIInstrFlags::VALU;
339 static bool isVMEM(const MachineInstr &MI) {
340 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
343 bool isVMEM(uint16_t Opcode) const {
344 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
347 static bool isSOP1(const MachineInstr &MI) {
348 return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
351 bool isSOP1(uint16_t Opcode) const {
352 return get(Opcode).TSFlags & SIInstrFlags::SOP1;
355 static bool isSOP2(const MachineInstr &MI) {
356 return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
359 bool isSOP2(uint16_t Opcode) const {
360 return get(Opcode).TSFlags & SIInstrFlags::SOP2;
363 static bool isSOPC(const MachineInstr &MI) {
364 return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
367 bool isSOPC(uint16_t Opcode) const {
368 return get(Opcode).TSFlags & SIInstrFlags::SOPC;
371 static bool isSOPK(const MachineInstr &MI) {
372 return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
375 bool isSOPK(uint16_t Opcode) const {
376 return get(Opcode).TSFlags & SIInstrFlags::SOPK;
379 static bool isSOPP(const MachineInstr &MI) {
380 return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
383 bool isSOPP(uint16_t Opcode) const {
384 return get(Opcode).TSFlags & SIInstrFlags::SOPP;
387 static bool isPacked(const MachineInstr &MI) {
388 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
391 bool isPacked(uint16_t Opcode) const {
392 return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
395 static bool isVOP1(const MachineInstr &MI) {
396 return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
399 bool isVOP1(uint16_t Opcode) const {
400 return get(Opcode).TSFlags & SIInstrFlags::VOP1;
403 static bool isVOP2(const MachineInstr &MI) {
404 return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
407 bool isVOP2(uint16_t Opcode) const {
408 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
411 static bool isVOP3(const MachineInstr &MI) {
412 return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
415 bool isVOP3(uint16_t Opcode) const {
416 return get(Opcode).TSFlags & SIInstrFlags::VOP3;
419 static bool isSDWA(const MachineInstr &MI) {
420 return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
423 bool isSDWA(uint16_t Opcode) const {
424 return get(Opcode).TSFlags & SIInstrFlags::SDWA;
427 static bool isVOPC(const MachineInstr &MI) {
428 return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
431 bool isVOPC(uint16_t Opcode) const {
432 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
435 static bool isMUBUF(const MachineInstr &MI) {
436 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
439 bool isMUBUF(uint16_t Opcode) const {
440 return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
443 static bool isMTBUF(const MachineInstr &MI) {
444 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
447 bool isMTBUF(uint16_t Opcode) const {
448 return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
451 static bool isSMRD(const MachineInstr &MI) {
452 return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
455 bool isSMRD(uint16_t Opcode) const {
456 return get(Opcode).TSFlags & SIInstrFlags::SMRD;
459 bool isBufferSMRD(const MachineInstr &MI) const;
461 static bool isDS(const MachineInstr &MI) {
462 return MI.getDesc().TSFlags & SIInstrFlags::DS;
465 bool isDS(uint16_t Opcode) const {
466 return get(Opcode).TSFlags & SIInstrFlags::DS;
469 bool isAlwaysGDS(uint16_t Opcode) const;
471 static bool isMIMG(const MachineInstr &MI) {
472 return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
475 bool isMIMG(uint16_t Opcode) const {
476 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
479 static bool isGather4(const MachineInstr &MI) {
480 return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
483 bool isGather4(uint16_t Opcode) const {
484 return get(Opcode).TSFlags & SIInstrFlags::Gather4;
487 static bool isFLAT(const MachineInstr &MI) {
488 return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
491 // Is a FLAT encoded instruction which accesses a specific segment,
492 // i.e. global_* or scratch_*.
493 static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
494 auto Flags = MI.getDesc().TSFlags;
495 return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT);
498 // FIXME: Make this more precise
499 static bool isFLATScratch(const MachineInstr &MI) {
500 return isSegmentSpecificFLAT(MI);
503 // Any FLAT encoded instruction, including global_* and scratch_*.
504 bool isFLAT(uint16_t Opcode) const {
505 return get(Opcode).TSFlags & SIInstrFlags::FLAT;
508 static bool isEXP(const MachineInstr &MI) {
509 return MI.getDesc().TSFlags & SIInstrFlags::EXP;
512 bool isEXP(uint16_t Opcode) const {
513 return get(Opcode).TSFlags & SIInstrFlags::EXP;
516 static bool isWQM(const MachineInstr &MI) {
517 return MI.getDesc().TSFlags & SIInstrFlags::WQM;
520 bool isWQM(uint16_t Opcode) const {
521 return get(Opcode).TSFlags & SIInstrFlags::WQM;
524 static bool isDisableWQM(const MachineInstr &MI) {
525 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
528 bool isDisableWQM(uint16_t Opcode) const {
529 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
532 static bool isVGPRSpill(const MachineInstr &MI) {
533 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
536 bool isVGPRSpill(uint16_t Opcode) const {
537 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
540 static bool isSGPRSpill(const MachineInstr &MI) {
541 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
544 bool isSGPRSpill(uint16_t Opcode) const {
545 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
548 static bool isDPP(const MachineInstr &MI) {
549 return MI.getDesc().TSFlags & SIInstrFlags::DPP;
552 bool isDPP(uint16_t Opcode) const {
553 return get(Opcode).TSFlags & SIInstrFlags::DPP;
556 static bool isVOP3P(const MachineInstr &MI) {
557 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
560 bool isVOP3P(uint16_t Opcode) const {
561 return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
564 static bool isVINTRP(const MachineInstr &MI) {
565 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
568 bool isVINTRP(uint16_t Opcode) const {
569 return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
572 static bool isMAI(const MachineInstr &MI) {
573 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
576 bool isMAI(uint16_t Opcode) const {
577 return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
580 static bool isDOT(const MachineInstr &MI) {
581 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
584 bool isDOT(uint16_t Opcode) const {
585 return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
588 static bool isScalarUnit(const MachineInstr &MI) {
589 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
592 static bool usesVM_CNT(const MachineInstr &MI) {
593 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
596 static bool usesLGKM_CNT(const MachineInstr &MI) {
597 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
600 static bool sopkIsZext(const MachineInstr &MI) {
601 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
604 bool sopkIsZext(uint16_t Opcode) const {
605 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
608 /// \returns true if this is an s_store_dword* instruction. This is more
609 /// specific than than isSMEM && mayStore.
610 static bool isScalarStore(const MachineInstr &MI) {
611 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
614 bool isScalarStore(uint16_t Opcode) const {
615 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
618 static bool isFixedSize(const MachineInstr &MI) {
619 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
622 bool isFixedSize(uint16_t Opcode) const {
623 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
626 static bool hasFPClamp(const MachineInstr &MI) {
627 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
630 bool hasFPClamp(uint16_t Opcode) const {
631 return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
634 static bool hasIntClamp(const MachineInstr &MI) {
635 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
638 uint64_t getClampMask(const MachineInstr &MI) const {
639 const uint64_t ClampFlags = SIInstrFlags::FPClamp |
640 SIInstrFlags::IntClamp |
641 SIInstrFlags::ClampLo |
642 SIInstrFlags::ClampHi;
643 return MI.getDesc().TSFlags & ClampFlags;
646 static bool usesFPDPRounding(const MachineInstr &MI) {
647 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
650 bool usesFPDPRounding(uint16_t Opcode) const {
651 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
654 static bool isFPAtomic(const MachineInstr &MI) {
655 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
658 bool isFPAtomic(uint16_t Opcode) const {
659 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
662 bool isVGPRCopy(const MachineInstr &MI) const {
663 assert(MI.isCopy());
664 unsigned Dest = MI.getOperand(0).getReg();
665 const MachineFunction &MF = *MI.getParent()->getParent();
666 const MachineRegisterInfo &MRI = MF.getRegInfo();
667 return !RI.isSGPRReg(MRI, Dest);
670 bool hasVGPRUses(const MachineInstr &MI) const {
671 const MachineFunction &MF = *MI.getParent()->getParent();
672 const MachineRegisterInfo &MRI = MF.getRegInfo();
673 return llvm::any_of(MI.explicit_uses(),
674 [&MRI, this](const MachineOperand &MO) {
675 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
678 /// Whether we must prevent this instruction from executing with EXEC = 0.
679 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
681 /// Returns true if the instruction could potentially depend on the value of
682 /// exec. If false, exec dependencies may safely be ignored.
683 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
685 bool isInlineConstant(const APInt &Imm) const;
687 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
689 bool isInlineConstant(const MachineOperand &MO,
690 const MCOperandInfo &OpInfo) const {
691 return isInlineConstant(MO, OpInfo.OperandType);
694 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
695 /// be an inline immediate.
696 bool isInlineConstant(const MachineInstr &MI,
697 const MachineOperand &UseMO,
698 const MachineOperand &DefMO) const {
699 assert(UseMO.getParent() == &MI);
700 int OpIdx = MI.getOperandNo(&UseMO);
701 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) {
702 return false;
705 return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
708 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
709 /// immediate.
710 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
711 const MachineOperand &MO = MI.getOperand(OpIdx);
712 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
715 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
716 const MachineOperand &MO) const {
717 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands)
718 return false;
720 if (MI.isCopy()) {
721 unsigned Size = getOpSize(MI, OpIdx);
722 assert(Size == 8 || Size == 4);
724 uint8_t OpType = (Size == 8) ?
725 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
726 return isInlineConstant(MO, OpType);
729 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
732 bool isInlineConstant(const MachineOperand &MO) const {
733 const MachineInstr *Parent = MO.getParent();
734 return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
737 bool isLiteralConstant(const MachineOperand &MO,
738 const MCOperandInfo &OpInfo) const {
739 return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
742 bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
743 const MachineOperand &MO = MI.getOperand(OpIdx);
744 return MO.isImm() && !isInlineConstant(MI, OpIdx);
747 // Returns true if this operand could potentially require a 32-bit literal
748 // operand, but not necessarily. A FrameIndex for example could resolve to an
749 // inline immediate value that will not require an additional 4-bytes; this
750 // assumes that it will.
751 bool isLiteralConstantLike(const MachineOperand &MO,
752 const MCOperandInfo &OpInfo) const;
754 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
755 const MachineOperand &MO) const;
757 /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
758 /// This function will return false if you pass it a 32-bit instruction.
759 bool hasVALU32BitEncoding(unsigned Opcode) const;
761 /// Returns true if this operand uses the constant bus.
762 bool usesConstantBus(const MachineRegisterInfo &MRI,
763 const MachineOperand &MO,
764 const MCOperandInfo &OpInfo) const;
766 /// Return true if this instruction has any modifiers.
767 /// e.g. src[012]_mod, omod, clamp.
768 bool hasModifiers(unsigned Opcode) const;
770 bool hasModifiersSet(const MachineInstr &MI,
771 unsigned OpName) const;
772 bool hasAnyModifiersSet(const MachineInstr &MI) const;
774 bool canShrink(const MachineInstr &MI,
775 const MachineRegisterInfo &MRI) const;
777 MachineInstr *buildShrunkInst(MachineInstr &MI,
778 unsigned NewOpcode) const;
780 bool verifyInstruction(const MachineInstr &MI,
781 StringRef &ErrInfo) const override;
783 unsigned getVALUOp(const MachineInstr &MI) const;
785 /// Return the correct register class for \p OpNo. For target-specific
786 /// instructions, this will return the register class that has been defined
787 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
788 /// the register class of its machine operand.
789 /// to infer the correct register class base on the other operands.
790 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
791 unsigned OpNo) const;
793 /// Return the size in bytes of the operand OpNo on the given
794 // instruction opcode.
795 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
796 const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
798 if (OpInfo.RegClass == -1) {
799 // If this is an immediate operand, this must be a 32-bit literal.
800 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
801 return 4;
804 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
807 /// This form should usually be preferred since it handles operands
808 /// with unknown register classes.
809 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
810 const MachineOperand &MO = MI.getOperand(OpNo);
811 if (MO.isReg()) {
812 if (unsigned SubReg = MO.getSubReg()) {
813 assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
814 MI.getParent()->getParent()->getRegInfo().
815 getRegClass(MO.getReg()), SubReg)) >= 32 &&
816 "Sub-dword subregs are not supported");
817 return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4;
820 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
823 /// Legalize the \p OpIndex operand of this instruction by inserting
824 /// a MOV. For example:
825 /// ADD_I32_e32 VGPR0, 15
826 /// to
827 /// MOV VGPR1, 15
828 /// ADD_I32_e32 VGPR0, VGPR1
830 /// If the operand being legalized is a register, then a COPY will be used
831 /// instead of MOV.
832 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
834 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
835 /// for \p MI.
836 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
837 const MachineOperand *MO = nullptr) const;
839 /// Check if \p MO would be a valid operand for the given operand
840 /// definition \p OpInfo. Note this does not attempt to validate constant bus
841 /// restrictions (e.g. literal constant usage).
842 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
843 const MCOperandInfo &OpInfo,
844 const MachineOperand &MO) const;
846 /// Check if \p MO (a register operand) is a legal register for the
847 /// given operand description.
848 bool isLegalRegOperand(const MachineRegisterInfo &MRI,
849 const MCOperandInfo &OpInfo,
850 const MachineOperand &MO) const;
852 /// Legalize operands in \p MI by either commuting it or inserting a
853 /// copy of src1.
854 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
856 /// Fix operands in \p MI to satisfy constant bus requirements.
857 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
859 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only
860 /// be used when it is know that the value in SrcReg is same across all
861 /// threads in the wave.
862 /// \returns The SGPR register that \p SrcReg was copied to.
863 unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
864 MachineRegisterInfo &MRI) const;
866 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
868 void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
869 MachineBasicBlock::iterator I,
870 const TargetRegisterClass *DstRC,
871 MachineOperand &Op, MachineRegisterInfo &MRI,
872 const DebugLoc &DL) const;
874 /// Legalize all operands in this instruction. This function may create new
875 /// instructions and control-flow around \p MI. If present, \p MDT is
876 /// updated.
877 void legalizeOperands(MachineInstr &MI,
878 MachineDominatorTree *MDT = nullptr) const;
880 /// Replace this instruction's opcode with the equivalent VALU
881 /// opcode. This function will also move the users of \p MI to the
882 /// VALU if necessary. If present, \p MDT is updated.
883 void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
885 void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
886 int Count) const;
888 void insertNoop(MachineBasicBlock &MBB,
889 MachineBasicBlock::iterator MI) const override;
891 void insertReturn(MachineBasicBlock &MBB) const;
892 /// Return the number of wait states that result from executing this
893 /// instruction.
894 static unsigned getNumWaitStates(const MachineInstr &MI);
896 /// Returns the operand named \p Op. If \p MI does not have an
897 /// operand named \c Op, this function returns nullptr.
898 LLVM_READONLY
899 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
901 LLVM_READONLY
902 const MachineOperand *getNamedOperand(const MachineInstr &MI,
903 unsigned OpName) const {
904 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
907 /// Get required immediate operand
908 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
909 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
910 return MI.getOperand(Idx).getImm();
913 uint64_t getDefaultRsrcDataFormat() const;
914 uint64_t getScratchRsrcWords23() const;
916 bool isLowLatencyInstruction(const MachineInstr &MI) const;
917 bool isHighLatencyInstruction(const MachineInstr &MI) const;
919 /// Return the descriptor of the target-specific machine instruction
920 /// that corresponds to the specified pseudo or native opcode.
921 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
922 return get(pseudoToMCOpcode(Opcode));
925 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
926 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
928 unsigned isLoadFromStackSlot(const MachineInstr &MI,
929 int &FrameIndex) const override;
930 unsigned isStoreToStackSlot(const MachineInstr &MI,
931 int &FrameIndex) const override;
933 unsigned getInstBundleSize(const MachineInstr &MI) const;
934 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
936 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
938 bool isNonUniformBranchInstr(MachineInstr &Instr) const;
940 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
941 MachineBasicBlock *IfEnd) const;
943 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
944 MachineBasicBlock *LoopEnd) const;
946 std::pair<unsigned, unsigned>
947 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
949 ArrayRef<std::pair<int, const char *>>
950 getSerializableTargetIndices() const override;
952 ArrayRef<std::pair<unsigned, const char *>>
953 getSerializableDirectMachineOperandTargetFlags() const override;
955 ScheduleHazardRecognizer *
956 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
957 const ScheduleDAG *DAG) const override;
959 ScheduleHazardRecognizer *
960 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
962 bool isBasicBlockPrologue(const MachineInstr &MI) const override;
964 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
965 MachineBasicBlock::iterator InsPt,
966 const DebugLoc &DL, Register Src,
967 Register Dst) const override;
969 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
970 MachineBasicBlock::iterator InsPt,
971 const DebugLoc &DL, Register Src,
972 Register SrcSubReg,
973 Register Dst) const override;
975 bool isWave32() const;
977 /// Return a partially built integer add instruction without carry.
978 /// Caller must add source operands.
979 /// For pre-GFX9 it will generate unused carry destination operand.
980 /// TODO: After GFX9 it should return a no-carry operation.
981 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
982 MachineBasicBlock::iterator I,
983 const DebugLoc &DL,
984 unsigned DestReg) const;
986 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
987 MachineBasicBlock::iterator I,
988 const DebugLoc &DL,
989 Register DestReg,
990 RegScavenger &RS) const;
992 static bool isKillTerminator(unsigned Opcode);
993 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
995 static bool isLegalMUBUFImmOffset(unsigned Imm) {
996 return isUInt<12>(Imm);
999 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1000 /// encoded instruction. If \p Signed, this is for an instruction that
1001 /// interprets the offset as signed.
1002 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1003 bool Signed) const;
1005 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1006 /// Return -1 if the target-specific opcode for the pseudo instruction does
1007 /// not exist. If Opcode is not a pseudo instruction, this is identity.
1008 int pseudoToMCOpcode(int Opcode) const;
1010 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1011 const TargetRegisterInfo *TRI,
1012 const MachineFunction &MF)
1013 const override {
1014 if (OpNum >= TID.getNumOperands())
1015 return nullptr;
1016 return RI.getRegClass(TID.OpInfo[OpNum].RegClass);
1019 void fixImplicitOperands(MachineInstr &MI) const;
1022 /// \brief Returns true if a reg:subreg pair P has a TRC class
1023 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1024 const TargetRegisterClass &TRC,
1025 MachineRegisterInfo &MRI) {
1026 auto *RC = MRI.getRegClass(P.Reg);
1027 if (!P.SubReg)
1028 return RC == &TRC;
1029 auto *TRI = MRI.getTargetRegisterInfo();
1030 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1033 /// \brief Create RegSubRegPair from a register MachineOperand
1034 inline
1035 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1036 assert(O.isReg());
1037 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1040 /// \brief Return the SubReg component from REG_SEQUENCE
1041 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1042 unsigned SubReg);
1044 /// \brief Return the defining instruction for a given reg:subreg pair
1045 /// skipping copy like instructions and subreg-manipulation pseudos.
1046 /// Following another subreg of a reg:subreg isn't supported.
1047 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1048 MachineRegisterInfo &MRI);
1050 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1051 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1052 /// attempt to track between blocks.
1053 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1054 Register VReg,
1055 const MachineInstr &DefMI,
1056 const MachineInstr &UseMI);
1058 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1059 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1060 /// track between blocks.
1061 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1062 Register VReg,
1063 const MachineInstr &DefMI);
1065 namespace AMDGPU {
1067 LLVM_READONLY
1068 int getVOPe64(uint16_t Opcode);
1070 LLVM_READONLY
1071 int getVOPe32(uint16_t Opcode);
1073 LLVM_READONLY
1074 int getSDWAOp(uint16_t Opcode);
1076 LLVM_READONLY
1077 int getDPPOp32(uint16_t Opcode);
1079 LLVM_READONLY
1080 int getBasicFromSDWAOp(uint16_t Opcode);
1082 LLVM_READONLY
1083 int getCommuteRev(uint16_t Opcode);
1085 LLVM_READONLY
1086 int getCommuteOrig(uint16_t Opcode);
1088 LLVM_READONLY
1089 int getAddr64Inst(uint16_t Opcode);
1091 /// Check if \p Opcode is an Addr64 opcode.
1093 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1094 LLVM_READONLY
1095 int getIfAddr64Inst(uint16_t Opcode);
1097 LLVM_READONLY
1098 int getMUBUFNoLdsInst(uint16_t Opcode);
1100 LLVM_READONLY
1101 int getAtomicRetOp(uint16_t Opcode);
1103 LLVM_READONLY
1104 int getAtomicNoRetOp(uint16_t Opcode);
1106 LLVM_READONLY
1107 int getSOPKOp(uint16_t Opcode);
1109 LLVM_READONLY
1110 int getGlobalSaddrOp(uint16_t Opcode);
1112 LLVM_READONLY
1113 int getVCMPXNoSDstOp(uint16_t Opcode);
1115 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1116 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1117 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1118 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1120 } // end namespace AMDGPU
1122 namespace SI {
1123 namespace KernelInputOffsets {
1125 /// Offsets in bytes from the start of the input buffer
1126 enum Offsets {
1127 NGROUPS_X = 0,
1128 NGROUPS_Y = 4,
1129 NGROUPS_Z = 8,
1130 GLOBAL_SIZE_X = 12,
1131 GLOBAL_SIZE_Y = 16,
1132 GLOBAL_SIZE_Z = 20,
1133 LOCAL_SIZE_X = 24,
1134 LOCAL_SIZE_Y = 28,
1135 LOCAL_SIZE_Z = 32
1138 } // end namespace KernelInputOffsets
1139 } // end namespace SI
1141 } // end namespace llvm
1143 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H