1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Interface definition for SIInstrInfo.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
33 class MachineDominatorTree
;
34 class MachineRegisterInfo
;
36 class TargetRegisterClass
;
37 class ScheduleHazardRecognizer
;
39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
40 /// on any path from the start of an entry function to this load.
41 static const MachineMemOperand::Flags MONoClobber
=
42 MachineMemOperand::MOTargetFlag1
;
44 /// Mark the MMO of a load as the last use.
45 static const MachineMemOperand::Flags MOLastUse
=
46 MachineMemOperand::MOTargetFlag2
;
48 /// Utility to store machine instructions worklist.
49 struct SIInstrWorklist
{
50 SIInstrWorklist() = default;
52 void insert(MachineInstr
*MI
);
54 MachineInstr
*top() const {
55 auto iter
= InstrList
.begin();
60 auto iter
= InstrList
.begin();
61 InstrList
.erase(iter
);
64 bool empty() const { return InstrList
.empty(); }
71 bool isDeferred(MachineInstr
*MI
);
73 SetVector
<MachineInstr
*> &getDeferredList() { return DeferredList
; }
76 /// InstrList contains the MachineInstrs.
77 SetVector
<MachineInstr
*> InstrList
;
78 /// Deferred instructions are specific MachineInstr
79 /// that will be added by insert method.
80 SetVector
<MachineInstr
*> DeferredList
;
83 class SIInstrInfo final
: public AMDGPUGenInstrInfo
{
85 const SIRegisterInfo RI
;
86 const GCNSubtarget
&ST
;
87 TargetSchedModel SchedModel
;
88 mutable std::unique_ptr
<AMDGPUMIRFormatter
> Formatter
;
90 // The inverse predicate should have the negative value.
91 enum BranchPredicate
{
101 using SetVectorType
= SmallSetVector
<MachineInstr
*, 32>;
103 static unsigned getBranchOpcode(BranchPredicate Cond
);
104 static BranchPredicate
getBranchPredicate(unsigned Opcode
);
107 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI
,
108 MachineRegisterInfo
&MRI
,
109 const MachineOperand
&SuperReg
,
110 const TargetRegisterClass
*SuperRC
,
112 const TargetRegisterClass
*SubRC
) const;
113 MachineOperand
buildExtractSubRegOrImm(
114 MachineBasicBlock::iterator MI
, MachineRegisterInfo
&MRI
,
115 const MachineOperand
&SuperReg
, const TargetRegisterClass
*SuperRC
,
116 unsigned SubIdx
, const TargetRegisterClass
*SubRC
) const;
119 void swapOperands(MachineInstr
&Inst
) const;
121 std::pair
<bool, MachineBasicBlock
*>
122 moveScalarAddSub(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
123 MachineDominatorTree
*MDT
= nullptr) const;
125 void lowerSelect(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
126 MachineDominatorTree
*MDT
= nullptr) const;
128 void lowerScalarAbs(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
) const;
130 void lowerScalarXnor(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
) const;
132 void splitScalarNotBinop(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
133 unsigned Opcode
) const;
135 void splitScalarBinOpN2(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
136 unsigned Opcode
) const;
138 void splitScalar64BitUnaryOp(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
139 unsigned Opcode
, bool Swap
= false) const;
141 void splitScalar64BitBinaryOp(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
143 MachineDominatorTree
*MDT
= nullptr) const;
145 void splitScalarSMulU64(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
146 MachineDominatorTree
*MDT
) const;
148 void splitScalarSMulPseudo(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
149 MachineDominatorTree
*MDT
) const;
151 void splitScalar64BitXnor(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
152 MachineDominatorTree
*MDT
= nullptr) const;
154 void splitScalar64BitBCNT(SIInstrWorklist
&Worklist
,
155 MachineInstr
&Inst
) const;
156 void splitScalar64BitBFE(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
) const;
157 void splitScalar64BitCountOp(SIInstrWorklist
&Worklist
, MachineInstr
&Inst
,
159 MachineDominatorTree
*MDT
= nullptr) const;
160 void movePackToVALU(SIInstrWorklist
&Worklist
, MachineRegisterInfo
&MRI
,
161 MachineInstr
&Inst
) const;
163 void addUsersToMoveToVALUWorklist(Register Reg
, MachineRegisterInfo
&MRI
,
164 SIInstrWorklist
&Worklist
) const;
166 void addSCCDefUsersToVALUWorklist(MachineOperand
&Op
,
167 MachineInstr
&SCCDefInst
,
168 SIInstrWorklist
&Worklist
,
169 Register NewCond
= Register()) const;
170 void addSCCDefsToVALUWorklist(MachineInstr
*SCCUseInst
,
171 SIInstrWorklist
&Worklist
) const;
173 const TargetRegisterClass
*
174 getDestEquivalentVGPRClass(const MachineInstr
&Inst
) const;
176 bool checkInstOffsetsDoNotOverlap(const MachineInstr
&MIa
,
177 const MachineInstr
&MIb
) const;
179 Register
findUsedSGPR(const MachineInstr
&MI
, int OpIndices
[3]) const;
182 /// If the specific machine instruction is a instruction that moves/copies
183 /// value from one register to another register return destination and source
184 /// registers as machine operands.
185 std::optional
<DestSourcePair
>
186 isCopyInstrImpl(const MachineInstr
&MI
) const override
;
188 bool swapSourceModifiers(MachineInstr
&MI
,
189 MachineOperand
&Src0
, unsigned Src0OpName
,
190 MachineOperand
&Src1
, unsigned Src1OpName
) const;
192 MachineInstr
*commuteInstructionImpl(MachineInstr
&MI
, bool NewMI
,
194 unsigned OpIdx1
) const override
;
197 enum TargetOperandFlags
{
201 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
203 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
205 MO_GOTPCREL32_LO
= 2,
206 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
207 MO_GOTPCREL32_HI
= 3,
208 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
211 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
214 MO_FAR_BRANCH_OFFSET
= 6,
220 explicit SIInstrInfo(const GCNSubtarget
&ST
);
222 const SIRegisterInfo
&getRegisterInfo() const {
226 const GCNSubtarget
&getSubtarget() const {
230 bool isReallyTriviallyReMaterializable(const MachineInstr
&MI
) const override
;
232 bool isIgnorableUse(const MachineOperand
&MO
) const override
;
234 bool isSafeToSink(MachineInstr
&MI
, MachineBasicBlock
*SuccToSinkTo
,
235 MachineCycleInfo
*CI
) const override
;
237 bool areLoadsFromSameBasePtr(SDNode
*Load0
, SDNode
*Load1
, int64_t &Offset0
,
238 int64_t &Offset1
) const override
;
240 bool getMemOperandsWithOffsetWidth(
241 const MachineInstr
&LdSt
,
242 SmallVectorImpl
<const MachineOperand
*> &BaseOps
, int64_t &Offset
,
243 bool &OffsetIsScalable
, LocationSize
&Width
,
244 const TargetRegisterInfo
*TRI
) const final
;
246 bool shouldClusterMemOps(ArrayRef
<const MachineOperand
*> BaseOps1
,
247 int64_t Offset1
, bool OffsetIsScalable1
,
248 ArrayRef
<const MachineOperand
*> BaseOps2
,
249 int64_t Offset2
, bool OffsetIsScalable2
,
250 unsigned ClusterSize
,
251 unsigned NumBytes
) const override
;
253 bool shouldScheduleLoadsNear(SDNode
*Load0
, SDNode
*Load1
, int64_t Offset0
,
254 int64_t Offset1
, unsigned NumLoads
) const override
;
256 void copyPhysReg(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
257 const DebugLoc
&DL
, MCRegister DestReg
, MCRegister SrcReg
,
258 bool KillSrc
) const override
;
260 void materializeImmediate(MachineBasicBlock
&MBB
,
261 MachineBasicBlock::iterator MI
, const DebugLoc
&DL
,
262 Register DestReg
, int64_t Value
) const;
264 const TargetRegisterClass
*getPreferredSelectRegClass(
265 unsigned Size
) const;
267 Register
insertNE(MachineBasicBlock
*MBB
,
268 MachineBasicBlock::iterator I
, const DebugLoc
&DL
,
269 Register SrcReg
, int Value
) const;
271 Register
insertEQ(MachineBasicBlock
*MBB
,
272 MachineBasicBlock::iterator I
, const DebugLoc
&DL
,
273 Register SrcReg
, int Value
) const;
275 void storeRegToStackSlot(MachineBasicBlock
&MBB
,
276 MachineBasicBlock::iterator MI
, Register SrcReg
,
277 bool isKill
, int FrameIndex
,
278 const TargetRegisterClass
*RC
,
279 const TargetRegisterInfo
*TRI
,
280 Register VReg
) const override
;
282 void loadRegFromStackSlot(MachineBasicBlock
&MBB
,
283 MachineBasicBlock::iterator MI
, Register DestReg
,
284 int FrameIndex
, const TargetRegisterClass
*RC
,
285 const TargetRegisterInfo
*TRI
,
286 Register VReg
) const override
;
288 bool expandPostRAPseudo(MachineInstr
&MI
) const override
;
290 void reMaterialize(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
291 Register DestReg
, unsigned SubIdx
,
292 const MachineInstr
&Orig
,
293 const TargetRegisterInfo
&TRI
) const override
;
295 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
296 // instructions. Returns a pair of generated instructions.
297 // Can split either post-RA with physical registers or pre-RA with
298 // virtual registers. In latter case IR needs to be in SSA form and
299 // and a REG_SEQUENCE is produced to define original register.
300 std::pair
<MachineInstr
*, MachineInstr
*>
301 expandMovDPP64(MachineInstr
&MI
) const;
303 // Returns an opcode that can be used to move a value to a \p DstRC
304 // register. If there is no hardware instruction that can store to \p
305 // DstRC, then AMDGPU::COPY is returned.
306 unsigned getMovOpcode(const TargetRegisterClass
*DstRC
) const;
308 const MCInstrDesc
&getIndirectRegWriteMovRelPseudo(unsigned VecSize
,
312 const MCInstrDesc
&getIndirectGPRIDXPseudo(unsigned VecSize
,
313 bool IsIndirectSrc
) const;
315 int commuteOpcode(unsigned Opc
) const;
318 inline int commuteOpcode(const MachineInstr
&MI
) const {
319 return commuteOpcode(MI
.getOpcode());
322 bool findCommutedOpIndices(const MachineInstr
&MI
, unsigned &SrcOpIdx0
,
323 unsigned &SrcOpIdx1
) const override
;
325 bool findCommutedOpIndices(const MCInstrDesc
&Desc
, unsigned &SrcOpIdx0
,
326 unsigned &SrcOpIdx1
) const;
328 bool isBranchOffsetInRange(unsigned BranchOpc
,
329 int64_t BrOffset
) const override
;
331 MachineBasicBlock
*getBranchDestBlock(const MachineInstr
&MI
) const override
;
333 /// Return whether the block terminate with divergent branch.
334 /// Note this only work before lowering the pseudo control flow instructions.
335 bool hasDivergentBranch(const MachineBasicBlock
*MBB
) const;
337 void insertIndirectBranch(MachineBasicBlock
&MBB
,
338 MachineBasicBlock
&NewDestBB
,
339 MachineBasicBlock
&RestoreBB
, const DebugLoc
&DL
,
340 int64_t BrOffset
, RegScavenger
*RS
) const override
;
342 bool analyzeBranchImpl(MachineBasicBlock
&MBB
,
343 MachineBasicBlock::iterator I
,
344 MachineBasicBlock
*&TBB
,
345 MachineBasicBlock
*&FBB
,
346 SmallVectorImpl
<MachineOperand
> &Cond
,
347 bool AllowModify
) const;
349 bool analyzeBranch(MachineBasicBlock
&MBB
, MachineBasicBlock
*&TBB
,
350 MachineBasicBlock
*&FBB
,
351 SmallVectorImpl
<MachineOperand
> &Cond
,
352 bool AllowModify
= false) const override
;
354 unsigned removeBranch(MachineBasicBlock
&MBB
,
355 int *BytesRemoved
= nullptr) const override
;
357 unsigned insertBranch(MachineBasicBlock
&MBB
, MachineBasicBlock
*TBB
,
358 MachineBasicBlock
*FBB
, ArrayRef
<MachineOperand
> Cond
,
360 int *BytesAdded
= nullptr) const override
;
362 bool reverseBranchCondition(
363 SmallVectorImpl
<MachineOperand
> &Cond
) const override
;
365 bool canInsertSelect(const MachineBasicBlock
&MBB
,
366 ArrayRef
<MachineOperand
> Cond
, Register DstReg
,
367 Register TrueReg
, Register FalseReg
, int &CondCycles
,
368 int &TrueCycles
, int &FalseCycles
) const override
;
370 void insertSelect(MachineBasicBlock
&MBB
,
371 MachineBasicBlock::iterator I
, const DebugLoc
&DL
,
372 Register DstReg
, ArrayRef
<MachineOperand
> Cond
,
373 Register TrueReg
, Register FalseReg
) const override
;
375 void insertVectorSelect(MachineBasicBlock
&MBB
,
376 MachineBasicBlock::iterator I
, const DebugLoc
&DL
,
377 Register DstReg
, ArrayRef
<MachineOperand
> Cond
,
378 Register TrueReg
, Register FalseReg
) const;
380 bool analyzeCompare(const MachineInstr
&MI
, Register
&SrcReg
,
381 Register
&SrcReg2
, int64_t &CmpMask
,
382 int64_t &CmpValue
) const override
;
384 bool optimizeCompareInstr(MachineInstr
&CmpInstr
, Register SrcReg
,
385 Register SrcReg2
, int64_t CmpMask
, int64_t CmpValue
,
386 const MachineRegisterInfo
*MRI
) const override
;
389 areMemAccessesTriviallyDisjoint(const MachineInstr
&MIa
,
390 const MachineInstr
&MIb
) const override
;
392 static bool isFoldableCopy(const MachineInstr
&MI
);
394 void removeModOperands(MachineInstr
&MI
) const;
396 bool foldImmediate(MachineInstr
&UseMI
, MachineInstr
&DefMI
, Register Reg
,
397 MachineRegisterInfo
*MRI
) const final
;
399 unsigned getMachineCSELookAheadLimit() const override
{ return 500; }
401 MachineInstr
*convertToThreeAddress(MachineInstr
&MI
, LiveVariables
*LV
,
402 LiveIntervals
*LIS
) const override
;
404 bool isSchedulingBoundary(const MachineInstr
&MI
,
405 const MachineBasicBlock
*MBB
,
406 const MachineFunction
&MF
) const override
;
408 static bool isSALU(const MachineInstr
&MI
) {
409 return MI
.getDesc().TSFlags
& SIInstrFlags::SALU
;
412 bool isSALU(uint16_t Opcode
) const {
413 return get(Opcode
).TSFlags
& SIInstrFlags::SALU
;
416 static bool isVALU(const MachineInstr
&MI
) {
417 return MI
.getDesc().TSFlags
& SIInstrFlags::VALU
;
420 bool isVALU(uint16_t Opcode
) const {
421 return get(Opcode
).TSFlags
& SIInstrFlags::VALU
;
424 static bool isImage(const MachineInstr
&MI
) {
425 return isMIMG(MI
) || isVSAMPLE(MI
) || isVIMAGE(MI
);
428 bool isImage(uint16_t Opcode
) const {
429 return isMIMG(Opcode
) || isVSAMPLE(Opcode
) || isVIMAGE(Opcode
);
432 static bool isVMEM(const MachineInstr
&MI
) {
433 return isMUBUF(MI
) || isMTBUF(MI
) || isImage(MI
);
436 bool isVMEM(uint16_t Opcode
) const {
437 return isMUBUF(Opcode
) || isMTBUF(Opcode
) || isImage(Opcode
);
440 static bool isSOP1(const MachineInstr
&MI
) {
441 return MI
.getDesc().TSFlags
& SIInstrFlags::SOP1
;
444 bool isSOP1(uint16_t Opcode
) const {
445 return get(Opcode
).TSFlags
& SIInstrFlags::SOP1
;
448 static bool isSOP2(const MachineInstr
&MI
) {
449 return MI
.getDesc().TSFlags
& SIInstrFlags::SOP2
;
452 bool isSOP2(uint16_t Opcode
) const {
453 return get(Opcode
).TSFlags
& SIInstrFlags::SOP2
;
456 static bool isSOPC(const MachineInstr
&MI
) {
457 return MI
.getDesc().TSFlags
& SIInstrFlags::SOPC
;
460 bool isSOPC(uint16_t Opcode
) const {
461 return get(Opcode
).TSFlags
& SIInstrFlags::SOPC
;
464 static bool isSOPK(const MachineInstr
&MI
) {
465 return MI
.getDesc().TSFlags
& SIInstrFlags::SOPK
;
468 bool isSOPK(uint16_t Opcode
) const {
469 return get(Opcode
).TSFlags
& SIInstrFlags::SOPK
;
472 static bool isSOPP(const MachineInstr
&MI
) {
473 return MI
.getDesc().TSFlags
& SIInstrFlags::SOPP
;
476 bool isSOPP(uint16_t Opcode
) const {
477 return get(Opcode
).TSFlags
& SIInstrFlags::SOPP
;
480 static bool isPacked(const MachineInstr
&MI
) {
481 return MI
.getDesc().TSFlags
& SIInstrFlags::IsPacked
;
484 bool isPacked(uint16_t Opcode
) const {
485 return get(Opcode
).TSFlags
& SIInstrFlags::IsPacked
;
488 static bool isVOP1(const MachineInstr
&MI
) {
489 return MI
.getDesc().TSFlags
& SIInstrFlags::VOP1
;
492 bool isVOP1(uint16_t Opcode
) const {
493 return get(Opcode
).TSFlags
& SIInstrFlags::VOP1
;
496 static bool isVOP2(const MachineInstr
&MI
) {
497 return MI
.getDesc().TSFlags
& SIInstrFlags::VOP2
;
500 bool isVOP2(uint16_t Opcode
) const {
501 return get(Opcode
).TSFlags
& SIInstrFlags::VOP2
;
504 static bool isVOP3(const MachineInstr
&MI
) {
505 return MI
.getDesc().TSFlags
& SIInstrFlags::VOP3
;
508 bool isVOP3(uint16_t Opcode
) const {
509 return get(Opcode
).TSFlags
& SIInstrFlags::VOP3
;
512 static bool isSDWA(const MachineInstr
&MI
) {
513 return MI
.getDesc().TSFlags
& SIInstrFlags::SDWA
;
516 bool isSDWA(uint16_t Opcode
) const {
517 return get(Opcode
).TSFlags
& SIInstrFlags::SDWA
;
520 static bool isVOPC(const MachineInstr
&MI
) {
521 return MI
.getDesc().TSFlags
& SIInstrFlags::VOPC
;
524 bool isVOPC(uint16_t Opcode
) const {
525 return get(Opcode
).TSFlags
& SIInstrFlags::VOPC
;
528 static bool isMUBUF(const MachineInstr
&MI
) {
529 return MI
.getDesc().TSFlags
& SIInstrFlags::MUBUF
;
532 bool isMUBUF(uint16_t Opcode
) const {
533 return get(Opcode
).TSFlags
& SIInstrFlags::MUBUF
;
536 static bool isMTBUF(const MachineInstr
&MI
) {
537 return MI
.getDesc().TSFlags
& SIInstrFlags::MTBUF
;
540 bool isMTBUF(uint16_t Opcode
) const {
541 return get(Opcode
).TSFlags
& SIInstrFlags::MTBUF
;
544 static bool isSMRD(const MachineInstr
&MI
) {
545 return MI
.getDesc().TSFlags
& SIInstrFlags::SMRD
;
548 bool isSMRD(uint16_t Opcode
) const {
549 return get(Opcode
).TSFlags
& SIInstrFlags::SMRD
;
552 bool isBufferSMRD(const MachineInstr
&MI
) const;
554 static bool isDS(const MachineInstr
&MI
) {
555 return MI
.getDesc().TSFlags
& SIInstrFlags::DS
;
558 bool isDS(uint16_t Opcode
) const {
559 return get(Opcode
).TSFlags
& SIInstrFlags::DS
;
562 static bool isLDSDMA(const MachineInstr
&MI
) {
563 return isVALU(MI
) && (isMUBUF(MI
) || isFLAT(MI
));
566 bool isLDSDMA(uint16_t Opcode
) {
567 return isVALU(Opcode
) && (isMUBUF(Opcode
) || isFLAT(Opcode
));
570 static bool isGWS(const MachineInstr
&MI
) {
571 return MI
.getDesc().TSFlags
& SIInstrFlags::GWS
;
574 bool isGWS(uint16_t Opcode
) const {
575 return get(Opcode
).TSFlags
& SIInstrFlags::GWS
;
578 bool isAlwaysGDS(uint16_t Opcode
) const;
580 static bool isMIMG(const MachineInstr
&MI
) {
581 return MI
.getDesc().TSFlags
& SIInstrFlags::MIMG
;
584 bool isMIMG(uint16_t Opcode
) const {
585 return get(Opcode
).TSFlags
& SIInstrFlags::MIMG
;
588 static bool isVIMAGE(const MachineInstr
&MI
) {
589 return MI
.getDesc().TSFlags
& SIInstrFlags::VIMAGE
;
592 bool isVIMAGE(uint16_t Opcode
) const {
593 return get(Opcode
).TSFlags
& SIInstrFlags::VIMAGE
;
596 static bool isVSAMPLE(const MachineInstr
&MI
) {
597 return MI
.getDesc().TSFlags
& SIInstrFlags::VSAMPLE
;
600 bool isVSAMPLE(uint16_t Opcode
) const {
601 return get(Opcode
).TSFlags
& SIInstrFlags::VSAMPLE
;
604 static bool isGather4(const MachineInstr
&MI
) {
605 return MI
.getDesc().TSFlags
& SIInstrFlags::Gather4
;
608 bool isGather4(uint16_t Opcode
) const {
609 return get(Opcode
).TSFlags
& SIInstrFlags::Gather4
;
612 static bool isFLAT(const MachineInstr
&MI
) {
613 return MI
.getDesc().TSFlags
& SIInstrFlags::FLAT
;
616 // Is a FLAT encoded instruction which accesses a specific segment,
617 // i.e. global_* or scratch_*.
618 static bool isSegmentSpecificFLAT(const MachineInstr
&MI
) {
619 auto Flags
= MI
.getDesc().TSFlags
;
620 return Flags
& (SIInstrFlags::FlatGlobal
| SIInstrFlags::FlatScratch
);
623 bool isSegmentSpecificFLAT(uint16_t Opcode
) const {
624 auto Flags
= get(Opcode
).TSFlags
;
625 return Flags
& (SIInstrFlags::FlatGlobal
| SIInstrFlags::FlatScratch
);
628 static bool isFLATGlobal(const MachineInstr
&MI
) {
629 return MI
.getDesc().TSFlags
& SIInstrFlags::FlatGlobal
;
632 bool isFLATGlobal(uint16_t Opcode
) const {
633 return get(Opcode
).TSFlags
& SIInstrFlags::FlatGlobal
;
636 static bool isFLATScratch(const MachineInstr
&MI
) {
637 return MI
.getDesc().TSFlags
& SIInstrFlags::FlatScratch
;
640 bool isFLATScratch(uint16_t Opcode
) const {
641 return get(Opcode
).TSFlags
& SIInstrFlags::FlatScratch
;
644 // Any FLAT encoded instruction, including global_* and scratch_*.
645 bool isFLAT(uint16_t Opcode
) const {
646 return get(Opcode
).TSFlags
& SIInstrFlags::FLAT
;
649 static bool isEXP(const MachineInstr
&MI
) {
650 return MI
.getDesc().TSFlags
& SIInstrFlags::EXP
;
653 static bool isDualSourceBlendEXP(const MachineInstr
&MI
) {
656 unsigned Target
= MI
.getOperand(0).getImm();
657 return Target
== AMDGPU::Exp::ET_DUAL_SRC_BLEND0
||
658 Target
== AMDGPU::Exp::ET_DUAL_SRC_BLEND1
;
661 bool isEXP(uint16_t Opcode
) const {
662 return get(Opcode
).TSFlags
& SIInstrFlags::EXP
;
665 static bool isAtomicNoRet(const MachineInstr
&MI
) {
666 return MI
.getDesc().TSFlags
& SIInstrFlags::IsAtomicNoRet
;
669 bool isAtomicNoRet(uint16_t Opcode
) const {
670 return get(Opcode
).TSFlags
& SIInstrFlags::IsAtomicNoRet
;
673 static bool isAtomicRet(const MachineInstr
&MI
) {
674 return MI
.getDesc().TSFlags
& SIInstrFlags::IsAtomicRet
;
677 bool isAtomicRet(uint16_t Opcode
) const {
678 return get(Opcode
).TSFlags
& SIInstrFlags::IsAtomicRet
;
681 static bool isAtomic(const MachineInstr
&MI
) {
682 return MI
.getDesc().TSFlags
& (SIInstrFlags::IsAtomicRet
|
683 SIInstrFlags::IsAtomicNoRet
);
686 bool isAtomic(uint16_t Opcode
) const {
687 return get(Opcode
).TSFlags
& (SIInstrFlags::IsAtomicRet
|
688 SIInstrFlags::IsAtomicNoRet
);
691 static bool mayWriteLDSThroughDMA(const MachineInstr
&MI
) {
692 return isLDSDMA(MI
) && MI
.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD
;
695 static bool isWQM(const MachineInstr
&MI
) {
696 return MI
.getDesc().TSFlags
& SIInstrFlags::WQM
;
699 bool isWQM(uint16_t Opcode
) const {
700 return get(Opcode
).TSFlags
& SIInstrFlags::WQM
;
703 static bool isDisableWQM(const MachineInstr
&MI
) {
704 return MI
.getDesc().TSFlags
& SIInstrFlags::DisableWQM
;
707 bool isDisableWQM(uint16_t Opcode
) const {
708 return get(Opcode
).TSFlags
& SIInstrFlags::DisableWQM
;
711 // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
712 // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
713 // therefore we need an explicit check for them since just checking if the
714 // Spill bit is set and what instruction type it came from misclassifies
716 static bool isVGPRSpill(const MachineInstr
&MI
) {
717 return MI
.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR
&&
718 MI
.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR
&&
719 (isSpill(MI
) && isVALU(MI
));
722 bool isVGPRSpill(uint16_t Opcode
) const {
723 return Opcode
!= AMDGPU::SI_SPILL_S32_TO_VGPR
&&
724 Opcode
!= AMDGPU::SI_RESTORE_S32_FROM_VGPR
&&
725 (isSpill(Opcode
) && isVALU(Opcode
));
728 static bool isSGPRSpill(const MachineInstr
&MI
) {
729 return MI
.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR
||
730 MI
.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR
||
731 (isSpill(MI
) && isSALU(MI
));
734 bool isSGPRSpill(uint16_t Opcode
) const {
735 return Opcode
== AMDGPU::SI_SPILL_S32_TO_VGPR
||
736 Opcode
== AMDGPU::SI_RESTORE_S32_FROM_VGPR
||
737 (isSpill(Opcode
) && isSALU(Opcode
));
740 bool isSpill(uint16_t Opcode
) const {
741 return get(Opcode
).TSFlags
& SIInstrFlags::Spill
;
744 static bool isSpill(const MachineInstr
&MI
) {
745 return MI
.getDesc().TSFlags
& SIInstrFlags::Spill
;
748 static bool isWWMRegSpillOpcode(uint16_t Opcode
) {
749 return Opcode
== AMDGPU::SI_SPILL_WWM_V32_SAVE
||
750 Opcode
== AMDGPU::SI_SPILL_WWM_AV32_SAVE
||
751 Opcode
== AMDGPU::SI_SPILL_WWM_V32_RESTORE
||
752 Opcode
== AMDGPU::SI_SPILL_WWM_AV32_RESTORE
;
755 static bool isChainCallOpcode(uint64_t Opcode
) {
756 return Opcode
== AMDGPU::SI_CS_CHAIN_TC_W32
||
757 Opcode
== AMDGPU::SI_CS_CHAIN_TC_W64
;
760 static bool isDPP(const MachineInstr
&MI
) {
761 return MI
.getDesc().TSFlags
& SIInstrFlags::DPP
;
764 bool isDPP(uint16_t Opcode
) const {
765 return get(Opcode
).TSFlags
& SIInstrFlags::DPP
;
768 static bool isTRANS(const MachineInstr
&MI
) {
769 return MI
.getDesc().TSFlags
& SIInstrFlags::TRANS
;
772 bool isTRANS(uint16_t Opcode
) const {
773 return get(Opcode
).TSFlags
& SIInstrFlags::TRANS
;
776 static bool isVOP3P(const MachineInstr
&MI
) {
777 return MI
.getDesc().TSFlags
& SIInstrFlags::VOP3P
;
780 bool isVOP3P(uint16_t Opcode
) const {
781 return get(Opcode
).TSFlags
& SIInstrFlags::VOP3P
;
784 static bool isVINTRP(const MachineInstr
&MI
) {
785 return MI
.getDesc().TSFlags
& SIInstrFlags::VINTRP
;
788 bool isVINTRP(uint16_t Opcode
) const {
789 return get(Opcode
).TSFlags
& SIInstrFlags::VINTRP
;
792 static bool isMAI(const MachineInstr
&MI
) {
793 return MI
.getDesc().TSFlags
& SIInstrFlags::IsMAI
;
796 bool isMAI(uint16_t Opcode
) const {
797 return get(Opcode
).TSFlags
& SIInstrFlags::IsMAI
;
800 static bool isMFMA(const MachineInstr
&MI
) {
801 return isMAI(MI
) && MI
.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64
&&
802 MI
.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64
;
805 static bool isDOT(const MachineInstr
&MI
) {
806 return MI
.getDesc().TSFlags
& SIInstrFlags::IsDOT
;
809 static bool isWMMA(const MachineInstr
&MI
) {
810 return MI
.getDesc().TSFlags
& SIInstrFlags::IsWMMA
;
813 bool isWMMA(uint16_t Opcode
) const {
814 return get(Opcode
).TSFlags
& SIInstrFlags::IsWMMA
;
817 static bool isMFMAorWMMA(const MachineInstr
&MI
) {
818 return isMFMA(MI
) || isWMMA(MI
) || isSWMMAC(MI
);
821 static bool isSWMMAC(const MachineInstr
&MI
) {
822 return MI
.getDesc().TSFlags
& SIInstrFlags::IsSWMMAC
;
825 bool isSWMMAC(uint16_t Opcode
) const {
826 return get(Opcode
).TSFlags
& SIInstrFlags::IsSWMMAC
;
829 bool isDOT(uint16_t Opcode
) const {
830 return get(Opcode
).TSFlags
& SIInstrFlags::IsDOT
;
833 static bool isLDSDIR(const MachineInstr
&MI
) {
834 return MI
.getDesc().TSFlags
& SIInstrFlags::LDSDIR
;
837 bool isLDSDIR(uint16_t Opcode
) const {
838 return get(Opcode
).TSFlags
& SIInstrFlags::LDSDIR
;
841 static bool isVINTERP(const MachineInstr
&MI
) {
842 return MI
.getDesc().TSFlags
& SIInstrFlags::VINTERP
;
845 bool isVINTERP(uint16_t Opcode
) const {
846 return get(Opcode
).TSFlags
& SIInstrFlags::VINTERP
;
849 static bool isScalarUnit(const MachineInstr
&MI
) {
850 return MI
.getDesc().TSFlags
& (SIInstrFlags::SALU
| SIInstrFlags::SMRD
);
853 static bool usesVM_CNT(const MachineInstr
&MI
) {
854 return MI
.getDesc().TSFlags
& SIInstrFlags::VM_CNT
;
857 static bool usesLGKM_CNT(const MachineInstr
&MI
) {
858 return MI
.getDesc().TSFlags
& SIInstrFlags::LGKM_CNT
;
861 // Most sopk treat the immediate as a signed 16-bit, however some
862 // use it as unsigned.
863 static bool sopkIsZext(unsigned Opcode
) {
864 return Opcode
== AMDGPU::S_CMPK_EQ_U32
|| Opcode
== AMDGPU::S_CMPK_LG_U32
||
865 Opcode
== AMDGPU::S_CMPK_GT_U32
|| Opcode
== AMDGPU::S_CMPK_GE_U32
||
866 Opcode
== AMDGPU::S_CMPK_LT_U32
|| Opcode
== AMDGPU::S_CMPK_LE_U32
||
867 Opcode
== AMDGPU::S_GETREG_B32
;
870 /// \returns true if this is an s_store_dword* instruction. This is more
871 /// specific than isSMEM && mayStore.
872 static bool isScalarStore(const MachineInstr
&MI
) {
873 return MI
.getDesc().TSFlags
& SIInstrFlags::SCALAR_STORE
;
876 bool isScalarStore(uint16_t Opcode
) const {
877 return get(Opcode
).TSFlags
& SIInstrFlags::SCALAR_STORE
;
880 static bool isFixedSize(const MachineInstr
&MI
) {
881 return MI
.getDesc().TSFlags
& SIInstrFlags::FIXED_SIZE
;
884 bool isFixedSize(uint16_t Opcode
) const {
885 return get(Opcode
).TSFlags
& SIInstrFlags::FIXED_SIZE
;
888 static bool hasFPClamp(const MachineInstr
&MI
) {
889 return MI
.getDesc().TSFlags
& SIInstrFlags::FPClamp
;
892 bool hasFPClamp(uint16_t Opcode
) const {
893 return get(Opcode
).TSFlags
& SIInstrFlags::FPClamp
;
896 static bool hasIntClamp(const MachineInstr
&MI
) {
897 return MI
.getDesc().TSFlags
& SIInstrFlags::IntClamp
;
900 uint64_t getClampMask(const MachineInstr
&MI
) const {
901 const uint64_t ClampFlags
= SIInstrFlags::FPClamp
|
902 SIInstrFlags::IntClamp
|
903 SIInstrFlags::ClampLo
|
904 SIInstrFlags::ClampHi
;
905 return MI
.getDesc().TSFlags
& ClampFlags
;
908 static bool usesFPDPRounding(const MachineInstr
&MI
) {
909 return MI
.getDesc().TSFlags
& SIInstrFlags::FPDPRounding
;
912 bool usesFPDPRounding(uint16_t Opcode
) const {
913 return get(Opcode
).TSFlags
& SIInstrFlags::FPDPRounding
;
916 static bool isFPAtomic(const MachineInstr
&MI
) {
917 return MI
.getDesc().TSFlags
& SIInstrFlags::FPAtomic
;
920 bool isFPAtomic(uint16_t Opcode
) const {
921 return get(Opcode
).TSFlags
& SIInstrFlags::FPAtomic
;
924 static bool isNeverUniform(const MachineInstr
&MI
) {
925 return MI
.getDesc().TSFlags
& SIInstrFlags::IsNeverUniform
;
928 // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
929 // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
930 // to check for the barrier start (S_BARRIER_SIGNAL*)
931 bool isBarrierStart(unsigned Opcode
) const {
932 return Opcode
== AMDGPU::S_BARRIER
||
933 Opcode
== AMDGPU::S_BARRIER_SIGNAL_M0
||
934 Opcode
== AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0
||
935 Opcode
== AMDGPU::S_BARRIER_SIGNAL_IMM
||
936 Opcode
== AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM
;
939 bool isBarrier(unsigned Opcode
) const {
940 return isBarrierStart(Opcode
) || Opcode
== AMDGPU::S_BARRIER_WAIT
||
941 Opcode
== AMDGPU::S_BARRIER_INIT_M0
||
942 Opcode
== AMDGPU::S_BARRIER_INIT_IMM
||
943 Opcode
== AMDGPU::S_BARRIER_JOIN_IMM
||
944 Opcode
== AMDGPU::S_BARRIER_LEAVE
||
945 Opcode
== AMDGPU::DS_GWS_INIT
||
946 Opcode
== AMDGPU::DS_GWS_BARRIER
;
949 static bool isF16PseudoScalarTrans(unsigned Opcode
) {
950 return Opcode
== AMDGPU::V_S_EXP_F16_e64
||
951 Opcode
== AMDGPU::V_S_LOG_F16_e64
||
952 Opcode
== AMDGPU::V_S_RCP_F16_e64
||
953 Opcode
== AMDGPU::V_S_RSQ_F16_e64
||
954 Opcode
== AMDGPU::V_S_SQRT_F16_e64
;
957 static bool doesNotReadTiedSource(const MachineInstr
&MI
) {
958 return MI
.getDesc().TSFlags
& SIInstrFlags::TiedSourceNotRead
;
961 bool doesNotReadTiedSource(uint16_t Opcode
) const {
962 return get(Opcode
).TSFlags
& SIInstrFlags::TiedSourceNotRead
;
965 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode
) {
967 case AMDGPU::S_WAITCNT_soft
:
968 return AMDGPU::S_WAITCNT
;
969 case AMDGPU::S_WAITCNT_VSCNT_soft
:
970 return AMDGPU::S_WAITCNT_VSCNT
;
971 case AMDGPU::S_WAIT_LOADCNT_soft
:
972 return AMDGPU::S_WAIT_LOADCNT
;
973 case AMDGPU::S_WAIT_STORECNT_soft
:
974 return AMDGPU::S_WAIT_STORECNT
;
975 case AMDGPU::S_WAIT_SAMPLECNT_soft
:
976 return AMDGPU::S_WAIT_SAMPLECNT
;
977 case AMDGPU::S_WAIT_BVHCNT_soft
:
978 return AMDGPU::S_WAIT_BVHCNT
;
979 case AMDGPU::S_WAIT_DSCNT_soft
:
980 return AMDGPU::S_WAIT_DSCNT
;
981 case AMDGPU::S_WAIT_KMCNT_soft
:
982 return AMDGPU::S_WAIT_KMCNT
;
988 bool isWaitcnt(unsigned Opcode
) const {
989 switch (getNonSoftWaitcntOpcode(Opcode
)) {
990 case AMDGPU::S_WAITCNT
:
991 case AMDGPU::S_WAITCNT_VSCNT
:
992 case AMDGPU::S_WAITCNT_VMCNT
:
993 case AMDGPU::S_WAITCNT_EXPCNT
:
994 case AMDGPU::S_WAITCNT_LGKMCNT
:
995 case AMDGPU::S_WAIT_LOADCNT
:
996 case AMDGPU::S_WAIT_LOADCNT_DSCNT
:
997 case AMDGPU::S_WAIT_STORECNT
:
998 case AMDGPU::S_WAIT_STORECNT_DSCNT
:
999 case AMDGPU::S_WAIT_SAMPLECNT
:
1000 case AMDGPU::S_WAIT_BVHCNT
:
1001 case AMDGPU::S_WAIT_EXPCNT
:
1002 case AMDGPU::S_WAIT_DSCNT
:
1003 case AMDGPU::S_WAIT_KMCNT
:
1004 case AMDGPU::S_WAIT_IDLE
:
1011 bool isVGPRCopy(const MachineInstr
&MI
) const {
1012 assert(isCopyInstr(MI
));
1013 Register Dest
= MI
.getOperand(0).getReg();
1014 const MachineFunction
&MF
= *MI
.getParent()->getParent();
1015 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1016 return !RI
.isSGPRReg(MRI
, Dest
);
1019 bool hasVGPRUses(const MachineInstr
&MI
) const {
1020 const MachineFunction
&MF
= *MI
.getParent()->getParent();
1021 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1022 return llvm::any_of(MI
.explicit_uses(),
1023 [&MRI
, this](const MachineOperand
&MO
) {
1024 return MO
.isReg() && RI
.isVGPR(MRI
, MO
.getReg());});
1027 /// Return true if the instruction modifies the mode register.q
1028 static bool modifiesModeRegister(const MachineInstr
&MI
);
1030 /// This function is used to determine if an instruction can be safely
1031 /// executed under EXEC = 0 without hardware error, indeterminate results,
1032 /// and/or visible effects on future vector execution or outside the shader.
1033 /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1034 /// used in removing branches over short EXEC = 0 sequences.
1035 /// As such it embeds certain assumptions which may not apply to every case
1036 /// of EXEC = 0 execution.
1037 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr
&MI
) const;
1039 /// Returns true if the instruction could potentially depend on the value of
1040 /// exec. If false, exec dependencies may safely be ignored.
1041 bool mayReadEXEC(const MachineRegisterInfo
&MRI
, const MachineInstr
&MI
) const;
1043 bool isInlineConstant(const APInt
&Imm
) const;
1045 bool isInlineConstant(const APFloat
&Imm
) const;
1047 // Returns true if this non-register operand definitely does not need to be
1048 // encoded as a 32-bit literal. Note that this function handles all kinds of
1049 // operands, not just immediates.
1051 // Some operands like FrameIndexes could resolve to an inline immediate value
1052 // that will not require an additional 4-bytes; this function assumes that it
1054 bool isInlineConstant(const MachineOperand
&MO
, uint8_t OperandType
) const;
1056 bool isInlineConstant(const MachineOperand
&MO
,
1057 const MCOperandInfo
&OpInfo
) const {
1058 return isInlineConstant(MO
, OpInfo
.OperandType
);
1061 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
1062 /// be an inline immediate.
1063 bool isInlineConstant(const MachineInstr
&MI
,
1064 const MachineOperand
&UseMO
,
1065 const MachineOperand
&DefMO
) const {
1066 assert(UseMO
.getParent() == &MI
);
1067 int OpIdx
= UseMO
.getOperandNo();
1068 if (OpIdx
>= MI
.getDesc().NumOperands
)
1071 return isInlineConstant(DefMO
, MI
.getDesc().operands()[OpIdx
]);
1074 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1076 bool isInlineConstant(const MachineInstr
&MI
, unsigned OpIdx
) const {
1077 const MachineOperand
&MO
= MI
.getOperand(OpIdx
);
1078 return isInlineConstant(MO
, MI
.getDesc().operands()[OpIdx
].OperandType
);
1081 bool isInlineConstant(const MachineInstr
&MI
, unsigned OpIdx
,
1082 const MachineOperand
&MO
) const {
1083 if (OpIdx
>= MI
.getDesc().NumOperands
)
1086 if (isCopyInstr(MI
)) {
1087 unsigned Size
= getOpSize(MI
, OpIdx
);
1088 assert(Size
== 8 || Size
== 4);
1090 uint8_t OpType
= (Size
== 8) ?
1091 AMDGPU::OPERAND_REG_IMM_INT64
: AMDGPU::OPERAND_REG_IMM_INT32
;
1092 return isInlineConstant(MO
, OpType
);
1095 return isInlineConstant(MO
, MI
.getDesc().operands()[OpIdx
].OperandType
);
1098 bool isInlineConstant(const MachineOperand
&MO
) const {
1099 return isInlineConstant(*MO
.getParent(), MO
.getOperandNo());
1102 bool isImmOperandLegal(const MachineInstr
&MI
, unsigned OpNo
,
1103 const MachineOperand
&MO
) const;
1105 /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1106 /// This function will return false if you pass it a 32-bit instruction.
1107 bool hasVALU32BitEncoding(unsigned Opcode
) const;
1109 /// Returns true if this operand uses the constant bus.
1110 bool usesConstantBus(const MachineRegisterInfo
&MRI
,
1111 const MachineOperand
&MO
,
1112 const MCOperandInfo
&OpInfo
) const;
1114 /// Return true if this instruction has any modifiers.
1115 /// e.g. src[012]_mod, omod, clamp.
1116 bool hasModifiers(unsigned Opcode
) const;
1118 bool hasModifiersSet(const MachineInstr
&MI
,
1119 unsigned OpName
) const;
1120 bool hasAnyModifiersSet(const MachineInstr
&MI
) const;
1122 bool canShrink(const MachineInstr
&MI
,
1123 const MachineRegisterInfo
&MRI
) const;
1125 MachineInstr
*buildShrunkInst(MachineInstr
&MI
,
1126 unsigned NewOpcode
) const;
1128 bool verifyInstruction(const MachineInstr
&MI
,
1129 StringRef
&ErrInfo
) const override
;
1131 unsigned getVALUOp(const MachineInstr
&MI
) const;
1133 void insertScratchExecCopy(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
1134 MachineBasicBlock::iterator MBBI
,
1135 const DebugLoc
&DL
, Register Reg
, bool IsSCCLive
,
1136 SlotIndexes
*Indexes
= nullptr) const;
1138 void restoreExec(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
1139 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
,
1140 Register Reg
, SlotIndexes
*Indexes
= nullptr) const;
1142 /// Return the correct register class for \p OpNo. For target-specific
1143 /// instructions, this will return the register class that has been defined
1144 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
1145 /// the register class of its machine operand.
1146 /// to infer the correct register class base on the other operands.
1147 const TargetRegisterClass
*getOpRegClass(const MachineInstr
&MI
,
1148 unsigned OpNo
) const;
1150 /// Return the size in bytes of the operand OpNo on the given
1151 // instruction opcode.
1152 unsigned getOpSize(uint16_t Opcode
, unsigned OpNo
) const {
1153 const MCOperandInfo
&OpInfo
= get(Opcode
).operands()[OpNo
];
1155 if (OpInfo
.RegClass
== -1) {
1156 // If this is an immediate operand, this must be a 32-bit literal.
1157 assert(OpInfo
.OperandType
== MCOI::OPERAND_IMMEDIATE
);
1161 return RI
.getRegSizeInBits(*RI
.getRegClass(OpInfo
.RegClass
)) / 8;
1164 /// This form should usually be preferred since it handles operands
1165 /// with unknown register classes.
1166 unsigned getOpSize(const MachineInstr
&MI
, unsigned OpNo
) const {
1167 const MachineOperand
&MO
= MI
.getOperand(OpNo
);
1169 if (unsigned SubReg
= MO
.getSubReg()) {
1170 return RI
.getSubRegIdxSize(SubReg
) / 8;
1173 return RI
.getRegSizeInBits(*getOpRegClass(MI
, OpNo
)) / 8;
1176 /// Legalize the \p OpIndex operand of this instruction by inserting
1177 /// a MOV. For example:
1178 /// ADD_I32_e32 VGPR0, 15
1181 /// ADD_I32_e32 VGPR0, VGPR1
1183 /// If the operand being legalized is a register, then a COPY will be used
1185 void legalizeOpWithMove(MachineInstr
&MI
, unsigned OpIdx
) const;
1187 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1189 bool isOperandLegal(const MachineInstr
&MI
, unsigned OpIdx
,
1190 const MachineOperand
*MO
= nullptr) const;
1192 /// Check if \p MO would be a valid operand for the given operand
1193 /// definition \p OpInfo. Note this does not attempt to validate constant bus
1194 /// restrictions (e.g. literal constant usage).
1195 bool isLegalVSrcOperand(const MachineRegisterInfo
&MRI
,
1196 const MCOperandInfo
&OpInfo
,
1197 const MachineOperand
&MO
) const;
1199 /// Check if \p MO (a register operand) is a legal register for the
1200 /// given operand description.
1201 bool isLegalRegOperand(const MachineRegisterInfo
&MRI
,
1202 const MCOperandInfo
&OpInfo
,
1203 const MachineOperand
&MO
) const;
1205 /// Legalize operands in \p MI by either commuting it or inserting a
1207 void legalizeOperandsVOP2(MachineRegisterInfo
&MRI
, MachineInstr
&MI
) const;
1209 /// Fix operands in \p MI to satisfy constant bus requirements.
1210 void legalizeOperandsVOP3(MachineRegisterInfo
&MRI
, MachineInstr
&MI
) const;
1212 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only
1213 /// be used when it is know that the value in SrcReg is same across all
1214 /// threads in the wave.
1215 /// \returns The SGPR register that \p SrcReg was copied to.
1216 Register
readlaneVGPRToSGPR(Register SrcReg
, MachineInstr
&UseMI
,
1217 MachineRegisterInfo
&MRI
) const;
1219 void legalizeOperandsSMRD(MachineRegisterInfo
&MRI
, MachineInstr
&MI
) const;
1220 void legalizeOperandsFLAT(MachineRegisterInfo
&MRI
, MachineInstr
&MI
) const;
1222 void legalizeGenericOperand(MachineBasicBlock
&InsertMBB
,
1223 MachineBasicBlock::iterator I
,
1224 const TargetRegisterClass
*DstRC
,
1225 MachineOperand
&Op
, MachineRegisterInfo
&MRI
,
1226 const DebugLoc
&DL
) const;
1228 /// Legalize all operands in this instruction. This function may create new
1229 /// instructions and control-flow around \p MI. If present, \p MDT is
1231 /// \returns A new basic block that contains \p MI if new blocks were created.
1233 legalizeOperands(MachineInstr
&MI
, MachineDominatorTree
*MDT
= nullptr) const;
1235 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1236 /// was moved to VGPR. \returns true if succeeded.
1237 bool moveFlatAddrToVGPR(MachineInstr
&Inst
) const;
1239 /// Replace the instructions opcode with the equivalent VALU
1240 /// opcode. This function will also move the users of MachineInstruntions
1241 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1243 void moveToVALU(SIInstrWorklist
&Worklist
, MachineDominatorTree
*MDT
) const;
1245 void moveToVALUImpl(SIInstrWorklist
&Worklist
, MachineDominatorTree
*MDT
,
1246 MachineInstr
&Inst
) const;
1248 void insertNoop(MachineBasicBlock
&MBB
,
1249 MachineBasicBlock::iterator MI
) const override
;
1251 void insertNoops(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
1252 unsigned Quantity
) const override
;
1254 void insertReturn(MachineBasicBlock
&MBB
) const;
1256 /// Build instructions that simulate the behavior of a `s_trap 2` instructions
1257 /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
1258 /// interpreted as a nop.
1259 MachineBasicBlock
*insertSimulatedTrap(MachineRegisterInfo
&MRI
,
1260 MachineBasicBlock
&MBB
,
1262 const DebugLoc
&DL
) const;
1264 /// Return the number of wait states that result from executing this
1266 static unsigned getNumWaitStates(const MachineInstr
&MI
);
1268 /// Returns the operand named \p Op. If \p MI does not have an
1269 /// operand named \c Op, this function returns nullptr.
1271 MachineOperand
*getNamedOperand(MachineInstr
&MI
, unsigned OperandName
) const;
1274 const MachineOperand
*getNamedOperand(const MachineInstr
&MI
,
1275 unsigned OpName
) const {
1276 return getNamedOperand(const_cast<MachineInstr
&>(MI
), OpName
);
1279 /// Get required immediate operand
1280 int64_t getNamedImmOperand(const MachineInstr
&MI
, unsigned OpName
) const {
1281 int Idx
= AMDGPU::getNamedOperandIdx(MI
.getOpcode(), OpName
);
1282 return MI
.getOperand(Idx
).getImm();
1285 uint64_t getDefaultRsrcDataFormat() const;
1286 uint64_t getScratchRsrcWords23() const;
1288 bool isLowLatencyInstruction(const MachineInstr
&MI
) const;
1289 bool isHighLatencyDef(int Opc
) const override
;
1291 /// Return the descriptor of the target-specific machine instruction
1292 /// that corresponds to the specified pseudo or native opcode.
1293 const MCInstrDesc
&getMCOpcodeFromPseudo(unsigned Opcode
) const {
1294 return get(pseudoToMCOpcode(Opcode
));
1297 unsigned isStackAccess(const MachineInstr
&MI
, int &FrameIndex
) const;
1298 unsigned isSGPRStackAccess(const MachineInstr
&MI
, int &FrameIndex
) const;
1300 Register
isLoadFromStackSlot(const MachineInstr
&MI
,
1301 int &FrameIndex
) const override
;
1302 Register
isStoreToStackSlot(const MachineInstr
&MI
,
1303 int &FrameIndex
) const override
;
1305 unsigned getInstBundleSize(const MachineInstr
&MI
) const;
1306 unsigned getInstSizeInBytes(const MachineInstr
&MI
) const override
;
1308 bool mayAccessFlatAddressSpace(const MachineInstr
&MI
) const;
1310 bool isNonUniformBranchInstr(MachineInstr
&Instr
) const;
1312 void convertNonUniformIfRegion(MachineBasicBlock
*IfEntry
,
1313 MachineBasicBlock
*IfEnd
) const;
1315 void convertNonUniformLoopRegion(MachineBasicBlock
*LoopEntry
,
1316 MachineBasicBlock
*LoopEnd
) const;
1318 std::pair
<unsigned, unsigned>
1319 decomposeMachineOperandsTargetFlags(unsigned TF
) const override
;
1321 ArrayRef
<std::pair
<int, const char *>>
1322 getSerializableTargetIndices() const override
;
1324 ArrayRef
<std::pair
<unsigned, const char *>>
1325 getSerializableDirectMachineOperandTargetFlags() const override
;
1327 ArrayRef
<std::pair
<MachineMemOperand::Flags
, const char *>>
1328 getSerializableMachineMemOperandTargetFlags() const override
;
1330 ScheduleHazardRecognizer
*
1331 CreateTargetPostRAHazardRecognizer(const InstrItineraryData
*II
,
1332 const ScheduleDAG
*DAG
) const override
;
1334 ScheduleHazardRecognizer
*
1335 CreateTargetPostRAHazardRecognizer(const MachineFunction
&MF
) const override
;
1337 ScheduleHazardRecognizer
*
1338 CreateTargetMIHazardRecognizer(const InstrItineraryData
*II
,
1339 const ScheduleDAGMI
*DAG
) const override
;
1341 unsigned getLiveRangeSplitOpcode(Register Reg
,
1342 const MachineFunction
&MF
) const override
;
1344 bool isBasicBlockPrologue(const MachineInstr
&MI
,
1345 Register Reg
= Register()) const override
;
1347 MachineInstr
*createPHIDestinationCopy(MachineBasicBlock
&MBB
,
1348 MachineBasicBlock::iterator InsPt
,
1349 const DebugLoc
&DL
, Register Src
,
1350 Register Dst
) const override
;
1352 MachineInstr
*createPHISourceCopy(MachineBasicBlock
&MBB
,
1353 MachineBasicBlock::iterator InsPt
,
1354 const DebugLoc
&DL
, Register Src
,
1356 Register Dst
) const override
;
1358 bool isWave32() const;
1360 /// Return a partially built integer add instruction without carry.
1361 /// Caller must add source operands.
1362 /// For pre-GFX9 it will generate unused carry destination operand.
1363 /// TODO: After GFX9 it should return a no-carry operation.
1364 MachineInstrBuilder
getAddNoCarry(MachineBasicBlock
&MBB
,
1365 MachineBasicBlock::iterator I
,
1367 Register DestReg
) const;
1369 MachineInstrBuilder
getAddNoCarry(MachineBasicBlock
&MBB
,
1370 MachineBasicBlock::iterator I
,
1373 RegScavenger
&RS
) const;
1375 static bool isKillTerminator(unsigned Opcode
);
1376 const MCInstrDesc
&getKillTerminatorFromPseudo(unsigned Opcode
) const;
1378 bool isLegalMUBUFImmOffset(unsigned Imm
) const;
1380 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget
&ST
);
1382 bool splitMUBUFOffset(uint32_t Imm
, uint32_t &SOffset
, uint32_t &ImmOffset
,
1383 Align Alignment
= Align(4)) const;
1385 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1386 /// encoded instruction. If \p Signed, this is for an instruction that
1387 /// interprets the offset as signed.
1388 bool isLegalFLATOffset(int64_t Offset
, unsigned AddrSpace
,
1389 uint64_t FlatVariant
) const;
1391 /// Split \p COffsetVal into {immediate offset field, remainder offset}
1393 std::pair
<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal
,
1395 uint64_t FlatVariant
) const;
1397 /// Returns true if negative offsets are allowed for the given \p FlatVariant.
1398 bool allowNegativeFlatOffset(uint64_t FlatVariant
) const;
1400 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1401 /// Return -1 if the target-specific opcode for the pseudo instruction does
1402 /// not exist. If Opcode is not a pseudo instruction, this is identity.
1403 int pseudoToMCOpcode(int Opcode
) const;
1405 /// \brief Check if this instruction should only be used by assembler.
1406 /// Return true if this opcode should not be used by codegen.
1407 bool isAsmOnlyOpcode(int MCOp
) const;
1409 const TargetRegisterClass
*getRegClass(const MCInstrDesc
&TID
, unsigned OpNum
,
1410 const TargetRegisterInfo
*TRI
,
1411 const MachineFunction
&MF
)
1414 void fixImplicitOperands(MachineInstr
&MI
) const;
1416 MachineInstr
*foldMemoryOperandImpl(MachineFunction
&MF
, MachineInstr
&MI
,
1417 ArrayRef
<unsigned> Ops
,
1418 MachineBasicBlock::iterator InsertPt
,
1420 LiveIntervals
*LIS
= nullptr,
1421 VirtRegMap
*VRM
= nullptr) const override
;
1423 unsigned getInstrLatency(const InstrItineraryData
*ItinData
,
1424 const MachineInstr
&MI
,
1425 unsigned *PredCost
= nullptr) const override
;
1427 InstructionUniformity
1428 getInstructionUniformity(const MachineInstr
&MI
) const override final
;
1430 InstructionUniformity
1431 getGenericInstructionUniformity(const MachineInstr
&MI
) const;
1433 const MIRFormatter
*getMIRFormatter() const override
{
1435 Formatter
= std::make_unique
<AMDGPUMIRFormatter
>();
1436 return Formatter
.get();
1439 static unsigned getDSShaderTypeValue(const MachineFunction
&MF
);
1441 const TargetSchedModel
&getSchedModel() const { return SchedModel
; }
1443 // Enforce operand's \p OpName even alignment if required by target.
1444 // This is used if an operand is a 32 bit register but needs to be aligned
1446 void enforceOperandRCAlignment(MachineInstr
&MI
, unsigned OpName
) const;
1449 /// \brief Returns true if a reg:subreg pair P has a TRC class
1450 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair
&P
,
1451 const TargetRegisterClass
&TRC
,
1452 MachineRegisterInfo
&MRI
) {
1453 auto *RC
= MRI
.getRegClass(P
.Reg
);
1456 auto *TRI
= MRI
.getTargetRegisterInfo();
1457 return RC
== TRI
->getMatchingSuperRegClass(RC
, &TRC
, P
.SubReg
);
1460 /// \brief Create RegSubRegPair from a register MachineOperand
1462 TargetInstrInfo::RegSubRegPair
getRegSubRegPair(const MachineOperand
&O
) {
1464 return TargetInstrInfo::RegSubRegPair(O
.getReg(), O
.getSubReg());
1467 /// \brief Return the SubReg component from REG_SEQUENCE
1468 TargetInstrInfo::RegSubRegPair
getRegSequenceSubReg(MachineInstr
&MI
,
1471 /// \brief Return the defining instruction for a given reg:subreg pair
1472 /// skipping copy like instructions and subreg-manipulation pseudos.
1473 /// Following another subreg of a reg:subreg isn't supported.
1474 MachineInstr
*getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair
&P
,
1475 MachineRegisterInfo
&MRI
);
1477 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1478 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1479 /// attempt to track between blocks.
1480 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo
&MRI
,
1482 const MachineInstr
&DefMI
,
1483 const MachineInstr
&UseMI
);
1485 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1486 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1487 /// track between blocks.
1488 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo
&MRI
,
1490 const MachineInstr
&DefMI
);
1495 int getVOPe64(uint16_t Opcode
);
1498 int getVOPe32(uint16_t Opcode
);
1501 int getSDWAOp(uint16_t Opcode
);
1504 int getDPPOp32(uint16_t Opcode
);
1507 int getDPPOp64(uint16_t Opcode
);
1510 int getBasicFromSDWAOp(uint16_t Opcode
);
1513 int getCommuteRev(uint16_t Opcode
);
1516 int getCommuteOrig(uint16_t Opcode
);
1519 int getAddr64Inst(uint16_t Opcode
);
1521 /// Check if \p Opcode is an Addr64 opcode.
1523 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1525 int getIfAddr64Inst(uint16_t Opcode
);
1528 int getSOPKOp(uint16_t Opcode
);
1530 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1531 /// of a VADDR form.
1533 int getGlobalSaddrOp(uint16_t Opcode
);
1535 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1536 /// of a SADDR form.
1538 int getGlobalVaddrOp(uint16_t Opcode
);
1541 int getVCMPXNoSDstOp(uint16_t Opcode
);
1543 /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1544 /// given an \p Opcode of an SS (SADDR) form.
1546 int getFlatScratchInstSTfromSS(uint16_t Opcode
);
1548 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1549 /// of an SVS (SADDR + VADDR) form.
1551 int getFlatScratchInstSVfromSVS(uint16_t Opcode
);
1553 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1554 /// of an SV (VADDR) form.
1556 int getFlatScratchInstSSfromSV(uint16_t Opcode
);
1558 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1559 /// of an SS (SADDR) form.
1561 int getFlatScratchInstSVfromSS(uint16_t Opcode
);
1563 /// \returns earlyclobber version of a MAC MFMA is exists.
1565 int getMFMAEarlyClobberOp(uint16_t Opcode
);
1567 /// \returns v_cmpx version of a v_cmp instruction.
1569 int getVCMPXOpFromVCMP(uint16_t Opcode
);
1571 const uint64_t RSRC_DATA_FORMAT
= 0xf00000000000LL
;
1572 const uint64_t RSRC_ELEMENT_SIZE_SHIFT
= (32 + 19);
1573 const uint64_t RSRC_INDEX_STRIDE_SHIFT
= (32 + 21);
1574 const uint64_t RSRC_TID_ENABLE
= UINT64_C(1) << (32 + 23);
1576 } // end namespace AMDGPU
1580 // For sgpr to vgpr spill instructions
1581 SGPR_SPILL
= MachineInstr::TAsmComments
1583 } // namespace AMDGPU
1586 namespace KernelInputOffsets
{
1588 /// Offsets in bytes from the start of the input buffer
1601 } // end namespace KernelInputOffsets
1602 } // end namespace SI
1604 } // end namespace llvm
1606 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H