1 //===- R600ExpandSpecialInstrs.cpp - Expand special instructions ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Vector, Reduction, and Cube instructions need to fill the entire instruction
11 /// group to work correctly. This pass expands these individual instructions
12 /// into several instructions that will completely fill the instruction group.
14 //===----------------------------------------------------------------------===//
17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18 #include "R600Defines.h"
19 #include "R600Subtarget.h"
23 #define DEBUG_TYPE "r600-expand-special-instrs"
27 class R600ExpandSpecialInstrsPass
: public MachineFunctionPass
{
29 const R600InstrInfo
*TII
= nullptr;
31 void SetFlagInNewMI(MachineInstr
*NewMI
, const MachineInstr
*OldMI
,
37 R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID
) {}
39 bool runOnMachineFunction(MachineFunction
&MF
) override
;
41 StringRef
getPassName() const override
{
42 return "R600 Expand special instructions pass";
46 } // end anonymous namespace
48 INITIALIZE_PASS_BEGIN(R600ExpandSpecialInstrsPass
, DEBUG_TYPE
,
49 "R600 Expand Special Instrs", false, false)
50 INITIALIZE_PASS_END(R600ExpandSpecialInstrsPass
, DEBUG_TYPE
,
51 "R600ExpandSpecialInstrs", false, false)
53 char R600ExpandSpecialInstrsPass::ID
= 0;
55 char &llvm::R600ExpandSpecialInstrsPassID
= R600ExpandSpecialInstrsPass::ID
;
57 FunctionPass
*llvm::createR600ExpandSpecialInstrsPass() {
58 return new R600ExpandSpecialInstrsPass();
61 void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr
*NewMI
,
62 const MachineInstr
*OldMI
, unsigned Op
) {
63 int OpIdx
= TII
->getOperandIdx(*OldMI
, Op
);
65 uint64_t Val
= OldMI
->getOperand(OpIdx
).getImm();
66 TII
->setImmOperand(*NewMI
, Op
, Val
);
70 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction
&MF
) {
71 const R600Subtarget
&ST
= MF
.getSubtarget
<R600Subtarget
>();
72 TII
= ST
.getInstrInfo();
74 const R600RegisterInfo
&TRI
= TII
->getRegisterInfo();
76 for (MachineFunction::iterator BB
= MF
.begin(), BB_E
= MF
.end();
78 MachineBasicBlock
&MBB
= *BB
;
79 MachineBasicBlock::iterator I
= MBB
.begin();
80 while (I
!= MBB
.end()) {
81 MachineInstr
&MI
= *I
;
84 // Expand LDS_*_RET instructions
85 if (TII
->isLDSRetInstr(MI
.getOpcode())) {
86 int DstIdx
= TII
->getOperandIdx(MI
.getOpcode(), R600::OpName::dst
);
88 MachineOperand
&DstOp
= MI
.getOperand(DstIdx
);
89 MachineInstr
*Mov
= TII
->buildMovInstr(&MBB
, I
,
90 DstOp
.getReg(), R600::OQAP
);
91 DstOp
.setReg(R600::OQAP
);
92 int LDSPredSelIdx
= TII
->getOperandIdx(MI
.getOpcode(),
93 R600::OpName::pred_sel
);
94 int MovPredSelIdx
= TII
->getOperandIdx(Mov
->getOpcode(),
95 R600::OpName::pred_sel
);
96 // Copy the pred_sel bit
97 Mov
->getOperand(MovPredSelIdx
).setReg(
98 MI
.getOperand(LDSPredSelIdx
).getReg());
101 switch (MI
.getOpcode()) {
103 // Expand PRED_X to one of the PRED_SET instructions.
105 uint64_t Flags
= MI
.getOperand(3).getImm();
106 // The native opcode used by PRED_X is stored as an immediate in the
108 MachineInstr
*PredSet
= TII
->buildDefaultInstruction(MBB
, I
,
109 MI
.getOperand(2).getImm(), // opcode
110 MI
.getOperand(0).getReg(), // dst
111 MI
.getOperand(1).getReg(), // src0
113 TII
->addFlag(*PredSet
, 0, MO_FLAG_MASK
);
114 if (Flags
& MO_FLAG_PUSH
) {
115 TII
->setImmOperand(*PredSet
, R600::OpName::update_exec_mask
, 1);
117 TII
->setImmOperand(*PredSet
, R600::OpName::update_pred
, 1);
119 MI
.eraseFromParent();
124 const R600RegisterInfo
&TRI
= TII
->getRegisterInfo();
126 Register DstReg
= MI
.getOperand(0).getReg();
127 unsigned DstBase
= TRI
.getEncodingValue(DstReg
) & HW_REG_MASK
;
129 for (unsigned Chan
= 0; Chan
< 4; ++Chan
) {
130 bool Mask
= (Chan
!= TRI
.getHWRegChan(DstReg
));
132 R600::R600_TReg32RegClass
.getRegister((DstBase
* 4) + Chan
);
134 TII
->buildSlotOfVectorInstruction(MBB
, &MI
, Chan
, SubDstReg
);
136 BMI
->bundleWithPred();
139 TII
->addFlag(*BMI
, 0, MO_FLAG_MASK
);
142 TII
->addFlag(*BMI
, 0, MO_FLAG_NOT_LAST
);
143 unsigned Opcode
= BMI
->getOpcode();
144 // While not strictly necessary from hw point of view, we force
145 // all src operands of a dot4 inst to belong to the same slot.
147 BMI
->getOperand(TII
->getOperandIdx(Opcode
, R600::OpName::src0
))
150 BMI
->getOperand(TII
->getOperandIdx(Opcode
, R600::OpName::src1
))
154 if ((TRI
.getEncodingValue(Src0
) & 0xff) < 127 &&
155 (TRI
.getEncodingValue(Src1
) & 0xff) < 127)
156 assert(TRI
.getHWRegChan(Src0
) == TRI
.getHWRegChan(Src1
));
158 MI
.eraseFromParent();
163 bool IsReduction
= TII
->isReductionOp(MI
.getOpcode());
164 bool IsVector
= TII
->isVector(MI
);
165 bool IsCube
= TII
->isCubeOp(MI
.getOpcode());
166 if (!IsReduction
&& !IsVector
&& !IsCube
) {
170 // Expand the instruction
172 // Reduction instructions:
173 // T0_X = DP4 T1_XYZW, T2_XYZW
175 // TO_X = DP4 T1_X, T2_X
176 // TO_Y (write masked) = DP4 T1_Y, T2_Y
177 // TO_Z (write masked) = DP4 T1_Z, T2_Z
178 // TO_W (write masked) = DP4 T1_W, T2_W
180 // Vector instructions:
181 // T0_X = MULLO_INT T1_X, T2_X
183 // T0_X = MULLO_INT T1_X, T2_X
184 // T0_Y (write masked) = MULLO_INT T1_X, T2_X
185 // T0_Z (write masked) = MULLO_INT T1_X, T2_X
186 // T0_W (write masked) = MULLO_INT T1_X, T2_X
188 // Cube instructions:
189 // T0_XYZW = CUBE T1_XYZW
191 // TO_X = CUBE T1_Z, T1_Y
192 // T0_Y = CUBE T1_Z, T1_X
193 // T0_Z = CUBE T1_X, T1_Z
194 // T0_W = CUBE T1_Y, T1_Z
195 for (unsigned Chan
= 0; Chan
< 4; Chan
++) {
197 MI
.getOperand(TII
->getOperandIdx(MI
, R600::OpName::dst
)).getReg();
199 MI
.getOperand(TII
->getOperandIdx(MI
, R600::OpName::src0
)).getReg();
202 // Determine the correct source registers
204 int Src1Idx
= TII
->getOperandIdx(MI
, R600::OpName::src1
);
206 Src1
= MI
.getOperand(Src1Idx
).getReg();
210 unsigned SubRegIndex
= R600RegisterInfo::getSubRegFromChannel(Chan
);
211 Src0
= TRI
.getSubReg(Src0
, SubRegIndex
);
212 Src1
= TRI
.getSubReg(Src1
, SubRegIndex
);
214 static const int CubeSrcSwz
[] = {2, 2, 0, 1};
215 unsigned SubRegIndex0
= R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz
[Chan
]);
216 unsigned SubRegIndex1
= R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz
[3 - Chan
]);
217 Src1
= TRI
.getSubReg(Src0
, SubRegIndex1
);
218 Src0
= TRI
.getSubReg(Src0
, SubRegIndex0
);
221 // Determine the correct destination registers;
225 unsigned SubRegIndex
= R600RegisterInfo::getSubRegFromChannel(Chan
);
226 DstReg
= TRI
.getSubReg(DstReg
, SubRegIndex
);
228 // Mask the write if the original instruction does not write to
229 // the current Channel.
230 Mask
= (Chan
!= TRI
.getHWRegChan(DstReg
));
231 unsigned DstBase
= TRI
.getEncodingValue(DstReg
) & HW_REG_MASK
;
232 DstReg
= R600::R600_TReg32RegClass
.getRegister((DstBase
* 4) + Chan
);
235 // Set the IsLast bit
236 NotLast
= (Chan
!= 3 );
238 // Add the new instruction
239 unsigned Opcode
= MI
.getOpcode();
241 case R600::CUBE_r600_pseudo
:
242 Opcode
= R600::CUBE_r600_real
;
244 case R600::CUBE_eg_pseudo
:
245 Opcode
= R600::CUBE_eg_real
;
251 MachineInstr
*NewMI
=
252 TII
->buildDefaultInstruction(MBB
, I
, Opcode
, DstReg
, Src0
, Src1
);
255 NewMI
->bundleWithPred();
257 TII
->addFlag(*NewMI
, 0, MO_FLAG_MASK
);
260 TII
->addFlag(*NewMI
, 0, MO_FLAG_NOT_LAST
);
262 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::clamp
);
263 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::literal
);
264 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src0_abs
);
265 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src1_abs
);
266 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src0_neg
);
267 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src1_neg
);
269 MI
.eraseFromParent();