1 //===- R600ExpandSpecialInstrs.cpp - Expand special instructions ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Vector, Reduction, and Cube instructions need to fill the entire instruction
11 /// group to work correctly. This pass expands these individual instructions
12 /// into several instructions that will completely fill the instruction group.
14 //===----------------------------------------------------------------------===//
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600RegisterInfo.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/Pass.h"
35 #define DEBUG_TYPE "r600-expand-special-instrs"
39 class R600ExpandSpecialInstrsPass
: public MachineFunctionPass
{
41 const R600InstrInfo
*TII
= nullptr;
43 void SetFlagInNewMI(MachineInstr
*NewMI
, const MachineInstr
*OldMI
,
49 R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID
) {}
51 bool runOnMachineFunction(MachineFunction
&MF
) override
;
53 StringRef
getPassName() const override
{
54 return "R600 Expand special instructions pass";
58 } // end anonymous namespace
60 INITIALIZE_PASS_BEGIN(R600ExpandSpecialInstrsPass
, DEBUG_TYPE
,
61 "R600 Expand Special Instrs", false, false)
62 INITIALIZE_PASS_END(R600ExpandSpecialInstrsPass
, DEBUG_TYPE
,
63 "R600ExpandSpecialInstrs", false, false)
65 char R600ExpandSpecialInstrsPass::ID
= 0;
67 char &llvm::R600ExpandSpecialInstrsPassID
= R600ExpandSpecialInstrsPass::ID
;
69 FunctionPass
*llvm::createR600ExpandSpecialInstrsPass() {
70 return new R600ExpandSpecialInstrsPass();
73 void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr
*NewMI
,
74 const MachineInstr
*OldMI
, unsigned Op
) {
75 int OpIdx
= TII
->getOperandIdx(*OldMI
, Op
);
77 uint64_t Val
= OldMI
->getOperand(OpIdx
).getImm();
78 TII
->setImmOperand(*NewMI
, Op
, Val
);
82 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction
&MF
) {
83 const R600Subtarget
&ST
= MF
.getSubtarget
<R600Subtarget
>();
84 TII
= ST
.getInstrInfo();
86 const R600RegisterInfo
&TRI
= TII
->getRegisterInfo();
88 for (MachineFunction::iterator BB
= MF
.begin(), BB_E
= MF
.end();
90 MachineBasicBlock
&MBB
= *BB
;
91 MachineBasicBlock::iterator I
= MBB
.begin();
92 while (I
!= MBB
.end()) {
93 MachineInstr
&MI
= *I
;
96 // Expand LDS_*_RET instructions
97 if (TII
->isLDSRetInstr(MI
.getOpcode())) {
98 int DstIdx
= TII
->getOperandIdx(MI
.getOpcode(), R600::OpName::dst
);
100 MachineOperand
&DstOp
= MI
.getOperand(DstIdx
);
101 MachineInstr
*Mov
= TII
->buildMovInstr(&MBB
, I
,
102 DstOp
.getReg(), R600::OQAP
);
103 DstOp
.setReg(R600::OQAP
);
104 int LDSPredSelIdx
= TII
->getOperandIdx(MI
.getOpcode(),
105 R600::OpName::pred_sel
);
106 int MovPredSelIdx
= TII
->getOperandIdx(Mov
->getOpcode(),
107 R600::OpName::pred_sel
);
108 // Copy the pred_sel bit
109 Mov
->getOperand(MovPredSelIdx
).setReg(
110 MI
.getOperand(LDSPredSelIdx
).getReg());
113 switch (MI
.getOpcode()) {
115 // Expand PRED_X to one of the PRED_SET instructions.
117 uint64_t Flags
= MI
.getOperand(3).getImm();
118 // The native opcode used by PRED_X is stored as an immediate in the
120 MachineInstr
*PredSet
= TII
->buildDefaultInstruction(MBB
, I
,
121 MI
.getOperand(2).getImm(), // opcode
122 MI
.getOperand(0).getReg(), // dst
123 MI
.getOperand(1).getReg(), // src0
125 TII
->addFlag(*PredSet
, 0, MO_FLAG_MASK
);
126 if (Flags
& MO_FLAG_PUSH
) {
127 TII
->setImmOperand(*PredSet
, R600::OpName::update_exec_mask
, 1);
129 TII
->setImmOperand(*PredSet
, R600::OpName::update_pred
, 1);
131 MI
.eraseFromParent();
136 const R600RegisterInfo
&TRI
= TII
->getRegisterInfo();
138 Register DstReg
= MI
.getOperand(0).getReg();
139 unsigned DstBase
= TRI
.getEncodingValue(DstReg
) & HW_REG_MASK
;
141 for (unsigned Chan
= 0; Chan
< 4; ++Chan
) {
142 bool Mask
= (Chan
!= TRI
.getHWRegChan(DstReg
));
144 R600::R600_TReg32RegClass
.getRegister((DstBase
* 4) + Chan
);
146 TII
->buildSlotOfVectorInstruction(MBB
, &MI
, Chan
, SubDstReg
);
148 BMI
->bundleWithPred();
151 TII
->addFlag(*BMI
, 0, MO_FLAG_MASK
);
154 TII
->addFlag(*BMI
, 0, MO_FLAG_NOT_LAST
);
155 unsigned Opcode
= BMI
->getOpcode();
156 // While not strictly necessary from hw point of view, we force
157 // all src operands of a dot4 inst to belong to the same slot.
159 BMI
->getOperand(TII
->getOperandIdx(Opcode
, R600::OpName::src0
))
162 BMI
->getOperand(TII
->getOperandIdx(Opcode
, R600::OpName::src1
))
166 if ((TRI
.getEncodingValue(Src0
) & 0xff) < 127 &&
167 (TRI
.getEncodingValue(Src1
) & 0xff) < 127)
168 assert(TRI
.getHWRegChan(Src0
) == TRI
.getHWRegChan(Src1
));
170 MI
.eraseFromParent();
175 bool IsReduction
= TII
->isReductionOp(MI
.getOpcode());
176 bool IsVector
= TII
->isVector(MI
);
177 bool IsCube
= TII
->isCubeOp(MI
.getOpcode());
178 if (!IsReduction
&& !IsVector
&& !IsCube
) {
182 // Expand the instruction
184 // Reduction instructions:
185 // T0_X = DP4 T1_XYZW, T2_XYZW
187 // TO_X = DP4 T1_X, T2_X
188 // TO_Y (write masked) = DP4 T1_Y, T2_Y
189 // TO_Z (write masked) = DP4 T1_Z, T2_Z
190 // TO_W (write masked) = DP4 T1_W, T2_W
192 // Vector instructions:
193 // T0_X = MULLO_INT T1_X, T2_X
195 // T0_X = MULLO_INT T1_X, T2_X
196 // T0_Y (write masked) = MULLO_INT T1_X, T2_X
197 // T0_Z (write masked) = MULLO_INT T1_X, T2_X
198 // T0_W (write masked) = MULLO_INT T1_X, T2_X
200 // Cube instructions:
201 // T0_XYZW = CUBE T1_XYZW
203 // TO_X = CUBE T1_Z, T1_Y
204 // T0_Y = CUBE T1_Z, T1_X
205 // T0_Z = CUBE T1_X, T1_Z
206 // T0_W = CUBE T1_Y, T1_Z
207 for (unsigned Chan
= 0; Chan
< 4; Chan
++) {
209 MI
.getOperand(TII
->getOperandIdx(MI
, R600::OpName::dst
)).getReg();
211 MI
.getOperand(TII
->getOperandIdx(MI
, R600::OpName::src0
)).getReg();
214 // Determine the correct source registers
216 int Src1Idx
= TII
->getOperandIdx(MI
, R600::OpName::src1
);
218 Src1
= MI
.getOperand(Src1Idx
).getReg();
222 unsigned SubRegIndex
= AMDGPURegisterInfo::getSubRegFromChannel(Chan
);
223 Src0
= TRI
.getSubReg(Src0
, SubRegIndex
);
224 Src1
= TRI
.getSubReg(Src1
, SubRegIndex
);
226 static const int CubeSrcSwz
[] = {2, 2, 0, 1};
227 unsigned SubRegIndex0
= AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz
[Chan
]);
228 unsigned SubRegIndex1
= AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz
[3 - Chan
]);
229 Src1
= TRI
.getSubReg(Src0
, SubRegIndex1
);
230 Src0
= TRI
.getSubReg(Src0
, SubRegIndex0
);
233 // Determine the correct destination registers;
237 unsigned SubRegIndex
= AMDGPURegisterInfo::getSubRegFromChannel(Chan
);
238 DstReg
= TRI
.getSubReg(DstReg
, SubRegIndex
);
240 // Mask the write if the original instruction does not write to
241 // the current Channel.
242 Mask
= (Chan
!= TRI
.getHWRegChan(DstReg
));
243 unsigned DstBase
= TRI
.getEncodingValue(DstReg
) & HW_REG_MASK
;
244 DstReg
= R600::R600_TReg32RegClass
.getRegister((DstBase
* 4) + Chan
);
247 // Set the IsLast bit
248 NotLast
= (Chan
!= 3 );
250 // Add the new instruction
251 unsigned Opcode
= MI
.getOpcode();
253 case R600::CUBE_r600_pseudo
:
254 Opcode
= R600::CUBE_r600_real
;
256 case R600::CUBE_eg_pseudo
:
257 Opcode
= R600::CUBE_eg_real
;
263 MachineInstr
*NewMI
=
264 TII
->buildDefaultInstruction(MBB
, I
, Opcode
, DstReg
, Src0
, Src1
);
267 NewMI
->bundleWithPred();
269 TII
->addFlag(*NewMI
, 0, MO_FLAG_MASK
);
272 TII
->addFlag(*NewMI
, 0, MO_FLAG_NOT_LAST
);
274 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::clamp
);
275 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::literal
);
276 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src0_abs
);
277 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src1_abs
);
278 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src0_neg
);
279 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src1_neg
);
281 MI
.eraseFromParent();