1 //===- R600ExpandSpecialInstrs.cpp - Expand special instructions ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// Vector, Reduction, and Cube instructions need to fill the entire instruction
12 /// group to work correctly. This pass expands these individual instructions
13 /// into several instructions that will completely fill the instruction group.
15 //===----------------------------------------------------------------------===//
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/Pass.h"
36 #define DEBUG_TYPE "r600-expand-special-instrs"
40 class R600ExpandSpecialInstrsPass
: public MachineFunctionPass
{
42 const R600InstrInfo
*TII
= nullptr;
44 void SetFlagInNewMI(MachineInstr
*NewMI
, const MachineInstr
*OldMI
,
50 R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID
) {}
52 bool runOnMachineFunction(MachineFunction
&MF
) override
;
54 StringRef
getPassName() const override
{
55 return "R600 Expand special instructions pass";
59 } // end anonymous namespace
61 INITIALIZE_PASS_BEGIN(R600ExpandSpecialInstrsPass
, DEBUG_TYPE
,
62 "R600 Expand Special Instrs", false, false)
63 INITIALIZE_PASS_END(R600ExpandSpecialInstrsPass
, DEBUG_TYPE
,
64 "R600ExpandSpecialInstrs", false, false)
66 char R600ExpandSpecialInstrsPass::ID
= 0;
68 char &llvm::R600ExpandSpecialInstrsPassID
= R600ExpandSpecialInstrsPass::ID
;
70 FunctionPass
*llvm::createR600ExpandSpecialInstrsPass() {
71 return new R600ExpandSpecialInstrsPass();
74 void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr
*NewMI
,
75 const MachineInstr
*OldMI
, unsigned Op
) {
76 int OpIdx
= TII
->getOperandIdx(*OldMI
, Op
);
78 uint64_t Val
= OldMI
->getOperand(OpIdx
).getImm();
79 TII
->setImmOperand(*NewMI
, Op
, Val
);
83 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction
&MF
) {
84 const R600Subtarget
&ST
= MF
.getSubtarget
<R600Subtarget
>();
85 TII
= ST
.getInstrInfo();
87 const R600RegisterInfo
&TRI
= TII
->getRegisterInfo();
89 for (MachineFunction::iterator BB
= MF
.begin(), BB_E
= MF
.end();
91 MachineBasicBlock
&MBB
= *BB
;
92 MachineBasicBlock::iterator I
= MBB
.begin();
93 while (I
!= MBB
.end()) {
94 MachineInstr
&MI
= *I
;
97 // Expand LDS_*_RET instructions
98 if (TII
->isLDSRetInstr(MI
.getOpcode())) {
99 int DstIdx
= TII
->getOperandIdx(MI
.getOpcode(), R600::OpName::dst
);
100 assert(DstIdx
!= -1);
101 MachineOperand
&DstOp
= MI
.getOperand(DstIdx
);
102 MachineInstr
*Mov
= TII
->buildMovInstr(&MBB
, I
,
103 DstOp
.getReg(), R600::OQAP
);
104 DstOp
.setReg(R600::OQAP
);
105 int LDSPredSelIdx
= TII
->getOperandIdx(MI
.getOpcode(),
106 R600::OpName::pred_sel
);
107 int MovPredSelIdx
= TII
->getOperandIdx(Mov
->getOpcode(),
108 R600::OpName::pred_sel
);
109 // Copy the pred_sel bit
110 Mov
->getOperand(MovPredSelIdx
).setReg(
111 MI
.getOperand(LDSPredSelIdx
).getReg());
114 switch (MI
.getOpcode()) {
116 // Expand PRED_X to one of the PRED_SET instructions.
118 uint64_t Flags
= MI
.getOperand(3).getImm();
119 // The native opcode used by PRED_X is stored as an immediate in the
121 MachineInstr
*PredSet
= TII
->buildDefaultInstruction(MBB
, I
,
122 MI
.getOperand(2).getImm(), // opcode
123 MI
.getOperand(0).getReg(), // dst
124 MI
.getOperand(1).getReg(), // src0
126 TII
->addFlag(*PredSet
, 0, MO_FLAG_MASK
);
127 if (Flags
& MO_FLAG_PUSH
) {
128 TII
->setImmOperand(*PredSet
, R600::OpName::update_exec_mask
, 1);
130 TII
->setImmOperand(*PredSet
, R600::OpName::update_pred
, 1);
132 MI
.eraseFromParent();
137 const R600RegisterInfo
&TRI
= TII
->getRegisterInfo();
139 unsigned DstReg
= MI
.getOperand(0).getReg();
140 unsigned DstBase
= TRI
.getEncodingValue(DstReg
) & HW_REG_MASK
;
142 for (unsigned Chan
= 0; Chan
< 4; ++Chan
) {
143 bool Mask
= (Chan
!= TRI
.getHWRegChan(DstReg
));
145 R600::R600_TReg32RegClass
.getRegister((DstBase
* 4) + Chan
);
147 TII
->buildSlotOfVectorInstruction(MBB
, &MI
, Chan
, SubDstReg
);
149 BMI
->bundleWithPred();
152 TII
->addFlag(*BMI
, 0, MO_FLAG_MASK
);
155 TII
->addFlag(*BMI
, 0, MO_FLAG_NOT_LAST
);
156 unsigned Opcode
= BMI
->getOpcode();
157 // While not strictly necessary from hw point of view, we force
158 // all src operands of a dot4 inst to belong to the same slot.
159 unsigned Src0
= BMI
->getOperand(
160 TII
->getOperandIdx(Opcode
, R600::OpName::src0
))
162 unsigned Src1
= BMI
->getOperand(
163 TII
->getOperandIdx(Opcode
, R600::OpName::src1
))
167 if ((TRI
.getEncodingValue(Src0
) & 0xff) < 127 &&
168 (TRI
.getEncodingValue(Src1
) & 0xff) < 127)
169 assert(TRI
.getHWRegChan(Src0
) == TRI
.getHWRegChan(Src1
));
171 MI
.eraseFromParent();
176 bool IsReduction
= TII
->isReductionOp(MI
.getOpcode());
177 bool IsVector
= TII
->isVector(MI
);
178 bool IsCube
= TII
->isCubeOp(MI
.getOpcode());
179 if (!IsReduction
&& !IsVector
&& !IsCube
) {
183 // Expand the instruction
185 // Reduction instructions:
186 // T0_X = DP4 T1_XYZW, T2_XYZW
188 // TO_X = DP4 T1_X, T2_X
189 // TO_Y (write masked) = DP4 T1_Y, T2_Y
190 // TO_Z (write masked) = DP4 T1_Z, T2_Z
191 // TO_W (write masked) = DP4 T1_W, T2_W
193 // Vector instructions:
194 // T0_X = MULLO_INT T1_X, T2_X
196 // T0_X = MULLO_INT T1_X, T2_X
197 // T0_Y (write masked) = MULLO_INT T1_X, T2_X
198 // T0_Z (write masked) = MULLO_INT T1_X, T2_X
199 // T0_W (write masked) = MULLO_INT T1_X, T2_X
201 // Cube instructions:
202 // T0_XYZW = CUBE T1_XYZW
204 // TO_X = CUBE T1_Z, T1_Y
205 // T0_Y = CUBE T1_Z, T1_X
206 // T0_Z = CUBE T1_X, T1_Z
207 // T0_W = CUBE T1_Y, T1_Z
208 for (unsigned Chan
= 0; Chan
< 4; Chan
++) {
209 unsigned DstReg
= MI
.getOperand(
210 TII
->getOperandIdx(MI
, R600::OpName::dst
)).getReg();
211 unsigned Src0
= MI
.getOperand(
212 TII
->getOperandIdx(MI
, R600::OpName::src0
)).getReg();
215 // Determine the correct source registers
217 int Src1Idx
= TII
->getOperandIdx(MI
, R600::OpName::src1
);
219 Src1
= MI
.getOperand(Src1Idx
).getReg();
223 unsigned SubRegIndex
= AMDGPURegisterInfo::getSubRegFromChannel(Chan
);
224 Src0
= TRI
.getSubReg(Src0
, SubRegIndex
);
225 Src1
= TRI
.getSubReg(Src1
, SubRegIndex
);
227 static const int CubeSrcSwz
[] = {2, 2, 0, 1};
228 unsigned SubRegIndex0
= AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz
[Chan
]);
229 unsigned SubRegIndex1
= AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz
[3 - Chan
]);
230 Src1
= TRI
.getSubReg(Src0
, SubRegIndex1
);
231 Src0
= TRI
.getSubReg(Src0
, SubRegIndex0
);
234 // Determine the correct destination registers;
238 unsigned SubRegIndex
= AMDGPURegisterInfo::getSubRegFromChannel(Chan
);
239 DstReg
= TRI
.getSubReg(DstReg
, SubRegIndex
);
241 // Mask the write if the original instruction does not write to
242 // the current Channel.
243 Mask
= (Chan
!= TRI
.getHWRegChan(DstReg
));
244 unsigned DstBase
= TRI
.getEncodingValue(DstReg
) & HW_REG_MASK
;
245 DstReg
= R600::R600_TReg32RegClass
.getRegister((DstBase
* 4) + Chan
);
248 // Set the IsLast bit
249 NotLast
= (Chan
!= 3 );
251 // Add the new instruction
252 unsigned Opcode
= MI
.getOpcode();
254 case R600::CUBE_r600_pseudo
:
255 Opcode
= R600::CUBE_r600_real
;
257 case R600::CUBE_eg_pseudo
:
258 Opcode
= R600::CUBE_eg_real
;
264 MachineInstr
*NewMI
=
265 TII
->buildDefaultInstruction(MBB
, I
, Opcode
, DstReg
, Src0
, Src1
);
268 NewMI
->bundleWithPred();
270 TII
->addFlag(*NewMI
, 0, MO_FLAG_MASK
);
273 TII
->addFlag(*NewMI
, 0, MO_FLAG_NOT_LAST
);
275 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::clamp
);
276 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::literal
);
277 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src0_abs
);
278 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src1_abs
);
279 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src0_neg
);
280 SetFlagInNewMI(NewMI
, &MI
, R600::OpName::src1_neg
);
282 MI
.eraseFromParent();