1 //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This pass inserts changes to the Mode register settings as required.
10 /// Note that currently it only deals with the Double Precision Floating Point
11 /// rounding mode setting, but is intended to be generic enough to be easily
14 //===----------------------------------------------------------------------===//
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/ADT/Statistic.h"
22 #define DEBUG_TYPE "si-mode-register"
24 STATISTIC(NumSetregInserted
, "Number of setreg of mode register inserted.");
29 // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
34 Status() : Mask(0), Mode(0){};
36 Status(unsigned NewMask
, unsigned NewMode
) : Mask(NewMask
), Mode(NewMode
) {
40 // merge two status values such that only values that don't conflict are
42 Status
merge(const Status
&S
) const {
43 return Status((Mask
| S
.Mask
), ((Mode
& ~S
.Mask
) | (S
.Mode
& S
.Mask
)));
46 // merge an unknown value by using the unknown value's mask to remove bits
48 Status
mergeUnknown(unsigned newMask
) {
49 return Status(Mask
& ~newMask
, Mode
& ~newMask
);
52 // intersect two Status values to produce a mode and mask that is a subset
54 Status
intersect(const Status
&S
) const {
55 unsigned NewMask
= (Mask
& S
.Mask
) & (Mode
^ ~S
.Mode
);
56 unsigned NewMode
= (Mode
& NewMask
);
57 return Status(NewMask
, NewMode
);
60 // produce the delta required to change the Mode to the required Mode
61 Status
delta(const Status
&S
) const {
62 return Status((S
.Mask
& (Mode
^ S
.Mode
)) | (~Mask
& S
.Mask
), S
.Mode
);
65 bool operator==(const Status
&S
) const {
66 return (Mask
== S
.Mask
) && (Mode
== S
.Mode
);
69 bool operator!=(const Status
&S
) const { return !(*this == S
); }
71 bool isCompatible(Status
&S
) {
72 return ((Mask
& S
.Mask
) == S
.Mask
) && ((Mode
& S
.Mask
) == S
.Mode
);
75 bool isCombinable(Status
&S
) { return !(Mask
& S
.Mask
) || isCompatible(S
); }
80 // The Status that represents the mode register settings required by the
81 // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
84 // The Status that represents the net changes to the Mode register made by
85 // this block, Calculated in Phase 1.
88 // The Status that represents the mode register settings on exit from this
89 // block. Calculated in Phase 2.
92 // The Status that represents the intersection of exit Mode register settings
93 // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
96 // In Phase 1 we record the first instruction that has a mode requirement,
97 // which is used in Phase 3 if we need to insert a mode change.
98 MachineInstr
*FirstInsertionPoint
;
100 // A flag to indicate whether an Exit value has been set (we can't tell by
101 // examining the Exit value itself as all values may be valid results).
104 BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
109 class SIModeRegister
: public MachineFunctionPass
{
113 std::vector
<std::unique_ptr
<BlockData
>> BlockInfo
;
114 std::queue
<MachineBasicBlock
*> Phase2List
;
116 // The default mode register setting currently only caters for the floating
117 // point double precision rounding mode.
118 // We currently assume the default rounding mode is Round to Nearest
119 // NOTE: this should come from a per function rounding mode setting once such
121 unsigned DefaultMode
= FP_ROUND_ROUND_TO_NEAREST
;
122 Status DefaultStatus
=
123 Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode
));
125 bool Changed
= false;
128 SIModeRegister() : MachineFunctionPass(ID
) {}
130 bool runOnMachineFunction(MachineFunction
&MF
) override
;
132 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
133 AU
.setPreservesCFG();
134 MachineFunctionPass::getAnalysisUsage(AU
);
137 void processBlockPhase1(MachineBasicBlock
&MBB
, const SIInstrInfo
*TII
);
139 void processBlockPhase2(MachineBasicBlock
&MBB
, const SIInstrInfo
*TII
);
141 void processBlockPhase3(MachineBasicBlock
&MBB
, const SIInstrInfo
*TII
);
143 Status
getInstructionMode(MachineInstr
&MI
, const SIInstrInfo
*TII
);
145 void insertSetreg(MachineBasicBlock
&MBB
, MachineInstr
*I
,
146 const SIInstrInfo
*TII
, Status InstrMode
);
148 } // End anonymous namespace.
150 INITIALIZE_PASS(SIModeRegister
, DEBUG_TYPE
,
151 "Insert required mode register values", false, false)
153 char SIModeRegister::ID
= 0;
155 char &llvm::SIModeRegisterID
= SIModeRegister::ID
;
157 FunctionPass
*llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
159 // Determine the Mode register setting required for this instruction.
160 // Instructions which don't use the Mode register return a null Status.
161 // Note this currently only deals with instructions that use the floating point
162 // double precision setting.
163 Status
SIModeRegister::getInstructionMode(MachineInstr
&MI
,
164 const SIInstrInfo
*TII
) {
165 if (TII
->usesFPDPRounding(MI
)) {
166 switch (MI
.getOpcode()) {
167 case AMDGPU::V_INTERP_P1LL_F16
:
168 case AMDGPU::V_INTERP_P1LV_F16
:
169 case AMDGPU::V_INTERP_P2_F16
:
170 // f16 interpolation instructions need double precision round to zero
171 return Status(FP_ROUND_MODE_DP(3),
172 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO
));
174 return DefaultStatus
;
180 // Insert a setreg instruction to update the Mode register.
181 // It is possible (though unlikely) for an instruction to require a change to
182 // the value of disjoint parts of the Mode register when we don't know the
183 // value of the intervening bits. In that case we need to use more than one
184 // setreg instruction.
185 void SIModeRegister::insertSetreg(MachineBasicBlock
&MBB
, MachineInstr
*MI
,
186 const SIInstrInfo
*TII
, Status InstrMode
) {
187 while (InstrMode
.Mask
) {
188 unsigned Offset
= countTrailingZeros
<unsigned>(InstrMode
.Mask
);
189 unsigned Width
= countTrailingOnes
<unsigned>(InstrMode
.Mask
>> Offset
);
190 unsigned Value
= (InstrMode
.Mode
>> Offset
) & ((1 << Width
) - 1);
191 BuildMI(MBB
, MI
, 0, TII
->get(AMDGPU::S_SETREG_IMM32_B32
))
193 .addImm(((Width
- 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_
) |
194 (Offset
<< AMDGPU::Hwreg::OFFSET_SHIFT_
) |
195 (AMDGPU::Hwreg::ID_MODE
<< AMDGPU::Hwreg::ID_SHIFT_
));
198 InstrMode
.Mask
&= ~(((1 << Width
) - 1) << Offset
);
202 // In Phase 1 we iterate through the instructions of the block and for each
203 // instruction we get its mode usage. If the instruction uses the Mode register
205 // - update the Change status, which tracks the changes to the Mode register
206 // made by this block
207 // - if this instruction's requirements are compatible with the current setting
208 // of the Mode register we merge the modes
209 // - if it isn't compatible and an InsertionPoint isn't set, then we set the
210 // InsertionPoint to the current instruction, and we remember the current
212 // - if it isn't compatible and InsertionPoint is set we insert a seteg before
213 // that instruction (unless this instruction forms part of the block's
214 // entry requirements in which case the insertion is deferred until Phase 3
215 // when predecessor exit values are known), and move the insertion point to
217 // - if this is a setreg instruction we treat it as an incompatible instruction.
218 // This is sub-optimal but avoids some nasty corner cases, and is expected to
219 // occur very rarely.
220 // - on exit we have set the Require, Change, and initial Exit modes.
221 void SIModeRegister::processBlockPhase1(MachineBasicBlock
&MBB
,
222 const SIInstrInfo
*TII
) {
223 auto NewInfo
= std::make_unique
<BlockData
>();
224 MachineInstr
*InsertionPoint
= nullptr;
225 // RequirePending is used to indicate whether we are collecting the initial
226 // requirements for the block, and need to defer the first InsertionPoint to
227 // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
228 // we discover an explict setreg that means this block doesn't have any
229 // initial requirements.
230 bool RequirePending
= true;
232 for (MachineInstr
&MI
: MBB
) {
233 Status InstrMode
= getInstructionMode(MI
, TII
);
234 if (MI
.getOpcode() == AMDGPU::S_SETREG_B32
||
235 MI
.getOpcode() == AMDGPU::S_SETREG_B32_mode
||
236 MI
.getOpcode() == AMDGPU::S_SETREG_IMM32_B32
||
237 MI
.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode
) {
238 // We preserve any explicit mode register setreg instruction we encounter,
239 // as we assume it has been inserted by a higher authority (this is
240 // likely to be a very rare occurrence).
241 unsigned Dst
= TII
->getNamedOperand(MI
, AMDGPU::OpName::simm16
)->getImm();
242 if (((Dst
& AMDGPU::Hwreg::ID_MASK_
) >> AMDGPU::Hwreg::ID_SHIFT_
) !=
243 AMDGPU::Hwreg::ID_MODE
)
246 unsigned Width
= ((Dst
& AMDGPU::Hwreg::WIDTH_M1_MASK_
) >>
247 AMDGPU::Hwreg::WIDTH_M1_SHIFT_
) +
250 (Dst
& AMDGPU::Hwreg::OFFSET_MASK_
) >> AMDGPU::Hwreg::OFFSET_SHIFT_
;
251 unsigned Mask
= ((1 << Width
) - 1) << Offset
;
253 // If an InsertionPoint is set we will insert a setreg there.
254 if (InsertionPoint
) {
255 insertSetreg(MBB
, InsertionPoint
, TII
, IPChange
.delta(NewInfo
->Change
));
256 InsertionPoint
= nullptr;
258 // If this is an immediate then we know the value being set, but if it is
259 // not an immediate then we treat the modified bits of the mode register
261 if (MI
.getOpcode() == AMDGPU::S_SETREG_IMM32_B32
||
262 MI
.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode
) {
263 unsigned Val
= TII
->getNamedOperand(MI
, AMDGPU::OpName::imm
)->getImm();
264 unsigned Mode
= (Val
<< Offset
) & Mask
;
265 Status Setreg
= Status(Mask
, Mode
);
266 // If we haven't already set the initial requirements for the block we
267 // don't need to as the requirements start from this explicit setreg.
268 RequirePending
= false;
269 NewInfo
->Change
= NewInfo
->Change
.merge(Setreg
);
271 NewInfo
->Change
= NewInfo
->Change
.mergeUnknown(Mask
);
273 } else if (!NewInfo
->Change
.isCompatible(InstrMode
)) {
274 // This instruction uses the Mode register and its requirements aren't
275 // compatible with the current mode.
276 if (InsertionPoint
) {
277 // If the required mode change cannot be included in the current
278 // InsertionPoint changes, we need a setreg and start a new
280 if (!IPChange
.delta(NewInfo
->Change
).isCombinable(InstrMode
)) {
281 if (RequirePending
) {
282 // This is the first insertionPoint in the block so we will defer
283 // the insertion of the setreg to Phase 3 where we know whether or
284 // not it is actually needed.
285 NewInfo
->FirstInsertionPoint
= InsertionPoint
;
286 NewInfo
->Require
= NewInfo
->Change
;
287 RequirePending
= false;
289 insertSetreg(MBB
, InsertionPoint
, TII
,
290 IPChange
.delta(NewInfo
->Change
));
291 IPChange
= NewInfo
->Change
;
293 // Set the new InsertionPoint
294 InsertionPoint
= &MI
;
296 NewInfo
->Change
= NewInfo
->Change
.merge(InstrMode
);
298 // No InsertionPoint is currently set - this is either the first in
299 // the block or we have previously seen an explicit setreg.
300 InsertionPoint
= &MI
;
301 IPChange
= NewInfo
->Change
;
302 NewInfo
->Change
= NewInfo
->Change
.merge(InstrMode
);
306 if (RequirePending
) {
307 // If we haven't yet set the initial requirements for the block we set them
309 NewInfo
->FirstInsertionPoint
= InsertionPoint
;
310 NewInfo
->Require
= NewInfo
->Change
;
311 } else if (InsertionPoint
) {
312 // We need to insert a setreg at the InsertionPoint
313 insertSetreg(MBB
, InsertionPoint
, TII
, IPChange
.delta(NewInfo
->Change
));
315 NewInfo
->Exit
= NewInfo
->Change
;
316 BlockInfo
[MBB
.getNumber()] = std::move(NewInfo
);
319 // In Phase 2 we revisit each block and calculate the common Mode register
320 // value provided by all predecessor blocks. If the Exit value for the block
321 // is changed, then we add the successor blocks to the worklist so that the
322 // exit value is propagated.
323 void SIModeRegister::processBlockPhase2(MachineBasicBlock
&MBB
,
324 const SIInstrInfo
*TII
) {
325 bool RevisitRequired
= false;
326 bool ExitSet
= false;
327 unsigned ThisBlock
= MBB
.getNumber();
328 if (MBB
.pred_empty()) {
329 // There are no predecessors, so use the default starting status.
330 BlockInfo
[ThisBlock
]->Pred
= DefaultStatus
;
333 // Build a status that is common to all the predecessors by intersecting
334 // all the predecessor exit status values.
335 // Mask bits (which represent the Mode bits with a known value) can only be
336 // added by explicit SETREG instructions or the initial default value -
337 // the intersection process may remove Mask bits.
338 // If we find a predecessor that has not yet had an exit value determined
339 // (this can happen for example if a block is its own predecessor) we defer
340 // use of that value as the Mask will be all zero, and we will revisit this
341 // block again later (unless the only predecessor without an exit value is
343 MachineBasicBlock::pred_iterator P
= MBB
.pred_begin(), E
= MBB
.pred_end();
344 MachineBasicBlock
&PB
= *(*P
);
345 unsigned PredBlock
= PB
.getNumber();
346 if ((ThisBlock
== PredBlock
) && (std::next(P
) == E
)) {
347 BlockInfo
[ThisBlock
]->Pred
= DefaultStatus
;
349 } else if (BlockInfo
[PredBlock
]->ExitSet
) {
350 BlockInfo
[ThisBlock
]->Pred
= BlockInfo
[PredBlock
]->Exit
;
352 } else if (PredBlock
!= ThisBlock
)
353 RevisitRequired
= true;
355 for (P
= std::next(P
); P
!= E
; P
= std::next(P
)) {
356 MachineBasicBlock
*Pred
= *P
;
357 unsigned PredBlock
= Pred
->getNumber();
358 if (BlockInfo
[PredBlock
]->ExitSet
) {
359 if (BlockInfo
[ThisBlock
]->ExitSet
) {
360 BlockInfo
[ThisBlock
]->Pred
=
361 BlockInfo
[ThisBlock
]->Pred
.intersect(BlockInfo
[PredBlock
]->Exit
);
363 BlockInfo
[ThisBlock
]->Pred
= BlockInfo
[PredBlock
]->Exit
;
366 } else if (PredBlock
!= ThisBlock
)
367 RevisitRequired
= true;
371 BlockInfo
[ThisBlock
]->Pred
.merge(BlockInfo
[ThisBlock
]->Change
);
372 if (BlockInfo
[ThisBlock
]->Exit
!= TmpStatus
) {
373 BlockInfo
[ThisBlock
]->Exit
= TmpStatus
;
374 // Add the successors to the work list so we can propagate the changed exit
376 for (MachineBasicBlock::succ_iterator S
= MBB
.succ_begin(),
378 S
!= E
; S
= std::next(S
)) {
379 MachineBasicBlock
&B
= *(*S
);
383 BlockInfo
[ThisBlock
]->ExitSet
= ExitSet
;
385 Phase2List
.push(&MBB
);
388 // In Phase 3 we revisit each block and if it has an insertion point defined we
389 // check whether the predecessor mode meets the block's entry requirements. If
390 // not we insert an appropriate setreg instruction to modify the Mode register.
391 void SIModeRegister::processBlockPhase3(MachineBasicBlock
&MBB
,
392 const SIInstrInfo
*TII
) {
393 unsigned ThisBlock
= MBB
.getNumber();
394 if (!BlockInfo
[ThisBlock
]->Pred
.isCompatible(BlockInfo
[ThisBlock
]->Require
)) {
396 BlockInfo
[ThisBlock
]->Pred
.delta(BlockInfo
[ThisBlock
]->Require
);
397 if (BlockInfo
[ThisBlock
]->FirstInsertionPoint
)
398 insertSetreg(MBB
, BlockInfo
[ThisBlock
]->FirstInsertionPoint
, TII
, Delta
);
400 insertSetreg(MBB
, &MBB
.instr_front(), TII
, Delta
);
404 bool SIModeRegister::runOnMachineFunction(MachineFunction
&MF
) {
405 BlockInfo
.resize(MF
.getNumBlockIDs());
406 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
407 const SIInstrInfo
*TII
= ST
.getInstrInfo();
409 // Processing is performed in a number of phases
411 // Phase 1 - determine the initial mode required by each block, and add setreg
412 // instructions for intra block requirements.
413 for (MachineBasicBlock
&BB
: MF
)
414 processBlockPhase1(BB
, TII
);
416 // Phase 2 - determine the exit mode from each block. We add all blocks to the
417 // list here, but will also add any that need to be revisited during Phase 2
419 for (MachineBasicBlock
&BB
: MF
)
420 Phase2List
.push(&BB
);
421 while (!Phase2List
.empty()) {
422 processBlockPhase2(*Phase2List
.front(), TII
);
426 // Phase 3 - add an initial setreg to each block where the required entry mode
427 // is not satisfied by the exit mode of all its predecessors.
428 for (MachineBasicBlock
&BB
: MF
)
429 processBlockPhase3(BB
, TII
);