[InstCombine] Signed saturation patterns
[llvm-complete.git] / lib / Target / AMDGPU / GCNDPPCombine.cpp
blob98678873e37cb3f039d6ca9c9ded15f8024a3d46
1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
17 // to
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
22 // Combining rules :
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 // $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 // $combined_bound_ctrl = DPP_BOUND_ZERO
29 // if the VALU op is binary and
30 // $bound_ctrl==DPP_BOUND_OFF and
31 // $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 // $combined_bound_ctrl = DPP_BOUND_OFF
35 // Otherwise cancel.
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
40 #include "AMDGPU.h"
41 #include "AMDGPUSubtarget.h"
42 #include "SIInstrInfo.h"
43 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44 #include "llvm/ADT/DenseMap.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/CodeGen/MachineBasicBlock.h"
48 #include "llvm/CodeGen/MachineFunction.h"
49 #include "llvm/CodeGen/MachineFunctionPass.h"
50 #include "llvm/CodeGen/MachineInstr.h"
51 #include "llvm/CodeGen/MachineInstrBuilder.h"
52 #include "llvm/CodeGen/MachineOperand.h"
53 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/CodeGen/TargetRegisterInfo.h"
55 #include "llvm/Pass.h"
56 #include <cassert>
58 using namespace llvm;
60 #define DEBUG_TYPE "gcn-dpp-combine"
62 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
64 namespace {
66 class GCNDPPCombine : public MachineFunctionPass {
67 MachineRegisterInfo *MRI;
68 const SIInstrInfo *TII;
70 using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
72 MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
74 MachineInstr *createDPPInst(MachineInstr &OrigMI,
75 MachineInstr &MovMI,
76 RegSubRegPair CombOldVGPR,
77 MachineOperand *OldOpnd,
78 bool CombBCZ) const;
80 MachineInstr *createDPPInst(MachineInstr &OrigMI,
81 MachineInstr &MovMI,
82 RegSubRegPair CombOldVGPR,
83 bool CombBCZ) const;
85 bool hasNoImmOrEqual(MachineInstr &MI,
86 unsigned OpndName,
87 int64_t Value,
88 int64_t Mask = -1) const;
90 bool combineDPPMov(MachineInstr &MI) const;
92 public:
93 static char ID;
95 GCNDPPCombine() : MachineFunctionPass(ID) {
96 initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
99 bool runOnMachineFunction(MachineFunction &MF) override;
101 StringRef getPassName() const override { return "GCN DPP Combine"; }
103 void getAnalysisUsage(AnalysisUsage &AU) const override {
104 AU.setPreservesCFG();
105 MachineFunctionPass::getAnalysisUsage(AU);
109 } // end anonymous namespace
111 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
113 char GCNDPPCombine::ID = 0;
115 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
117 FunctionPass *llvm::createGCNDPPCombinePass() {
118 return new GCNDPPCombine();
121 static int getDPPOp(unsigned Op) {
122 auto DPP32 = AMDGPU::getDPPOp32(Op);
123 if (DPP32 != -1)
124 return DPP32;
126 auto E32 = AMDGPU::getVOPe32(Op);
127 return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
130 // tracks the register operand definition and returns:
131 // 1. immediate operand used to initialize the register if found
132 // 2. nullptr if the register operand is undef
133 // 3. the operand itself otherwise
134 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
135 auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
136 if (!Def)
137 return nullptr;
139 switch(Def->getOpcode()) {
140 default: break;
141 case AMDGPU::IMPLICIT_DEF:
142 return nullptr;
143 case AMDGPU::COPY:
144 case AMDGPU::V_MOV_B32_e32: {
145 auto &Op1 = Def->getOperand(1);
146 if (Op1.isImm())
147 return &Op1;
148 break;
151 return &OldOpnd;
154 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
155 MachineInstr &MovMI,
156 RegSubRegPair CombOldVGPR,
157 bool CombBCZ) const {
158 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
160 auto OrigOp = OrigMI.getOpcode();
161 auto DPPOp = getDPPOp(OrigOp);
162 if (DPPOp == -1) {
163 LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
164 return nullptr;
167 auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
168 OrigMI.getDebugLoc(), TII->get(DPPOp));
169 bool Fail = false;
170 do {
171 auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
172 assert(Dst);
173 DPPInst.add(*Dst);
174 int NumOperands = 1;
176 const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
177 if (OldIdx != -1) {
178 assert(OldIdx == NumOperands);
179 assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
180 auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
181 DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
182 CombOldVGPR.SubReg);
183 ++NumOperands;
184 } else {
185 // TODO: this discards MAC/FMA instructions for now, let's add it later
186 LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
187 " TBD\n");
188 Fail = true;
189 break;
192 if (auto *Mod0 = TII->getNamedOperand(OrigMI,
193 AMDGPU::OpName::src0_modifiers)) {
194 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
195 AMDGPU::OpName::src0_modifiers));
196 assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
197 DPPInst.addImm(Mod0->getImm());
198 ++NumOperands;
199 } else if (AMDGPU::getNamedOperandIdx(DPPOp,
200 AMDGPU::OpName::src0_modifiers) != -1) {
201 DPPInst.addImm(0);
202 ++NumOperands;
204 auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
205 assert(Src0);
206 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
207 LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
208 Fail = true;
209 break;
211 DPPInst.add(*Src0);
212 DPPInst->getOperand(NumOperands).setIsKill(false);
213 ++NumOperands;
215 if (auto *Mod1 = TII->getNamedOperand(OrigMI,
216 AMDGPU::OpName::src1_modifiers)) {
217 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
218 AMDGPU::OpName::src1_modifiers));
219 assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
220 DPPInst.addImm(Mod1->getImm());
221 ++NumOperands;
222 } else if (AMDGPU::getNamedOperandIdx(DPPOp,
223 AMDGPU::OpName::src1_modifiers) != -1) {
224 DPPInst.addImm(0);
225 ++NumOperands;
227 if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
228 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
229 LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
230 Fail = true;
231 break;
233 DPPInst.add(*Src1);
234 ++NumOperands;
237 if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
238 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
239 LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
240 Fail = true;
241 break;
243 DPPInst.add(*Src2);
246 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
247 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
248 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
249 DPPInst.addImm(CombBCZ ? 1 : 0);
250 } while (false);
252 if (Fail) {
253 DPPInst.getInstr()->eraseFromParent();
254 return nullptr;
256 LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
257 return DPPInst.getInstr();
260 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
261 assert(OldOpnd->isImm());
262 switch (OrigMIOp) {
263 default: break;
264 case AMDGPU::V_ADD_U32_e32:
265 case AMDGPU::V_ADD_U32_e64:
266 case AMDGPU::V_ADD_I32_e32:
267 case AMDGPU::V_ADD_I32_e64:
268 case AMDGPU::V_OR_B32_e32:
269 case AMDGPU::V_OR_B32_e64:
270 case AMDGPU::V_SUBREV_U32_e32:
271 case AMDGPU::V_SUBREV_U32_e64:
272 case AMDGPU::V_SUBREV_I32_e32:
273 case AMDGPU::V_SUBREV_I32_e64:
274 case AMDGPU::V_MAX_U32_e32:
275 case AMDGPU::V_MAX_U32_e64:
276 case AMDGPU::V_XOR_B32_e32:
277 case AMDGPU::V_XOR_B32_e64:
278 if (OldOpnd->getImm() == 0)
279 return true;
280 break;
281 case AMDGPU::V_AND_B32_e32:
282 case AMDGPU::V_AND_B32_e64:
283 case AMDGPU::V_MIN_U32_e32:
284 case AMDGPU::V_MIN_U32_e64:
285 if (static_cast<uint32_t>(OldOpnd->getImm()) ==
286 std::numeric_limits<uint32_t>::max())
287 return true;
288 break;
289 case AMDGPU::V_MIN_I32_e32:
290 case AMDGPU::V_MIN_I32_e64:
291 if (static_cast<int32_t>(OldOpnd->getImm()) ==
292 std::numeric_limits<int32_t>::max())
293 return true;
294 break;
295 case AMDGPU::V_MAX_I32_e32:
296 case AMDGPU::V_MAX_I32_e64:
297 if (static_cast<int32_t>(OldOpnd->getImm()) ==
298 std::numeric_limits<int32_t>::min())
299 return true;
300 break;
301 case AMDGPU::V_MUL_I32_I24_e32:
302 case AMDGPU::V_MUL_I32_I24_e64:
303 case AMDGPU::V_MUL_U32_U24_e32:
304 case AMDGPU::V_MUL_U32_U24_e64:
305 if (OldOpnd->getImm() == 1)
306 return true;
307 break;
309 return false;
312 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
313 MachineInstr &MovMI,
314 RegSubRegPair CombOldVGPR,
315 MachineOperand *OldOpndValue,
316 bool CombBCZ) const {
317 assert(CombOldVGPR.Reg);
318 if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
319 auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
320 if (!Src1 || !Src1->isReg()) {
321 LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
322 return nullptr;
324 if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
325 LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
326 return nullptr;
328 CombOldVGPR = getRegSubRegPair(*Src1);
329 if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
330 LLVM_DEBUG(dbgs() << " failed: src1 isn't a VGPR32 register\n");
331 return nullptr;
334 return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
337 // returns true if MI doesn't have OpndName immediate operand or the
338 // operand has Value
339 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
340 int64_t Value, int64_t Mask) const {
341 auto *Imm = TII->getNamedOperand(MI, OpndName);
342 if (!Imm)
343 return true;
345 assert(Imm->isImm());
346 return (Imm->getImm() & Mask) == Value;
349 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
350 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
351 LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
353 auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
354 assert(DstOpnd && DstOpnd->isReg());
355 auto DPPMovReg = DstOpnd->getReg();
356 if (DPPMovReg.isPhysical()) {
357 LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n");
358 return false;
360 if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
361 LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
362 " for all uses\n");
363 return false;
366 auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
367 assert(RowMaskOpnd && RowMaskOpnd->isImm());
368 auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
369 assert(BankMaskOpnd && BankMaskOpnd->isImm());
370 const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
371 BankMaskOpnd->getImm() == 0xF;
373 auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
374 assert(BCZOpnd && BCZOpnd->isImm());
375 bool BoundCtrlZero = BCZOpnd->getImm();
377 auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
378 auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
379 assert(OldOpnd && OldOpnd->isReg());
380 assert(SrcOpnd && SrcOpnd->isReg());
381 if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
382 LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n");
383 return false;
386 auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
387 // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
388 // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
389 // but the third option is used to distinguish undef from non-immediate
390 // to reuse IMPLICIT_DEF instruction later
391 assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
393 bool CombBCZ = false;
395 if (MaskAllLanes && BoundCtrlZero) { // [1]
396 CombBCZ = true;
397 } else {
398 if (!OldOpndValue || !OldOpndValue->isImm()) {
399 LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
400 return false;
403 if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
404 LLVM_DEBUG(dbgs() <<
405 " failed: old reg def and mov should be in the same BB\n");
406 return false;
409 if (OldOpndValue->getImm() == 0) {
410 if (MaskAllLanes) {
411 assert(!BoundCtrlZero); // by check [1]
412 CombBCZ = true;
414 } else if (BoundCtrlZero) {
415 assert(!MaskAllLanes); // by check [1]
416 LLVM_DEBUG(dbgs() <<
417 " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
418 return false;
422 LLVM_DEBUG(dbgs() << " old=";
423 if (!OldOpndValue)
424 dbgs() << "undef";
425 else
426 dbgs() << *OldOpndValue;
427 dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
429 SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
430 DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
431 auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
432 // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
433 if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
434 CombOldVGPR = RegSubRegPair(
435 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
436 auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
437 TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
438 DPPMIs.push_back(UndefInst.getInstr());
441 OrigMIs.push_back(&MovMI);
442 bool Rollback = true;
443 SmallVector<MachineOperand*, 16> Uses;
445 for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
446 Uses.push_back(&Use);
449 while (!Uses.empty()) {
450 MachineOperand *Use = Uses.pop_back_val();
451 Rollback = true;
453 auto &OrigMI = *Use->getParent();
454 LLVM_DEBUG(dbgs() << " try: " << OrigMI);
456 auto OrigOp = OrigMI.getOpcode();
457 if (OrigOp == AMDGPU::REG_SEQUENCE) {
458 Register FwdReg = OrigMI.getOperand(0).getReg();
459 unsigned FwdSubReg = 0;
461 if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
462 LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
463 " for all uses\n");
464 break;
467 unsigned OpNo, E = OrigMI.getNumOperands();
468 for (OpNo = 1; OpNo < E; OpNo += 2) {
469 if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
470 FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
471 break;
475 if (!FwdSubReg)
476 break;
478 for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
479 if (Op.getSubReg() == FwdSubReg)
480 Uses.push_back(&Op);
482 RegSeqWithOpNos[&OrigMI].push_back(OpNo);
483 continue;
486 if (TII->isVOP3(OrigOp)) {
487 if (!TII->hasVALU32BitEncoding(OrigOp)) {
488 LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
489 break;
491 // check if other than abs|neg modifiers are set (opsel for example)
492 const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
493 if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
494 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
495 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
496 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
497 LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n");
498 break;
500 } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
501 LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
502 break;
505 LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
506 if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
507 if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
508 OldOpndValue, CombBCZ)) {
509 DPPMIs.push_back(DPPInst);
510 Rollback = false;
512 } else if (OrigMI.isCommutable() &&
513 Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
514 auto *BB = OrigMI.getParent();
515 auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
516 BB->insert(OrigMI, NewMI);
517 if (TII->commuteInstruction(*NewMI)) {
518 LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
519 if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
520 OldOpndValue, CombBCZ)) {
521 DPPMIs.push_back(DPPInst);
522 Rollback = false;
524 } else
525 LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
526 NewMI->eraseFromParent();
527 } else
528 LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
529 if (Rollback)
530 break;
531 OrigMIs.push_back(&OrigMI);
534 Rollback |= !Uses.empty();
536 for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
537 MI->eraseFromParent();
539 if (!Rollback) {
540 for (auto &S : RegSeqWithOpNos) {
541 if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
542 S.first->eraseFromParent();
543 continue;
545 while (!S.second.empty())
546 S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
550 return !Rollback;
553 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
554 auto &ST = MF.getSubtarget<GCNSubtarget>();
555 if (!ST.hasDPP() || skipFunction(MF.getFunction()))
556 return false;
558 MRI = &MF.getRegInfo();
559 TII = ST.getInstrInfo();
561 assert(MRI->isSSA() && "Must be run on SSA");
563 bool Changed = false;
564 for (auto &MBB : MF) {
565 for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
566 auto &MI = *I++;
567 if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
568 Changed = true;
569 ++NumDPPMovsCombined;
570 } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
571 auto Split = TII->expandMovDPP64(MI);
572 for (auto M : { Split.first, Split.second }) {
573 if (combineDPPMov(*M))
574 ++NumDPPMovsCombined;
576 Changed = true;
580 return Changed;