[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / llvm / lib / CodeGen / DetectDeadLanes.cpp
blob86e9f3abe010d70f8963b125c9c730b63b2f8f0f
1 //===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Analysis that tracks defined/used subregister lanes across COPY instructions
11 /// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE,
12 /// INSERT_SUBREG, EXTRACT_SUBREG).
13 /// The information is used to detect dead definitions and the usage of
14 /// (completely) undefined values and mark the operands as such.
15 /// This pass is necessary because the dead/undef status is not obvious anymore
16 /// when subregisters are involved.
17 ///
18 /// Example:
19 /// %0 = some definition
20 /// %1 = IMPLICIT_DEF
21 /// %2 = REG_SEQUENCE %0, sub0, %1, sub1
22 /// %3 = EXTRACT_SUBREG %2, sub1
23 /// = use %3
24 /// The %0 definition is dead and %3 contains an undefined value.
26 //===----------------------------------------------------------------------===//
28 #include "llvm/CodeGen/DetectDeadLanes.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
37 using namespace llvm;
39 #define DEBUG_TYPE "detect-dead-lanes"
41 DeadLaneDetector::DeadLaneDetector(const MachineRegisterInfo *MRI,
42 const TargetRegisterInfo *TRI)
43 : MRI(MRI), TRI(TRI) {
44 unsigned NumVirtRegs = MRI->getNumVirtRegs();
45 VRegInfos = std::unique_ptr<VRegInfo[]>(new VRegInfo[NumVirtRegs]);
46 WorklistMembers.resize(NumVirtRegs);
47 DefinedByCopy.resize(NumVirtRegs);
50 /// Returns true if \p MI will get lowered to a series of COPY instructions.
51 /// We call this a COPY-like instruction.
52 static bool lowersToCopies(const MachineInstr &MI) {
53 // Note: We could support instructions with MCInstrDesc::isRegSequenceLike(),
54 // isExtractSubRegLike(), isInsertSubregLike() in the future even though they
55 // are not lowered to a COPY.
56 switch (MI.getOpcode()) {
57 case TargetOpcode::COPY:
58 case TargetOpcode::PHI:
59 case TargetOpcode::INSERT_SUBREG:
60 case TargetOpcode::REG_SEQUENCE:
61 case TargetOpcode::EXTRACT_SUBREG:
62 return true;
64 return false;
67 static bool isCrossCopy(const MachineRegisterInfo &MRI,
68 const MachineInstr &MI,
69 const TargetRegisterClass *DstRC,
70 const MachineOperand &MO) {
71 assert(lowersToCopies(MI));
72 Register SrcReg = MO.getReg();
73 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
74 if (DstRC == SrcRC)
75 return false;
77 unsigned SrcSubIdx = MO.getSubReg();
79 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
80 unsigned DstSubIdx = 0;
81 switch (MI.getOpcode()) {
82 case TargetOpcode::INSERT_SUBREG:
83 if (MO.getOperandNo() == 2)
84 DstSubIdx = MI.getOperand(3).getImm();
85 break;
86 case TargetOpcode::REG_SEQUENCE: {
87 unsigned OpNum = MO.getOperandNo();
88 DstSubIdx = MI.getOperand(OpNum+1).getImm();
89 break;
91 case TargetOpcode::EXTRACT_SUBREG: {
92 unsigned SubReg = MI.getOperand(2).getImm();
93 SrcSubIdx = TRI.composeSubRegIndices(SubReg, SrcSubIdx);
97 unsigned PreA, PreB; // Unused.
98 if (SrcSubIdx && DstSubIdx)
99 return !TRI.getCommonSuperRegClass(SrcRC, SrcSubIdx, DstRC, DstSubIdx, PreA,
100 PreB);
101 if (SrcSubIdx)
102 return !TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSubIdx);
103 if (DstSubIdx)
104 return !TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSubIdx);
105 return !TRI.getCommonSubClass(SrcRC, DstRC);
108 void DeadLaneDetector::addUsedLanesOnOperand(const MachineOperand &MO,
109 LaneBitmask UsedLanes) {
110 if (!MO.readsReg())
111 return;
112 Register MOReg = MO.getReg();
113 if (!MOReg.isVirtual())
114 return;
116 unsigned MOSubReg = MO.getSubReg();
117 if (MOSubReg != 0)
118 UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes);
119 UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
121 unsigned MORegIdx = Register::virtReg2Index(MOReg);
122 DeadLaneDetector::VRegInfo &MORegInfo = VRegInfos[MORegIdx];
123 LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
124 // Any change at all?
125 if ((UsedLanes & ~PrevUsedLanes).none())
126 return;
128 // Set UsedLanes and remember instruction for further propagation.
129 MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes;
130 if (DefinedByCopy.test(MORegIdx))
131 PutInWorklist(MORegIdx);
134 void DeadLaneDetector::transferUsedLanesStep(const MachineInstr &MI,
135 LaneBitmask UsedLanes) {
136 for (const MachineOperand &MO : MI.uses()) {
137 if (!MO.isReg() || !MO.getReg().isVirtual())
138 continue;
139 LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO);
140 addUsedLanesOnOperand(MO, UsedOnMO);
144 LaneBitmask
145 DeadLaneDetector::transferUsedLanes(const MachineInstr &MI,
146 LaneBitmask UsedLanes,
147 const MachineOperand &MO) const {
148 unsigned OpNum = MO.getOperandNo();
149 assert(lowersToCopies(MI) &&
150 DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]);
152 switch (MI.getOpcode()) {
153 case TargetOpcode::COPY:
154 case TargetOpcode::PHI:
155 return UsedLanes;
156 case TargetOpcode::REG_SEQUENCE: {
157 assert(OpNum % 2 == 1);
158 unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
159 return TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
161 case TargetOpcode::INSERT_SUBREG: {
162 unsigned SubIdx = MI.getOperand(3).getImm();
163 LaneBitmask MO2UsedLanes =
164 TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
165 if (OpNum == 2)
166 return MO2UsedLanes;
168 const MachineOperand &Def = MI.getOperand(0);
169 Register DefReg = Def.getReg();
170 const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
171 LaneBitmask MO1UsedLanes;
172 if (RC->CoveredBySubRegs)
173 MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx);
174 else
175 MO1UsedLanes = RC->LaneMask;
177 assert(OpNum == 1);
178 return MO1UsedLanes;
180 case TargetOpcode::EXTRACT_SUBREG: {
181 assert(OpNum == 1);
182 unsigned SubIdx = MI.getOperand(2).getImm();
183 return TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes);
185 default:
186 llvm_unreachable("function must be called with COPY-like instruction");
190 void DeadLaneDetector::transferDefinedLanesStep(const MachineOperand &Use,
191 LaneBitmask DefinedLanes) {
192 if (!Use.readsReg())
193 return;
194 // Check whether the operand writes a vreg and is part of a COPY-like
195 // instruction.
196 const MachineInstr &MI = *Use.getParent();
197 if (MI.getDesc().getNumDefs() != 1)
198 return;
199 // FIXME: PATCHPOINT instructions announce a Def that does not always exist,
200 // they really need to be modeled differently!
201 if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
202 return;
203 const MachineOperand &Def = *MI.defs().begin();
204 Register DefReg = Def.getReg();
205 if (!DefReg.isVirtual())
206 return;
207 unsigned DefRegIdx = Register::virtReg2Index(DefReg);
208 if (!DefinedByCopy.test(DefRegIdx))
209 return;
211 unsigned OpNum = Use.getOperandNo();
212 DefinedLanes =
213 TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes);
214 DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes);
216 VRegInfo &RegInfo = VRegInfos[DefRegIdx];
217 LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
218 // Any change at all?
219 if ((DefinedLanes & ~PrevDefinedLanes).none())
220 return;
222 RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
223 PutInWorklist(DefRegIdx);
226 LaneBitmask DeadLaneDetector::transferDefinedLanes(
227 const MachineOperand &Def, unsigned OpNum, LaneBitmask DefinedLanes) const {
228 const MachineInstr &MI = *Def.getParent();
229 // Translate DefinedLanes if necessary.
230 switch (MI.getOpcode()) {
231 case TargetOpcode::REG_SEQUENCE: {
232 unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
233 DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
234 DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
235 break;
237 case TargetOpcode::INSERT_SUBREG: {
238 unsigned SubIdx = MI.getOperand(3).getImm();
239 if (OpNum == 2) {
240 DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
241 DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
242 } else {
243 assert(OpNum == 1 && "INSERT_SUBREG must have two operands");
244 // Ignore lanes defined by operand 2.
245 DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx);
247 break;
249 case TargetOpcode::EXTRACT_SUBREG: {
250 unsigned SubIdx = MI.getOperand(2).getImm();
251 assert(OpNum == 1 && "EXTRACT_SUBREG must have one register operand only");
252 DefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes);
253 break;
255 case TargetOpcode::COPY:
256 case TargetOpcode::PHI:
257 break;
258 default:
259 llvm_unreachable("function must be called with COPY-like instruction");
262 assert(Def.getSubReg() == 0 &&
263 "Should not have subregister defs in machine SSA phase");
264 DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg());
265 return DefinedLanes;
268 LaneBitmask DeadLaneDetector::determineInitialDefinedLanes(unsigned Reg) {
269 // Live-In or unused registers have no definition but are considered fully
270 // defined.
271 if (!MRI->hasOneDef(Reg))
272 return LaneBitmask::getAll();
274 const MachineOperand &Def = *MRI->def_begin(Reg);
275 const MachineInstr &DefMI = *Def.getParent();
276 if (lowersToCopies(DefMI)) {
277 // Start optimisatically with no used or defined lanes for copy
278 // instructions. The following dataflow analysis will add more bits.
279 unsigned RegIdx = Register::virtReg2Index(Reg);
280 DefinedByCopy.set(RegIdx);
281 PutInWorklist(RegIdx);
283 if (Def.isDead())
284 return LaneBitmask::getNone();
286 // COPY/PHI can copy across unrelated register classes (example: float/int)
287 // with incompatible subregister structure. Do not include these in the
288 // dataflow analysis since we cannot transfer lanemasks in a meaningful way.
289 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
291 // Determine initially DefinedLanes.
292 LaneBitmask DefinedLanes;
293 for (const MachineOperand &MO : DefMI.uses()) {
294 if (!MO.isReg() || !MO.readsReg())
295 continue;
296 Register MOReg = MO.getReg();
297 if (!MOReg)
298 continue;
300 LaneBitmask MODefinedLanes;
301 if (MOReg.isPhysical()) {
302 MODefinedLanes = LaneBitmask::getAll();
303 } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
304 MODefinedLanes = LaneBitmask::getAll();
305 } else {
306 assert(MOReg.isVirtual());
307 if (MRI->hasOneDef(MOReg)) {
308 const MachineOperand &MODef = *MRI->def_begin(MOReg);
309 const MachineInstr &MODefMI = *MODef.getParent();
310 // Bits from copy-like operations will be added later.
311 if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef())
312 continue;
314 unsigned MOSubReg = MO.getSubReg();
315 MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg);
316 MODefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(
317 MOSubReg, MODefinedLanes);
320 unsigned OpNum = MO.getOperandNo();
321 DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes);
323 return DefinedLanes;
325 if (DefMI.isImplicitDef() || Def.isDead())
326 return LaneBitmask::getNone();
328 assert(Def.getSubReg() == 0 &&
329 "Should not have subregister defs in machine SSA phase");
330 return MRI->getMaxLaneMaskForVReg(Reg);
333 LaneBitmask DeadLaneDetector::determineInitialUsedLanes(unsigned Reg) {
334 LaneBitmask UsedLanes = LaneBitmask::getNone();
335 for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
336 if (!MO.readsReg())
337 continue;
339 const MachineInstr &UseMI = *MO.getParent();
340 if (UseMI.isKill())
341 continue;
343 unsigned SubReg = MO.getSubReg();
344 if (lowersToCopies(UseMI)) {
345 assert(UseMI.getDesc().getNumDefs() == 1);
346 const MachineOperand &Def = *UseMI.defs().begin();
347 Register DefReg = Def.getReg();
348 // The used lanes of COPY-like instruction operands are determined by the
349 // following dataflow analysis.
350 if (DefReg.isVirtual()) {
351 // But ignore copies across incompatible register classes.
352 bool CrossCopy = false;
353 if (lowersToCopies(UseMI)) {
354 const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
355 CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
356 if (CrossCopy)
357 LLVM_DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI);
360 if (!CrossCopy)
361 continue;
365 // Shortcut: All lanes are used.
366 if (SubReg == 0)
367 return MRI->getMaxLaneMaskForVReg(Reg);
369 UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg);
371 return UsedLanes;
374 namespace {
376 class DetectDeadLanes : public MachineFunctionPass {
377 public:
378 bool runOnMachineFunction(MachineFunction &MF) override;
380 static char ID;
381 DetectDeadLanes() : MachineFunctionPass(ID) {}
383 StringRef getPassName() const override { return "Detect Dead Lanes"; }
385 void getAnalysisUsage(AnalysisUsage &AU) const override {
386 AU.setPreservesCFG();
387 MachineFunctionPass::getAnalysisUsage(AU);
390 private:
391 /// update the operand status.
392 /// The first return value shows whether MF been changed.
393 /// The second return value indicates we need to call
394 /// DeadLaneDetector::computeSubRegisterLaneBitInfo and this function again
395 /// to propagate changes.
396 std::pair<bool, bool>
397 modifySubRegisterOperandStatus(const DeadLaneDetector &DLD,
398 MachineFunction &MF);
400 bool isUndefRegAtInput(const MachineOperand &MO,
401 const DeadLaneDetector::VRegInfo &RegInfo) const;
403 bool isUndefInput(const DeadLaneDetector &DLD, const MachineOperand &MO,
404 bool *CrossCopy) const;
406 const MachineRegisterInfo *MRI = nullptr;
407 const TargetRegisterInfo *TRI = nullptr;
410 } // end anonymous namespace
412 char DetectDeadLanes::ID = 0;
413 char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
415 INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false)
417 bool DetectDeadLanes::isUndefRegAtInput(
418 const MachineOperand &MO, const DeadLaneDetector::VRegInfo &RegInfo) const {
419 unsigned SubReg = MO.getSubReg();
420 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
421 return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask).none();
424 bool DetectDeadLanes::isUndefInput(const DeadLaneDetector &DLD,
425 const MachineOperand &MO,
426 bool *CrossCopy) const {
427 if (!MO.isUse())
428 return false;
429 const MachineInstr &MI = *MO.getParent();
430 if (!lowersToCopies(MI))
431 return false;
432 const MachineOperand &Def = MI.getOperand(0);
433 Register DefReg = Def.getReg();
434 if (!DefReg.isVirtual())
435 return false;
436 unsigned DefRegIdx = Register::virtReg2Index(DefReg);
437 if (!DLD.isDefinedByCopy(DefRegIdx))
438 return false;
440 const DeadLaneDetector::VRegInfo &DefRegInfo = DLD.getVRegInfo(DefRegIdx);
441 LaneBitmask UsedLanes = DLD.transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
442 if (UsedLanes.any())
443 return false;
445 Register MOReg = MO.getReg();
446 if (MOReg.isVirtual()) {
447 const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
448 *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO);
450 return true;
453 void DeadLaneDetector::computeSubRegisterLaneBitInfo() {
454 // First pass: Populate defs/uses of vregs with initial values
455 unsigned NumVirtRegs = MRI->getNumVirtRegs();
456 for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
457 Register Reg = Register::index2VirtReg(RegIdx);
459 // Determine used/defined lanes and add copy instructions to worklist.
460 VRegInfo &Info = VRegInfos[RegIdx];
461 Info.DefinedLanes = determineInitialDefinedLanes(Reg);
462 Info.UsedLanes = determineInitialUsedLanes(Reg);
465 // Iterate as long as defined lanes/used lanes keep changing.
466 while (!Worklist.empty()) {
467 unsigned RegIdx = Worklist.front();
468 Worklist.pop_front();
469 WorklistMembers.reset(RegIdx);
470 VRegInfo &Info = VRegInfos[RegIdx];
471 Register Reg = Register::index2VirtReg(RegIdx);
473 // Transfer UsedLanes to operands of DefMI (backwards dataflow).
474 MachineOperand &Def = *MRI->def_begin(Reg);
475 const MachineInstr &MI = *Def.getParent();
476 transferUsedLanesStep(MI, Info.UsedLanes);
477 // Transfer DefinedLanes to users of Reg (forward dataflow).
478 for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg))
479 transferDefinedLanesStep(MO, Info.DefinedLanes);
482 LLVM_DEBUG({
483 dbgs() << "Defined/Used lanes:\n";
484 for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
485 Register Reg = Register::index2VirtReg(RegIdx);
486 const VRegInfo &Info = VRegInfos[RegIdx];
487 dbgs() << printReg(Reg, nullptr)
488 << " Used: " << PrintLaneMask(Info.UsedLanes)
489 << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
491 dbgs() << "\n";
495 std::pair<bool, bool>
496 DetectDeadLanes::modifySubRegisterOperandStatus(const DeadLaneDetector &DLD,
497 MachineFunction &MF) {
498 bool Changed = false;
499 bool Again = false;
500 // Mark operands as dead/unused.
501 for (MachineBasicBlock &MBB : MF) {
502 for (MachineInstr &MI : MBB) {
503 for (MachineOperand &MO : MI.operands()) {
504 if (!MO.isReg())
505 continue;
506 Register Reg = MO.getReg();
507 if (!Reg.isVirtual())
508 continue;
509 unsigned RegIdx = Register::virtReg2Index(Reg);
510 const DeadLaneDetector::VRegInfo &RegInfo = DLD.getVRegInfo(RegIdx);
511 if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
512 LLVM_DEBUG(dbgs()
513 << "Marking operand '" << MO << "' as dead in " << MI);
514 MO.setIsDead();
515 Changed = true;
517 if (MO.readsReg()) {
518 bool CrossCopy = false;
519 if (isUndefRegAtInput(MO, RegInfo)) {
520 LLVM_DEBUG(dbgs()
521 << "Marking operand '" << MO << "' as undef in " << MI);
522 MO.setIsUndef();
523 Changed = true;
524 } else if (isUndefInput(DLD, MO, &CrossCopy)) {
525 LLVM_DEBUG(dbgs()
526 << "Marking operand '" << MO << "' as undef in " << MI);
527 MO.setIsUndef();
528 Changed = true;
529 if (CrossCopy)
530 Again = true;
537 return std::make_pair(Changed, Again);
540 bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
541 // Don't bother if we won't track subregister liveness later. This pass is
542 // required for correctness if subregister liveness is enabled because the
543 // register coalescer cannot deal with hidden dead defs. However without
544 // subregister liveness enabled, the expected benefits of this pass are small
545 // so we safe the compile time.
546 MRI = &MF.getRegInfo();
547 if (!MRI->subRegLivenessEnabled()) {
548 LLVM_DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
549 return false;
552 TRI = MRI->getTargetRegisterInfo();
554 DeadLaneDetector DLD(MRI, TRI);
556 bool Changed = false;
557 bool Again;
558 do {
559 DLD.computeSubRegisterLaneBitInfo();
560 bool LocalChanged;
561 std::tie(LocalChanged, Again) = modifySubRegisterOperandStatus(DLD, MF);
562 Changed |= LocalChanged;
563 } while (Again);
565 return Changed;