Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / lib / Target / NVPTX / NVPTXPeephole.cpp
blob0968701737e88d6ef0bae8921d79833e6dd01af5
1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
10 // of a MachineFunction.
12 // mov %SPL, %depot
13 // cvta.local %SP, %SPL
15 // Because Frame Index is a generic address and alloca can only return generic
16 // pointer, without this pass the instructions producing alloca'ed address will
17 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
18 // this address with their .local versions, but this may introduce a lot of
19 // cvta.to.local instructions. Performance can be improved if we avoid casting
20 // address back and forth and directly calculate local address based on %SPL.
21 // This peephole pass optimizes these cases, for example
23 // It will transform the following pattern
24 // %0 = LEA_ADDRi64 %VRFrame64, 4
25 // %1 = cvta_to_local_yes_64 %0
27 // into
28 // %1 = LEA_ADDRi64 %VRFrameLocal64, 4
30 // %VRFrameLocal64 is the virtual register name of %SPL
32 //===----------------------------------------------------------------------===//
34 #include "NVPTX.h"
35 #include "NVPTXRegisterInfo.h"
36 #include "NVPTXSubtarget.h"
37 #include "llvm/CodeGen/MachineFunctionPass.h"
38 #include "llvm/CodeGen/MachineInstrBuilder.h"
39 #include "llvm/CodeGen/MachineRegisterInfo.h"
40 #include "llvm/CodeGen/TargetInstrInfo.h"
41 #include "llvm/CodeGen/TargetRegisterInfo.h"
43 using namespace llvm;
45 #define DEBUG_TYPE "nvptx-peephole"
47 namespace llvm {
48 void initializeNVPTXPeepholePass(PassRegistry &);
51 namespace {
52 struct NVPTXPeephole : public MachineFunctionPass {
53 public:
54 static char ID;
55 NVPTXPeephole() : MachineFunctionPass(ID) {
56 initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
59 bool runOnMachineFunction(MachineFunction &MF) override;
61 StringRef getPassName() const override {
62 return "NVPTX optimize redundant cvta.to.local instruction";
65 void getAnalysisUsage(AnalysisUsage &AU) const override {
66 MachineFunctionPass::getAnalysisUsage(AU);
71 char NVPTXPeephole::ID = 0;
73 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
75 static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
76 auto &MBB = *Root.getParent();
77 auto &MF = *MBB.getParent();
78 // Check current instruction is cvta.to.local
79 if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
80 Root.getOpcode() != NVPTX::cvta_to_local_yes)
81 return false;
83 auto &Op = Root.getOperand(1);
84 const auto &MRI = MF.getRegInfo();
85 MachineInstr *GenericAddrDef = nullptr;
86 if (Op.isReg() && Op.getReg().isVirtual()) {
87 GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
90 // Check the register operand is uniquely defined by LEA_ADDRi instruction
91 if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
92 (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
93 GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
94 return false;
97 const NVPTXRegisterInfo *NRI =
98 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
100 // Check the LEA_ADDRi operand is Frame index
101 auto &BaseAddrOp = GenericAddrDef->getOperand(1);
102 if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
103 return true;
106 return false;
109 static void CombineCVTAToLocal(MachineInstr &Root) {
110 auto &MBB = *Root.getParent();
111 auto &MF = *MBB.getParent();
112 const auto &MRI = MF.getRegInfo();
113 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
114 auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
116 const NVPTXRegisterInfo *NRI =
117 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
119 MachineInstrBuilder MIB =
120 BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
121 Root.getOperand(0).getReg())
122 .addReg(NRI->getFrameLocalRegister(MF))
123 .add(Prev.getOperand(2));
125 MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
127 // Check if MRI has only one non dbg use, which is Root
128 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
129 Prev.eraseFromParent();
131 Root.eraseFromParent();
134 bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
135 if (skipFunction(MF.getFunction()))
136 return false;
138 bool Changed = false;
139 // Loop over all of the basic blocks.
140 for (auto &MBB : MF) {
141 // Traverse the basic block.
142 auto BlockIter = MBB.begin();
144 while (BlockIter != MBB.end()) {
145 auto &MI = *BlockIter++;
146 if (isCVTAToLocalCombinationCandidate(MI)) {
147 CombineCVTAToLocal(MI);
148 Changed = true;
150 } // Instruction
151 } // Basic Block
153 const NVPTXRegisterInfo *NRI =
154 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
156 // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
157 const auto &MRI = MF.getRegInfo();
158 if (MRI.use_empty(NRI->getFrameRegister(MF))) {
159 if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
160 MI->eraseFromParent();
164 return Changed;
167 MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }