1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
10 // of a MachineFunction.
13 // cvta.local %SP, %SPL
15 // Because Frame Index is a generic address and alloca can only return generic
16 // pointer, without this pass the instructions producing alloca'ed address will
17 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
18 // this address with their .local versions, but this may introduce a lot of
19 // cvta.to.local instructions. Performance can be improved if we avoid casting
20 // address back and forth and directly calculate local address based on %SPL.
21 // This peephole pass optimizes these cases, for example
23 // It will transform the following pattern
24 // %0 = LEA_ADDRi64 %VRFrame, 4
25 // %1 = cvta_to_local_yes_64 %0
28 // %1 = LEA_ADDRi64 %VRFrameLocal, 4
30 // %VRFrameLocal is the virtual register name of %SPL
32 //===----------------------------------------------------------------------===//
35 #include "llvm/CodeGen/MachineFunctionPass.h"
36 #include "llvm/CodeGen/MachineInstrBuilder.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
43 #define DEBUG_TYPE "nvptx-peephole"
46 void initializeNVPTXPeepholePass(PassRegistry
&);
50 struct NVPTXPeephole
: public MachineFunctionPass
{
53 NVPTXPeephole() : MachineFunctionPass(ID
) {
54 initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
57 bool runOnMachineFunction(MachineFunction
&MF
) override
;
59 StringRef
getPassName() const override
{
60 return "NVPTX optimize redundant cvta.to.local instruction";
63 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
64 MachineFunctionPass::getAnalysisUsage(AU
);
69 char NVPTXPeephole::ID
= 0;
71 INITIALIZE_PASS(NVPTXPeephole
, "nvptx-peephole", "NVPTX Peephole", false, false)
73 static bool isCVTAToLocalCombinationCandidate(MachineInstr
&Root
) {
74 auto &MBB
= *Root
.getParent();
75 auto &MF
= *MBB
.getParent();
76 // Check current instruction is cvta.to.local
77 if (Root
.getOpcode() != NVPTX::cvta_to_local_yes_64
&&
78 Root
.getOpcode() != NVPTX::cvta_to_local_yes
)
81 auto &Op
= Root
.getOperand(1);
82 const auto &MRI
= MF
.getRegInfo();
83 MachineInstr
*GenericAddrDef
= nullptr;
84 if (Op
.isReg() && TargetRegisterInfo::isVirtualRegister(Op
.getReg())) {
85 GenericAddrDef
= MRI
.getUniqueVRegDef(Op
.getReg());
88 // Check the register operand is uniquely defined by LEA_ADDRi instruction
89 if (!GenericAddrDef
|| GenericAddrDef
->getParent() != &MBB
||
90 (GenericAddrDef
->getOpcode() != NVPTX::LEA_ADDRi64
&&
91 GenericAddrDef
->getOpcode() != NVPTX::LEA_ADDRi
)) {
95 // Check the LEA_ADDRi operand is Frame index
96 auto &BaseAddrOp
= GenericAddrDef
->getOperand(1);
97 if (BaseAddrOp
.isReg() && BaseAddrOp
.getReg() == NVPTX::VRFrame
) {
104 static void CombineCVTAToLocal(MachineInstr
&Root
) {
105 auto &MBB
= *Root
.getParent();
106 auto &MF
= *MBB
.getParent();
107 const auto &MRI
= MF
.getRegInfo();
108 const TargetInstrInfo
*TII
= MF
.getSubtarget().getInstrInfo();
109 auto &Prev
= *MRI
.getUniqueVRegDef(Root
.getOperand(1).getReg());
111 MachineInstrBuilder MIB
=
112 BuildMI(MF
, Root
.getDebugLoc(), TII
->get(Prev
.getOpcode()),
113 Root
.getOperand(0).getReg())
114 .addReg(NVPTX::VRFrameLocal
)
115 .add(Prev
.getOperand(2));
117 MBB
.insert((MachineBasicBlock::iterator
)&Root
, MIB
);
119 // Check if MRI has only one non dbg use, which is Root
120 if (MRI
.hasOneNonDBGUse(Prev
.getOperand(0).getReg())) {
121 Prev
.eraseFromParentAndMarkDBGValuesForRemoval();
123 Root
.eraseFromParentAndMarkDBGValuesForRemoval();
126 bool NVPTXPeephole::runOnMachineFunction(MachineFunction
&MF
) {
127 if (skipFunction(MF
.getFunction()))
130 bool Changed
= false;
131 // Loop over all of the basic blocks.
132 for (auto &MBB
: MF
) {
133 // Traverse the basic block.
134 auto BlockIter
= MBB
.begin();
136 while (BlockIter
!= MBB
.end()) {
137 auto &MI
= *BlockIter
++;
138 if (isCVTAToLocalCombinationCandidate(MI
)) {
139 CombineCVTAToLocal(MI
);
145 // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
146 const auto &MRI
= MF
.getRegInfo();
147 if (MRI
.use_empty(NVPTX::VRFrame
)) {
148 if (auto MI
= MRI
.getUniqueVRegDef(NVPTX::VRFrame
)) {
149 MI
->eraseFromParentAndMarkDBGValuesForRemoval();
156 MachineFunctionPass
*llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }