1 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass identifies floating point stores that should not be combined into
10 // store pairs. Later we may do the same for floating point loads.
11 // ===---------------------------------------------------------------------===//
13 #include "AArch64InstrInfo.h"
14 #include "AArch64Subtarget.h"
15 #include "llvm/CodeGen/MachineFunction.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17 #include "llvm/CodeGen/MachineInstr.h"
18 #include "llvm/CodeGen/MachineTraceMetrics.h"
19 #include "llvm/CodeGen/TargetInstrInfo.h"
20 #include "llvm/CodeGen/TargetSchedule.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/raw_ostream.h"
26 #define DEBUG_TYPE "aarch64-stp-suppress"
28 #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression"
31 class AArch64StorePairSuppress
: public MachineFunctionPass
{
32 const AArch64InstrInfo
*TII
;
33 const TargetRegisterInfo
*TRI
;
34 const MachineRegisterInfo
*MRI
;
35 TargetSchedModel SchedModel
;
36 MachineTraceMetrics
*Traces
;
37 MachineTraceMetrics::Ensemble
*MinInstr
;
41 AArch64StorePairSuppress() : MachineFunctionPass(ID
) {
42 initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry());
45 StringRef
getPassName() const override
{ return STPSUPPRESS_PASS_NAME
; }
47 bool runOnMachineFunction(MachineFunction
&F
) override
;
50 bool shouldAddSTPToBlock(const MachineBasicBlock
*BB
);
52 bool isNarrowFPStore(const MachineInstr
&MI
);
54 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
56 AU
.addRequired
<MachineTraceMetrics
>();
57 AU
.addPreserved
<MachineTraceMetrics
>();
58 MachineFunctionPass::getAnalysisUsage(AU
);
61 char AArch64StorePairSuppress::ID
= 0;
64 INITIALIZE_PASS(AArch64StorePairSuppress
, "aarch64-stp-suppress",
65 STPSUPPRESS_PASS_NAME
, false, false)
67 FunctionPass
*llvm::createAArch64StorePairSuppressPass() {
68 return new AArch64StorePairSuppress();
71 /// Return true if an STP can be added to this block without increasing the
72 /// critical resource height. STP is good to form in Ld/St limited blocks and
73 /// bad to form in float-point limited blocks. This is true independent of the
74 /// critical path. If the critical path is longer than the resource height, the
75 /// extra vector ops can limit physreg renaming. Otherwise, it could simply
76 /// oversaturate the vector units.
77 bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock
*BB
) {
79 MinInstr
= Traces
->getEnsemble(MachineTraceStrategy::TS_MinInstrCount
);
81 MachineTraceMetrics::Trace BBTrace
= MinInstr
->getTrace(BB
);
82 unsigned ResLength
= BBTrace
.getResourceLength();
84 // Get the machine model's scheduling class for STPDi and STRDui.
85 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
86 unsigned SCIdx
= TII
->get(AArch64::STPDi
).getSchedClass();
87 const MCSchedClassDesc
*PairSCDesc
=
88 SchedModel
.getMCSchedModel()->getSchedClassDesc(SCIdx
);
90 unsigned SCIdx2
= TII
->get(AArch64::STRDui
).getSchedClass();
91 const MCSchedClassDesc
*SingleSCDesc
=
92 SchedModel
.getMCSchedModel()->getSchedClassDesc(SCIdx2
);
94 // If a subtarget does not define resources for STPDi, bail here.
95 if (PairSCDesc
->isValid() && !PairSCDesc
->isVariant() &&
96 SingleSCDesc
->isValid() && !SingleSCDesc
->isVariant()) {
97 // Compute the new critical resource length after replacing 2 separate
98 // STRDui with one STPDi.
99 unsigned ResLenWithSTP
= BBTrace
.getResourceLength(
100 std::nullopt
, PairSCDesc
, {SingleSCDesc
, SingleSCDesc
});
101 if (ResLenWithSTP
> ResLength
) {
102 LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB
->getNumber()
103 << " resources " << ResLength
<< " -> " << ResLenWithSTP
111 /// Return true if this is a floating-point store smaller than the V reg. On
112 /// cyclone, these require a vector shuffle before storing a pair.
113 /// Ideally we would call getMatchingPairOpcode() and have the machine model
114 /// tell us if it's profitable with no cpu knowledge here.
116 /// FIXME: We plan to develop a decent Target abstraction for simple loads and
117 /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
118 bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr
&MI
) {
119 switch (MI
.getOpcode()) {
122 case AArch64::STRSui
:
123 case AArch64::STRDui
:
124 case AArch64::STURSi
:
125 case AArch64::STURDi
:
130 bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction
&MF
) {
131 if (skipFunction(MF
.getFunction()) || MF
.getFunction().hasOptSize())
134 const AArch64Subtarget
&ST
= MF
.getSubtarget
<AArch64Subtarget
>();
135 if (!ST
.enableStorePairSuppress())
138 TII
= static_cast<const AArch64InstrInfo
*>(ST
.getInstrInfo());
139 TRI
= ST
.getRegisterInfo();
140 MRI
= &MF
.getRegInfo();
141 SchedModel
.init(&ST
);
142 Traces
= &getAnalysis
<MachineTraceMetrics
>();
145 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF
.getName() << '\n');
147 if (!SchedModel
.hasInstrSchedModel()) {
148 LLVM_DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
152 // Check for a sequence of stores to the same base address. We don't need to
153 // precisely determine whether a store pair can be formed. But we do want to
154 // filter out most situations where we can't form store pairs to avoid
155 // computing trace metrics in those cases.
156 for (auto &MBB
: MF
) {
157 bool SuppressSTP
= false;
158 unsigned PrevBaseReg
= 0;
159 for (auto &MI
: MBB
) {
160 if (!isNarrowFPStore(MI
))
162 const MachineOperand
*BaseOp
;
164 bool OffsetIsScalable
;
165 if (TII
->getMemOperandWithOffset(MI
, BaseOp
, Offset
, OffsetIsScalable
,
168 Register BaseReg
= BaseOp
->getReg();
169 if (PrevBaseReg
== BaseReg
) {
170 // If this block can take STPs, skip ahead to the next block.
171 if (!SuppressSTP
&& shouldAddSTPToBlock(MI
.getParent()))
173 // Otherwise, continue unpairing the stores in this block.
174 LLVM_DEBUG(dbgs() << "Unpairing store " << MI
<< "\n");
176 TII
->suppressLdStPair(MI
);
178 PrevBaseReg
= BaseReg
;
183 // This pass just sets some internal MachineMemOperand flags. It can't really
184 // invalidate anything.