1 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass identifies floating point stores that should not be combined into
10 // store pairs. Later we may do the same for floating point loads.
11 // ===---------------------------------------------------------------------===//
13 #include "AArch64InstrInfo.h"
14 #include "AArch64Subtarget.h"
15 #include "llvm/CodeGen/MachineFunction.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17 #include "llvm/CodeGen/MachineInstr.h"
18 #include "llvm/CodeGen/MachineTraceMetrics.h"
19 #include "llvm/CodeGen/TargetInstrInfo.h"
20 #include "llvm/CodeGen/TargetSchedule.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/raw_ostream.h"
26 #define DEBUG_TYPE "aarch64-stp-suppress"
28 #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression"
31 class AArch64StorePairSuppress
: public MachineFunctionPass
{
32 const AArch64InstrInfo
*TII
;
33 const TargetRegisterInfo
*TRI
;
34 const MachineRegisterInfo
*MRI
;
35 TargetSchedModel SchedModel
;
36 MachineTraceMetrics
*Traces
;
37 MachineTraceMetrics::Ensemble
*MinInstr
;
41 AArch64StorePairSuppress() : MachineFunctionPass(ID
) {
42 initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry());
45 StringRef
getPassName() const override
{ return STPSUPPRESS_PASS_NAME
; }
47 bool runOnMachineFunction(MachineFunction
&F
) override
;
50 bool shouldAddSTPToBlock(const MachineBasicBlock
*BB
);
52 bool isNarrowFPStore(const MachineInstr
&MI
);
54 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
56 AU
.addRequired
<MachineTraceMetrics
>();
57 AU
.addPreserved
<MachineTraceMetrics
>();
58 MachineFunctionPass::getAnalysisUsage(AU
);
61 char AArch64StorePairSuppress::ID
= 0;
64 INITIALIZE_PASS(AArch64StorePairSuppress
, "aarch64-stp-suppress",
65 STPSUPPRESS_PASS_NAME
, false, false)
67 FunctionPass
*llvm::createAArch64StorePairSuppressPass() {
68 return new AArch64StorePairSuppress();
71 /// Return true if an STP can be added to this block without increasing the
72 /// critical resource height. STP is good to form in Ld/St limited blocks and
73 /// bad to form in float-point limited blocks. This is true independent of the
74 /// critical path. If the critical path is longer than the resource height, the
75 /// extra vector ops can limit physreg renaming. Otherwise, it could simply
76 /// oversaturate the vector units.
77 bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock
*BB
) {
79 MinInstr
= Traces
->getEnsemble(MachineTraceStrategy::TS_MinInstrCount
);
81 MachineTraceMetrics::Trace BBTrace
= MinInstr
->getTrace(BB
);
82 unsigned ResLength
= BBTrace
.getResourceLength();
84 // Get the machine model's scheduling class for STPQi.
85 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
86 unsigned SCIdx
= TII
->get(AArch64::STPDi
).getSchedClass();
87 const MCSchedClassDesc
*SCDesc
=
88 SchedModel
.getMCSchedModel()->getSchedClassDesc(SCIdx
);
90 // If a subtarget does not define resources for STPQi, bail here.
91 if (SCDesc
->isValid() && !SCDesc
->isVariant()) {
92 unsigned ResLenWithSTP
= BBTrace
.getResourceLength(std::nullopt
, SCDesc
);
93 if (ResLenWithSTP
> ResLength
) {
94 LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB
->getNumber()
95 << " resources " << ResLength
<< " -> " << ResLenWithSTP
103 /// Return true if this is a floating-point store smaller than the V reg. On
104 /// cyclone, these require a vector shuffle before storing a pair.
105 /// Ideally we would call getMatchingPairOpcode() and have the machine model
106 /// tell us if it's profitable with no cpu knowledge here.
108 /// FIXME: We plan to develop a decent Target abstraction for simple loads and
109 /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
110 bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr
&MI
) {
111 switch (MI
.getOpcode()) {
114 case AArch64::STRSui
:
115 case AArch64::STRDui
:
116 case AArch64::STURSi
:
117 case AArch64::STURDi
:
122 bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction
&MF
) {
123 if (skipFunction(MF
.getFunction()) || MF
.getFunction().hasOptSize())
126 const AArch64Subtarget
&ST
= MF
.getSubtarget
<AArch64Subtarget
>();
127 if (!ST
.enableStorePairSuppress())
130 TII
= static_cast<const AArch64InstrInfo
*>(ST
.getInstrInfo());
131 TRI
= ST
.getRegisterInfo();
132 MRI
= &MF
.getRegInfo();
133 SchedModel
.init(&ST
);
134 Traces
= &getAnalysis
<MachineTraceMetrics
>();
137 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF
.getName() << '\n');
139 if (!SchedModel
.hasInstrSchedModel()) {
140 LLVM_DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
144 // Check for a sequence of stores to the same base address. We don't need to
145 // precisely determine whether a store pair can be formed. But we do want to
146 // filter out most situations where we can't form store pairs to avoid
147 // computing trace metrics in those cases.
148 for (auto &MBB
: MF
) {
149 bool SuppressSTP
= false;
150 unsigned PrevBaseReg
= 0;
151 for (auto &MI
: MBB
) {
152 if (!isNarrowFPStore(MI
))
154 const MachineOperand
*BaseOp
;
156 bool OffsetIsScalable
;
157 if (TII
->getMemOperandWithOffset(MI
, BaseOp
, Offset
, OffsetIsScalable
,
160 Register BaseReg
= BaseOp
->getReg();
161 if (PrevBaseReg
== BaseReg
) {
162 // If this block can take STPs, skip ahead to the next block.
163 if (!SuppressSTP
&& shouldAddSTPToBlock(MI
.getParent()))
165 // Otherwise, continue unpairing the stores in this block.
166 LLVM_DEBUG(dbgs() << "Unpairing store " << MI
<< "\n");
168 TII
->suppressLdStPair(MI
);
170 PrevBaseReg
= BaseReg
;
175 // This pass just sets some internal MachineMemOperand flags. It can't really
176 // invalidate anything.