1 //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass identifies floating point stores that should not be combined into
10 // store pairs. Later we may do the same for floating point loads.
11 // ===---------------------------------------------------------------------===//
13 #include "AArch64InstrInfo.h"
14 #include "llvm/CodeGen/MachineFunction.h"
15 #include "llvm/CodeGen/MachineFunctionPass.h"
16 #include "llvm/CodeGen/MachineInstr.h"
17 #include "llvm/CodeGen/MachineTraceMetrics.h"
18 #include "llvm/CodeGen/TargetInstrInfo.h"
19 #include "llvm/CodeGen/TargetSchedule.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/raw_ostream.h"
25 #define DEBUG_TYPE "aarch64-stp-suppress"
27 #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression"
30 class AArch64StorePairSuppress
: public MachineFunctionPass
{
31 const AArch64InstrInfo
*TII
;
32 const TargetRegisterInfo
*TRI
;
33 const MachineRegisterInfo
*MRI
;
34 TargetSchedModel SchedModel
;
35 MachineTraceMetrics
*Traces
;
36 MachineTraceMetrics::Ensemble
*MinInstr
;
40 AArch64StorePairSuppress() : MachineFunctionPass(ID
) {
41 initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry());
44 StringRef
getPassName() const override
{ return STPSUPPRESS_PASS_NAME
; }
46 bool runOnMachineFunction(MachineFunction
&F
) override
;
49 bool shouldAddSTPToBlock(const MachineBasicBlock
*BB
);
51 bool isNarrowFPStore(const MachineInstr
&MI
);
53 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
55 AU
.addRequired
<MachineTraceMetrics
>();
56 AU
.addPreserved
<MachineTraceMetrics
>();
57 MachineFunctionPass::getAnalysisUsage(AU
);
60 char AArch64StorePairSuppress::ID
= 0;
63 INITIALIZE_PASS(AArch64StorePairSuppress
, "aarch64-stp-suppress",
64 STPSUPPRESS_PASS_NAME
, false, false)
66 FunctionPass
*llvm::createAArch64StorePairSuppressPass() {
67 return new AArch64StorePairSuppress();
70 /// Return true if an STP can be added to this block without increasing the
71 /// critical resource height. STP is good to form in Ld/St limited blocks and
72 /// bad to form in float-point limited blocks. This is true independent of the
73 /// critical path. If the critical path is longer than the resource height, the
74 /// extra vector ops can limit physreg renaming. Otherwise, it could simply
75 /// oversaturate the vector units.
76 bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock
*BB
) {
78 MinInstr
= Traces
->getEnsemble(MachineTraceMetrics::TS_MinInstrCount
);
80 MachineTraceMetrics::Trace BBTrace
= MinInstr
->getTrace(BB
);
81 unsigned ResLength
= BBTrace
.getResourceLength();
83 // Get the machine model's scheduling class for STPQi.
84 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
85 unsigned SCIdx
= TII
->get(AArch64::STPDi
).getSchedClass();
86 const MCSchedClassDesc
*SCDesc
=
87 SchedModel
.getMCSchedModel()->getSchedClassDesc(SCIdx
);
89 // If a subtarget does not define resources for STPQi, bail here.
90 if (SCDesc
->isValid() && !SCDesc
->isVariant()) {
91 unsigned ResLenWithSTP
= BBTrace
.getResourceLength(None
, SCDesc
);
92 if (ResLenWithSTP
> ResLength
) {
93 LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB
->getNumber()
94 << " resources " << ResLength
<< " -> " << ResLenWithSTP
102 /// Return true if this is a floating-point store smaller than the V reg. On
103 /// cyclone, these require a vector shuffle before storing a pair.
104 /// Ideally we would call getMatchingPairOpcode() and have the machine model
105 /// tell us if it's profitable with no cpu knowledge here.
107 /// FIXME: We plan to develop a decent Target abstraction for simple loads and
108 /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
109 bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr
&MI
) {
110 switch (MI
.getOpcode()) {
113 case AArch64::STRSui
:
114 case AArch64::STRDui
:
115 case AArch64::STURSi
:
116 case AArch64::STURDi
:
121 bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction
&MF
) {
122 if (skipFunction(MF
.getFunction()) || MF
.getFunction().hasOptSize())
125 const TargetSubtargetInfo
&ST
= MF
.getSubtarget();
126 TII
= static_cast<const AArch64InstrInfo
*>(ST
.getInstrInfo());
127 TRI
= ST
.getRegisterInfo();
128 MRI
= &MF
.getRegInfo();
129 SchedModel
.init(&ST
);
130 Traces
= &getAnalysis
<MachineTraceMetrics
>();
133 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF
.getName() << '\n');
135 if (!SchedModel
.hasInstrSchedModel()) {
136 LLVM_DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
140 // Check for a sequence of stores to the same base address. We don't need to
141 // precisely determine whether a store pair can be formed. But we do want to
142 // filter out most situations where we can't form store pairs to avoid
143 // computing trace metrics in those cases.
144 for (auto &MBB
: MF
) {
145 bool SuppressSTP
= false;
146 unsigned PrevBaseReg
= 0;
147 for (auto &MI
: MBB
) {
148 if (!isNarrowFPStore(MI
))
150 const MachineOperand
*BaseOp
;
152 bool OffsetIsScalable
;
153 if (TII
->getMemOperandWithOffset(MI
, BaseOp
, Offset
, OffsetIsScalable
,
156 Register BaseReg
= BaseOp
->getReg();
157 if (PrevBaseReg
== BaseReg
) {
158 // If this block can take STPs, skip ahead to the next block.
159 if (!SuppressSTP
&& shouldAddSTPToBlock(MI
.getParent()))
161 // Otherwise, continue unpairing the stores in this block.
162 LLVM_DEBUG(dbgs() << "Unpairing store " << MI
<< "\n");
164 TII
->suppressLdStPair(MI
);
166 PrevBaseReg
= BaseReg
;
171 // This pass just sets some internal MachineMemOperand flags. It can't really
172 // invalidate anything.