1 //===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the LoadStoreOpt optimization pass.
10 //===----------------------------------------------------------------------===//
12 #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/Analysis/AliasAnalysis.h"
15 #include "llvm/Analysis/MemoryLocation.h"
16 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
17 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelType.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/Register.h"
29 #include "llvm/CodeGen/TargetLowering.h"
30 #include "llvm/CodeGen/TargetOpcodes.h"
31 #include "llvm/IR/DebugInfoMetadata.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Support/AtomicOrdering.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/MathExtras.h"
40 #define DEBUG_TYPE "loadstore-opt"
44 using namespace MIPatternMatch
;
46 STATISTIC(NumStoresMerged
, "Number of stores merged");
48 const unsigned MaxStoreSizeToForm
= 128;
50 char LoadStoreOpt::ID
= 0;
51 INITIALIZE_PASS_BEGIN(LoadStoreOpt
, DEBUG_TYPE
, "Generic memory optimizations",
53 INITIALIZE_PASS_END(LoadStoreOpt
, DEBUG_TYPE
, "Generic memory optimizations",
56 LoadStoreOpt::LoadStoreOpt(std::function
<bool(const MachineFunction
&)> F
)
57 : MachineFunctionPass(ID
), DoNotRunPass(F
) {}
59 LoadStoreOpt::LoadStoreOpt()
60 : LoadStoreOpt([](const MachineFunction
&) { return false; }) {}
62 void LoadStoreOpt::init(MachineFunction
&MF
) {
64 MRI
= &MF
.getRegInfo();
65 AA
= &getAnalysis
<AAResultsWrapperPass
>().getAAResults();
66 TLI
= MF
.getSubtarget().getTargetLowering();
67 LI
= MF
.getSubtarget().getLegalizerInfo();
69 IsPreLegalizer
= !MF
.getProperties().hasProperty(
70 MachineFunctionProperties::Property::Legalized
);
74 void LoadStoreOpt::getAnalysisUsage(AnalysisUsage
&AU
) const {
75 AU
.addRequired
<AAResultsWrapperPass
>();
77 getSelectionDAGFallbackAnalysisUsage(AU
);
78 MachineFunctionPass::getAnalysisUsage(AU
);
81 BaseIndexOffset
GISelAddressing::getPointerInfo(Register Ptr
,
82 MachineRegisterInfo
&MRI
) {
85 if (!mi_match(Ptr
, MRI
, m_GPtrAdd(m_Reg(Info
.BaseReg
), m_Reg(PtrAddRHS
)))) {
87 Info
.IndexReg
= Register();
88 Info
.IsIndexSignExt
= false;
92 auto RHSCst
= getIConstantVRegValWithLookThrough(PtrAddRHS
, MRI
);
94 Info
.Offset
= RHSCst
->Value
.getSExtValue();
96 // Just recognize a simple case for now. In future we'll need to match
97 // indexing patterns for base + index + constant.
98 Info
.IndexReg
= PtrAddRHS
;
99 Info
.IsIndexSignExt
= false;
103 bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr
&MI1
,
104 const MachineInstr
&MI2
,
106 MachineRegisterInfo
&MRI
) {
107 auto *LdSt1
= dyn_cast
<GLoadStore
>(&MI1
);
108 auto *LdSt2
= dyn_cast
<GLoadStore
>(&MI2
);
109 if (!LdSt1
|| !LdSt2
)
112 BaseIndexOffset BasePtr0
= getPointerInfo(LdSt1
->getPointerReg(), MRI
);
113 BaseIndexOffset BasePtr1
= getPointerInfo(LdSt2
->getPointerReg(), MRI
);
115 if (!BasePtr0
.BaseReg
.isValid() || !BasePtr1
.BaseReg
.isValid())
118 int64_t Size1
= LdSt1
->getMemSize();
119 int64_t Size2
= LdSt2
->getMemSize();
122 if (BasePtr0
.BaseReg
== BasePtr1
.BaseReg
) {
123 PtrDiff
= BasePtr1
.Offset
- BasePtr0
.Offset
;
124 // If the size of memory access is unknown, do not use it to do analysis.
125 // One example of unknown size memory access is to load/store scalable
126 // vector objects on the stack.
127 // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
128 // following situations arise:
130 Size1
!= static_cast<int64_t>(MemoryLocation::UnknownSize
)) {
131 // [----BasePtr0----]
133 // ========PtrDiff========>
134 IsAlias
= !(Size1
<= PtrDiff
);
138 Size2
!= static_cast<int64_t>(MemoryLocation::UnknownSize
)) {
139 // [----BasePtr0----]
141 // =====(-PtrDiff)====>
142 IsAlias
= !((PtrDiff
+ Size2
) <= 0);
148 // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
149 // able to calculate their relative offset if at least one arises
150 // from an alloca. However, these allocas cannot overlap and we
151 // can infer there is no alias.
152 auto *Base0Def
= getDefIgnoringCopies(BasePtr0
.BaseReg
, MRI
);
153 auto *Base1Def
= getDefIgnoringCopies(BasePtr1
.BaseReg
, MRI
);
154 if (!Base0Def
|| !Base1Def
)
155 return false; // Couldn't tell anything.
158 if (Base0Def
->getOpcode() != Base1Def
->getOpcode())
161 if (Base0Def
->getOpcode() == TargetOpcode::G_FRAME_INDEX
) {
162 MachineFrameInfo
&MFI
= Base0Def
->getMF()->getFrameInfo();
163 // If the bases have the same frame index but we couldn't find a
164 // constant offset, (indices are different) be conservative.
165 if (Base0Def
!= Base1Def
&&
166 (!MFI
.isFixedObjectIndex(Base0Def
->getOperand(1).getIndex()) ||
167 !MFI
.isFixedObjectIndex(Base1Def
->getOperand(1).getIndex()))) {
173 // This implementation is a lot more primitive than the SDAG one for now.
174 // FIXME: what about constant pools?
175 if (Base0Def
->getOpcode() == TargetOpcode::G_GLOBAL_VALUE
) {
176 auto GV0
= Base0Def
->getOperand(1).getGlobal();
177 auto GV1
= Base1Def
->getOperand(1).getGlobal();
184 // Can't tell anything about aliasing.
188 bool GISelAddressing::instMayAlias(const MachineInstr
&MI
,
189 const MachineInstr
&Other
,
190 MachineRegisterInfo
&MRI
,
192 struct MemUseCharacteristics
{
198 MachineMemOperand
*MMO
;
201 auto getCharacteristics
=
202 [&](const MachineInstr
*MI
) -> MemUseCharacteristics
{
203 if (const auto *LS
= dyn_cast
<GLoadStore
>(MI
)) {
206 // No pre/post-inc addressing modes are considered here, unlike in SDAG.
207 if (!mi_match(LS
->getPointerReg(), MRI
,
208 m_GPtrAdd(m_Reg(BaseReg
), m_ICst(Offset
)))) {
209 BaseReg
= LS
->getPointerReg();
213 uint64_t Size
= MemoryLocation::getSizeOrUnknown(
214 LS
->getMMO().getMemoryType().getSizeInBytes());
215 return {LS
->isVolatile(), LS
->isAtomic(), BaseReg
,
216 Offset
/*base offset*/, Size
, &LS
->getMMO()};
218 // FIXME: support recognizing lifetime instructions.
220 return {false /*isvolatile*/,
221 /*isAtomic*/ false, Register(),
222 (int64_t)0 /*offset*/, 0 /*size*/,
223 (MachineMemOperand
*)nullptr};
225 MemUseCharacteristics MUC0
= getCharacteristics(&MI
),
226 MUC1
= getCharacteristics(&Other
);
228 // If they are to the same address, then they must be aliases.
229 if (MUC0
.BasePtr
.isValid() && MUC0
.BasePtr
== MUC1
.BasePtr
&&
230 MUC0
.Offset
== MUC1
.Offset
)
233 // If they are both volatile then they cannot be reordered.
234 if (MUC0
.IsVolatile
&& MUC1
.IsVolatile
)
237 // Be conservative about atomics for the moment
238 // TODO: This is way overconservative for unordered atomics (see D66309)
239 if (MUC0
.IsAtomic
&& MUC1
.IsAtomic
)
242 // If one operation reads from invariant memory, and the other may store, they
244 if (MUC0
.MMO
&& MUC1
.MMO
) {
245 if ((MUC0
.MMO
->isInvariant() && MUC1
.MMO
->isStore()) ||
246 (MUC1
.MMO
->isInvariant() && MUC0
.MMO
->isStore()))
250 // Try to prove that there is aliasing, or that there is no aliasing. Either
251 // way, we can return now. If nothing can be proved, proceed with more tests.
253 if (GISelAddressing::aliasIsKnownForLoadStore(MI
, Other
, IsAlias
, MRI
))
256 // The following all rely on MMO0 and MMO1 being valid.
257 if (!MUC0
.MMO
|| !MUC1
.MMO
)
260 // FIXME: port the alignment based alias analysis from SDAG's isAlias().
261 int64_t SrcValOffset0
= MUC0
.MMO
->getOffset();
262 int64_t SrcValOffset1
= MUC1
.MMO
->getOffset();
263 uint64_t Size0
= MUC0
.NumBytes
;
264 uint64_t Size1
= MUC1
.NumBytes
;
265 if (AA
&& MUC0
.MMO
->getValue() && MUC1
.MMO
->getValue() &&
266 Size0
!= MemoryLocation::UnknownSize
&&
267 Size1
!= MemoryLocation::UnknownSize
) {
268 // Use alias analysis information.
269 int64_t MinOffset
= std::min(SrcValOffset0
, SrcValOffset1
);
270 int64_t Overlap0
= Size0
+ SrcValOffset0
- MinOffset
;
271 int64_t Overlap1
= Size1
+ SrcValOffset1
- MinOffset
;
272 if (AA
->isNoAlias(MemoryLocation(MUC0
.MMO
->getValue(), Overlap0
,
273 MUC0
.MMO
->getAAInfo()),
274 MemoryLocation(MUC1
.MMO
->getValue(), Overlap1
,
275 MUC1
.MMO
->getAAInfo())))
279 // Otherwise we have to assume they alias.
283 /// Returns true if the instruction creates an unavoidable hazard that
284 /// forces a boundary between store merge candidates.
285 static bool isInstHardMergeHazard(MachineInstr
&MI
) {
286 return MI
.hasUnmodeledSideEffects() || MI
.hasOrderedMemoryRef();
289 bool LoadStoreOpt::mergeStores(SmallVectorImpl
<GStore
*> &StoresToMerge
) {
290 // Try to merge all the stores in the vector, splitting into separate segments
292 assert(StoresToMerge
.size() > 1 && "Expected multiple stores to merge");
293 LLT OrigTy
= MRI
->getType(StoresToMerge
[0]->getValueReg());
294 LLT PtrTy
= MRI
->getType(StoresToMerge
[0]->getPointerReg());
295 unsigned AS
= PtrTy
.getAddressSpace();
296 // Ensure the legal store info is computed for this address space.
297 initializeStoreMergeTargetInfo(AS
);
298 const auto &LegalSizes
= LegalStoreSizes
[AS
];
301 for (auto *StoreMI
: StoresToMerge
)
302 assert(MRI
->getType(StoreMI
->getValueReg()) == OrigTy
);
305 const auto &DL
= MF
->getFunction().getParent()->getDataLayout();
306 bool AnyMerged
= false;
308 unsigned NumPow2
= PowerOf2Floor(StoresToMerge
.size());
309 unsigned MaxSizeBits
= NumPow2
* OrigTy
.getSizeInBits().getFixedSize();
310 // Compute the biggest store we can generate to handle the number of stores.
311 unsigned MergeSizeBits
;
312 for (MergeSizeBits
= MaxSizeBits
; MergeSizeBits
> 1; MergeSizeBits
/= 2) {
313 LLT StoreTy
= LLT::scalar(MergeSizeBits
);
315 getApproximateEVTForLLT(StoreTy
, DL
, MF
->getFunction().getContext());
316 if (LegalSizes
.size() > MergeSizeBits
&& LegalSizes
[MergeSizeBits
] &&
317 TLI
->canMergeStoresTo(AS
, StoreEVT
, *MF
) &&
318 (TLI
->isTypeLegal(StoreEVT
)))
319 break; // We can generate a MergeSize bits store.
321 if (MergeSizeBits
<= OrigTy
.getSizeInBits())
322 return AnyMerged
; // No greater merge.
324 unsigned NumStoresToMerge
= MergeSizeBits
/ OrigTy
.getSizeInBits();
325 // Perform the actual merging.
326 SmallVector
<GStore
*, 8> SingleMergeStores(
327 StoresToMerge
.begin(), StoresToMerge
.begin() + NumStoresToMerge
);
328 AnyMerged
|= doSingleStoreMerge(SingleMergeStores
);
329 StoresToMerge
.erase(StoresToMerge
.begin(),
330 StoresToMerge
.begin() + NumStoresToMerge
);
331 } while (StoresToMerge
.size() > 1);
335 bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery
&Query
,
336 MachineFunction
&MF
) const {
337 auto Action
= LI
->getAction(Query
).Action
;
338 // If the instruction is unsupported, it can't be legalized at all.
339 if (Action
== LegalizeActions::Unsupported
)
341 return IsPreLegalizer
|| Action
== LegalizeAction::Legal
;
344 bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl
<GStore
*> &Stores
) {
345 assert(Stores
.size() > 1);
346 // We know that all the stores are consecutive and there are no aliasing
347 // operations in the range. However, the values that are being stored may be
348 // generated anywhere before each store. To ensure we have the values
349 // available, we materialize the wide value and new store at the place of the
350 // final store in the merge sequence.
351 GStore
*FirstStore
= Stores
[0];
352 const unsigned NumStores
= Stores
.size();
353 LLT SmallTy
= MRI
->getType(FirstStore
->getValueReg());
355 LLT::scalar(NumStores
* SmallTy
.getSizeInBits().getFixedSize());
357 // For each store, compute pairwise merged debug locs.
359 for (unsigned AIdx
= 0, BIdx
= 1; BIdx
< NumStores
; ++AIdx
, ++BIdx
)
360 MergedLoc
= DILocation::getMergedLocation(Stores
[AIdx
]->getDebugLoc(),
361 Stores
[BIdx
]->getDebugLoc());
362 Builder
.setInstr(*Stores
.back());
363 Builder
.setDebugLoc(MergedLoc
);
365 // If all of the store values are constants, then create a wide constant
366 // directly. Otherwise, we need to generate some instructions to merge the
367 // existing values together into a wider type.
368 SmallVector
<APInt
, 8> ConstantVals
;
369 for (auto *Store
: Stores
) {
371 getIConstantVRegValWithLookThrough(Store
->getValueReg(), *MRI
);
373 ConstantVals
.clear();
376 ConstantVals
.emplace_back(MaybeCst
->Value
);
381 MF
->getMachineMemOperand(&FirstStore
->getMMO(), 0, WideValueTy
);
382 if (ConstantVals
.empty()) {
383 // Mimic the SDAG behaviour here and don't try to do anything for unknown
384 // values. In future, we should also support the cases of loads and
385 // extracted vector elements.
389 assert(ConstantVals
.size() == NumStores
);
390 // Check if our wide constant is legal.
391 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT
, {WideValueTy
}}, *MF
))
393 APInt
WideConst(WideValueTy
.getSizeInBits(), 0);
394 for (unsigned Idx
= 0; Idx
< ConstantVals
.size(); ++Idx
) {
395 // Insert the smaller constant into the corresponding position in the
397 WideConst
.insertBits(ConstantVals
[Idx
], Idx
* SmallTy
.getSizeInBits());
399 WideReg
= Builder
.buildConstant(WideValueTy
, WideConst
).getReg(0);
401 Builder
.buildStore(WideReg
, FirstStore
->getPointerReg(), *WideMMO
);
403 LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore
);
404 NumStoresMerged
+= Stores
.size();
406 MachineOptimizationRemarkEmitter
MORE(*MF
, nullptr);
408 MachineOptimizationRemark
R(DEBUG_TYPE
, "MergedStore",
409 FirstStore
->getDebugLoc(),
410 FirstStore
->getParent());
411 R
<< "Merged " << NV("NumMerged", Stores
.size()) << " stores of "
412 << NV("OrigWidth", SmallTy
.getSizeInBytes())
413 << " bytes into a single store of "
414 << NV("NewWidth", WideValueTy
.getSizeInBytes()) << " bytes";
418 for (auto *MI
: Stores
)
419 InstsToErase
.insert(MI
);
423 bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate
&C
) {
424 if (C
.Stores
.size() < 2) {
429 LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C
.Stores
.size()
430 << " stores, starting with " << *C
.Stores
[0]);
431 // We know that the stores in the candidate are adjacent.
432 // Now we need to check if any potential aliasing instructions recorded
433 // during the search alias with load/stores added to the candidate after.
434 // For example, if we have the candidate:
435 // C.Stores = [ST1, ST2, ST3, ST4]
436 // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
437 // ST2, then we would have recorded it into the PotentialAliases structure
438 // with the associated index value of "1". Then we see ST3 and ST4 and add
439 // them to the candidate group. We know that LD1 does not alias with ST1 or
440 // ST2, since we already did that check. However we don't yet know if it
441 // may alias ST3 and ST4, so we perform those checks now.
442 SmallVector
<GStore
*> StoresToMerge
;
444 auto DoesStoreAliasWithPotential
= [&](unsigned Idx
, GStore
&CheckStore
) {
445 for (auto AliasInfo
: reverse(C
.PotentialAliases
)) {
446 MachineInstr
*PotentialAliasOp
= AliasInfo
.first
;
447 unsigned PreCheckedIdx
= AliasInfo
.second
;
448 if (static_cast<unsigned>(Idx
) > PreCheckedIdx
) {
449 // Need to check this alias.
450 if (GISelAddressing::instMayAlias(CheckStore
, *PotentialAliasOp
, *MRI
,
452 LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
457 // Once our store index is lower than the index associated with the
458 // potential alias, we know that we've already checked for this alias
459 // and all of the earlier potential aliases too.
465 // Start from the last store in the group, and check if it aliases with any
466 // of the potential aliasing operations in the list.
467 for (int StoreIdx
= C
.Stores
.size() - 1; StoreIdx
>= 0; --StoreIdx
) {
468 auto *CheckStore
= C
.Stores
[StoreIdx
];
469 if (DoesStoreAliasWithPotential(StoreIdx
, *CheckStore
))
471 StoresToMerge
.emplace_back(CheckStore
);
474 LLVM_DEBUG(dbgs() << StoresToMerge
.size()
475 << " stores remaining after alias checks. Merging...\n");
477 // Now we've checked for aliasing hazards, merge any stores left.
479 if (StoresToMerge
.size() < 2)
481 return mergeStores(StoresToMerge
);
484 bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr
&MI
,
485 StoreMergeCandidate
&C
) {
486 if (C
.Stores
.empty())
488 return llvm::any_of(C
.Stores
, [&](MachineInstr
*OtherMI
) {
489 return instMayAlias(MI
, *OtherMI
, *MRI
, AA
);
493 void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr
&MI
) {
494 PotentialAliases
.emplace_back(std::make_pair(&MI
, Stores
.size() - 1));
497 bool LoadStoreOpt::addStoreToCandidate(GStore
&StoreMI
,
498 StoreMergeCandidate
&C
) {
499 // Check if the given store writes to an adjacent address, and other
501 LLT ValueTy
= MRI
->getType(StoreMI
.getValueReg());
502 LLT PtrTy
= MRI
->getType(StoreMI
.getPointerReg());
504 // Only handle scalars.
505 if (!ValueTy
.isScalar())
508 // Don't allow truncating stores for now.
509 if (StoreMI
.getMemSizeInBits() != ValueTy
.getSizeInBits())
512 // Avoid adding volatile or ordered stores to the candidate. We already have a
513 // check for this in instMayAlias() but that only get's called later between
514 // potential aliasing hazards.
515 if (!StoreMI
.isSimple())
518 Register StoreAddr
= StoreMI
.getPointerReg();
519 auto BIO
= getPointerInfo(StoreAddr
, *MRI
);
520 Register StoreBase
= BIO
.BaseReg
;
521 uint64_t StoreOffCst
= BIO
.Offset
;
522 if (C
.Stores
.empty()) {
523 // This is the first store of the candidate.
524 // If the offset can't possibly allow for a lower addressed store with the
525 // same base, don't bother adding it.
526 if (StoreOffCst
< ValueTy
.getSizeInBytes())
528 C
.BasePtr
= StoreBase
;
529 C
.CurrentLowestOffset
= StoreOffCst
;
530 C
.Stores
.emplace_back(&StoreMI
);
531 LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
536 // Check the store is the same size as the existing ones in the candidate.
537 if (MRI
->getType(C
.Stores
[0]->getValueReg()).getSizeInBits() !=
538 ValueTy
.getSizeInBits())
541 if (MRI
->getType(C
.Stores
[0]->getPointerReg()).getAddressSpace() !=
542 PtrTy
.getAddressSpace())
545 // There are other stores in the candidate. Check that the store address
546 // writes to the next lowest adjacent address.
547 if (C
.BasePtr
!= StoreBase
)
549 if ((C
.CurrentLowestOffset
- ValueTy
.getSizeInBytes()) !=
550 static_cast<uint64_t>(StoreOffCst
))
553 // This writes to an adjacent address. Allow it.
554 C
.Stores
.emplace_back(&StoreMI
);
555 C
.CurrentLowestOffset
= C
.CurrentLowestOffset
- ValueTy
.getSizeInBytes();
556 LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI
);
560 bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock
&MBB
) {
561 bool Changed
= false;
562 // Walk through the block bottom-up, looking for merging candidates.
563 StoreMergeCandidate Candidate
;
564 for (MachineInstr
&MI
: llvm::reverse(MBB
)) {
565 if (InstsToErase
.contains(&MI
))
568 if (auto *StoreMI
= dyn_cast
<GStore
>(&MI
)) {
569 // We have a G_STORE. Add it to the candidate if it writes to an adjacent
571 if (!addStoreToCandidate(*StoreMI
, Candidate
)) {
572 // Store wasn't eligible to be added. May need to record it as a
574 if (operationAliasesWithCandidate(*StoreMI
, Candidate
)) {
575 Changed
|= processMergeCandidate(Candidate
);
578 Candidate
.addPotentialAlias(*StoreMI
);
583 // If we don't have any stores yet, this instruction can't pose a problem.
584 if (Candidate
.Stores
.empty())
587 // We're dealing with some other kind of instruction.
588 if (isInstHardMergeHazard(MI
)) {
589 Changed
|= processMergeCandidate(Candidate
);
590 Candidate
.Stores
.clear();
594 if (!MI
.mayLoadOrStore())
597 if (operationAliasesWithCandidate(MI
, Candidate
)) {
598 // We have a potential alias, so process the current candidate if we can
599 // and then continue looking for a new candidate.
600 Changed
|= processMergeCandidate(Candidate
);
604 // Record this instruction as a potential alias for future stores that are
605 // added to the candidate.
606 Candidate
.addPotentialAlias(MI
);
609 // Process any candidate left after finishing searching the entire block.
610 Changed
|= processMergeCandidate(Candidate
);
612 // Erase instructions now that we're no longer iterating over the block.
613 for (auto *MI
: InstsToErase
)
614 MI
->eraseFromParent();
615 InstsToErase
.clear();
619 bool LoadStoreOpt::mergeFunctionStores(MachineFunction
&MF
) {
620 bool Changed
= false;
621 for (auto &BB
: MF
) {
622 Changed
|= mergeBlockStores(BB
);
627 void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace
) {
628 // Query the legalizer info to record what store types are legal.
629 // We record this because we don't want to bother trying to merge stores into
630 // illegal ones, which would just result in being split again.
632 if (LegalStoreSizes
.count(AddrSpace
)) {
633 assert(LegalStoreSizes
[AddrSpace
].any());
634 return; // Already cached sizes for this address space.
637 // Need to reserve at least MaxStoreSizeToForm + 1 bits.
638 BitVector
LegalSizes(MaxStoreSizeToForm
* 2);
639 const auto &LI
= *MF
->getSubtarget().getLegalizerInfo();
640 const auto &DL
= MF
->getFunction().getParent()->getDataLayout();
642 DL
.getIntPtrType(MF
->getFunction().getContext(), AddrSpace
);
643 LLT PtrTy
= getLLTForType(*IntPtrIRTy
->getPointerTo(AddrSpace
), DL
);
644 // We assume that we're not going to be generating any stores wider than
645 // MaxStoreSizeToForm bits for now.
646 for (unsigned Size
= 2; Size
<= MaxStoreSizeToForm
; Size
*= 2) {
647 LLT Ty
= LLT::scalar(Size
);
648 SmallVector
<LegalityQuery::MemDesc
, 2> MemDescrs(
649 {{Ty
, Ty
.getSizeInBits(), AtomicOrdering::NotAtomic
}});
650 SmallVector
<LLT
> StoreTys({Ty
, PtrTy
});
651 LegalityQuery
Q(TargetOpcode::G_STORE
, StoreTys
, MemDescrs
);
652 LegalizeActionStep ActionStep
= LI
.getAction(Q
);
653 if (ActionStep
.Action
== LegalizeActions::Legal
)
654 LegalSizes
.set(Size
);
656 assert(LegalSizes
.any() && "Expected some store sizes to be legal!");
657 LegalStoreSizes
[AddrSpace
] = LegalSizes
;
660 bool LoadStoreOpt::runOnMachineFunction(MachineFunction
&MF
) {
661 // If the ISel pipeline failed, do not bother running that pass.
662 if (MF
.getProperties().hasProperty(
663 MachineFunctionProperties::Property::FailedISel
))
666 LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF
.getName()
670 bool Changed
= false;
671 Changed
|= mergeFunctionStores(MF
);
673 LegalStoreSizes
.clear();