1 //===- HexagonStoreWidening.cpp -------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // Replace sequences of "narrow" stores to adjacent memory locations with
9 // a fewer "wide" stores that have the same effect.
10 // For example, replace:
11 // S4_storeirb_io %100, 0, 0 ; store-immediate-byte
12 // S4_storeirb_io %100, 1, 0 ; store-immediate-byte
14 // S4_storeirh_io %100, 0, 0 ; store-immediate-halfword
15 // The above is the general idea. The actual cases handled by the code
16 // may be a bit more complex.
17 // The purpose of this pass is to reduce the number of outstanding stores,
18 // or as one could say, "reduce store queue pressure". Also, wide stores
19 // mean fewer stores, and since there are only two memory instructions allowed
20 // per packet, it also means fewer packets, and ultimately fewer cycles.
21 //===---------------------------------------------------------------------===//
23 #include "HexagonInstrInfo.h"
24 #include "HexagonRegisterInfo.h"
25 #include "HexagonSubtarget.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/Analysis/MemoryLocation.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include "llvm/CodeGen/MachineInstr.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineMemOperand.h"
35 #include "llvm/CodeGen/MachineOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/IR/DebugLoc.h"
38 #include "llvm/InitializePasses.h"
39 #include "llvm/MC/MCInstrDesc.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Support/raw_ostream.h"
51 #define DEBUG_TYPE "hexagon-widen-stores"
57 FunctionPass
*createHexagonStoreWidening();
58 void initializeHexagonStoreWideningPass(PassRegistry
&);
60 } // end namespace llvm
64 struct HexagonStoreWidening
: public MachineFunctionPass
{
65 const HexagonInstrInfo
*TII
;
66 const HexagonRegisterInfo
*TRI
;
67 const MachineRegisterInfo
*MRI
;
74 HexagonStoreWidening() : MachineFunctionPass(ID
) {
75 initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
78 bool runOnMachineFunction(MachineFunction
&MF
) override
;
80 StringRef
getPassName() const override
{ return "Hexagon Store Widening"; }
82 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
83 AU
.addRequired
<AAResultsWrapperPass
>();
84 AU
.addPreserved
<AAResultsWrapperPass
>();
85 MachineFunctionPass::getAnalysisUsage(AU
);
88 static bool handledStoreType(const MachineInstr
*MI
);
91 static const int MaxWideSize
= 4;
93 using InstrGroup
= std::vector
<MachineInstr
*>;
94 using InstrGroupList
= std::vector
<InstrGroup
>;
96 bool instrAliased(InstrGroup
&Stores
, const MachineMemOperand
&MMO
);
97 bool instrAliased(InstrGroup
&Stores
, const MachineInstr
*MI
);
98 void createStoreGroup(MachineInstr
*BaseStore
, InstrGroup::iterator Begin
,
99 InstrGroup::iterator End
, InstrGroup
&Group
);
100 void createStoreGroups(MachineBasicBlock
&MBB
,
101 InstrGroupList
&StoreGroups
);
102 bool processBasicBlock(MachineBasicBlock
&MBB
);
103 bool processStoreGroup(InstrGroup
&Group
);
104 bool selectStores(InstrGroup::iterator Begin
, InstrGroup::iterator End
,
105 InstrGroup
&OG
, unsigned &TotalSize
, unsigned MaxSize
);
106 bool createWideStores(InstrGroup
&OG
, InstrGroup
&NG
, unsigned TotalSize
);
107 bool replaceStores(InstrGroup
&OG
, InstrGroup
&NG
);
108 bool storesAreAdjacent(const MachineInstr
*S1
, const MachineInstr
*S2
);
111 } // end anonymous namespace
113 char HexagonStoreWidening::ID
= 0;
115 INITIALIZE_PASS_BEGIN(HexagonStoreWidening
, "hexagon-widen-stores",
116 "Hexason Store Widening", false, false)
117 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass
)
118 INITIALIZE_PASS_END(HexagonStoreWidening
, "hexagon-widen-stores",
119 "Hexagon Store Widening", false, false)
121 // Some local helper functions...
122 static unsigned getBaseAddressRegister(const MachineInstr
*MI
) {
123 const MachineOperand
&MO
= MI
->getOperand(0);
124 assert(MO
.isReg() && "Expecting register operand");
128 static int64_t getStoreOffset(const MachineInstr
*MI
) {
129 unsigned OpC
= MI
->getOpcode();
130 assert(HexagonStoreWidening::handledStoreType(MI
) && "Unhandled opcode");
133 case Hexagon::S4_storeirb_io
:
134 case Hexagon::S4_storeirh_io
:
135 case Hexagon::S4_storeiri_io
: {
136 const MachineOperand
&MO
= MI
->getOperand(1);
137 assert(MO
.isImm() && "Expecting immediate offset");
142 llvm_unreachable("Store offset calculation missing for a handled opcode");
146 static const MachineMemOperand
&getStoreTarget(const MachineInstr
*MI
) {
147 assert(!MI
->memoperands_empty() && "Expecting memory operands");
148 return **MI
->memoperands_begin();
151 // Filtering function: any stores whose opcodes are not "approved" of by
152 // this function will not be subjected to widening.
153 inline bool HexagonStoreWidening::handledStoreType(const MachineInstr
*MI
) {
154 // For now, only handle stores of immediate values.
155 // Also, reject stores to stack slots.
156 unsigned Opc
= MI
->getOpcode();
158 case Hexagon::S4_storeirb_io
:
159 case Hexagon::S4_storeirh_io
:
160 case Hexagon::S4_storeiri_io
:
161 // Base address must be a register. (Implement FI later.)
162 return MI
->getOperand(0).isReg();
168 // Check if the machine memory operand MMO is aliased with any of the
169 // stores in the store group Stores.
170 bool HexagonStoreWidening::instrAliased(InstrGroup
&Stores
,
171 const MachineMemOperand
&MMO
) {
175 MemoryLocation
L(MMO
.getValue(), MMO
.getSize(), MMO
.getAAInfo());
177 for (auto SI
: Stores
) {
178 const MachineMemOperand
&SMO
= getStoreTarget(SI
);
182 MemoryLocation
SL(SMO
.getValue(), SMO
.getSize(), SMO
.getAAInfo());
183 if (!AA
->isNoAlias(L
, SL
))
190 // Check if the machine instruction MI accesses any storage aliased with
191 // any store in the group Stores.
192 bool HexagonStoreWidening::instrAliased(InstrGroup
&Stores
,
193 const MachineInstr
*MI
) {
194 for (auto &I
: MI
->memoperands())
195 if (instrAliased(Stores
, *I
))
200 // Inspect a machine basic block, and generate store groups out of stores
201 // encountered in the block.
203 // A store group is a group of stores that use the same base register,
204 // and which can be reordered within that group without altering the
205 // semantics of the program. A single store group could be widened as
206 // a whole, if there existed a single store instruction with the same
207 // semantics as the entire group. In many cases, a single store group
208 // may need more than one wide store.
209 void HexagonStoreWidening::createStoreGroups(MachineBasicBlock
&MBB
,
210 InstrGroupList
&StoreGroups
) {
213 // Copy all instruction pointers from the basic block to a temporary
214 // list. This will allow operating on the list, and modifying its
215 // elements without affecting the basic block.
217 AllInsns
.push_back(&I
);
219 // Traverse all instructions in the AllInsns list, and if we encounter
220 // a store, then try to create a store group starting at that instruction
221 // i.e. a sequence of independent stores that can be widened.
222 for (auto I
= AllInsns
.begin(), E
= AllInsns
.end(); I
!= E
; ++I
) {
223 MachineInstr
*MI
= *I
;
224 // Skip null pointers (processed instructions).
225 if (!MI
|| !handledStoreType(MI
))
228 // Found a store. Try to create a store group.
230 createStoreGroup(MI
, I
+1, E
, G
);
232 StoreGroups
.push_back(G
);
236 // Create a single store group. The stores need to be independent between
237 // themselves, and also there cannot be other instructions between them
238 // that could read or modify storage being stored into.
239 void HexagonStoreWidening::createStoreGroup(MachineInstr
*BaseStore
,
240 InstrGroup::iterator Begin
, InstrGroup::iterator End
, InstrGroup
&Group
) {
241 assert(handledStoreType(BaseStore
) && "Unexpected instruction");
242 unsigned BaseReg
= getBaseAddressRegister(BaseStore
);
245 Group
.push_back(BaseStore
);
247 for (auto I
= Begin
; I
!= End
; ++I
) {
248 MachineInstr
*MI
= *I
;
252 if (handledStoreType(MI
)) {
253 // If this store instruction is aliased with anything already in the
254 // group, terminate the group now.
255 if (instrAliased(Group
, getStoreTarget(MI
)))
257 // If this store is aliased to any of the memory instructions we have
258 // seen so far (that are not a part of this group), terminate the group.
259 if (instrAliased(Other
, getStoreTarget(MI
)))
262 unsigned BR
= getBaseAddressRegister(MI
);
270 // Assume calls are aliased to everything.
271 if (MI
->isCall() || MI
->hasUnmodeledSideEffects())
274 if (MI
->mayLoadOrStore()) {
275 if (MI
->hasOrderedMemoryRef() || instrAliased(Group
, MI
))
282 // Check if store instructions S1 and S2 are adjacent. More precisely,
283 // S2 has to access memory immediately following that accessed by S1.
284 bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr
*S1
,
285 const MachineInstr
*S2
) {
286 if (!handledStoreType(S1
) || !handledStoreType(S2
))
289 const MachineMemOperand
&S1MO
= getStoreTarget(S1
);
291 // Currently only handling immediate stores.
292 int Off1
= S1
->getOperand(1).getImm();
293 int Off2
= S2
->getOperand(1).getImm();
295 return (Off1
>= 0) ? Off1
+S1MO
.getSize() == unsigned(Off2
)
296 : int(Off1
+S1MO
.getSize()) == Off2
;
299 /// Given a sequence of adjacent stores, and a maximum size of a single wide
300 /// store, pick a group of stores that can be replaced by a single store
301 /// of size not exceeding MaxSize. The selected sequence will be recorded
302 /// in OG ("old group" of instructions).
303 /// OG should be empty on entry, and should be left empty if the function
305 bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin
,
306 InstrGroup::iterator End
, InstrGroup
&OG
, unsigned &TotalSize
,
308 assert(Begin
!= End
&& "No instructions to analyze");
309 assert(OG
.empty() && "Old group not empty on entry");
311 if (std::distance(Begin
, End
) <= 1)
314 MachineInstr
*FirstMI
= *Begin
;
315 assert(!FirstMI
->memoperands_empty() && "Expecting some memory operands");
316 const MachineMemOperand
&FirstMMO
= getStoreTarget(FirstMI
);
317 unsigned Alignment
= FirstMMO
.getAlign().value();
318 unsigned SizeAccum
= FirstMMO
.getSize();
319 unsigned FirstOffset
= getStoreOffset(FirstMI
);
321 // The initial value of SizeAccum should always be a power of 2.
322 assert(isPowerOf2_32(SizeAccum
) && "First store size not a power of 2");
324 // If the size of the first store equals to or exceeds the limit, do nothing.
325 if (SizeAccum
>= MaxSize
)
328 // If the size of the first store is greater than or equal to the address
329 // stored to, then the store cannot be made any wider.
330 if (SizeAccum
>= Alignment
)
333 // The offset of a store will put restrictions on how wide the store can be.
334 // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
335 // If the first store already exhausts the offset limits, quit. Test this
336 // by checking if the next wider size would exceed the limit.
337 if ((2*SizeAccum
-1) & FirstOffset
)
340 OG
.push_back(FirstMI
);
341 MachineInstr
*S1
= FirstMI
;
343 // Pow2Num will be the largest number of elements in OG such that the sum
344 // of sizes of stores 0...Pow2Num-1 will be a power of 2.
345 unsigned Pow2Num
= 1;
346 unsigned Pow2Size
= SizeAccum
;
348 // Be greedy: keep accumulating stores as long as they are to adjacent
349 // memory locations, and as long as the total number of bytes stored
350 // does not exceed the limit (MaxSize).
351 // Keep track of when the total size covered is a power of 2, since
352 // this is a size a single store can cover.
353 for (InstrGroup::iterator I
= Begin
+ 1; I
!= End
; ++I
) {
354 MachineInstr
*S2
= *I
;
355 // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
356 // any other store to fill the "hole".
357 if (!storesAreAdjacent(S1
, S2
))
360 unsigned S2Size
= getStoreTarget(S2
).getSize();
361 if (SizeAccum
+ S2Size
> std::min(MaxSize
, Alignment
))
366 if (isPowerOf2_32(SizeAccum
)) {
368 Pow2Size
= SizeAccum
;
370 if ((2*Pow2Size
-1) & FirstOffset
)
376 // The stores don't add up to anything that can be widened. Clean up.
382 // Only leave the stored being widened.
384 TotalSize
= Pow2Size
;
388 /// Given an "old group" OG of stores, create a "new group" NG of instructions
389 /// to replace them. Ideally, NG would only have a single instruction in it,
390 /// but that may only be possible for store-immediate.
391 bool HexagonStoreWidening::createWideStores(InstrGroup
&OG
, InstrGroup
&NG
,
392 unsigned TotalSize
) {
393 // XXX Current limitations:
394 // - only expect stores of immediate values in OG,
395 // - only handle a TotalSize of up to 4.
400 unsigned Acc
= 0; // Value accumulator.
403 for (InstrGroup::iterator I
= OG
.begin(), E
= OG
.end(); I
!= E
; ++I
) {
404 MachineInstr
*MI
= *I
;
405 const MachineMemOperand
&MMO
= getStoreTarget(MI
);
406 MachineOperand
&SO
= MI
->getOperand(2); // Source.
407 assert(SO
.isImm() && "Expecting an immediate operand");
409 unsigned NBits
= MMO
.getSize()*8;
410 unsigned Mask
= (0xFFFFFFFFU
>> (32-NBits
));
411 unsigned Val
= (SO
.getImm() & Mask
) << Shift
;
416 MachineInstr
*FirstSt
= OG
.front();
417 DebugLoc DL
= OG
.back()->getDebugLoc();
418 const MachineMemOperand
&OldM
= getStoreTarget(FirstSt
);
419 MachineMemOperand
*NewM
=
420 MF
->getMachineMemOperand(OldM
.getPointerInfo(), OldM
.getFlags(),
421 TotalSize
, OldM
.getAlign(), OldM
.getAAInfo());
424 // Create mem[hw] = #Acc
425 unsigned WOpc
= (TotalSize
== 2) ? Hexagon::S4_storeirh_io
:
426 (TotalSize
== 4) ? Hexagon::S4_storeiri_io
: 0;
427 assert(WOpc
&& "Unexpected size");
429 int Val
= (TotalSize
== 2) ? int16_t(Acc
) : int(Acc
);
430 const MCInstrDesc
&StD
= TII
->get(WOpc
);
431 MachineOperand
&MR
= FirstSt
->getOperand(0);
432 int64_t Off
= FirstSt
->getOperand(1).getImm();
434 BuildMI(*MF
, DL
, StD
)
435 .addReg(MR
.getReg(), getKillRegState(MR
.isKill()), MR
.getSubReg())
438 StI
->addMemOperand(*MF
, NewM
);
441 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
442 const MCInstrDesc
&TfrD
= TII
->get(Hexagon::A2_tfrsi
);
443 const TargetRegisterClass
*RC
= TII
->getRegClass(TfrD
, 0, TRI
, *MF
);
444 Register VReg
= MF
->getRegInfo().createVirtualRegister(RC
);
445 MachineInstr
*TfrI
= BuildMI(*MF
, DL
, TfrD
, VReg
)
449 unsigned WOpc
= (TotalSize
== 2) ? Hexagon::S2_storerh_io
:
450 (TotalSize
== 4) ? Hexagon::S2_storeri_io
: 0;
451 assert(WOpc
&& "Unexpected size");
453 const MCInstrDesc
&StD
= TII
->get(WOpc
);
454 MachineOperand
&MR
= FirstSt
->getOperand(0);
455 int64_t Off
= FirstSt
->getOperand(1).getImm();
457 BuildMI(*MF
, DL
, StD
)
458 .addReg(MR
.getReg(), getKillRegState(MR
.isKill()), MR
.getSubReg())
460 .addReg(VReg
, RegState::Kill
);
461 StI
->addMemOperand(*MF
, NewM
);
468 // Replace instructions from the old group OG with instructions from the
469 // new group NG. Conceptually, remove all instructions in OG, and then
470 // insert all instructions in NG, starting at where the first instruction
471 // from OG was (in the order in which they appeared in the basic block).
472 // (The ordering in OG does not have to match the order in the basic block.)
473 bool HexagonStoreWidening::replaceStores(InstrGroup
&OG
, InstrGroup
&NG
) {
475 dbgs() << "Replacing:\n";
483 MachineBasicBlock
*MBB
= OG
.back()->getParent();
484 MachineBasicBlock::iterator InsertAt
= MBB
->end();
486 // Need to establish the insertion point. The best one is right before
487 // the first store in the OG, but in the order in which the stores occur
488 // in the program list. Since the ordering in OG does not correspond
489 // to the order in the program list, we need to do some work to find
490 // the insertion point.
492 // Create a set of all instructions in OG (for quick lookup).
493 SmallPtrSet
<MachineInstr
*, 4> InstrSet
;
497 // Traverse the block, until we hit an instruction from OG.
498 for (auto &I
: *MBB
) {
499 if (InstrSet
.count(&I
)) {
505 assert((InsertAt
!= MBB
->end()) && "Cannot locate any store from the group");
507 bool AtBBStart
= false;
509 // InsertAt points at the first instruction that will be removed. We need
510 // to move it out of the way, so it remains valid after removing all the
511 // old stores, and so we are able to recover it back to the proper insertion
513 if (InsertAt
!= MBB
->begin())
519 I
->eraseFromParent();
524 InsertAt
= MBB
->begin();
527 MBB
->insert(InsertAt
, I
);
532 // Break up the group into smaller groups, each of which can be replaced by
533 // a single wide store. Widen each such smaller group and replace the old
534 // instructions with the widened ones.
535 bool HexagonStoreWidening::processStoreGroup(InstrGroup
&Group
) {
536 bool Changed
= false;
537 InstrGroup::iterator I
= Group
.begin(), E
= Group
.end();
538 InstrGroup OG
, NG
; // Old and new groups.
539 unsigned CollectedSize
;
545 bool Succ
= selectStores(I
++, E
, OG
, CollectedSize
, MaxWideSize
) &&
546 createWideStores(OG
, NG
, CollectedSize
) &&
547 replaceStores(OG
, NG
);
551 assert(OG
.size() > 1 && "Created invalid group");
552 assert(distance(I
, E
)+1 >= int(OG
.size()) && "Too many elements");
561 // Process a single basic block: create the store groups, and replace them
562 // with the widened stores, if possible. Processing of each basic block
563 // is independent from processing of any other basic block. This transfor-
564 // mation could be stopped after having processed any basic block without
565 // any ill effects (other than not having performed widening in the unpro-
566 // cessed blocks). Also, the basic blocks can be processed in any order.
567 bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock
&MBB
) {
569 bool Changed
= false;
571 createStoreGroups(MBB
, SGs
);
573 auto Less
= [] (const MachineInstr
*A
, const MachineInstr
*B
) -> bool {
574 return getStoreOffset(A
) < getStoreOffset(B
);
576 for (auto &G
: SGs
) {
577 assert(G
.size() > 1 && "Store group with fewer than 2 elements");
580 Changed
|= processStoreGroup(G
);
586 bool HexagonStoreWidening::runOnMachineFunction(MachineFunction
&MFn
) {
587 if (skipFunction(MFn
.getFunction()))
591 auto &ST
= MFn
.getSubtarget
<HexagonSubtarget
>();
592 TII
= ST
.getInstrInfo();
593 TRI
= ST
.getRegisterInfo();
594 MRI
= &MFn
.getRegInfo();
595 AA
= &getAnalysis
<AAResultsWrapperPass
>().getAAResults();
597 bool Changed
= false;
600 Changed
|= processBasicBlock(B
);
605 FunctionPass
*llvm::createHexagonStoreWidening() {
606 return new HexagonStoreWidening();