[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / AArch64 / AArch64LoadStoreOptimizer.cpp
blob3a836ac33064e4c3d18940f0b2c4ec85b41326da
1 //===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that performs load / store related peephole
10 // optimizations. This pass should be run after register allocation.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/MC/MCAsmInfo.h"
34 #include "llvm/MC/MCRegisterInfo.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/DebugCounter.h"
39 #include "llvm/Support/ErrorHandling.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <cstdint>
43 #include <functional>
44 #include <iterator>
45 #include <limits>
47 using namespace llvm;
49 #define DEBUG_TYPE "aarch64-ldst-opt"
51 STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
52 STATISTIC(NumPostFolded, "Number of post-index updates folded");
53 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
54 STATISTIC(NumUnscaledPairCreated,
55 "Number of load/store from unscaled generated");
56 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
57 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
59 DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
60 "Controls which pairs are considered for renaming");
62 // The LdStLimit limits how far we search for load/store pairs.
63 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
64 cl::init(20), cl::Hidden);
66 // The UpdateLimit limits how far we search for update instructions when we form
67 // pre-/post-index instructions.
68 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
69 cl::Hidden);
71 // Enable register renaming to find additional store pairing opportunities.
72 static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
73 cl::init(true), cl::Hidden);
75 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
77 namespace {
79 using LdStPairFlags = struct LdStPairFlags {
80 // If a matching instruction is found, MergeForward is set to true if the
81 // merge is to remove the first instruction and replace the second with
82 // a pair-wise insn, and false if the reverse is true.
83 bool MergeForward = false;
85 // SExtIdx gives the index of the result of the load pair that must be
86 // extended. The value of SExtIdx assumes that the paired load produces the
87 // value in this order: (I, returned iterator), i.e., -1 means no value has
88 // to be extended, 0 means I, and 1 means the returned iterator.
89 int SExtIdx = -1;
91 // If not none, RenameReg can be used to rename the result register of the
92 // first store in a pair. Currently this only works when merging stores
93 // forward.
94 Optional<MCPhysReg> RenameReg = None;
96 LdStPairFlags() = default;
98 void setMergeForward(bool V = true) { MergeForward = V; }
99 bool getMergeForward() const { return MergeForward; }
101 void setSExtIdx(int V) { SExtIdx = V; }
102 int getSExtIdx() const { return SExtIdx; }
104 void setRenameReg(MCPhysReg R) { RenameReg = R; }
105 void clearRenameReg() { RenameReg = None; }
106 Optional<MCPhysReg> getRenameReg() const { return RenameReg; }
109 struct AArch64LoadStoreOpt : public MachineFunctionPass {
110 static char ID;
112 AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
113 initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
116 AliasAnalysis *AA;
117 const AArch64InstrInfo *TII;
118 const TargetRegisterInfo *TRI;
119 const AArch64Subtarget *Subtarget;
121 // Track which register units have been modified and used.
122 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
123 LiveRegUnits DefinedInBB;
125 void getAnalysisUsage(AnalysisUsage &AU) const override {
126 AU.addRequired<AAResultsWrapperPass>();
127 MachineFunctionPass::getAnalysisUsage(AU);
130 // Scan the instructions looking for a load/store that can be combined
131 // with the current instruction into a load/store pair.
132 // Return the matching instruction if one is found, else MBB->end().
133 MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
134 LdStPairFlags &Flags,
135 unsigned Limit,
136 bool FindNarrowMerge);
138 // Scan the instructions looking for a store that writes to the address from
139 // which the current load instruction reads. Return true if one is found.
140 bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
141 MachineBasicBlock::iterator &StoreI);
143 // Merge the two instructions indicated into a wider narrow store instruction.
144 MachineBasicBlock::iterator
145 mergeNarrowZeroStores(MachineBasicBlock::iterator I,
146 MachineBasicBlock::iterator MergeMI,
147 const LdStPairFlags &Flags);
149 // Merge the two instructions indicated into a single pair-wise instruction.
150 MachineBasicBlock::iterator
151 mergePairedInsns(MachineBasicBlock::iterator I,
152 MachineBasicBlock::iterator Paired,
153 const LdStPairFlags &Flags);
155 // Promote the load that reads directly from the address stored to.
156 MachineBasicBlock::iterator
157 promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
158 MachineBasicBlock::iterator StoreI);
160 // Scan the instruction list to find a base register update that can
161 // be combined with the current instruction (a load or store) using
162 // pre or post indexed addressing with writeback. Scan forwards.
163 MachineBasicBlock::iterator
164 findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
165 int UnscaledOffset, unsigned Limit);
167 // Scan the instruction list to find a base register update that can
168 // be combined with the current instruction (a load or store) using
169 // pre or post indexed addressing with writeback. Scan backwards.
170 MachineBasicBlock::iterator
171 findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
173 // Find an instruction that updates the base register of the ld/st
174 // instruction.
175 bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
176 unsigned BaseReg, int Offset);
178 // Merge a pre- or post-index base register update into a ld/st instruction.
179 MachineBasicBlock::iterator
180 mergeUpdateInsn(MachineBasicBlock::iterator I,
181 MachineBasicBlock::iterator Update, bool IsPreIdx);
183 // Find and merge zero store instructions.
184 bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
186 // Find and pair ldr/str instructions.
187 bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
189 // Find and promote load instructions which read directly from store.
190 bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
192 // Find and merge a base register updates before or after a ld/st instruction.
193 bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
195 bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
197 bool runOnMachineFunction(MachineFunction &Fn) override;
199 MachineFunctionProperties getRequiredProperties() const override {
200 return MachineFunctionProperties().set(
201 MachineFunctionProperties::Property::NoVRegs);
204 StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
207 char AArch64LoadStoreOpt::ID = 0;
209 } // end anonymous namespace
211 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
212 AARCH64_LOAD_STORE_OPT_NAME, false, false)
214 static bool isNarrowStore(unsigned Opc) {
215 switch (Opc) {
216 default:
217 return false;
218 case AArch64::STRBBui:
219 case AArch64::STURBBi:
220 case AArch64::STRHHui:
221 case AArch64::STURHHi:
222 return true;
226 // These instruction set memory tag and either keep memory contents unchanged or
227 // set it to zero, ignoring the address part of the source register.
228 static bool isTagStore(const MachineInstr &MI) {
229 switch (MI.getOpcode()) {
230 default:
231 return false;
232 case AArch64::STGOffset:
233 case AArch64::STZGOffset:
234 case AArch64::ST2GOffset:
235 case AArch64::STZ2GOffset:
236 return true;
240 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
241 bool *IsValidLdStrOpc = nullptr) {
242 if (IsValidLdStrOpc)
243 *IsValidLdStrOpc = true;
244 switch (Opc) {
245 default:
246 if (IsValidLdStrOpc)
247 *IsValidLdStrOpc = false;
248 return std::numeric_limits<unsigned>::max();
249 case AArch64::STRDui:
250 case AArch64::STURDi:
251 case AArch64::STRDpre:
252 case AArch64::STRQui:
253 case AArch64::STURQi:
254 case AArch64::STRQpre:
255 case AArch64::STRBBui:
256 case AArch64::STURBBi:
257 case AArch64::STRHHui:
258 case AArch64::STURHHi:
259 case AArch64::STRWui:
260 case AArch64::STRWpre:
261 case AArch64::STURWi:
262 case AArch64::STRXui:
263 case AArch64::STRXpre:
264 case AArch64::STURXi:
265 case AArch64::LDRDui:
266 case AArch64::LDURDi:
267 case AArch64::LDRDpre:
268 case AArch64::LDRQui:
269 case AArch64::LDURQi:
270 case AArch64::LDRQpre:
271 case AArch64::LDRWui:
272 case AArch64::LDURWi:
273 case AArch64::LDRWpre:
274 case AArch64::LDRXui:
275 case AArch64::LDURXi:
276 case AArch64::LDRXpre:
277 case AArch64::STRSui:
278 case AArch64::STURSi:
279 case AArch64::STRSpre:
280 case AArch64::LDRSui:
281 case AArch64::LDURSi:
282 case AArch64::LDRSpre:
283 return Opc;
284 case AArch64::LDRSWui:
285 return AArch64::LDRWui;
286 case AArch64::LDURSWi:
287 return AArch64::LDURWi;
291 static unsigned getMatchingWideOpcode(unsigned Opc) {
292 switch (Opc) {
293 default:
294 llvm_unreachable("Opcode has no wide equivalent!");
295 case AArch64::STRBBui:
296 return AArch64::STRHHui;
297 case AArch64::STRHHui:
298 return AArch64::STRWui;
299 case AArch64::STURBBi:
300 return AArch64::STURHHi;
301 case AArch64::STURHHi:
302 return AArch64::STURWi;
303 case AArch64::STURWi:
304 return AArch64::STURXi;
305 case AArch64::STRWui:
306 return AArch64::STRXui;
310 static unsigned getMatchingPairOpcode(unsigned Opc) {
311 switch (Opc) {
312 default:
313 llvm_unreachable("Opcode has no pairwise equivalent!");
314 case AArch64::STRSui:
315 case AArch64::STURSi:
316 return AArch64::STPSi;
317 case AArch64::STRSpre:
318 return AArch64::STPSpre;
319 case AArch64::STRDui:
320 case AArch64::STURDi:
321 return AArch64::STPDi;
322 case AArch64::STRDpre:
323 return AArch64::STPDpre;
324 case AArch64::STRQui:
325 case AArch64::STURQi:
326 return AArch64::STPQi;
327 case AArch64::STRQpre:
328 return AArch64::STPQpre;
329 case AArch64::STRWui:
330 case AArch64::STURWi:
331 return AArch64::STPWi;
332 case AArch64::STRWpre:
333 return AArch64::STPWpre;
334 case AArch64::STRXui:
335 case AArch64::STURXi:
336 return AArch64::STPXi;
337 case AArch64::STRXpre:
338 return AArch64::STPXpre;
339 case AArch64::LDRSui:
340 case AArch64::LDURSi:
341 return AArch64::LDPSi;
342 case AArch64::LDRSpre:
343 return AArch64::LDPSpre;
344 case AArch64::LDRDui:
345 case AArch64::LDURDi:
346 return AArch64::LDPDi;
347 case AArch64::LDRDpre:
348 return AArch64::LDPDpre;
349 case AArch64::LDRQui:
350 case AArch64::LDURQi:
351 return AArch64::LDPQi;
352 case AArch64::LDRQpre:
353 return AArch64::LDPQpre;
354 case AArch64::LDRWui:
355 case AArch64::LDURWi:
356 return AArch64::LDPWi;
357 case AArch64::LDRWpre:
358 return AArch64::LDPWpre;
359 case AArch64::LDRXui:
360 case AArch64::LDURXi:
361 return AArch64::LDPXi;
362 case AArch64::LDRXpre:
363 return AArch64::LDPXpre;
364 case AArch64::LDRSWui:
365 case AArch64::LDURSWi:
366 return AArch64::LDPSWi;
370 static unsigned isMatchingStore(MachineInstr &LoadInst,
371 MachineInstr &StoreInst) {
372 unsigned LdOpc = LoadInst.getOpcode();
373 unsigned StOpc = StoreInst.getOpcode();
374 switch (LdOpc) {
375 default:
376 llvm_unreachable("Unsupported load instruction!");
377 case AArch64::LDRBBui:
378 return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
379 StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
380 case AArch64::LDURBBi:
381 return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
382 StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
383 case AArch64::LDRHHui:
384 return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
385 StOpc == AArch64::STRXui;
386 case AArch64::LDURHHi:
387 return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
388 StOpc == AArch64::STURXi;
389 case AArch64::LDRWui:
390 return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
391 case AArch64::LDURWi:
392 return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
393 case AArch64::LDRXui:
394 return StOpc == AArch64::STRXui;
395 case AArch64::LDURXi:
396 return StOpc == AArch64::STURXi;
400 static unsigned getPreIndexedOpcode(unsigned Opc) {
401 // FIXME: We don't currently support creating pre-indexed loads/stores when
402 // the load or store is the unscaled version. If we decide to perform such an
403 // optimization in the future the cases for the unscaled loads/stores will
404 // need to be added here.
405 switch (Opc) {
406 default:
407 llvm_unreachable("Opcode has no pre-indexed equivalent!");
408 case AArch64::STRSui:
409 return AArch64::STRSpre;
410 case AArch64::STRDui:
411 return AArch64::STRDpre;
412 case AArch64::STRQui:
413 return AArch64::STRQpre;
414 case AArch64::STRBBui:
415 return AArch64::STRBBpre;
416 case AArch64::STRHHui:
417 return AArch64::STRHHpre;
418 case AArch64::STRWui:
419 return AArch64::STRWpre;
420 case AArch64::STRXui:
421 return AArch64::STRXpre;
422 case AArch64::LDRSui:
423 return AArch64::LDRSpre;
424 case AArch64::LDRDui:
425 return AArch64::LDRDpre;
426 case AArch64::LDRQui:
427 return AArch64::LDRQpre;
428 case AArch64::LDRBBui:
429 return AArch64::LDRBBpre;
430 case AArch64::LDRHHui:
431 return AArch64::LDRHHpre;
432 case AArch64::LDRWui:
433 return AArch64::LDRWpre;
434 case AArch64::LDRXui:
435 return AArch64::LDRXpre;
436 case AArch64::LDRSWui:
437 return AArch64::LDRSWpre;
438 case AArch64::LDPSi:
439 return AArch64::LDPSpre;
440 case AArch64::LDPSWi:
441 return AArch64::LDPSWpre;
442 case AArch64::LDPDi:
443 return AArch64::LDPDpre;
444 case AArch64::LDPQi:
445 return AArch64::LDPQpre;
446 case AArch64::LDPWi:
447 return AArch64::LDPWpre;
448 case AArch64::LDPXi:
449 return AArch64::LDPXpre;
450 case AArch64::STPSi:
451 return AArch64::STPSpre;
452 case AArch64::STPDi:
453 return AArch64::STPDpre;
454 case AArch64::STPQi:
455 return AArch64::STPQpre;
456 case AArch64::STPWi:
457 return AArch64::STPWpre;
458 case AArch64::STPXi:
459 return AArch64::STPXpre;
460 case AArch64::STGOffset:
461 return AArch64::STGPreIndex;
462 case AArch64::STZGOffset:
463 return AArch64::STZGPreIndex;
464 case AArch64::ST2GOffset:
465 return AArch64::ST2GPreIndex;
466 case AArch64::STZ2GOffset:
467 return AArch64::STZ2GPreIndex;
468 case AArch64::STGPi:
469 return AArch64::STGPpre;
473 static unsigned getPostIndexedOpcode(unsigned Opc) {
474 switch (Opc) {
475 default:
476 llvm_unreachable("Opcode has no post-indexed wise equivalent!");
477 case AArch64::STRSui:
478 case AArch64::STURSi:
479 return AArch64::STRSpost;
480 case AArch64::STRDui:
481 case AArch64::STURDi:
482 return AArch64::STRDpost;
483 case AArch64::STRQui:
484 case AArch64::STURQi:
485 return AArch64::STRQpost;
486 case AArch64::STRBBui:
487 return AArch64::STRBBpost;
488 case AArch64::STRHHui:
489 return AArch64::STRHHpost;
490 case AArch64::STRWui:
491 case AArch64::STURWi:
492 return AArch64::STRWpost;
493 case AArch64::STRXui:
494 case AArch64::STURXi:
495 return AArch64::STRXpost;
496 case AArch64::LDRSui:
497 case AArch64::LDURSi:
498 return AArch64::LDRSpost;
499 case AArch64::LDRDui:
500 case AArch64::LDURDi:
501 return AArch64::LDRDpost;
502 case AArch64::LDRQui:
503 case AArch64::LDURQi:
504 return AArch64::LDRQpost;
505 case AArch64::LDRBBui:
506 return AArch64::LDRBBpost;
507 case AArch64::LDRHHui:
508 return AArch64::LDRHHpost;
509 case AArch64::LDRWui:
510 case AArch64::LDURWi:
511 return AArch64::LDRWpost;
512 case AArch64::LDRXui:
513 case AArch64::LDURXi:
514 return AArch64::LDRXpost;
515 case AArch64::LDRSWui:
516 return AArch64::LDRSWpost;
517 case AArch64::LDPSi:
518 return AArch64::LDPSpost;
519 case AArch64::LDPSWi:
520 return AArch64::LDPSWpost;
521 case AArch64::LDPDi:
522 return AArch64::LDPDpost;
523 case AArch64::LDPQi:
524 return AArch64::LDPQpost;
525 case AArch64::LDPWi:
526 return AArch64::LDPWpost;
527 case AArch64::LDPXi:
528 return AArch64::LDPXpost;
529 case AArch64::STPSi:
530 return AArch64::STPSpost;
531 case AArch64::STPDi:
532 return AArch64::STPDpost;
533 case AArch64::STPQi:
534 return AArch64::STPQpost;
535 case AArch64::STPWi:
536 return AArch64::STPWpost;
537 case AArch64::STPXi:
538 return AArch64::STPXpost;
539 case AArch64::STGOffset:
540 return AArch64::STGPostIndex;
541 case AArch64::STZGOffset:
542 return AArch64::STZGPostIndex;
543 case AArch64::ST2GOffset:
544 return AArch64::ST2GPostIndex;
545 case AArch64::STZ2GOffset:
546 return AArch64::STZ2GPostIndex;
547 case AArch64::STGPi:
548 return AArch64::STGPpost;
552 static bool isPairedLdSt(const MachineInstr &MI) {
553 switch (MI.getOpcode()) {
554 default:
555 return false;
556 case AArch64::LDPSi:
557 case AArch64::LDPSWi:
558 case AArch64::LDPDi:
559 case AArch64::LDPQi:
560 case AArch64::LDPWi:
561 case AArch64::LDPXi:
562 case AArch64::STPSi:
563 case AArch64::STPDi:
564 case AArch64::STPQi:
565 case AArch64::STPWi:
566 case AArch64::STPXi:
567 case AArch64::STGPi:
568 return true;
572 static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
574 unsigned OpcA = FirstMI.getOpcode();
575 unsigned OpcB = MI.getOpcode();
577 switch (OpcA) {
578 default:
579 return false;
580 case AArch64::STRSpre:
581 return (OpcB == AArch64::STRSui) || (OpcB == AArch64::STURSi);
582 case AArch64::STRDpre:
583 return (OpcB == AArch64::STRDui) || (OpcB == AArch64::STURDi);
584 case AArch64::STRQpre:
585 return (OpcB == AArch64::STRQui) || (OpcB == AArch64::STURQi);
586 case AArch64::STRWpre:
587 return (OpcB == AArch64::STRWui) || (OpcB == AArch64::STURWi);
588 case AArch64::STRXpre:
589 return (OpcB == AArch64::STRXui) || (OpcB == AArch64::STURXi);
590 case AArch64::LDRSpre:
591 return (OpcB == AArch64::LDRSui) || (OpcB == AArch64::LDURSi);
592 case AArch64::LDRDpre:
593 return (OpcB == AArch64::LDRDui) || (OpcB == AArch64::LDURDi);
594 case AArch64::LDRQpre:
595 return (OpcB == AArch64::LDRQui) || (OpcB == AArch64::LDURQi);
596 case AArch64::LDRWpre:
597 return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
598 case AArch64::LDRXpre:
599 return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
603 // Returns the scale and offset range of pre/post indexed variants of MI.
604 static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
605 int &MinOffset, int &MaxOffset) {
606 bool IsPaired = isPairedLdSt(MI);
607 bool IsTagStore = isTagStore(MI);
608 // ST*G and all paired ldst have the same scale in pre/post-indexed variants
609 // as in the "unsigned offset" variant.
610 // All other pre/post indexed ldst instructions are unscaled.
611 Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
613 if (IsPaired) {
614 MinOffset = -64;
615 MaxOffset = 63;
616 } else {
617 MinOffset = -256;
618 MaxOffset = 255;
622 static MachineOperand &getLdStRegOp(MachineInstr &MI,
623 unsigned PairedRegOp = 0) {
624 assert(PairedRegOp < 2 && "Unexpected register operand idx.");
625 bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
626 if (IsPreLdSt)
627 PairedRegOp += 1;
628 unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
629 return MI.getOperand(Idx);
632 static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) {
633 unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1;
634 return MI.getOperand(Idx);
637 static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) {
638 unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2;
639 return MI.getOperand(Idx);
642 static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
643 MachineInstr &StoreInst,
644 const AArch64InstrInfo *TII) {
645 assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
646 int LoadSize = TII->getMemScale(LoadInst);
647 int StoreSize = TII->getMemScale(StoreInst);
648 int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst)
649 ? getLdStOffsetOp(StoreInst).getImm()
650 : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
651 int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst)
652 ? getLdStOffsetOp(LoadInst).getImm()
653 : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
654 return (UnscaledStOffset <= UnscaledLdOffset) &&
655 (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
658 static bool isPromotableZeroStoreInst(MachineInstr &MI) {
659 unsigned Opc = MI.getOpcode();
660 return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
661 isNarrowStore(Opc)) &&
662 getLdStRegOp(MI).getReg() == AArch64::WZR;
665 static bool isPromotableLoadFromStore(MachineInstr &MI) {
666 switch (MI.getOpcode()) {
667 default:
668 return false;
669 // Scaled instructions.
670 case AArch64::LDRBBui:
671 case AArch64::LDRHHui:
672 case AArch64::LDRWui:
673 case AArch64::LDRXui:
674 // Unscaled instructions.
675 case AArch64::LDURBBi:
676 case AArch64::LDURHHi:
677 case AArch64::LDURWi:
678 case AArch64::LDURXi:
679 return true;
683 static bool isMergeableLdStUpdate(MachineInstr &MI) {
684 unsigned Opc = MI.getOpcode();
685 switch (Opc) {
686 default:
687 return false;
688 // Scaled instructions.
689 case AArch64::STRSui:
690 case AArch64::STRDui:
691 case AArch64::STRQui:
692 case AArch64::STRXui:
693 case AArch64::STRWui:
694 case AArch64::STRHHui:
695 case AArch64::STRBBui:
696 case AArch64::LDRSui:
697 case AArch64::LDRDui:
698 case AArch64::LDRQui:
699 case AArch64::LDRXui:
700 case AArch64::LDRWui:
701 case AArch64::LDRHHui:
702 case AArch64::LDRBBui:
703 case AArch64::STGOffset:
704 case AArch64::STZGOffset:
705 case AArch64::ST2GOffset:
706 case AArch64::STZ2GOffset:
707 case AArch64::STGPi:
708 // Unscaled instructions.
709 case AArch64::STURSi:
710 case AArch64::STURDi:
711 case AArch64::STURQi:
712 case AArch64::STURWi:
713 case AArch64::STURXi:
714 case AArch64::LDURSi:
715 case AArch64::LDURDi:
716 case AArch64::LDURQi:
717 case AArch64::LDURWi:
718 case AArch64::LDURXi:
719 // Paired instructions.
720 case AArch64::LDPSi:
721 case AArch64::LDPSWi:
722 case AArch64::LDPDi:
723 case AArch64::LDPQi:
724 case AArch64::LDPWi:
725 case AArch64::LDPXi:
726 case AArch64::STPSi:
727 case AArch64::STPDi:
728 case AArch64::STPQi:
729 case AArch64::STPWi:
730 case AArch64::STPXi:
731 // Make sure this is a reg+imm (as opposed to an address reloc).
732 if (!getLdStOffsetOp(MI).isImm())
733 return false;
735 return true;
739 MachineBasicBlock::iterator
740 AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
741 MachineBasicBlock::iterator MergeMI,
742 const LdStPairFlags &Flags) {
743 assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
744 "Expected promotable zero stores.");
746 MachineBasicBlock::iterator E = I->getParent()->end();
747 MachineBasicBlock::iterator NextI = next_nodbg(I, E);
748 // If NextI is the second of the two instructions to be merged, we need
749 // to skip one further. Either way we merge will invalidate the iterator,
750 // and we don't need to scan the new instruction, as it's a pairwise
751 // instruction, which we're not considering for further action anyway.
752 if (NextI == MergeMI)
753 NextI = next_nodbg(NextI, E);
755 unsigned Opc = I->getOpcode();
756 bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
757 int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
759 bool MergeForward = Flags.getMergeForward();
760 // Insert our new paired instruction after whichever of the paired
761 // instructions MergeForward indicates.
762 MachineBasicBlock::iterator InsertionPoint = MergeForward ? MergeMI : I;
763 // Also based on MergeForward is from where we copy the base register operand
764 // so we get the flags compatible with the input code.
765 const MachineOperand &BaseRegOp =
766 MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
768 // Which register is Rt and which is Rt2 depends on the offset order.
769 MachineInstr *RtMI;
770 if (getLdStOffsetOp(*I).getImm() ==
771 getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
772 RtMI = &*MergeMI;
773 else
774 RtMI = &*I;
776 int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
777 // Change the scaled offset from small to large type.
778 if (IsScaled) {
779 assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
780 OffsetImm /= 2;
783 // Construct the new instruction.
784 DebugLoc DL = I->getDebugLoc();
785 MachineBasicBlock *MBB = I->getParent();
786 MachineInstrBuilder MIB;
787 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
788 .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
789 .add(BaseRegOp)
790 .addImm(OffsetImm)
791 .cloneMergedMemRefs({&*I, &*MergeMI})
792 .setMIFlags(I->mergeFlagsWith(*MergeMI));
793 (void)MIB;
795 LLVM_DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n ");
796 LLVM_DEBUG(I->print(dbgs()));
797 LLVM_DEBUG(dbgs() << " ");
798 LLVM_DEBUG(MergeMI->print(dbgs()));
799 LLVM_DEBUG(dbgs() << " with instruction:\n ");
800 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
801 LLVM_DEBUG(dbgs() << "\n");
803 // Erase the old instructions.
804 I->eraseFromParent();
805 MergeMI->eraseFromParent();
806 return NextI;
809 // Apply Fn to all instructions between MI and the beginning of the block, until
810 // a def for DefReg is reached. Returns true, iff Fn returns true for all
811 // visited instructions. Stop after visiting Limit iterations.
812 static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg,
813 const TargetRegisterInfo *TRI, unsigned Limit,
814 std::function<bool(MachineInstr &, bool)> &Fn) {
815 auto MBB = MI.getParent();
816 for (MachineInstr &I :
817 instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) {
818 if (!Limit)
819 return false;
820 --Limit;
822 bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) {
823 return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() &&
824 TRI->regsOverlap(MOP.getReg(), DefReg);
826 if (!Fn(I, isDef))
827 return false;
828 if (isDef)
829 break;
831 return true;
834 static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units,
835 const TargetRegisterInfo *TRI) {
837 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
838 if (MOP.isReg() && MOP.isKill())
839 Units.removeReg(MOP.getReg());
841 for (const MachineOperand &MOP : phys_regs_and_masks(MI))
842 if (MOP.isReg() && !MOP.isKill())
843 Units.addReg(MOP.getReg());
846 MachineBasicBlock::iterator
847 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
848 MachineBasicBlock::iterator Paired,
849 const LdStPairFlags &Flags) {
850 MachineBasicBlock::iterator E = I->getParent()->end();
851 MachineBasicBlock::iterator NextI = next_nodbg(I, E);
852 // If NextI is the second of the two instructions to be merged, we need
853 // to skip one further. Either way we merge will invalidate the iterator,
854 // and we don't need to scan the new instruction, as it's a pairwise
855 // instruction, which we're not considering for further action anyway.
856 if (NextI == Paired)
857 NextI = next_nodbg(NextI, E);
859 int SExtIdx = Flags.getSExtIdx();
860 unsigned Opc =
861 SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
862 bool IsUnscaled = TII->hasUnscaledLdStOffset(Opc);
863 int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
865 bool MergeForward = Flags.getMergeForward();
867 Optional<MCPhysReg> RenameReg = Flags.getRenameReg();
868 if (MergeForward && RenameReg) {
869 MCRegister RegToRename = getLdStRegOp(*I).getReg();
870 DefinedInBB.addReg(*RenameReg);
872 // Return the sub/super register for RenameReg, matching the size of
873 // OriginalReg.
874 auto GetMatchingSubReg = [this,
875 RenameReg](MCPhysReg OriginalReg) -> MCPhysReg {
876 for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg))
877 if (TRI->getMinimalPhysRegClass(OriginalReg) ==
878 TRI->getMinimalPhysRegClass(SubOrSuper))
879 return SubOrSuper;
880 llvm_unreachable("Should have found matching sub or super register!");
883 std::function<bool(MachineInstr &, bool)> UpdateMIs =
884 [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) {
885 if (IsDef) {
886 bool SeenDef = false;
887 for (auto &MOP : MI.operands()) {
888 // Rename the first explicit definition and all implicit
889 // definitions matching RegToRename.
890 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
891 (!SeenDef || (MOP.isDef() && MOP.isImplicit())) &&
892 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
893 assert((MOP.isImplicit() ||
894 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
895 "Need renamable operands");
896 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
897 SeenDef = true;
900 } else {
901 for (auto &MOP : MI.operands()) {
902 if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
903 TRI->regsOverlap(MOP.getReg(), RegToRename)) {
904 assert((MOP.isImplicit() ||
905 (MOP.isRenamable() && !MOP.isEarlyClobber())) &&
906 "Need renamable operands");
907 MOP.setReg(GetMatchingSubReg(MOP.getReg()));
911 LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n");
912 return true;
914 forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs);
916 #if !defined(NDEBUG)
917 // Make sure the register used for renaming is not used between the paired
918 // instructions. That would trash the content before the new paired
919 // instruction.
920 for (auto &MI :
921 iterator_range<MachineInstrBundleIterator<llvm::MachineInstr>>(
922 std::next(I), std::next(Paired)))
923 assert(all_of(MI.operands(),
924 [this, &RenameReg](const MachineOperand &MOP) {
925 return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
926 !TRI->regsOverlap(MOP.getReg(), *RenameReg);
927 }) &&
928 "Rename register used between paired instruction, trashing the "
929 "content");
930 #endif
933 // Insert our new paired instruction after whichever of the paired
934 // instructions MergeForward indicates.
935 MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
936 // Also based on MergeForward is from where we copy the base register operand
937 // so we get the flags compatible with the input code.
938 const MachineOperand &BaseRegOp =
939 MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
941 int Offset = getLdStOffsetOp(*I).getImm();
942 int PairedOffset = getLdStOffsetOp(*Paired).getImm();
943 bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
944 if (IsUnscaled != PairedIsUnscaled) {
945 // We're trying to pair instructions that differ in how they are scaled. If
946 // I is scaled then scale the offset of Paired accordingly. Otherwise, do
947 // the opposite (i.e., make Paired's offset unscaled).
948 int MemSize = TII->getMemScale(*Paired);
949 if (PairedIsUnscaled) {
950 // If the unscaled offset isn't a multiple of the MemSize, we can't
951 // pair the operations together.
952 assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
953 "Offset should be a multiple of the stride!");
954 PairedOffset /= MemSize;
955 } else {
956 PairedOffset *= MemSize;
960 // Which register is Rt and which is Rt2 depends on the offset order.
961 // However, for pre load/stores the Rt should be the one of the pre
962 // load/store.
963 MachineInstr *RtMI, *Rt2MI;
964 if (Offset == PairedOffset + OffsetStride &&
965 !AArch64InstrInfo::isPreLdSt(*I)) {
966 RtMI = &*Paired;
967 Rt2MI = &*I;
968 // Here we swapped the assumption made for SExtIdx.
969 // I.e., we turn ldp I, Paired into ldp Paired, I.
970 // Update the index accordingly.
971 if (SExtIdx != -1)
972 SExtIdx = (SExtIdx + 1) % 2;
973 } else {
974 RtMI = &*I;
975 Rt2MI = &*Paired;
977 int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
978 // Scale the immediate offset, if necessary.
979 if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
980 assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
981 "Unscaled offset cannot be scaled.");
982 OffsetImm /= TII->getMemScale(*RtMI);
985 // Construct the new instruction.
986 MachineInstrBuilder MIB;
987 DebugLoc DL = I->getDebugLoc();
988 MachineBasicBlock *MBB = I->getParent();
989 MachineOperand RegOp0 = getLdStRegOp(*RtMI);
990 MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
991 // Kill flags may become invalid when moving stores for pairing.
992 if (RegOp0.isUse()) {
993 if (!MergeForward) {
994 // Clear kill flags on store if moving upwards. Example:
995 // STRWui %w0, ...
996 // USE %w1
997 // STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
998 RegOp0.setIsKill(false);
999 RegOp1.setIsKill(false);
1000 } else {
1001 // Clear kill flags of the first stores register. Example:
1002 // STRWui %w1, ...
1003 // USE kill %w1 ; need to clear kill flag when moving STRWui downwards
1004 // STRW %w0
1005 Register Reg = getLdStRegOp(*I).getReg();
1006 for (MachineInstr &MI : make_range(std::next(I), Paired))
1007 MI.clearRegisterKills(Reg, TRI);
1011 unsigned int MatchPairOpcode = getMatchingPairOpcode(Opc);
1012 MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(MatchPairOpcode));
1014 // Adds the pre-index operand for pre-indexed ld/st pairs.
1015 if (AArch64InstrInfo::isPreLdSt(*RtMI))
1016 MIB.addReg(BaseRegOp.getReg(), RegState::Define);
1018 MIB.add(RegOp0)
1019 .add(RegOp1)
1020 .add(BaseRegOp)
1021 .addImm(OffsetImm)
1022 .cloneMergedMemRefs({&*I, &*Paired})
1023 .setMIFlags(I->mergeFlagsWith(*Paired));
1025 (void)MIB;
1027 LLVM_DEBUG(
1028 dbgs() << "Creating pair load/store. Replacing instructions:\n ");
1029 LLVM_DEBUG(I->print(dbgs()));
1030 LLVM_DEBUG(dbgs() << " ");
1031 LLVM_DEBUG(Paired->print(dbgs()));
1032 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1033 if (SExtIdx != -1) {
1034 // Generate the sign extension for the proper result of the ldp.
1035 // I.e., with X1, that would be:
1036 // %w1 = KILL %w1, implicit-def %x1
1037 // %x1 = SBFMXri killed %x1, 0, 31
1038 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
1039 // Right now, DstMO has the extended register, since it comes from an
1040 // extended opcode.
1041 Register DstRegX = DstMO.getReg();
1042 // Get the W variant of that register.
1043 Register DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
1044 // Update the result of LDP to use the W instead of the X variant.
1045 DstMO.setReg(DstRegW);
1046 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1047 LLVM_DEBUG(dbgs() << "\n");
1048 // Make the machine verifier happy by providing a definition for
1049 // the X register.
1050 // Insert this definition right after the generated LDP, i.e., before
1051 // InsertionPoint.
1052 MachineInstrBuilder MIBKill =
1053 BuildMI(*MBB, InsertionPoint, DL, TII->get(TargetOpcode::KILL), DstRegW)
1054 .addReg(DstRegW)
1055 .addReg(DstRegX, RegState::Define);
1056 MIBKill->getOperand(2).setImplicit();
1057 // Create the sign extension.
1058 MachineInstrBuilder MIBSXTW =
1059 BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::SBFMXri), DstRegX)
1060 .addReg(DstRegX)
1061 .addImm(0)
1062 .addImm(31);
1063 (void)MIBSXTW;
1064 LLVM_DEBUG(dbgs() << " Extend operand:\n ");
1065 LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
1066 } else {
1067 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1069 LLVM_DEBUG(dbgs() << "\n");
1071 if (MergeForward)
1072 for (const MachineOperand &MOP : phys_regs_and_masks(*I))
1073 if (MOP.isReg() && MOP.isKill())
1074 DefinedInBB.addReg(MOP.getReg());
1076 // Erase the old instructions.
1077 I->eraseFromParent();
1078 Paired->eraseFromParent();
1080 return NextI;
1083 MachineBasicBlock::iterator
1084 AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
1085 MachineBasicBlock::iterator StoreI) {
1086 MachineBasicBlock::iterator NextI =
1087 next_nodbg(LoadI, LoadI->getParent()->end());
1089 int LoadSize = TII->getMemScale(*LoadI);
1090 int StoreSize = TII->getMemScale(*StoreI);
1091 Register LdRt = getLdStRegOp(*LoadI).getReg();
1092 const MachineOperand &StMO = getLdStRegOp(*StoreI);
1093 Register StRt = getLdStRegOp(*StoreI).getReg();
1094 bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
1096 assert((IsStoreXReg ||
1097 TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
1098 "Unexpected RegClass");
1100 MachineInstr *BitExtMI;
1101 if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
1102 // Remove the load, if the destination register of the loads is the same
1103 // register for stored value.
1104 if (StRt == LdRt && LoadSize == 8) {
1105 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1106 LoadI->getIterator())) {
1107 if (MI.killsRegister(StRt, TRI)) {
1108 MI.clearRegisterKills(StRt, TRI);
1109 break;
1112 LLVM_DEBUG(dbgs() << "Remove load instruction:\n ");
1113 LLVM_DEBUG(LoadI->print(dbgs()));
1114 LLVM_DEBUG(dbgs() << "\n");
1115 LoadI->eraseFromParent();
1116 return NextI;
1118 // Replace the load with a mov if the load and store are in the same size.
1119 BitExtMI =
1120 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1121 TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
1122 .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
1123 .add(StMO)
1124 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1125 .setMIFlags(LoadI->getFlags());
1126 } else {
1127 // FIXME: Currently we disable this transformation in big-endian targets as
1128 // performance and correctness are verified only in little-endian.
1129 if (!Subtarget->isLittleEndian())
1130 return NextI;
1131 bool IsUnscaled = TII->hasUnscaledLdStOffset(*LoadI);
1132 assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
1133 "Unsupported ld/st match");
1134 assert(LoadSize <= StoreSize && "Invalid load size");
1135 int UnscaledLdOffset = IsUnscaled
1136 ? getLdStOffsetOp(*LoadI).getImm()
1137 : getLdStOffsetOp(*LoadI).getImm() * LoadSize;
1138 int UnscaledStOffset = IsUnscaled
1139 ? getLdStOffsetOp(*StoreI).getImm()
1140 : getLdStOffsetOp(*StoreI).getImm() * StoreSize;
1141 int Width = LoadSize * 8;
1142 unsigned DestReg =
1143 IsStoreXReg ? Register(TRI->getMatchingSuperReg(
1144 LdRt, AArch64::sub_32, &AArch64::GPR64RegClass))
1145 : LdRt;
1147 assert((UnscaledLdOffset >= UnscaledStOffset &&
1148 (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
1149 "Invalid offset");
1151 int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
1152 int Imms = Immr + Width - 1;
1153 if (UnscaledLdOffset == UnscaledStOffset) {
1154 uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
1155 | ((Immr) << 6) // immr
1156 | ((Imms) << 0) // imms
1159 BitExtMI =
1160 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1161 TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
1162 DestReg)
1163 .add(StMO)
1164 .addImm(AndMaskEncoded)
1165 .setMIFlags(LoadI->getFlags());
1166 } else {
1167 BitExtMI =
1168 BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1169 TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
1170 DestReg)
1171 .add(StMO)
1172 .addImm(Immr)
1173 .addImm(Imms)
1174 .setMIFlags(LoadI->getFlags());
1178 // Clear kill flags between store and load.
1179 for (MachineInstr &MI : make_range(StoreI->getIterator(),
1180 BitExtMI->getIterator()))
1181 if (MI.killsRegister(StRt, TRI)) {
1182 MI.clearRegisterKills(StRt, TRI);
1183 break;
1186 LLVM_DEBUG(dbgs() << "Promoting load by replacing :\n ");
1187 LLVM_DEBUG(StoreI->print(dbgs()));
1188 LLVM_DEBUG(dbgs() << " ");
1189 LLVM_DEBUG(LoadI->print(dbgs()));
1190 LLVM_DEBUG(dbgs() << " with instructions:\n ");
1191 LLVM_DEBUG(StoreI->print(dbgs()));
1192 LLVM_DEBUG(dbgs() << " ");
1193 LLVM_DEBUG((BitExtMI)->print(dbgs()));
1194 LLVM_DEBUG(dbgs() << "\n");
1196 // Erase the old instructions.
1197 LoadI->eraseFromParent();
1198 return NextI;
1201 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
1202 // Convert the byte-offset used by unscaled into an "element" offset used
1203 // by the scaled pair load/store instructions.
1204 if (IsUnscaled) {
1205 // If the byte-offset isn't a multiple of the stride, there's no point
1206 // trying to match it.
1207 if (Offset % OffsetStride)
1208 return false;
1209 Offset /= OffsetStride;
1211 return Offset <= 63 && Offset >= -64;
1214 // Do alignment, specialized to power of 2 and for signed ints,
1215 // avoiding having to do a C-style cast from uint_64t to int when
1216 // using alignTo from include/llvm/Support/MathExtras.h.
1217 // FIXME: Move this function to include/MathExtras.h?
1218 static int alignTo(int Num, int PowOf2) {
1219 return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
1222 static bool mayAlias(MachineInstr &MIa,
1223 SmallVectorImpl<MachineInstr *> &MemInsns,
1224 AliasAnalysis *AA) {
1225 for (MachineInstr *MIb : MemInsns)
1226 if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
1227 return true;
1229 return false;
1232 bool AArch64LoadStoreOpt::findMatchingStore(
1233 MachineBasicBlock::iterator I, unsigned Limit,
1234 MachineBasicBlock::iterator &StoreI) {
1235 MachineBasicBlock::iterator B = I->getParent()->begin();
1236 MachineBasicBlock::iterator MBBI = I;
1237 MachineInstr &LoadMI = *I;
1238 Register BaseReg = getLdStBaseOp(LoadMI).getReg();
1240 // If the load is the first instruction in the block, there's obviously
1241 // not any matching store.
1242 if (MBBI == B)
1243 return false;
1245 // Track which register units have been modified and used between the first
1246 // insn and the second insn.
1247 ModifiedRegUnits.clear();
1248 UsedRegUnits.clear();
1250 unsigned Count = 0;
1251 do {
1252 MBBI = prev_nodbg(MBBI, B);
1253 MachineInstr &MI = *MBBI;
1255 // Don't count transient instructions towards the search limit since there
1256 // may be different numbers of them if e.g. debug information is present.
1257 if (!MI.isTransient())
1258 ++Count;
1260 // If the load instruction reads directly from the address to which the
1261 // store instruction writes and the stored value is not modified, we can
1262 // promote the load. Since we do not handle stores with pre-/post-index,
1263 // it's unnecessary to check if BaseReg is modified by the store itself.
1264 // Also we can't handle stores without an immediate offset operand,
1265 // while the operand might be the address for a global variable.
1266 if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
1267 BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
1268 isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
1269 ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
1270 StoreI = MBBI;
1271 return true;
1274 if (MI.isCall())
1275 return false;
1277 // Update modified / uses register units.
1278 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1280 // Otherwise, if the base register is modified, we have no match, so
1281 // return early.
1282 if (!ModifiedRegUnits.available(BaseReg))
1283 return false;
1285 // If we encounter a store aliased with the load, return early.
1286 if (MI.mayStore() && LoadMI.mayAlias(AA, MI, /*UseTBAA*/ false))
1287 return false;
1288 } while (MBBI != B && Count < Limit);
1289 return false;
1292 // Returns true if FirstMI and MI are candidates for merging or pairing.
1293 // Otherwise, returns false.
1294 static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
1295 LdStPairFlags &Flags,
1296 const AArch64InstrInfo *TII) {
1297 // If this is volatile or if pairing is suppressed, not a candidate.
1298 if (MI.hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
1299 return false;
1301 // We should have already checked FirstMI for pair suppression and volatility.
1302 assert(!FirstMI.hasOrderedMemoryRef() &&
1303 !TII->isLdStPairSuppressed(FirstMI) &&
1304 "FirstMI shouldn't get here if either of these checks are true.");
1306 unsigned OpcA = FirstMI.getOpcode();
1307 unsigned OpcB = MI.getOpcode();
1309 // Opcodes match: If the opcodes are pre ld/st there is nothing more to check.
1310 if (OpcA == OpcB)
1311 return !AArch64InstrInfo::isPreLdSt(FirstMI);
1313 // Try to match a sign-extended load/store with a zero-extended load/store.
1314 bool IsValidLdStrOpc, PairIsValidLdStrOpc;
1315 unsigned NonSExtOpc = getMatchingNonSExtOpcode(OpcA, &IsValidLdStrOpc);
1316 assert(IsValidLdStrOpc &&
1317 "Given Opc should be a Load or Store with an immediate");
1318 // OpcA will be the first instruction in the pair.
1319 if (NonSExtOpc == getMatchingNonSExtOpcode(OpcB, &PairIsValidLdStrOpc)) {
1320 Flags.setSExtIdx(NonSExtOpc == (unsigned)OpcA ? 1 : 0);
1321 return true;
1324 // If the second instruction isn't even a mergable/pairable load/store, bail
1325 // out.
1326 if (!PairIsValidLdStrOpc)
1327 return false;
1329 // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
1330 // offsets.
1331 if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
1332 return false;
1334 // The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
1335 // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
1336 // are candidate pairs that can be merged.
1337 if (isPreLdStPairCandidate(FirstMI, MI))
1338 return true;
1340 // Try to match an unscaled load/store with a scaled load/store.
1341 return TII->hasUnscaledLdStOffset(OpcA) != TII->hasUnscaledLdStOffset(OpcB) &&
1342 getMatchingPairOpcode(OpcA) == getMatchingPairOpcode(OpcB);
1344 // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair?
1347 static bool
1348 canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween,
1349 SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
1350 const TargetRegisterInfo *TRI) {
1351 if (!FirstMI.mayStore())
1352 return false;
1354 // Check if we can find an unused register which we can use to rename
1355 // the register used by the first load/store.
1356 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1357 MachineFunction &MF = *FirstMI.getParent()->getParent();
1358 if (!RegClass || !MF.getRegInfo().tracksLiveness())
1359 return false;
1361 auto RegToRename = getLdStRegOp(FirstMI).getReg();
1362 // For now, we only rename if the store operand gets killed at the store.
1363 if (!getLdStRegOp(FirstMI).isKill() &&
1364 !any_of(FirstMI.operands(),
1365 [TRI, RegToRename](const MachineOperand &MOP) {
1366 return MOP.isReg() && !MOP.isDebug() && MOP.getReg() &&
1367 MOP.isImplicit() && MOP.isKill() &&
1368 TRI->regsOverlap(RegToRename, MOP.getReg());
1369 })) {
1370 LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n");
1371 return false;
1373 auto canRenameMOP = [TRI](const MachineOperand &MOP) {
1374 if (MOP.isReg()) {
1375 auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg());
1376 // Renaming registers with multiple disjunct sub-registers (e.g. the
1377 // result of a LD3) means that all sub-registers are renamed, potentially
1378 // impacting other instructions we did not check. Bail out.
1379 // Note that this relies on the structure of the AArch64 register file. In
1380 // particular, a subregister cannot be written without overwriting the
1381 // whole register.
1382 if (RegClass->HasDisjunctSubRegs) {
1383 LLVM_DEBUG(
1384 dbgs()
1385 << " Cannot rename operands with multiple disjunct subregisters ("
1386 << MOP << ")\n");
1387 return false;
1390 return MOP.isImplicit() ||
1391 (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied());
1394 bool FoundDef = false;
1396 // For each instruction between FirstMI and the previous def for RegToRename,
1397 // we
1398 // * check if we can rename RegToRename in this instruction
1399 // * collect the registers used and required register classes for RegToRename.
1400 std::function<bool(MachineInstr &, bool)> CheckMIs = [&](MachineInstr &MI,
1401 bool IsDef) {
1402 LLVM_DEBUG(dbgs() << "Checking " << MI << "\n");
1403 // Currently we do not try to rename across frame-setup instructions.
1404 if (MI.getFlag(MachineInstr::FrameSetup)) {
1405 LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently ("
1406 << MI << ")\n");
1407 return false;
1410 UsedInBetween.accumulate(MI);
1412 // For a definition, check that we can rename the definition and exit the
1413 // loop.
1414 FoundDef = IsDef;
1416 // For defs, check if we can rename the first def of RegToRename.
1417 if (FoundDef) {
1418 // For some pseudo instructions, we might not generate code in the end
1419 // (e.g. KILL) and we would end up without a correct def for the rename
1420 // register.
1421 // TODO: This might be overly conservative and we could handle those cases
1422 // in multiple ways:
1423 // 1. Insert an extra copy, to materialize the def.
1424 // 2. Skip pseudo-defs until we find an non-pseudo def.
1425 if (MI.isPseudo()) {
1426 LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI
1427 << "\n");
1428 return false;
1431 for (auto &MOP : MI.operands()) {
1432 if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() ||
1433 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1434 continue;
1435 if (!canRenameMOP(MOP)) {
1436 LLVM_DEBUG(dbgs()
1437 << " Cannot rename " << MOP << " in " << MI << "\n");
1438 return false;
1440 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1442 return true;
1443 } else {
1444 for (auto &MOP : MI.operands()) {
1445 if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() ||
1446 !TRI->regsOverlap(MOP.getReg(), RegToRename))
1447 continue;
1449 if (!canRenameMOP(MOP)) {
1450 LLVM_DEBUG(dbgs()
1451 << " Cannot rename " << MOP << " in " << MI << "\n");
1452 return false;
1454 RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg()));
1457 return true;
1460 if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs))
1461 return false;
1463 if (!FoundDef) {
1464 LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n");
1465 return false;
1467 return true;
1470 // Check if we can find a physical register for renaming. This register must:
1471 // * not be defined up to FirstMI (checking DefinedInBB)
1472 // * not used between the MI and the defining instruction of the register to
1473 // rename (checked using UsedInBetween).
1474 // * is available in all used register classes (checked using RequiredClasses).
1475 static Optional<MCPhysReg> tryToFindRegisterToRename(
1476 MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB,
1477 LiveRegUnits &UsedInBetween,
1478 SmallPtrSetImpl<const TargetRegisterClass *> &RequiredClasses,
1479 const TargetRegisterInfo *TRI) {
1480 auto &MF = *FirstMI.getParent()->getParent();
1481 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1483 // Checks if any sub- or super-register of PR is callee saved.
1484 auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) {
1485 return any_of(TRI->sub_and_superregs_inclusive(PR),
1486 [&MF, TRI](MCPhysReg SubOrSuper) {
1487 return TRI->isCalleeSavedPhysReg(SubOrSuper, MF);
1491 // Check if PR or one of its sub- or super-registers can be used for all
1492 // required register classes.
1493 auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) {
1494 return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) {
1495 return any_of(TRI->sub_and_superregs_inclusive(PR),
1496 [C, TRI](MCPhysReg SubOrSuper) {
1497 return C == TRI->getMinimalPhysRegClass(SubOrSuper);
1502 auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg());
1503 for (const MCPhysReg &PR : *RegClass) {
1504 if (DefinedInBB.available(PR) && UsedInBetween.available(PR) &&
1505 !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) &&
1506 CanBeUsedForAllClasses(PR)) {
1507 DefinedInBB.addReg(PR);
1508 LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI)
1509 << "\n");
1510 return {PR};
1513 LLVM_DEBUG(dbgs() << "No rename register found from "
1514 << TRI->getRegClassName(RegClass) << "\n");
1515 return None;
1518 /// Scan the instructions looking for a load/store that can be combined with the
1519 /// current instruction into a wider equivalent or a load/store pair.
1520 MachineBasicBlock::iterator
1521 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
1522 LdStPairFlags &Flags, unsigned Limit,
1523 bool FindNarrowMerge) {
1524 MachineBasicBlock::iterator E = I->getParent()->end();
1525 MachineBasicBlock::iterator MBBI = I;
1526 MachineBasicBlock::iterator MBBIWithRenameReg;
1527 MachineInstr &FirstMI = *I;
1528 MBBI = next_nodbg(MBBI, E);
1530 bool MayLoad = FirstMI.mayLoad();
1531 bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
1532 Register Reg = getLdStRegOp(FirstMI).getReg();
1533 Register BaseReg = getLdStBaseOp(FirstMI).getReg();
1534 int Offset = getLdStOffsetOp(FirstMI).getImm();
1535 int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
1536 bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
1538 Optional<bool> MaybeCanRename = None;
1539 if (!EnableRenaming)
1540 MaybeCanRename = {false};
1542 SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses;
1543 LiveRegUnits UsedInBetween;
1544 UsedInBetween.init(*TRI);
1546 Flags.clearRenameReg();
1548 // Track which register units have been modified and used between the first
1549 // insn (inclusive) and the second insn.
1550 ModifiedRegUnits.clear();
1551 UsedRegUnits.clear();
1553 // Remember any instructions that read/write memory between FirstMI and MI.
1554 SmallVector<MachineInstr *, 4> MemInsns;
1556 for (unsigned Count = 0; MBBI != E && Count < Limit;
1557 MBBI = next_nodbg(MBBI, E)) {
1558 MachineInstr &MI = *MBBI;
1560 UsedInBetween.accumulate(MI);
1562 // Don't count transient instructions towards the search limit since there
1563 // may be different numbers of them if e.g. debug information is present.
1564 if (!MI.isTransient())
1565 ++Count;
1567 Flags.setSExtIdx(-1);
1568 if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
1569 getLdStOffsetOp(MI).isImm()) {
1570 assert(MI.mayLoadOrStore() && "Expected memory operation.");
1571 // If we've found another instruction with the same opcode, check to see
1572 // if the base and offset are compatible with our starting instruction.
1573 // These instructions all have scaled immediate operands, so we just
1574 // check for +1/-1. Make sure to check the new instruction offset is
1575 // actually an immediate and not a symbolic reference destined for
1576 // a relocation.
1577 Register MIBaseReg = getLdStBaseOp(MI).getReg();
1578 int MIOffset = getLdStOffsetOp(MI).getImm();
1579 bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
1580 if (IsUnscaled != MIIsUnscaled) {
1581 // We're trying to pair instructions that differ in how they are scaled.
1582 // If FirstMI is scaled then scale the offset of MI accordingly.
1583 // Otherwise, do the opposite (i.e., make MI's offset unscaled).
1584 int MemSize = TII->getMemScale(MI);
1585 if (MIIsUnscaled) {
1586 // If the unscaled offset isn't a multiple of the MemSize, we can't
1587 // pair the operations together: bail and keep looking.
1588 if (MIOffset % MemSize) {
1589 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1590 UsedRegUnits, TRI);
1591 MemInsns.push_back(&MI);
1592 continue;
1594 MIOffset /= MemSize;
1595 } else {
1596 MIOffset *= MemSize;
1600 bool IsPreLdSt = isPreLdStPairCandidate(FirstMI, MI);
1602 if (BaseReg == MIBaseReg) {
1603 // If the offset of the second ld/st is not equal to the size of the
1604 // destination register it can’t be paired with a pre-index ld/st
1605 // pair. Additionally if the base reg is used or modified the operations
1606 // can't be paired: bail and keep looking.
1607 if (IsPreLdSt) {
1608 bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
1609 bool IsBaseRegUsed =
1610 !UsedRegUnits.available(getLdStBaseOp(MI).getReg());
1611 bool IsBaseRegModified =
1612 !ModifiedRegUnits.available(getLdStBaseOp(MI).getReg());
1613 // If the stored value and the address of the second instruction is
1614 // the same, it needs to be using the updated register and therefore
1615 // it must not be folded.
1616 bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),
1617 getLdStBaseOp(MI).getReg());
1618 if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
1619 IsMIRegTheSame) {
1620 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1621 UsedRegUnits, TRI);
1622 MemInsns.push_back(&MI);
1623 continue;
1625 } else {
1626 if ((Offset != MIOffset + OffsetStride) &&
1627 (Offset + OffsetStride != MIOffset)) {
1628 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1629 UsedRegUnits, TRI);
1630 MemInsns.push_back(&MI);
1631 continue;
1635 int MinOffset = Offset < MIOffset ? Offset : MIOffset;
1636 if (FindNarrowMerge) {
1637 // If the alignment requirements of the scaled wide load/store
1638 // instruction can't express the offset of the scaled narrow input,
1639 // bail and keep looking. For promotable zero stores, allow only when
1640 // the stored value is the same (i.e., WZR).
1641 if ((!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) ||
1642 (IsPromotableZeroStore && Reg != getLdStRegOp(MI).getReg())) {
1643 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1644 UsedRegUnits, TRI);
1645 MemInsns.push_back(&MI);
1646 continue;
1648 } else {
1649 // Pairwise instructions have a 7-bit signed offset field. Single
1650 // insns have a 12-bit unsigned offset field. If the resultant
1651 // immediate offset of merging these instructions is out of range for
1652 // a pairwise instruction, bail and keep looking.
1653 if (!inBoundsForPair(IsUnscaled, MinOffset, OffsetStride)) {
1654 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1655 UsedRegUnits, TRI);
1656 MemInsns.push_back(&MI);
1657 continue;
1659 // If the alignment requirements of the paired (scaled) instruction
1660 // can't express the offset of the unscaled input, bail and keep
1661 // looking.
1662 if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
1663 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
1664 UsedRegUnits, TRI);
1665 MemInsns.push_back(&MI);
1666 continue;
1669 // If the destination register of one load is the same register or a
1670 // sub/super register of the other load, bail and keep looking. A
1671 // load-pair instruction with both destination registers the same is
1672 // UNPREDICTABLE and will result in an exception.
1673 if (MayLoad &&
1674 TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) {
1675 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1676 TRI);
1677 MemInsns.push_back(&MI);
1678 continue;
1681 // If the BaseReg has been modified, then we cannot do the optimization.
1682 // For example, in the following pattern
1683 // ldr x1 [x2]
1684 // ldr x2 [x3]
1685 // ldr x4 [x2, #8],
1686 // the first and third ldr cannot be converted to ldp x1, x4, [x2]
1687 if (!ModifiedRegUnits.available(BaseReg))
1688 return E;
1690 // If the Rt of the second instruction was not modified or used between
1691 // the two instructions and none of the instructions between the second
1692 // and first alias with the second, we can combine the second into the
1693 // first.
1694 if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
1695 !(MI.mayLoad() &&
1696 !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
1697 !mayAlias(MI, MemInsns, AA)) {
1699 Flags.setMergeForward(false);
1700 Flags.clearRenameReg();
1701 return MBBI;
1704 // Likewise, if the Rt of the first instruction is not modified or used
1705 // between the two instructions and none of the instructions between the
1706 // first and the second alias with the first, we can combine the first
1707 // into the second.
1708 if (!(MayLoad &&
1709 !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
1710 !mayAlias(FirstMI, MemInsns, AA)) {
1712 if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
1713 Flags.setMergeForward(true);
1714 Flags.clearRenameReg();
1715 return MBBI;
1718 if (DebugCounter::shouldExecute(RegRenamingCounter)) {
1719 if (!MaybeCanRename)
1720 MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween,
1721 RequiredClasses, TRI)};
1723 if (*MaybeCanRename) {
1724 Optional<MCPhysReg> MaybeRenameReg = tryToFindRegisterToRename(
1725 FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses,
1726 TRI);
1727 if (MaybeRenameReg) {
1728 Flags.setRenameReg(*MaybeRenameReg);
1729 Flags.setMergeForward(true);
1730 MBBIWithRenameReg = MBBI;
1735 // Unable to combine these instructions due to interference in between.
1736 // Keep looking.
1740 if (Flags.getRenameReg())
1741 return MBBIWithRenameReg;
1743 // If the instruction wasn't a matching load or store. Stop searching if we
1744 // encounter a call instruction that might modify memory.
1745 if (MI.isCall())
1746 return E;
1748 // Update modified / uses register units.
1749 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1751 // Otherwise, if the base register is modified, we have no match, so
1752 // return early.
1753 if (!ModifiedRegUnits.available(BaseReg))
1754 return E;
1756 // Update list of instructions that read/write memory.
1757 if (MI.mayLoadOrStore())
1758 MemInsns.push_back(&MI);
1760 return E;
1763 MachineBasicBlock::iterator
1764 AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
1765 MachineBasicBlock::iterator Update,
1766 bool IsPreIdx) {
1767 assert((Update->getOpcode() == AArch64::ADDXri ||
1768 Update->getOpcode() == AArch64::SUBXri) &&
1769 "Unexpected base register update instruction to merge!");
1770 MachineBasicBlock::iterator E = I->getParent()->end();
1771 MachineBasicBlock::iterator NextI = next_nodbg(I, E);
1772 // Return the instruction following the merged instruction, which is
1773 // the instruction following our unmerged load. Unless that's the add/sub
1774 // instruction we're merging, in which case it's the one after that.
1775 if (NextI == Update)
1776 NextI = next_nodbg(NextI, E);
1778 int Value = Update->getOperand(2).getImm();
1779 assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
1780 "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
1781 if (Update->getOpcode() == AArch64::SUBXri)
1782 Value = -Value;
1784 unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
1785 : getPostIndexedOpcode(I->getOpcode());
1786 MachineInstrBuilder MIB;
1787 int Scale, MinOffset, MaxOffset;
1788 getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
1789 if (!isPairedLdSt(*I)) {
1790 // Non-paired instruction.
1791 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1792 .add(getLdStRegOp(*Update))
1793 .add(getLdStRegOp(*I))
1794 .add(getLdStBaseOp(*I))
1795 .addImm(Value / Scale)
1796 .setMemRefs(I->memoperands())
1797 .setMIFlags(I->mergeFlagsWith(*Update));
1798 } else {
1799 // Paired instruction.
1800 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
1801 .add(getLdStRegOp(*Update))
1802 .add(getLdStRegOp(*I, 0))
1803 .add(getLdStRegOp(*I, 1))
1804 .add(getLdStBaseOp(*I))
1805 .addImm(Value / Scale)
1806 .setMemRefs(I->memoperands())
1807 .setMIFlags(I->mergeFlagsWith(*Update));
1809 (void)MIB;
1811 if (IsPreIdx) {
1812 ++NumPreFolded;
1813 LLVM_DEBUG(dbgs() << "Creating pre-indexed load/store.");
1814 } else {
1815 ++NumPostFolded;
1816 LLVM_DEBUG(dbgs() << "Creating post-indexed load/store.");
1818 LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
1819 LLVM_DEBUG(I->print(dbgs()));
1820 LLVM_DEBUG(dbgs() << " ");
1821 LLVM_DEBUG(Update->print(dbgs()));
1822 LLVM_DEBUG(dbgs() << " with instruction:\n ");
1823 LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
1824 LLVM_DEBUG(dbgs() << "\n");
1826 // Erase the old instructions for the block.
1827 I->eraseFromParent();
1828 Update->eraseFromParent();
1830 return NextI;
1833 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
1834 MachineInstr &MI,
1835 unsigned BaseReg, int Offset) {
1836 switch (MI.getOpcode()) {
1837 default:
1838 break;
1839 case AArch64::SUBXri:
1840 case AArch64::ADDXri:
1841 // Make sure it's a vanilla immediate operand, not a relocation or
1842 // anything else we can't handle.
1843 if (!MI.getOperand(2).isImm())
1844 break;
1845 // Watch out for 1 << 12 shifted value.
1846 if (AArch64_AM::getShiftValue(MI.getOperand(3).getImm()))
1847 break;
1849 // The update instruction source and destination register must be the
1850 // same as the load/store base register.
1851 if (MI.getOperand(0).getReg() != BaseReg ||
1852 MI.getOperand(1).getReg() != BaseReg)
1853 break;
1855 int UpdateOffset = MI.getOperand(2).getImm();
1856 if (MI.getOpcode() == AArch64::SUBXri)
1857 UpdateOffset = -UpdateOffset;
1859 // The immediate must be a multiple of the scaling factor of the pre/post
1860 // indexed instruction.
1861 int Scale, MinOffset, MaxOffset;
1862 getPrePostIndexedMemOpInfo(MemMI, Scale, MinOffset, MaxOffset);
1863 if (UpdateOffset % Scale != 0)
1864 break;
1866 // Scaled offset must fit in the instruction immediate.
1867 int ScaledOffset = UpdateOffset / Scale;
1868 if (ScaledOffset > MaxOffset || ScaledOffset < MinOffset)
1869 break;
1871 // If we have a non-zero Offset, we check that it matches the amount
1872 // we're adding to the register.
1873 if (!Offset || Offset == UpdateOffset)
1874 return true;
1875 break;
1877 return false;
1880 static bool needsWinCFI(const MachineFunction *MF) {
1881 return MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1882 MF->getFunction().needsUnwindTableEntry();
1885 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
1886 MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
1887 MachineBasicBlock::iterator E = I->getParent()->end();
1888 MachineInstr &MemMI = *I;
1889 MachineBasicBlock::iterator MBBI = I;
1891 Register BaseReg = getLdStBaseOp(MemMI).getReg();
1892 int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
1894 // Scan forward looking for post-index opportunities. Updating instructions
1895 // can't be formed if the memory instruction doesn't have the offset we're
1896 // looking for.
1897 if (MIUnscaledOffset != UnscaledOffset)
1898 return E;
1900 // If the base register overlaps a source/destination register, we can't
1901 // merge the update. This does not apply to tag store instructions which
1902 // ignore the address part of the source register.
1903 // This does not apply to STGPi as well, which does not have unpredictable
1904 // behavior in this case unlike normal stores, and always performs writeback
1905 // after reading the source register value.
1906 if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
1907 bool IsPairedInsn = isPairedLdSt(MemMI);
1908 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1909 Register DestReg = getLdStRegOp(MemMI, i).getReg();
1910 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1911 return E;
1915 // Track which register units have been modified and used between the first
1916 // insn (inclusive) and the second insn.
1917 ModifiedRegUnits.clear();
1918 UsedRegUnits.clear();
1919 MBBI = next_nodbg(MBBI, E);
1921 // We can't post-increment the stack pointer if any instruction between
1922 // the memory access (I) and the increment (MBBI) can access the memory
1923 // region defined by [SP, MBBI].
1924 const bool BaseRegSP = BaseReg == AArch64::SP;
1925 if (BaseRegSP && needsWinCFI(I->getMF())) {
1926 // FIXME: For now, we always block the optimization over SP in windows
1927 // targets as it requires to adjust the unwind/debug info, messing up
1928 // the unwind info can actually cause a miscompile.
1929 return E;
1932 for (unsigned Count = 0; MBBI != E && Count < Limit;
1933 MBBI = next_nodbg(MBBI, E)) {
1934 MachineInstr &MI = *MBBI;
1936 // Don't count transient instructions towards the search limit since there
1937 // may be different numbers of them if e.g. debug information is present.
1938 if (!MI.isTransient())
1939 ++Count;
1941 // If we found a match, return it.
1942 if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
1943 return MBBI;
1945 // Update the status of what the instruction clobbered and used.
1946 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
1948 // Otherwise, if the base register is used or modified, we have no match, so
1949 // return early.
1950 // If we are optimizing SP, do not allow instructions that may load or store
1951 // in between the load and the optimized value update.
1952 if (!ModifiedRegUnits.available(BaseReg) ||
1953 !UsedRegUnits.available(BaseReg) ||
1954 (BaseRegSP && MBBI->mayLoadOrStore()))
1955 return E;
1957 return E;
1960 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
1961 MachineBasicBlock::iterator I, unsigned Limit) {
1962 MachineBasicBlock::iterator B = I->getParent()->begin();
1963 MachineBasicBlock::iterator E = I->getParent()->end();
1964 MachineInstr &MemMI = *I;
1965 MachineBasicBlock::iterator MBBI = I;
1966 MachineFunction &MF = *MemMI.getMF();
1968 Register BaseReg = getLdStBaseOp(MemMI).getReg();
1969 int Offset = getLdStOffsetOp(MemMI).getImm();
1971 // If the load/store is the first instruction in the block, there's obviously
1972 // not any matching update. Ditto if the memory offset isn't zero.
1973 if (MBBI == B || Offset != 0)
1974 return E;
1975 // If the base register overlaps a destination register, we can't
1976 // merge the update.
1977 if (!isTagStore(MemMI)) {
1978 bool IsPairedInsn = isPairedLdSt(MemMI);
1979 for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
1980 Register DestReg = getLdStRegOp(MemMI, i).getReg();
1981 if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
1982 return E;
1986 const bool BaseRegSP = BaseReg == AArch64::SP;
1987 if (BaseRegSP && needsWinCFI(I->getMF())) {
1988 // FIXME: For now, we always block the optimization over SP in windows
1989 // targets as it requires to adjust the unwind/debug info, messing up
1990 // the unwind info can actually cause a miscompile.
1991 return E;
1994 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1995 unsigned RedZoneSize =
1996 Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
1998 // Track which register units have been modified and used between the first
1999 // insn (inclusive) and the second insn.
2000 ModifiedRegUnits.clear();
2001 UsedRegUnits.clear();
2002 unsigned Count = 0;
2003 bool MemAcessBeforeSPPreInc = false;
2004 do {
2005 MBBI = prev_nodbg(MBBI, B);
2006 MachineInstr &MI = *MBBI;
2008 // Don't count transient instructions towards the search limit since there
2009 // may be different numbers of them if e.g. debug information is present.
2010 if (!MI.isTransient())
2011 ++Count;
2013 // If we found a match, return it.
2014 if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) {
2015 // Check that the update value is within our red zone limit (which may be
2016 // zero).
2017 if (MemAcessBeforeSPPreInc && MBBI->getOperand(2).getImm() > RedZoneSize)
2018 return E;
2019 return MBBI;
2022 // Update the status of what the instruction clobbered and used.
2023 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2025 // Otherwise, if the base register is used or modified, we have no match, so
2026 // return early.
2027 if (!ModifiedRegUnits.available(BaseReg) ||
2028 !UsedRegUnits.available(BaseReg))
2029 return E;
2030 // Keep track if we have a memory access before an SP pre-increment, in this
2031 // case we need to validate later that the update amount respects the red
2032 // zone.
2033 if (BaseRegSP && MBBI->mayLoadOrStore())
2034 MemAcessBeforeSPPreInc = true;
2035 } while (MBBI != B && Count < Limit);
2036 return E;
2039 bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
2040 MachineBasicBlock::iterator &MBBI) {
2041 MachineInstr &MI = *MBBI;
2042 // If this is a volatile load, don't mess with it.
2043 if (MI.hasOrderedMemoryRef())
2044 return false;
2046 // Make sure this is a reg+imm.
2047 // FIXME: It is possible to extend it to handle reg+reg cases.
2048 if (!getLdStOffsetOp(MI).isImm())
2049 return false;
2051 // Look backward up to LdStLimit instructions.
2052 MachineBasicBlock::iterator StoreI;
2053 if (findMatchingStore(MBBI, LdStLimit, StoreI)) {
2054 ++NumLoadsFromStoresPromoted;
2055 // Promote the load. Keeping the iterator straight is a
2056 // pain, so we let the merge routine tell us what the next instruction
2057 // is after it's done mucking about.
2058 MBBI = promoteLoadFromStore(MBBI, StoreI);
2059 return true;
2061 return false;
2064 // Merge adjacent zero stores into a wider store.
2065 bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
2066 MachineBasicBlock::iterator &MBBI) {
2067 assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
2068 MachineInstr &MI = *MBBI;
2069 MachineBasicBlock::iterator E = MI.getParent()->end();
2071 if (!TII->isCandidateToMergeOrPair(MI))
2072 return false;
2074 // Look ahead up to LdStLimit instructions for a mergable instruction.
2075 LdStPairFlags Flags;
2076 MachineBasicBlock::iterator MergeMI =
2077 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
2078 if (MergeMI != E) {
2079 ++NumZeroStoresPromoted;
2081 // Keeping the iterator straight is a pain, so we let the merge routine tell
2082 // us what the next instruction is after it's done mucking about.
2083 MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
2084 return true;
2086 return false;
2089 // Find loads and stores that can be merged into a single load or store pair
2090 // instruction.
2091 bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
2092 MachineInstr &MI = *MBBI;
2093 MachineBasicBlock::iterator E = MI.getParent()->end();
2095 if (!TII->isCandidateToMergeOrPair(MI))
2096 return false;
2098 // Early exit if the offset is not possible to match. (6 bits of positive
2099 // range, plus allow an extra one in case we find a later insn that matches
2100 // with Offset-1)
2101 bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
2102 int Offset = getLdStOffsetOp(MI).getImm();
2103 int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
2104 // Allow one more for offset.
2105 if (Offset > 0)
2106 Offset -= OffsetStride;
2107 if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
2108 return false;
2110 // Look ahead up to LdStLimit instructions for a pairable instruction.
2111 LdStPairFlags Flags;
2112 MachineBasicBlock::iterator Paired =
2113 findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ false);
2114 if (Paired != E) {
2115 ++NumPairCreated;
2116 if (TII->hasUnscaledLdStOffset(MI))
2117 ++NumUnscaledPairCreated;
2118 // Keeping the iterator straight is a pain, so we let the merge routine tell
2119 // us what the next instruction is after it's done mucking about.
2120 auto Prev = std::prev(MBBI);
2121 MBBI = mergePairedInsns(MBBI, Paired, Flags);
2122 // Collect liveness info for instructions between Prev and the new position
2123 // MBBI.
2124 for (auto I = std::next(Prev); I != MBBI; I++)
2125 updateDefinedRegisters(*I, DefinedInBB, TRI);
2127 return true;
2129 return false;
2132 bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2133 (MachineBasicBlock::iterator &MBBI) {
2134 MachineInstr &MI = *MBBI;
2135 MachineBasicBlock::iterator E = MI.getParent()->end();
2136 MachineBasicBlock::iterator Update;
2138 // Look forward to try to form a post-index instruction. For example,
2139 // ldr x0, [x20]
2140 // add x20, x20, #32
2141 // merged into:
2142 // ldr x0, [x20], #32
2143 Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
2144 if (Update != E) {
2145 // Merge the update into the ld/st.
2146 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
2147 return true;
2150 // Don't know how to handle unscaled pre/post-index versions below, so bail.
2151 if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2152 return false;
2154 // Look back to try to find a pre-index instruction. For example,
2155 // add x0, x0, #8
2156 // ldr x1, [x0]
2157 // merged into:
2158 // ldr x1, [x0, #8]!
2159 Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
2160 if (Update != E) {
2161 // Merge the update into the ld/st.
2162 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2163 return true;
2166 // The immediate in the load/store is scaled by the size of the memory
2167 // operation. The immediate in the add we're looking for,
2168 // however, is not, so adjust here.
2169 int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
2171 // Look forward to try to find a pre-index instruction. For example,
2172 // ldr x1, [x0, #64]
2173 // add x0, x0, #64
2174 // merged into:
2175 // ldr x1, [x0, #64]!
2176 Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
2177 if (Update != E) {
2178 // Merge the update into the ld/st.
2179 MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
2180 return true;
2183 return false;
2186 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
2187 bool EnableNarrowZeroStOpt) {
2189 bool Modified = false;
2190 // Four tranformations to do here:
2191 // 1) Find loads that directly read from stores and promote them by
2192 // replacing with mov instructions. If the store is wider than the load,
2193 // the load will be replaced with a bitfield extract.
2194 // e.g.,
2195 // str w1, [x0, #4]
2196 // ldrh w2, [x0, #6]
2197 // ; becomes
2198 // str w1, [x0, #4]
2199 // lsr w2, w1, #16
2200 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2201 MBBI != E;) {
2202 if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI))
2203 Modified = true;
2204 else
2205 ++MBBI;
2207 // 2) Merge adjacent zero stores into a wider store.
2208 // e.g.,
2209 // strh wzr, [x0]
2210 // strh wzr, [x0, #2]
2211 // ; becomes
2212 // str wzr, [x0]
2213 // e.g.,
2214 // str wzr, [x0]
2215 // str wzr, [x0, #4]
2216 // ; becomes
2217 // str xzr, [x0]
2218 if (EnableNarrowZeroStOpt)
2219 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2220 MBBI != E;) {
2221 if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI))
2222 Modified = true;
2223 else
2224 ++MBBI;
2226 // 3) Find loads and stores that can be merged into a single load or store
2227 // pair instruction.
2228 // e.g.,
2229 // ldr x0, [x2]
2230 // ldr x1, [x2, #8]
2231 // ; becomes
2232 // ldp x0, x1, [x2]
2234 if (MBB.getParent()->getRegInfo().tracksLiveness()) {
2235 DefinedInBB.clear();
2236 DefinedInBB.addLiveIns(MBB);
2239 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2240 MBBI != E;) {
2241 // Track currently live registers up to this point, to help with
2242 // searching for a rename register on demand.
2243 updateDefinedRegisters(*MBBI, DefinedInBB, TRI);
2244 if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
2245 Modified = true;
2246 else
2247 ++MBBI;
2249 // 4) Find base register updates that can be merged into the load or store
2250 // as a base-reg writeback.
2251 // e.g.,
2252 // ldr x0, [x2]
2253 // add x2, x2, #4
2254 // ; becomes
2255 // ldr x0, [x2], #4
2256 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2257 MBBI != E;) {
2258 if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI))
2259 Modified = true;
2260 else
2261 ++MBBI;
2264 return Modified;
2267 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2268 if (skipFunction(Fn.getFunction()))
2269 return false;
2271 Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
2272 TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
2273 TRI = Subtarget->getRegisterInfo();
2274 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2276 // Resize the modified and used register unit trackers. We do this once
2277 // per function and then clear the register units each time we optimize a load
2278 // or store.
2279 ModifiedRegUnits.init(*TRI);
2280 UsedRegUnits.init(*TRI);
2281 DefinedInBB.init(*TRI);
2283 bool Modified = false;
2284 bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
2285 for (auto &MBB : Fn) {
2286 auto M = optimizeBlock(MBB, enableNarrowZeroStOpt);
2287 Modified |= M;
2290 return Modified;
2293 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
2294 // stores near one another? Note: The pre-RA instruction scheduler already has
2295 // hooks to try and schedule pairable loads/stores together to improve pairing
2296 // opportunities. Thus, pre-RA pairing pass may not be worth the effort.
2298 // FIXME: When pairing store instructions it's very possible for this pass to
2299 // hoist a store with a KILL marker above another use (without a KILL marker).
2300 // The resulting IR is invalid, but nothing uses the KILL markers after this
2301 // pass, so it's never caused a problem in practice.
2303 /// createAArch64LoadStoreOptimizationPass - returns an instance of the
2304 /// load / store optimization pass.
2305 FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
2306 return new AArch64LoadStoreOpt();