1 //===- HexagonBitSimplify.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "BitTracker.h"
10 #include "HexagonBitTracker.h"
11 #include "HexagonInstrInfo.h"
12 #include "HexagonRegisterInfo.h"
13 #include "HexagonSubtarget.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineOperand.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetRegisterInfo.h"
29 #include "llvm/IR/DebugLoc.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Support/raw_ostream.h"
48 #define DEBUG_TYPE "hexbit"
52 static cl::opt
<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden
,
53 cl::init(true), cl::desc("Preserve subregisters in tied operands"));
54 static cl::opt
<bool> GenExtract("hexbit-extract", cl::Hidden
,
55 cl::init(true), cl::desc("Generate extract instructions"));
56 static cl::opt
<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden
,
57 cl::init(true), cl::desc("Generate bitsplit instructions"));
59 static cl::opt
<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden
,
60 cl::init(std::numeric_limits
<unsigned>::max()));
61 static unsigned CountExtract
= 0;
62 static cl::opt
<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden
,
63 cl::init(std::numeric_limits
<unsigned>::max()));
64 static unsigned CountBitSplit
= 0;
66 static cl::opt
<unsigned> RegisterSetLimit("hexbit-registerset-limit",
67 cl::Hidden
, cl::init(1000));
71 void initializeHexagonBitSimplifyPass(PassRegistry
& Registry
);
72 FunctionPass
*createHexagonBitSimplify();
74 } // end namespace llvm
78 // Set of virtual registers, based on BitVector.
80 RegisterSet() = default;
81 explicit RegisterSet(unsigned s
, bool t
= false) : Bits(s
, t
) {}
82 RegisterSet(const RegisterSet
&RS
) = default;
89 unsigned count() const {
93 unsigned find_first() const {
94 int First
= Bits
.find_first();
100 unsigned find_next(unsigned Prev
) const {
101 int Next
= Bits
.find_next(v2x(Prev
));
107 RegisterSet
&insert(unsigned R
) {
108 unsigned Idx
= v2x(R
);
110 bool Exists
= Bits
.test(Idx
);
114 if (LRU
.size() > RegisterSetLimit
) {
115 unsigned T
= LRU
.front();
122 RegisterSet
&remove(unsigned R
) {
123 unsigned Idx
= v2x(R
);
124 if (Idx
< Bits
.size()) {
125 bool Exists
= Bits
.test(Idx
);
128 auto F
= llvm::find(LRU
, Idx
);
129 assert(F
!= LRU
.end());
136 RegisterSet
&insert(const RegisterSet
&Rs
) {
137 for (unsigned R
= Rs
.find_first(); R
; R
= Rs
.find_next(R
))
141 RegisterSet
&remove(const RegisterSet
&Rs
) {
142 for (unsigned R
= Rs
.find_first(); R
; R
= Rs
.find_next(R
))
147 bool operator[](unsigned R
) const {
148 unsigned Idx
= v2x(R
);
149 return Idx
< Bits
.size() ? Bits
[Idx
] : false;
151 bool has(unsigned R
) const {
152 unsigned Idx
= v2x(R
);
153 if (Idx
>= Bits
.size())
155 return Bits
.test(Idx
);
161 bool includes(const RegisterSet
&Rs
) const {
162 // A.test(B) <=> A-B != {}
163 return !Rs
.Bits
.test(Bits
);
165 bool intersects(const RegisterSet
&Rs
) const {
166 return Bits
.anyCommon(Rs
.Bits
);
171 std::deque
<unsigned> LRU
;
173 void ensure(unsigned Idx
) {
174 if (Bits
.size() <= Idx
)
175 Bits
.resize(std::max(Idx
+1, 32U));
178 static inline unsigned v2x(unsigned v
) {
179 return Register::virtReg2Index(v
);
182 static inline unsigned x2v(unsigned x
) {
183 return Register::index2VirtReg(x
);
188 PrintRegSet(const RegisterSet
&S
, const TargetRegisterInfo
*RI
)
191 friend raw_ostream
&operator<< (raw_ostream
&OS
,
192 const PrintRegSet
&P
);
195 const RegisterSet
&RS
;
196 const TargetRegisterInfo
*TRI
;
199 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegSet
&P
)
200 LLVM_ATTRIBUTE_UNUSED
;
201 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegSet
&P
) {
203 for (unsigned R
= P
.RS
.find_first(); R
; R
= P
.RS
.find_next(R
))
204 OS
<< ' ' << printReg(R
, P
.TRI
);
209 class Transformation
;
211 class HexagonBitSimplify
: public MachineFunctionPass
{
215 HexagonBitSimplify() : MachineFunctionPass(ID
) {}
217 StringRef
getPassName() const override
{
218 return "Hexagon bit simplification";
221 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
222 AU
.addRequired
<MachineDominatorTreeWrapperPass
>();
223 AU
.addPreserved
<MachineDominatorTreeWrapperPass
>();
224 MachineFunctionPass::getAnalysisUsage(AU
);
227 bool runOnMachineFunction(MachineFunction
&MF
) override
;
229 static void getInstrDefs(const MachineInstr
&MI
, RegisterSet
&Defs
);
230 static void getInstrUses(const MachineInstr
&MI
, RegisterSet
&Uses
);
231 static bool isEqual(const BitTracker::RegisterCell
&RC1
, uint16_t B1
,
232 const BitTracker::RegisterCell
&RC2
, uint16_t B2
, uint16_t W
);
233 static bool isZero(const BitTracker::RegisterCell
&RC
, uint16_t B
,
235 static bool getConst(const BitTracker::RegisterCell
&RC
, uint16_t B
,
236 uint16_t W
, uint64_t &U
);
237 static bool replaceReg(Register OldR
, Register NewR
,
238 MachineRegisterInfo
&MRI
);
239 static bool getSubregMask(const BitTracker::RegisterRef
&RR
,
240 unsigned &Begin
, unsigned &Width
, MachineRegisterInfo
&MRI
);
241 static bool replaceRegWithSub(Register OldR
, Register NewR
, unsigned NewSR
,
242 MachineRegisterInfo
&MRI
);
243 static bool replaceSubWithSub(Register OldR
, unsigned OldSR
, Register NewR
,
244 unsigned NewSR
, MachineRegisterInfo
&MRI
);
245 static bool parseRegSequence(const MachineInstr
&I
,
246 BitTracker::RegisterRef
&SL
, BitTracker::RegisterRef
&SH
,
247 const MachineRegisterInfo
&MRI
);
249 static bool getUsedBitsInStore(unsigned Opc
, BitVector
&Bits
,
251 static bool getUsedBits(unsigned Opc
, unsigned OpN
, BitVector
&Bits
,
252 uint16_t Begin
, const HexagonInstrInfo
&HII
);
254 static const TargetRegisterClass
*getFinalVRegClass(
255 const BitTracker::RegisterRef
&RR
, MachineRegisterInfo
&MRI
);
256 static bool isTransparentCopy(const BitTracker::RegisterRef
&RD
,
257 const BitTracker::RegisterRef
&RS
, MachineRegisterInfo
&MRI
);
260 MachineDominatorTree
*MDT
= nullptr;
262 bool visitBlock(MachineBasicBlock
&B
, Transformation
&T
, RegisterSet
&AVs
);
263 static bool hasTiedUse(unsigned Reg
, MachineRegisterInfo
&MRI
,
264 unsigned NewSub
= Hexagon::NoSubRegister
);
267 using HBS
= HexagonBitSimplify
;
269 // The purpose of this class is to provide a common facility to traverse
270 // the function top-down or bottom-up via the dominator tree, and keep
271 // track of the available registers.
272 class Transformation
{
276 Transformation(bool TD
) : TopDown(TD
) {}
277 virtual ~Transformation() = default;
279 virtual bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) = 0;
282 } // end anonymous namespace
284 char HexagonBitSimplify::ID
= 0;
286 INITIALIZE_PASS_BEGIN(HexagonBitSimplify
, "hexagon-bit-simplify",
287 "Hexagon bit simplification", false, false)
288 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass
)
289 INITIALIZE_PASS_END(HexagonBitSimplify
, "hexagon-bit-simplify",
290 "Hexagon bit simplification", false, false)
292 bool HexagonBitSimplify::visitBlock(MachineBasicBlock
&B
, Transformation
&T
,
294 bool Changed
= false;
297 Changed
= T
.processBlock(B
, AVs
);
301 getInstrDefs(I
, Defs
);
302 RegisterSet NewAVs
= AVs
;
305 for (auto *DTN
: children
<MachineDomTreeNode
*>(MDT
->getNode(&B
)))
306 Changed
|= visitBlock(*(DTN
->getBlock()), T
, NewAVs
);
309 Changed
|= T
.processBlock(B
, AVs
);
315 // Utility functions:
317 void HexagonBitSimplify::getInstrDefs(const MachineInstr
&MI
,
319 for (auto &Op
: MI
.operands()) {
320 if (!Op
.isReg() || !Op
.isDef())
322 Register R
= Op
.getReg();
329 void HexagonBitSimplify::getInstrUses(const MachineInstr
&MI
,
331 for (auto &Op
: MI
.operands()) {
332 if (!Op
.isReg() || !Op
.isUse())
334 Register R
= Op
.getReg();
341 // Check if all the bits in range [B, E) in both cells are equal.
342 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell
&RC1
,
343 uint16_t B1
, const BitTracker::RegisterCell
&RC2
, uint16_t B2
,
345 for (uint16_t i
= 0; i
< W
; ++i
) {
346 // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
347 if (RC1
[B1
+i
].Type
== BitTracker::BitValue::Ref
&& RC1
[B1
+i
].RefI
.Reg
== 0)
350 if (RC2
[B2
+i
].Type
== BitTracker::BitValue::Ref
&& RC2
[B2
+i
].RefI
.Reg
== 0)
352 if (RC1
[B1
+i
] != RC2
[B2
+i
])
358 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell
&RC
,
359 uint16_t B
, uint16_t W
) {
360 assert(B
< RC
.width() && B
+W
<= RC
.width());
361 for (uint16_t i
= B
; i
< B
+W
; ++i
)
367 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell
&RC
,
368 uint16_t B
, uint16_t W
, uint64_t &U
) {
369 assert(B
< RC
.width() && B
+W
<= RC
.width());
371 for (uint16_t i
= B
+W
; i
> B
; --i
) {
372 const BitTracker::BitValue
&BV
= RC
[i
-1];
383 bool HexagonBitSimplify::replaceReg(Register OldR
, Register NewR
,
384 MachineRegisterInfo
&MRI
) {
385 if (!OldR
.isVirtual() || !NewR
.isVirtual())
387 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
389 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
390 NextI
= std::next(I
);
396 bool HexagonBitSimplify::replaceRegWithSub(Register OldR
, Register NewR
,
398 MachineRegisterInfo
&MRI
) {
399 if (!OldR
.isVirtual() || !NewR
.isVirtual())
401 if (hasTiedUse(OldR
, MRI
, NewSR
))
403 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
405 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
406 NextI
= std::next(I
);
413 bool HexagonBitSimplify::replaceSubWithSub(Register OldR
, unsigned OldSR
,
414 Register NewR
, unsigned NewSR
,
415 MachineRegisterInfo
&MRI
) {
416 if (!OldR
.isVirtual() || !NewR
.isVirtual())
418 if (OldSR
!= NewSR
&& hasTiedUse(OldR
, MRI
, NewSR
))
420 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
422 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
423 NextI
= std::next(I
);
424 if (I
->getSubReg() != OldSR
)
432 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
433 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
434 // if this succeeded, otherwise return false.
435 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef
&RR
,
436 unsigned &Begin
, unsigned &Width
, MachineRegisterInfo
&MRI
) {
437 const TargetRegisterClass
*RC
= MRI
.getRegClass(RR
.Reg
);
440 Width
= MRI
.getTargetRegisterInfo()->getRegSizeInBits(*RC
);
446 switch (RC
->getID()) {
447 case Hexagon::DoubleRegsRegClassID
:
448 case Hexagon::HvxWRRegClassID
:
449 Width
= MRI
.getTargetRegisterInfo()->getRegSizeInBits(*RC
) / 2;
450 if (RR
.Sub
== Hexagon::isub_hi
|| RR
.Sub
== Hexagon::vsub_hi
)
460 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
462 bool HexagonBitSimplify::parseRegSequence(const MachineInstr
&I
,
463 BitTracker::RegisterRef
&SL
, BitTracker::RegisterRef
&SH
,
464 const MachineRegisterInfo
&MRI
) {
465 assert(I
.getOpcode() == TargetOpcode::REG_SEQUENCE
);
466 unsigned Sub1
= I
.getOperand(2).getImm(), Sub2
= I
.getOperand(4).getImm();
467 auto &DstRC
= *MRI
.getRegClass(I
.getOperand(0).getReg());
468 auto &HRI
= static_cast<const HexagonRegisterInfo
&>(
469 *MRI
.getTargetRegisterInfo());
470 unsigned SubLo
= HRI
.getHexagonSubRegIndex(DstRC
, Hexagon::ps_sub_lo
);
471 unsigned SubHi
= HRI
.getHexagonSubRegIndex(DstRC
, Hexagon::ps_sub_hi
);
472 assert((Sub1
== SubLo
&& Sub2
== SubHi
) || (Sub1
== SubHi
&& Sub2
== SubLo
));
473 if (Sub1
== SubLo
&& Sub2
== SubHi
) {
474 SL
= I
.getOperand(1);
475 SH
= I
.getOperand(3);
478 if (Sub1
== SubHi
&& Sub2
== SubLo
) {
479 SH
= I
.getOperand(1);
480 SL
= I
.getOperand(3);
486 // All stores (except 64-bit stores) take a 32-bit register as the source
487 // of the value to be stored. If the instruction stores into a location
488 // that is shorter than 32 bits, some bits of the source register are not
489 // used. For each store instruction, calculate the set of used bits in
490 // the source register, and set appropriate bits in Bits. Return true if
491 // the bits are calculated, false otherwise.
492 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc
, BitVector
&Bits
,
494 using namespace Hexagon
;
498 case S2_storerb_io
: // memb(Rs32+#s11:0)=Rt32
499 case S2_storerbnew_io
: // memb(Rs32+#s11:0)=Nt8.new
500 case S2_pstorerbt_io
: // if (Pv4) memb(Rs32+#u6:0)=Rt32
501 case S2_pstorerbf_io
: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
502 case S4_pstorerbtnew_io
: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
503 case S4_pstorerbfnew_io
: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
504 case S2_pstorerbnewt_io
: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
505 case S2_pstorerbnewf_io
: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
506 case S4_pstorerbnewtnew_io
: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
507 case S4_pstorerbnewfnew_io
: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
508 case S2_storerb_pi
: // memb(Rx32++#s4:0)=Rt32
509 case S2_storerbnew_pi
: // memb(Rx32++#s4:0)=Nt8.new
510 case S2_pstorerbt_pi
: // if (Pv4) memb(Rx32++#s4:0)=Rt32
511 case S2_pstorerbf_pi
: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
512 case S2_pstorerbtnew_pi
: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
513 case S2_pstorerbfnew_pi
: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
514 case S2_pstorerbnewt_pi
: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
515 case S2_pstorerbnewf_pi
: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
516 case S2_pstorerbnewtnew_pi
: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
517 case S2_pstorerbnewfnew_pi
: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
518 case S4_storerb_ap
: // memb(Re32=#U6)=Rt32
519 case S4_storerbnew_ap
: // memb(Re32=#U6)=Nt8.new
520 case S2_storerb_pr
: // memb(Rx32++Mu2)=Rt32
521 case S2_storerbnew_pr
: // memb(Rx32++Mu2)=Nt8.new
522 case S4_storerb_ur
: // memb(Ru32<<#u2+#U6)=Rt32
523 case S4_storerbnew_ur
: // memb(Ru32<<#u2+#U6)=Nt8.new
524 case S2_storerb_pbr
: // memb(Rx32++Mu2:brev)=Rt32
525 case S2_storerbnew_pbr
: // memb(Rx32++Mu2:brev)=Nt8.new
526 case S2_storerb_pci
: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
527 case S2_storerbnew_pci
: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
528 case S2_storerb_pcr
: // memb(Rx32++I:circ(Mu2))=Rt32
529 case S2_storerbnew_pcr
: // memb(Rx32++I:circ(Mu2))=Nt8.new
530 case S4_storerb_rr
: // memb(Rs32+Ru32<<#u2)=Rt32
531 case S4_storerbnew_rr
: // memb(Rs32+Ru32<<#u2)=Nt8.new
532 case S4_pstorerbt_rr
: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
533 case S4_pstorerbf_rr
: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
534 case S4_pstorerbtnew_rr
: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
535 case S4_pstorerbfnew_rr
: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
536 case S4_pstorerbnewt_rr
: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
537 case S4_pstorerbnewf_rr
: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
538 case S4_pstorerbnewtnew_rr
: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
539 case S4_pstorerbnewfnew_rr
: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
540 case S2_storerbgp
: // memb(gp+#u16:0)=Rt32
541 case S2_storerbnewgp
: // memb(gp+#u16:0)=Nt8.new
542 case S4_pstorerbt_abs
: // if (Pv4) memb(#u6)=Rt32
543 case S4_pstorerbf_abs
: // if (!Pv4) memb(#u6)=Rt32
544 case S4_pstorerbtnew_abs
: // if (Pv4.new) memb(#u6)=Rt32
545 case S4_pstorerbfnew_abs
: // if (!Pv4.new) memb(#u6)=Rt32
546 case S4_pstorerbnewt_abs
: // if (Pv4) memb(#u6)=Nt8.new
547 case S4_pstorerbnewf_abs
: // if (!Pv4) memb(#u6)=Nt8.new
548 case S4_pstorerbnewtnew_abs
: // if (Pv4.new) memb(#u6)=Nt8.new
549 case S4_pstorerbnewfnew_abs
: // if (!Pv4.new) memb(#u6)=Nt8.new
550 Bits
.set(Begin
, Begin
+8);
554 case S2_storerh_io
: // memh(Rs32+#s11:1)=Rt32
555 case S2_storerhnew_io
: // memh(Rs32+#s11:1)=Nt8.new
556 case S2_pstorerht_io
: // if (Pv4) memh(Rs32+#u6:1)=Rt32
557 case S2_pstorerhf_io
: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
558 case S4_pstorerhtnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
559 case S4_pstorerhfnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
560 case S2_pstorerhnewt_io
: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
561 case S2_pstorerhnewf_io
: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
562 case S4_pstorerhnewtnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
563 case S4_pstorerhnewfnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
564 case S2_storerh_pi
: // memh(Rx32++#s4:1)=Rt32
565 case S2_storerhnew_pi
: // memh(Rx32++#s4:1)=Nt8.new
566 case S2_pstorerht_pi
: // if (Pv4) memh(Rx32++#s4:1)=Rt32
567 case S2_pstorerhf_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
568 case S2_pstorerhtnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
569 case S2_pstorerhfnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
570 case S2_pstorerhnewt_pi
: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
571 case S2_pstorerhnewf_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
572 case S2_pstorerhnewtnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
573 case S2_pstorerhnewfnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
574 case S4_storerh_ap
: // memh(Re32=#U6)=Rt32
575 case S4_storerhnew_ap
: // memh(Re32=#U6)=Nt8.new
576 case S2_storerh_pr
: // memh(Rx32++Mu2)=Rt32
577 case S2_storerhnew_pr
: // memh(Rx32++Mu2)=Nt8.new
578 case S4_storerh_ur
: // memh(Ru32<<#u2+#U6)=Rt32
579 case S4_storerhnew_ur
: // memh(Ru32<<#u2+#U6)=Nt8.new
580 case S2_storerh_pbr
: // memh(Rx32++Mu2:brev)=Rt32
581 case S2_storerhnew_pbr
: // memh(Rx32++Mu2:brev)=Nt8.new
582 case S2_storerh_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
583 case S2_storerhnew_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
584 case S2_storerh_pcr
: // memh(Rx32++I:circ(Mu2))=Rt32
585 case S2_storerhnew_pcr
: // memh(Rx32++I:circ(Mu2))=Nt8.new
586 case S4_storerh_rr
: // memh(Rs32+Ru32<<#u2)=Rt32
587 case S4_pstorerht_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
588 case S4_pstorerhf_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
589 case S4_pstorerhtnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
590 case S4_pstorerhfnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
591 case S4_storerhnew_rr
: // memh(Rs32+Ru32<<#u2)=Nt8.new
592 case S4_pstorerhnewt_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
593 case S4_pstorerhnewf_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
594 case S4_pstorerhnewtnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
595 case S4_pstorerhnewfnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
596 case S2_storerhgp
: // memh(gp+#u16:1)=Rt32
597 case S2_storerhnewgp
: // memh(gp+#u16:1)=Nt8.new
598 case S4_pstorerht_abs
: // if (Pv4) memh(#u6)=Rt32
599 case S4_pstorerhf_abs
: // if (!Pv4) memh(#u6)=Rt32
600 case S4_pstorerhtnew_abs
: // if (Pv4.new) memh(#u6)=Rt32
601 case S4_pstorerhfnew_abs
: // if (!Pv4.new) memh(#u6)=Rt32
602 case S4_pstorerhnewt_abs
: // if (Pv4) memh(#u6)=Nt8.new
603 case S4_pstorerhnewf_abs
: // if (!Pv4) memh(#u6)=Nt8.new
604 case S4_pstorerhnewtnew_abs
: // if (Pv4.new) memh(#u6)=Nt8.new
605 case S4_pstorerhnewfnew_abs
: // if (!Pv4.new) memh(#u6)=Nt8.new
606 Bits
.set(Begin
, Begin
+16);
610 case S2_storerf_io
: // memh(Rs32+#s11:1)=Rt.H32
611 case S2_pstorerft_io
: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
612 case S2_pstorerff_io
: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
613 case S4_pstorerftnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
614 case S4_pstorerffnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
615 case S2_storerf_pi
: // memh(Rx32++#s4:1)=Rt.H32
616 case S2_pstorerft_pi
: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
617 case S2_pstorerff_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
618 case S2_pstorerftnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
619 case S2_pstorerffnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
620 case S4_storerf_ap
: // memh(Re32=#U6)=Rt.H32
621 case S2_storerf_pr
: // memh(Rx32++Mu2)=Rt.H32
622 case S4_storerf_ur
: // memh(Ru32<<#u2+#U6)=Rt.H32
623 case S2_storerf_pbr
: // memh(Rx32++Mu2:brev)=Rt.H32
624 case S2_storerf_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
625 case S2_storerf_pcr
: // memh(Rx32++I:circ(Mu2))=Rt.H32
626 case S4_storerf_rr
: // memh(Rs32+Ru32<<#u2)=Rt.H32
627 case S4_pstorerft_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
628 case S4_pstorerff_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
629 case S4_pstorerftnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
630 case S4_pstorerffnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
631 case S2_storerfgp
: // memh(gp+#u16:1)=Rt.H32
632 case S4_pstorerft_abs
: // if (Pv4) memh(#u6)=Rt.H32
633 case S4_pstorerff_abs
: // if (!Pv4) memh(#u6)=Rt.H32
634 case S4_pstorerftnew_abs
: // if (Pv4.new) memh(#u6)=Rt.H32
635 case S4_pstorerffnew_abs
: // if (!Pv4.new) memh(#u6)=Rt.H32
636 Bits
.set(Begin
+16, Begin
+32);
643 // For an instruction with opcode Opc, calculate the set of bits that it
644 // uses in a register in operand OpN. This only calculates the set of used
645 // bits for cases where it does not depend on any operands (as is the case
646 // in shifts, for example). For concrete instructions from a program, the
647 // operand may be a subregister of a larger register, while Bits would
648 // correspond to the larger register in its entirety. Because of that,
649 // the parameter Begin can be used to indicate which bit of Bits should be
650 // considered the LSB of the operand.
651 bool HexagonBitSimplify::getUsedBits(unsigned Opc
, unsigned OpN
,
652 BitVector
&Bits
, uint16_t Begin
, const HexagonInstrInfo
&HII
) {
653 using namespace Hexagon
;
655 const MCInstrDesc
&D
= HII
.get(Opc
);
657 if (OpN
== D
.getNumOperands()-1)
658 return getUsedBitsInStore(Opc
, Bits
, Begin
);
663 // One register source. Used bits: R1[0-7].
670 Bits
.set(Begin
, Begin
+8);
675 // One register source. Used bits: R1[0-15].
683 Bits
.set(Begin
, Begin
+16);
688 // One register source. Used bits: R1[16-31].
691 Bits
.set(Begin
+16, Begin
+32);
696 // Two register sources. Used bits: R1[0-7], R2[0-7].
701 Bits
.set(Begin
, Begin
+8);
706 // Two register sources. Used bits: R1[0-15], R2[0-15].
711 case A2_addh_h16_sat_ll
:
713 case A2_addh_l16_sat_ll
:
716 case A2_subh_h16_sat_ll
:
718 case A2_subh_l16_sat_ll
:
719 case M2_mpy_acc_ll_s0
:
720 case M2_mpy_acc_ll_s1
:
721 case M2_mpy_acc_sat_ll_s0
:
722 case M2_mpy_acc_sat_ll_s1
:
725 case M2_mpy_nac_ll_s0
:
726 case M2_mpy_nac_ll_s1
:
727 case M2_mpy_nac_sat_ll_s0
:
728 case M2_mpy_nac_sat_ll_s1
:
729 case M2_mpy_rnd_ll_s0
:
730 case M2_mpy_rnd_ll_s1
:
731 case M2_mpy_sat_ll_s0
:
732 case M2_mpy_sat_ll_s1
:
733 case M2_mpy_sat_rnd_ll_s0
:
734 case M2_mpy_sat_rnd_ll_s1
:
735 case M2_mpyd_acc_ll_s0
:
736 case M2_mpyd_acc_ll_s1
:
739 case M2_mpyd_nac_ll_s0
:
740 case M2_mpyd_nac_ll_s1
:
741 case M2_mpyd_rnd_ll_s0
:
742 case M2_mpyd_rnd_ll_s1
:
743 case M2_mpyu_acc_ll_s0
:
744 case M2_mpyu_acc_ll_s1
:
747 case M2_mpyu_nac_ll_s0
:
748 case M2_mpyu_nac_ll_s1
:
749 case M2_mpyud_acc_ll_s0
:
750 case M2_mpyud_acc_ll_s1
:
753 case M2_mpyud_nac_ll_s0
:
754 case M2_mpyud_nac_ll_s1
:
755 if (OpN
== 1 || OpN
== 2) {
756 Bits
.set(Begin
, Begin
+16);
761 // Two register sources. Used bits: R1[0-15], R2[16-31].
763 case A2_addh_h16_sat_lh
:
766 case A2_subh_h16_sat_lh
:
767 case M2_mpy_acc_lh_s0
:
768 case M2_mpy_acc_lh_s1
:
769 case M2_mpy_acc_sat_lh_s0
:
770 case M2_mpy_acc_sat_lh_s1
:
773 case M2_mpy_nac_lh_s0
:
774 case M2_mpy_nac_lh_s1
:
775 case M2_mpy_nac_sat_lh_s0
:
776 case M2_mpy_nac_sat_lh_s1
:
777 case M2_mpy_rnd_lh_s0
:
778 case M2_mpy_rnd_lh_s1
:
779 case M2_mpy_sat_lh_s0
:
780 case M2_mpy_sat_lh_s1
:
781 case M2_mpy_sat_rnd_lh_s0
:
782 case M2_mpy_sat_rnd_lh_s1
:
783 case M2_mpyd_acc_lh_s0
:
784 case M2_mpyd_acc_lh_s1
:
787 case M2_mpyd_nac_lh_s0
:
788 case M2_mpyd_nac_lh_s1
:
789 case M2_mpyd_rnd_lh_s0
:
790 case M2_mpyd_rnd_lh_s1
:
791 case M2_mpyu_acc_lh_s0
:
792 case M2_mpyu_acc_lh_s1
:
795 case M2_mpyu_nac_lh_s0
:
796 case M2_mpyu_nac_lh_s1
:
797 case M2_mpyud_acc_lh_s0
:
798 case M2_mpyud_acc_lh_s1
:
801 case M2_mpyud_nac_lh_s0
:
802 case M2_mpyud_nac_lh_s1
:
803 // These four are actually LH.
805 case A2_addh_l16_sat_hl
:
807 case A2_subh_l16_sat_hl
:
809 Bits
.set(Begin
, Begin
+16);
813 Bits
.set(Begin
+16, Begin
+32);
818 // Two register sources, used bits: R1[16-31], R2[0-15].
820 case A2_addh_h16_sat_hl
:
823 case A2_subh_h16_sat_hl
:
824 case M2_mpy_acc_hl_s0
:
825 case M2_mpy_acc_hl_s1
:
826 case M2_mpy_acc_sat_hl_s0
:
827 case M2_mpy_acc_sat_hl_s1
:
830 case M2_mpy_nac_hl_s0
:
831 case M2_mpy_nac_hl_s1
:
832 case M2_mpy_nac_sat_hl_s0
:
833 case M2_mpy_nac_sat_hl_s1
:
834 case M2_mpy_rnd_hl_s0
:
835 case M2_mpy_rnd_hl_s1
:
836 case M2_mpy_sat_hl_s0
:
837 case M2_mpy_sat_hl_s1
:
838 case M2_mpy_sat_rnd_hl_s0
:
839 case M2_mpy_sat_rnd_hl_s1
:
840 case M2_mpyd_acc_hl_s0
:
841 case M2_mpyd_acc_hl_s1
:
844 case M2_mpyd_nac_hl_s0
:
845 case M2_mpyd_nac_hl_s1
:
846 case M2_mpyd_rnd_hl_s0
:
847 case M2_mpyd_rnd_hl_s1
:
848 case M2_mpyu_acc_hl_s0
:
849 case M2_mpyu_acc_hl_s1
:
852 case M2_mpyu_nac_hl_s0
:
853 case M2_mpyu_nac_hl_s1
:
854 case M2_mpyud_acc_hl_s0
:
855 case M2_mpyud_acc_hl_s1
:
858 case M2_mpyud_nac_hl_s0
:
859 case M2_mpyud_nac_hl_s1
:
861 Bits
.set(Begin
+16, Begin
+32);
865 Bits
.set(Begin
, Begin
+16);
870 // Two register sources, used bits: R1[16-31], R2[16-31].
872 case A2_addh_h16_sat_hh
:
875 case A2_subh_h16_sat_hh
:
876 case M2_mpy_acc_hh_s0
:
877 case M2_mpy_acc_hh_s1
:
878 case M2_mpy_acc_sat_hh_s0
:
879 case M2_mpy_acc_sat_hh_s1
:
882 case M2_mpy_nac_hh_s0
:
883 case M2_mpy_nac_hh_s1
:
884 case M2_mpy_nac_sat_hh_s0
:
885 case M2_mpy_nac_sat_hh_s1
:
886 case M2_mpy_rnd_hh_s0
:
887 case M2_mpy_rnd_hh_s1
:
888 case M2_mpy_sat_hh_s0
:
889 case M2_mpy_sat_hh_s1
:
890 case M2_mpy_sat_rnd_hh_s0
:
891 case M2_mpy_sat_rnd_hh_s1
:
892 case M2_mpyd_acc_hh_s0
:
893 case M2_mpyd_acc_hh_s1
:
896 case M2_mpyd_nac_hh_s0
:
897 case M2_mpyd_nac_hh_s1
:
898 case M2_mpyd_rnd_hh_s0
:
899 case M2_mpyd_rnd_hh_s1
:
900 case M2_mpyu_acc_hh_s0
:
901 case M2_mpyu_acc_hh_s1
:
904 case M2_mpyu_nac_hh_s0
:
905 case M2_mpyu_nac_hh_s1
:
906 case M2_mpyud_acc_hh_s0
:
907 case M2_mpyud_acc_hh_s1
:
910 case M2_mpyud_nac_hh_s0
:
911 case M2_mpyud_nac_hh_s1
:
912 if (OpN
== 1 || OpN
== 2) {
913 Bits
.set(Begin
+16, Begin
+32);
922 // Calculate the register class that matches Reg:Sub. For example, if
923 // %1 is a double register, then %1:isub_hi would match the "int"
925 const TargetRegisterClass
*HexagonBitSimplify::getFinalVRegClass(
926 const BitTracker::RegisterRef
&RR
, MachineRegisterInfo
&MRI
) {
927 if (!RR
.Reg
.isVirtual())
929 auto *RC
= MRI
.getRegClass(RR
.Reg
);
932 auto &HRI
= static_cast<const HexagonRegisterInfo
&>(
933 *MRI
.getTargetRegisterInfo());
935 auto VerifySR
= [&HRI
] (const TargetRegisterClass
*RC
, unsigned Sub
) -> void {
937 assert(Sub
== HRI
.getHexagonSubRegIndex(*RC
, Hexagon::ps_sub_lo
) ||
938 Sub
== HRI
.getHexagonSubRegIndex(*RC
, Hexagon::ps_sub_hi
));
941 switch (RC
->getID()) {
942 case Hexagon::DoubleRegsRegClassID
:
943 VerifySR(RC
, RR
.Sub
);
944 return &Hexagon::IntRegsRegClass
;
945 case Hexagon::HvxWRRegClassID
:
946 VerifySR(RC
, RR
.Sub
);
947 return &Hexagon::HvxVRRegClass
;
952 // Check if RD could be replaced with RS at any possible use of RD.
953 // For example a predicate register cannot be replaced with a integer
954 // register, but a 64-bit register with a subregister can be replaced
955 // with a 32-bit register.
956 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef
&RD
,
957 const BitTracker::RegisterRef
&RS
, MachineRegisterInfo
&MRI
) {
958 if (!RD
.Reg
.isVirtual() || !RS
.Reg
.isVirtual())
960 // Return false if one (or both) classes are nullptr.
961 auto *DRC
= getFinalVRegClass(RD
, MRI
);
965 return DRC
== getFinalVRegClass(RS
, MRI
);
968 bool HexagonBitSimplify::hasTiedUse(unsigned Reg
, MachineRegisterInfo
&MRI
,
970 if (!PreserveTiedOps
)
972 return llvm::any_of(MRI
.use_operands(Reg
),
973 [NewSub
] (const MachineOperand
&Op
) -> bool {
974 return Op
.getSubReg() != NewSub
&& Op
.isTied();
980 class DeadCodeElimination
{
982 DeadCodeElimination(MachineFunction
&mf
, MachineDominatorTree
&mdt
)
983 : MF(mf
), HII(*MF
.getSubtarget
<HexagonSubtarget
>().getInstrInfo()),
984 MDT(mdt
), MRI(mf
.getRegInfo()) {}
987 return runOnNode(MDT
.getRootNode());
991 bool isDead(unsigned R
) const;
992 bool runOnNode(MachineDomTreeNode
*N
);
995 const HexagonInstrInfo
&HII
;
996 MachineDominatorTree
&MDT
;
997 MachineRegisterInfo
&MRI
;
1000 } // end anonymous namespace
1002 bool DeadCodeElimination::isDead(unsigned R
) const {
1003 for (const MachineOperand
&MO
: MRI
.use_operands(R
)) {
1004 const MachineInstr
*UseI
= MO
.getParent();
1005 if (UseI
->isDebugInstr())
1007 if (UseI
->isPHI()) {
1008 assert(!UseI
->getOperand(0).getSubReg());
1009 Register DR
= UseI
->getOperand(0).getReg();
1018 bool DeadCodeElimination::runOnNode(MachineDomTreeNode
*N
) {
1019 bool Changed
= false;
1021 for (auto *DTN
: children
<MachineDomTreeNode
*>(N
))
1022 Changed
|= runOnNode(DTN
);
1024 MachineBasicBlock
*B
= N
->getBlock();
1025 std::vector
<MachineInstr
*> Instrs
;
1026 for (MachineInstr
&MI
: llvm::reverse(*B
))
1027 Instrs
.push_back(&MI
);
1029 for (auto *MI
: Instrs
) {
1030 unsigned Opc
= MI
->getOpcode();
1031 // Do not touch lifetime markers. This is why the target-independent DCE
1033 if (Opc
== TargetOpcode::LIFETIME_START
||
1034 Opc
== TargetOpcode::LIFETIME_END
)
1037 if (MI
->isInlineAsm())
1039 // Delete PHIs if possible.
1040 if (!MI
->isPHI() && !MI
->isSafeToMove(Store
))
1043 bool AllDead
= true;
1044 SmallVector
<unsigned,2> Regs
;
1045 for (auto &Op
: MI
->operands()) {
1046 if (!Op
.isReg() || !Op
.isDef())
1048 Register R
= Op
.getReg();
1049 if (!R
.isVirtual() || !isDead(R
)) {
1059 for (unsigned Reg
: Regs
)
1060 MRI
.markUsesInDebugValueAsUndef(Reg
);
1069 // Eliminate redundant instructions
1071 // This transformation will identify instructions where the output register
1072 // is the same as one of its input registers. This only works on instructions
1073 // that define a single register (unlike post-increment loads, for example).
1074 // The equality check is actually more detailed: the code calculates which
1075 // bits of the output are used, and only compares these bits with the input
1077 // If the output matches an input, the instruction is replaced with COPY.
1078 // The copies will be removed by another transformation.
1079 class RedundantInstrElimination
: public Transformation
{
1081 RedundantInstrElimination(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1082 const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1083 : Transformation(true), HII(hii
), HRI(hri
), MRI(mri
), BT(bt
) {}
1085 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1088 bool isLossyShiftLeft(const MachineInstr
&MI
, unsigned OpN
,
1089 unsigned &LostB
, unsigned &LostE
);
1090 bool isLossyShiftRight(const MachineInstr
&MI
, unsigned OpN
,
1091 unsigned &LostB
, unsigned &LostE
);
1092 bool computeUsedBits(unsigned Reg
, BitVector
&Bits
);
1093 bool computeUsedBits(const MachineInstr
&MI
, unsigned OpN
, BitVector
&Bits
,
1095 bool usedBitsEqual(BitTracker::RegisterRef RD
, BitTracker::RegisterRef RS
);
1097 const HexagonInstrInfo
&HII
;
1098 const HexagonRegisterInfo
&HRI
;
1099 MachineRegisterInfo
&MRI
;
1103 } // end anonymous namespace
1105 // Check if the instruction is a lossy shift left, where the input being
1106 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1107 // of bit indices that are lost.
1108 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr
&MI
,
1109 unsigned OpN
, unsigned &LostB
, unsigned &LostE
) {
1110 using namespace Hexagon
;
1112 unsigned Opc
= MI
.getOpcode();
1113 unsigned ImN
, RegN
, Width
;
1120 case S2_asl_i_p_acc
:
1121 case S2_asl_i_p_and
:
1122 case S2_asl_i_p_nac
:
1124 case S2_asl_i_p_xacc
:
1134 case S2_addasl_rrri
:
1135 case S4_andi_asl_ri
:
1137 case S4_addi_asl_ri
:
1138 case S4_subi_asl_ri
:
1139 case S2_asl_i_r_acc
:
1140 case S2_asl_i_r_and
:
1141 case S2_asl_i_r_nac
:
1143 case S2_asl_i_r_sat
:
1144 case S2_asl_i_r_xacc
:
1156 assert(MI
.getOperand(ImN
).isImm());
1157 unsigned S
= MI
.getOperand(ImN
).getImm();
1165 // Check if the instruction is a lossy shift right, where the input being
1166 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1167 // of bit indices that are lost.
1168 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr
&MI
,
1169 unsigned OpN
, unsigned &LostB
, unsigned &LostE
) {
1170 using namespace Hexagon
;
1172 unsigned Opc
= MI
.getOpcode();
1180 case S2_asr_i_p_acc
:
1181 case S2_asr_i_p_and
:
1182 case S2_asr_i_p_nac
:
1184 case S2_lsr_i_p_acc
:
1185 case S2_lsr_i_p_and
:
1186 case S2_lsr_i_p_nac
:
1188 case S2_lsr_i_p_xacc
:
1197 case S4_andi_lsr_ri
:
1199 case S4_addi_lsr_ri
:
1200 case S4_subi_lsr_ri
:
1201 case S2_asr_i_r_acc
:
1202 case S2_asr_i_r_and
:
1203 case S2_asr_i_r_nac
:
1205 case S2_lsr_i_r_acc
:
1206 case S2_lsr_i_r_and
:
1207 case S2_lsr_i_r_nac
:
1209 case S2_lsr_i_r_xacc
:
1221 assert(MI
.getOperand(ImN
).isImm());
1222 unsigned S
= MI
.getOperand(ImN
).getImm();
1228 // Calculate the bit vector that corresponds to the used bits of register Reg.
1229 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
1230 // culation fails (i.e. the used bits are unknown), it returns false. Other-
1231 // wise, it returns true and sets the corresponding bits in Bits.
1232 bool RedundantInstrElimination::computeUsedBits(unsigned Reg
, BitVector
&Bits
) {
1233 BitVector
Used(Bits
.size());
1234 RegisterSet Visited
;
1235 std::vector
<unsigned> Pending
;
1236 Pending
.push_back(Reg
);
1238 for (unsigned i
= 0; i
< Pending
.size(); ++i
) {
1239 unsigned R
= Pending
[i
];
1243 for (auto I
= MRI
.use_begin(R
), E
= MRI
.use_end(); I
!= E
; ++I
) {
1244 BitTracker::RegisterRef UR
= *I
;
1246 if (!HBS::getSubregMask(UR
, B
, W
, MRI
))
1248 MachineInstr
&UseI
= *I
->getParent();
1249 if (UseI
.isPHI() || UseI
.isCopy()) {
1250 Register DefR
= UseI
.getOperand(0).getReg();
1251 if (!DefR
.isVirtual())
1253 Pending
.push_back(DefR
);
1255 if (!computeUsedBits(UseI
, I
.getOperandNo(), Used
, B
))
1264 // Calculate the bits used by instruction MI in a register in operand OpN.
1265 // Return true/false if the calculation succeeds/fails. If is succeeds, set
1266 // used bits in Bits. This function does not reset any bits in Bits, so
1267 // subsequent calls over different instructions will result in the union
1268 // of the used bits in all these instructions.
1269 // The register in question may be used with a sub-register, whereas Bits
1270 // holds the bits for the entire register. To keep track of that, the
1271 // argument Begin indicates where in Bits is the lowest-significant bit
1272 // of the register used in operand OpN. For example, in instruction:
1273 // %1 = S2_lsr_i_r %2:isub_hi, 10
1274 // the operand 1 is a 32-bit register, which happens to be a subregister
1275 // of the 64-bit register %2, and that subregister starts at position 32.
1276 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
1278 bool RedundantInstrElimination::computeUsedBits(const MachineInstr
&MI
,
1279 unsigned OpN
, BitVector
&Bits
, uint16_t Begin
) {
1280 unsigned Opc
= MI
.getOpcode();
1281 BitVector
T(Bits
.size());
1282 bool GotBits
= HBS::getUsedBits(Opc
, OpN
, T
, Begin
, HII
);
1283 // Even if we don't have bits yet, we could still provide some information
1284 // if the instruction is a lossy shift: the lost bits will be marked as
1287 if (isLossyShiftLeft(MI
, OpN
, LB
, LE
) || isLossyShiftRight(MI
, OpN
, LB
, LE
)) {
1288 assert(MI
.getOperand(OpN
).isReg());
1289 BitTracker::RegisterRef RR
= MI
.getOperand(OpN
);
1290 const TargetRegisterClass
*RC
= HBS::getFinalVRegClass(RR
, MRI
);
1291 uint16_t Width
= HRI
.getRegSizeInBits(*RC
);
1294 T
.set(Begin
, Begin
+Width
);
1295 assert(LB
<= LE
&& LB
< Width
&& LE
<= Width
);
1296 T
.reset(Begin
+LB
, Begin
+LE
);
1304 // Calculates the used bits in RD ("defined register"), and checks if these
1305 // bits in RS ("used register") and RD are identical.
1306 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD
,
1307 BitTracker::RegisterRef RS
) {
1308 const BitTracker::RegisterCell
&DC
= BT
.lookup(RD
.Reg
);
1309 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
1312 if (!HBS::getSubregMask(RD
, DB
, DW
, MRI
))
1315 if (!HBS::getSubregMask(RS
, SB
, SW
, MRI
))
1320 BitVector
Used(DC
.width());
1321 if (!computeUsedBits(RD
.Reg
, Used
))
1324 for (unsigned i
= 0; i
!= DW
; ++i
)
1325 if (Used
[i
+DB
] && DC
[DB
+i
] != SC
[SB
+i
])
1330 bool RedundantInstrElimination::processBlock(MachineBasicBlock
&B
,
1331 const RegisterSet
&) {
1332 if (!BT
.reached(&B
))
1334 bool Changed
= false;
1336 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
) {
1337 MachineInstr
*MI
= &*I
;
1339 if (MI
->getOpcode() == TargetOpcode::COPY
)
1341 if (MI
->isPHI() || MI
->hasUnmodeledSideEffects() || MI
->isInlineAsm())
1343 unsigned NumD
= MI
->getDesc().getNumDefs();
1347 BitTracker::RegisterRef RD
= MI
->getOperand(0);
1348 if (!BT
.has(RD
.Reg
))
1350 const BitTracker::RegisterCell
&DC
= BT
.lookup(RD
.Reg
);
1351 auto At
= MachineBasicBlock::iterator(MI
);
1353 // Find a source operand that is equal to the result.
1354 for (auto &Op
: MI
->uses()) {
1357 BitTracker::RegisterRef RS
= Op
;
1358 if (!BT
.has(RS
.Reg
))
1360 if (!HBS::isTransparentCopy(RD
, RS
, MRI
))
1364 if (!HBS::getSubregMask(RS
, BN
, BW
, MRI
))
1367 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
1368 if (!usedBitsEqual(RD
, RS
) && !HBS::isEqual(DC
, 0, SC
, BN
, BW
))
1371 // If found, replace the instruction with a COPY.
1372 const DebugLoc
&DL
= MI
->getDebugLoc();
1373 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
1374 Register NewR
= MRI
.createVirtualRegister(FRC
);
1375 MachineInstr
*CopyI
=
1376 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::COPY
), NewR
)
1377 .addReg(RS
.Reg
, 0, RS
.Sub
);
1378 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
1379 // This pass can create copies between registers that don't have the
1380 // exact same values. Updating the tracker has to involve updating
1381 // all dependent cells. Example:
1382 // %1 = inst %2 ; %1 != %2, but used bits are equal
1384 // %3 = copy %2 ; <- inserted
1385 // ... = %3 ; <- replaced from %2
1386 // Indirectly, we can create a "copy" between %1 and %2 even
1387 // though their exact values do not match.
1399 // Recognize instructions that produce constant values known at compile-time.
1400 // Replace them with register definitions that load these constants directly.
1401 class ConstGeneration
: public Transformation
{
1403 ConstGeneration(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1404 MachineRegisterInfo
&mri
)
1405 : Transformation(true), HII(hii
), MRI(mri
), BT(bt
) {}
1407 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1408 static bool isTfrConst(const MachineInstr
&MI
);
1411 Register
genTfrConst(const TargetRegisterClass
*RC
, int64_t C
,
1412 MachineBasicBlock
&B
, MachineBasicBlock::iterator At
,
1415 const HexagonInstrInfo
&HII
;
1416 MachineRegisterInfo
&MRI
;
1420 } // end anonymous namespace
1422 bool ConstGeneration::isTfrConst(const MachineInstr
&MI
) {
1423 unsigned Opc
= MI
.getOpcode();
1425 case Hexagon::A2_combineii
:
1426 case Hexagon::A4_combineii
:
1427 case Hexagon::A2_tfrsi
:
1428 case Hexagon::A2_tfrpi
:
1429 case Hexagon::PS_true
:
1430 case Hexagon::PS_false
:
1431 case Hexagon::CONST32
:
1432 case Hexagon::CONST64
:
1438 // Generate a transfer-immediate instruction that is appropriate for the
1439 // register class and the actual value being transferred.
1440 Register
ConstGeneration::genTfrConst(const TargetRegisterClass
*RC
, int64_t C
,
1441 MachineBasicBlock
&B
,
1442 MachineBasicBlock::iterator At
,
1444 Register Reg
= MRI
.createVirtualRegister(RC
);
1445 if (RC
== &Hexagon::IntRegsRegClass
) {
1446 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrsi
), Reg
)
1447 .addImm(int32_t(C
));
1451 if (RC
== &Hexagon::DoubleRegsRegClass
) {
1453 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrpi
), Reg
)
1458 unsigned Lo
= Lo_32(C
), Hi
= Hi_32(C
);
1459 if (isInt
<8>(Lo
) || isInt
<8>(Hi
)) {
1460 unsigned Opc
= isInt
<8>(Lo
) ? Hexagon::A2_combineii
1461 : Hexagon::A4_combineii
;
1462 BuildMI(B
, At
, DL
, HII
.get(Opc
), Reg
)
1463 .addImm(int32_t(Hi
))
1464 .addImm(int32_t(Lo
));
1467 MachineFunction
*MF
= B
.getParent();
1468 auto &HST
= MF
->getSubtarget
<HexagonSubtarget
>();
1470 // Disable CONST64 for tiny core since it takes a LD resource.
1471 if (!HST
.isTinyCore() ||
1472 MF
->getFunction().hasOptSize()) {
1473 BuildMI(B
, At
, DL
, HII
.get(Hexagon::CONST64
), Reg
)
1479 if (RC
== &Hexagon::PredRegsRegClass
) {
1482 Opc
= Hexagon::PS_false
;
1483 else if ((C
& 0xFF) == 0xFF)
1484 Opc
= Hexagon::PS_true
;
1487 BuildMI(B
, At
, DL
, HII
.get(Opc
), Reg
);
1494 bool ConstGeneration::processBlock(MachineBasicBlock
&B
, const RegisterSet
&) {
1495 if (!BT
.reached(&B
))
1497 bool Changed
= false;
1500 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
) {
1504 HBS::getInstrDefs(*I
, Defs
);
1505 if (Defs
.count() != 1)
1507 Register DR
= Defs
.find_first();
1508 if (!DR
.isVirtual())
1511 const BitTracker::RegisterCell
&DRC
= BT
.lookup(DR
);
1512 if (HBS::getConst(DRC
, 0, DRC
.width(), U
)) {
1514 DebugLoc DL
= I
->getDebugLoc();
1515 auto At
= I
->isPHI() ? B
.getFirstNonPHI() : I
;
1516 Register ImmReg
= genTfrConst(MRI
.getRegClass(DR
), C
, B
, At
, DL
);
1518 HBS::replaceReg(DR
, ImmReg
, MRI
);
1519 BT
.put(ImmReg
, DRC
);
1529 // Identify pairs of available registers which hold identical values.
1530 // In such cases, only one of them needs to be calculated, the other one
1531 // will be defined as a copy of the first.
1532 class CopyGeneration
: public Transformation
{
1534 CopyGeneration(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1535 const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1536 : Transformation(true), HII(hii
), HRI(hri
), MRI(mri
), BT(bt
) {}
1538 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1541 bool findMatch(const BitTracker::RegisterRef
&Inp
,
1542 BitTracker::RegisterRef
&Out
, const RegisterSet
&AVs
);
1544 const HexagonInstrInfo
&HII
;
1545 const HexagonRegisterInfo
&HRI
;
1546 MachineRegisterInfo
&MRI
;
1548 RegisterSet Forbidden
;
1551 // Eliminate register copies RD = RS, by replacing the uses of RD with
1553 class CopyPropagation
: public Transformation
{
1555 CopyPropagation(const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1556 : Transformation(false), HRI(hri
), MRI(mri
) {}
1558 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1560 static bool isCopyReg(unsigned Opc
, bool NoConv
);
1563 bool propagateRegCopy(MachineInstr
&MI
);
1565 const HexagonRegisterInfo
&HRI
;
1566 MachineRegisterInfo
&MRI
;
1569 } // end anonymous namespace
1571 /// Check if there is a register in AVs that is identical to Inp. If so,
1572 /// set Out to the found register. The output may be a pair Reg:Sub.
1573 bool CopyGeneration::findMatch(const BitTracker::RegisterRef
&Inp
,
1574 BitTracker::RegisterRef
&Out
, const RegisterSet
&AVs
) {
1575 if (!BT
.has(Inp
.Reg
))
1577 const BitTracker::RegisterCell
&InpRC
= BT
.lookup(Inp
.Reg
);
1578 auto *FRC
= HBS::getFinalVRegClass(Inp
, MRI
);
1580 if (!HBS::getSubregMask(Inp
, B
, W
, MRI
))
1583 for (Register R
= AVs
.find_first(); R
; R
= AVs
.find_next(R
)) {
1584 if (!BT
.has(R
) || Forbidden
[R
])
1586 const BitTracker::RegisterCell
&RC
= BT
.lookup(R
);
1587 unsigned RW
= RC
.width();
1589 if (FRC
!= MRI
.getRegClass(R
))
1591 if (!HBS::isTransparentCopy(R
, Inp
, MRI
))
1593 if (!HBS::isEqual(InpRC
, B
, RC
, 0, W
))
1599 // Check if there is a super-register, whose part (with a subregister)
1600 // is equal to the input.
1601 // Only do double registers for now.
1604 if (MRI
.getRegClass(R
) != &Hexagon::DoubleRegsRegClass
)
1607 if (HBS::isEqual(InpRC
, B
, RC
, 0, W
))
1608 Out
.Sub
= Hexagon::isub_lo
;
1609 else if (HBS::isEqual(InpRC
, B
, RC
, W
, W
))
1610 Out
.Sub
= Hexagon::isub_hi
;
1614 if (HBS::isTransparentCopy(Out
, Inp
, MRI
))
1620 bool CopyGeneration::processBlock(MachineBasicBlock
&B
,
1621 const RegisterSet
&AVs
) {
1622 if (!BT
.reached(&B
))
1624 RegisterSet
AVB(AVs
);
1625 bool Changed
= false;
1628 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
, AVB
.insert(Defs
)) {
1630 HBS::getInstrDefs(*I
, Defs
);
1632 unsigned Opc
= I
->getOpcode();
1633 if (CopyPropagation::isCopyReg(Opc
, false) ||
1634 ConstGeneration::isTfrConst(*I
))
1637 DebugLoc DL
= I
->getDebugLoc();
1638 auto At
= I
->isPHI() ? B
.getFirstNonPHI() : I
;
1640 for (Register R
= Defs
.find_first(); R
; R
= Defs
.find_next(R
)) {
1641 BitTracker::RegisterRef MR
;
1642 auto *FRC
= HBS::getFinalVRegClass(R
, MRI
);
1644 if (findMatch(R
, MR
, AVB
)) {
1645 Register NewR
= MRI
.createVirtualRegister(FRC
);
1646 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::COPY
), NewR
)
1647 .addReg(MR
.Reg
, 0, MR
.Sub
);
1648 BT
.put(BitTracker::RegisterRef(NewR
), BT
.get(MR
));
1649 HBS::replaceReg(R
, NewR
, MRI
);
1650 Forbidden
.insert(R
);
1654 if (FRC
== &Hexagon::DoubleRegsRegClass
||
1655 FRC
== &Hexagon::HvxWRRegClass
) {
1656 // Try to generate REG_SEQUENCE.
1657 unsigned SubLo
= HRI
.getHexagonSubRegIndex(*FRC
, Hexagon::ps_sub_lo
);
1658 unsigned SubHi
= HRI
.getHexagonSubRegIndex(*FRC
, Hexagon::ps_sub_hi
);
1659 BitTracker::RegisterRef TL
= { R
, SubLo
};
1660 BitTracker::RegisterRef TH
= { R
, SubHi
};
1661 BitTracker::RegisterRef ML
, MH
;
1662 if (findMatch(TL
, ML
, AVB
) && findMatch(TH
, MH
, AVB
)) {
1663 auto *FRC
= HBS::getFinalVRegClass(R
, MRI
);
1664 Register NewR
= MRI
.createVirtualRegister(FRC
);
1665 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::REG_SEQUENCE
), NewR
)
1666 .addReg(ML
.Reg
, 0, ML
.Sub
)
1668 .addReg(MH
.Reg
, 0, MH
.Sub
)
1670 BT
.put(BitTracker::RegisterRef(NewR
), BT
.get(R
));
1671 HBS::replaceReg(R
, NewR
, MRI
);
1672 Forbidden
.insert(R
);
1681 bool CopyPropagation::isCopyReg(unsigned Opc
, bool NoConv
) {
1683 case TargetOpcode::COPY
:
1684 case TargetOpcode::REG_SEQUENCE
:
1685 case Hexagon::A4_combineir
:
1686 case Hexagon::A4_combineri
:
1688 case Hexagon::A2_tfr
:
1689 case Hexagon::A2_tfrp
:
1690 case Hexagon::A2_combinew
:
1691 case Hexagon::V6_vcombine
:
1699 bool CopyPropagation::propagateRegCopy(MachineInstr
&MI
) {
1700 bool Changed
= false;
1701 unsigned Opc
= MI
.getOpcode();
1702 BitTracker::RegisterRef RD
= MI
.getOperand(0);
1703 assert(MI
.getOperand(0).getSubReg() == 0);
1706 case TargetOpcode::COPY
:
1707 case Hexagon::A2_tfr
:
1708 case Hexagon::A2_tfrp
: {
1709 BitTracker::RegisterRef RS
= MI
.getOperand(1);
1710 if (!HBS::isTransparentCopy(RD
, RS
, MRI
))
1713 Changed
= HBS::replaceRegWithSub(RD
.Reg
, RS
.Reg
, RS
.Sub
, MRI
);
1715 Changed
= HBS::replaceReg(RD
.Reg
, RS
.Reg
, MRI
);
1718 case TargetOpcode::REG_SEQUENCE
: {
1719 BitTracker::RegisterRef SL
, SH
;
1720 if (HBS::parseRegSequence(MI
, SL
, SH
, MRI
)) {
1721 const TargetRegisterClass
&RC
= *MRI
.getRegClass(RD
.Reg
);
1722 unsigned SubLo
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_lo
);
1723 unsigned SubHi
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_hi
);
1724 Changed
= HBS::replaceSubWithSub(RD
.Reg
, SubLo
, SL
.Reg
, SL
.Sub
, MRI
);
1725 Changed
|= HBS::replaceSubWithSub(RD
.Reg
, SubHi
, SH
.Reg
, SH
.Sub
, MRI
);
1729 case Hexagon::A2_combinew
:
1730 case Hexagon::V6_vcombine
: {
1731 const TargetRegisterClass
&RC
= *MRI
.getRegClass(RD
.Reg
);
1732 unsigned SubLo
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_lo
);
1733 unsigned SubHi
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_hi
);
1734 BitTracker::RegisterRef RH
= MI
.getOperand(1), RL
= MI
.getOperand(2);
1735 Changed
= HBS::replaceSubWithSub(RD
.Reg
, SubLo
, RL
.Reg
, RL
.Sub
, MRI
);
1736 Changed
|= HBS::replaceSubWithSub(RD
.Reg
, SubHi
, RH
.Reg
, RH
.Sub
, MRI
);
1739 case Hexagon::A4_combineir
:
1740 case Hexagon::A4_combineri
: {
1741 unsigned SrcX
= (Opc
== Hexagon::A4_combineir
) ? 2 : 1;
1742 unsigned Sub
= (Opc
== Hexagon::A4_combineir
) ? Hexagon::isub_lo
1744 BitTracker::RegisterRef RS
= MI
.getOperand(SrcX
);
1745 Changed
= HBS::replaceSubWithSub(RD
.Reg
, Sub
, RS
.Reg
, RS
.Sub
, MRI
);
1752 bool CopyPropagation::processBlock(MachineBasicBlock
&B
, const RegisterSet
&) {
1753 std::vector
<MachineInstr
*> Instrs
;
1754 for (MachineInstr
&MI
: llvm::reverse(B
))
1755 Instrs
.push_back(&MI
);
1757 bool Changed
= false;
1758 for (auto *I
: Instrs
) {
1759 unsigned Opc
= I
->getOpcode();
1760 if (!CopyPropagation::isCopyReg(Opc
, true))
1762 Changed
|= propagateRegCopy(*I
);
1770 // Recognize patterns that can be simplified and replace them with the
1772 // This is by no means complete
1773 class BitSimplification
: public Transformation
{
1775 BitSimplification(BitTracker
&bt
, const MachineDominatorTree
&mdt
,
1776 const HexagonInstrInfo
&hii
, const HexagonRegisterInfo
&hri
,
1777 MachineRegisterInfo
&mri
, MachineFunction
&mf
)
1778 : Transformation(true), MDT(mdt
), HII(hii
), HRI(hri
), MRI(mri
),
1781 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1784 struct RegHalf
: public BitTracker::RegisterRef
{
1785 bool Low
; // Low/High halfword.
1788 bool matchHalf(unsigned SelfR
, const BitTracker::RegisterCell
&RC
,
1789 unsigned B
, RegHalf
&RH
);
1790 bool validateReg(BitTracker::RegisterRef R
, unsigned Opc
, unsigned OpNum
);
1792 bool matchPackhl(unsigned SelfR
, const BitTracker::RegisterCell
&RC
,
1793 BitTracker::RegisterRef
&Rs
, BitTracker::RegisterRef
&Rt
);
1794 unsigned getCombineOpcode(bool HLow
, bool LLow
);
1796 bool genStoreUpperHalf(MachineInstr
*MI
);
1797 bool genStoreImmediate(MachineInstr
*MI
);
1798 bool genPackhl(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1799 const BitTracker::RegisterCell
&RC
);
1800 bool genExtractHalf(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1801 const BitTracker::RegisterCell
&RC
);
1802 bool genCombineHalf(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1803 const BitTracker::RegisterCell
&RC
);
1804 bool genExtractLow(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1805 const BitTracker::RegisterCell
&RC
);
1806 bool genBitSplit(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1807 const BitTracker::RegisterCell
&RC
, const RegisterSet
&AVs
);
1808 bool simplifyTstbit(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1809 const BitTracker::RegisterCell
&RC
);
1810 bool simplifyExtractLow(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1811 const BitTracker::RegisterCell
&RC
, const RegisterSet
&AVs
);
1812 bool simplifyRCmp0(MachineInstr
*MI
, BitTracker::RegisterRef RD
);
1814 // Cache of created instructions to avoid creating duplicates.
1815 // XXX Currently only used by genBitSplit.
1816 std::vector
<MachineInstr
*> NewMIs
;
1818 const MachineDominatorTree
&MDT
;
1819 const HexagonInstrInfo
&HII
;
1820 const HexagonRegisterInfo
&HRI
;
1821 MachineRegisterInfo
&MRI
;
1822 MachineFunction
&MF
;
1826 } // end anonymous namespace
1828 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
1829 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
1830 // set the information about the found register in RH.
1831 bool BitSimplification::matchHalf(unsigned SelfR
,
1832 const BitTracker::RegisterCell
&RC
, unsigned B
, RegHalf
&RH
) {
1833 // XXX This could be searching in the set of available registers, in case
1834 // the match is not exact.
1836 // Match 16-bit chunks, where the RC[B..B+15] references exactly one
1837 // register and all the bits B..B+15 match between RC and the register.
1838 // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1839 // and RC = { [0]:0 [1-15]:v1[1-15]... }.
1842 while (I
< B
+16 && RC
[I
].num())
1847 Register Reg
= RC
[I
].RefI
.Reg
;
1848 unsigned P
= RC
[I
].RefI
.Pos
; // The RefI.Pos will be advanced by I-B.
1851 unsigned Pos
= P
- (I
-B
);
1853 if (Reg
== 0 || Reg
== SelfR
) // Don't match "self".
1855 if (!Reg
.isVirtual())
1860 const BitTracker::RegisterCell
&SC
= BT
.lookup(Reg
);
1861 if (Pos
+16 > SC
.width())
1864 for (unsigned i
= 0; i
< 16; ++i
) {
1865 const BitTracker::BitValue
&RV
= RC
[i
+B
];
1866 if (RV
.Type
== BitTracker::BitValue::Ref
) {
1867 if (RV
.RefI
.Reg
!= Reg
)
1869 if (RV
.RefI
.Pos
!= i
+Pos
)
1873 if (RC
[i
+B
] != SC
[i
+Pos
])
1880 Sub
= Hexagon::isub_lo
;
1884 Sub
= Hexagon::isub_lo
;
1888 Sub
= Hexagon::isub_hi
;
1892 Sub
= Hexagon::isub_hi
;
1902 // If the subregister is not valid with the register, set it to 0.
1903 if (!HBS::getFinalVRegClass(RH
, MRI
))
1909 bool BitSimplification::validateReg(BitTracker::RegisterRef R
, unsigned Opc
,
1911 auto *OpRC
= HII
.getRegClass(HII
.get(Opc
), OpNum
, &HRI
, MF
);
1912 auto *RRC
= HBS::getFinalVRegClass(R
, MRI
);
1913 return OpRC
->hasSubClassEq(RRC
);
1916 // Check if RC matches the pattern of a S2_packhl. If so, return true and
1917 // set the inputs Rs and Rt.
1918 bool BitSimplification::matchPackhl(unsigned SelfR
,
1919 const BitTracker::RegisterCell
&RC
, BitTracker::RegisterRef
&Rs
,
1920 BitTracker::RegisterRef
&Rt
) {
1921 RegHalf L1
, H1
, L2
, H2
;
1923 if (!matchHalf(SelfR
, RC
, 0, L2
) || !matchHalf(SelfR
, RC
, 16, L1
))
1925 if (!matchHalf(SelfR
, RC
, 32, H2
) || !matchHalf(SelfR
, RC
, 48, H1
))
1928 // Rs = H1.L1, Rt = H2.L2
1929 if (H1
.Reg
!= L1
.Reg
|| H1
.Sub
!= L1
.Sub
|| H1
.Low
|| !L1
.Low
)
1931 if (H2
.Reg
!= L2
.Reg
|| H2
.Sub
!= L2
.Sub
|| H2
.Low
|| !L2
.Low
)
1939 unsigned BitSimplification::getCombineOpcode(bool HLow
, bool LLow
) {
1940 return HLow
? LLow
? Hexagon::A2_combine_ll
1941 : Hexagon::A2_combine_lh
1942 : LLow
? Hexagon::A2_combine_hl
1943 : Hexagon::A2_combine_hh
;
1946 // If MI stores the upper halfword of a register (potentially obtained via
1947 // shifts or extracts), replace it with a storerf instruction. This could
1948 // cause the "extraction" code to become dead.
1949 bool BitSimplification::genStoreUpperHalf(MachineInstr
*MI
) {
1950 unsigned Opc
= MI
->getOpcode();
1951 if (Opc
!= Hexagon::S2_storerh_io
)
1954 MachineOperand
&ValOp
= MI
->getOperand(2);
1955 BitTracker::RegisterRef RS
= ValOp
;
1956 if (!BT
.has(RS
.Reg
))
1958 const BitTracker::RegisterCell
&RC
= BT
.lookup(RS
.Reg
);
1960 unsigned B
= (RS
.Sub
== Hexagon::isub_hi
) ? 32 : 0;
1961 if (!matchHalf(0, RC
, B
, H
))
1965 MI
->setDesc(HII
.get(Hexagon::S2_storerf_io
));
1966 ValOp
.setReg(H
.Reg
);
1967 ValOp
.setSubReg(H
.Sub
);
1971 // If MI stores a value known at compile-time, and the value is within a range
1972 // that avoids using constant-extenders, replace it with a store-immediate.
1973 bool BitSimplification::genStoreImmediate(MachineInstr
*MI
) {
1974 unsigned Opc
= MI
->getOpcode();
1977 case Hexagon::S2_storeri_io
:
1980 case Hexagon::S2_storerh_io
:
1983 case Hexagon::S2_storerb_io
:
1989 // Avoid stores to frame-indices (due to an unknown offset).
1990 if (!MI
->getOperand(0).isReg())
1992 MachineOperand
&OffOp
= MI
->getOperand(1);
1996 int64_t Off
= OffOp
.getImm();
1997 // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1998 if (!isUIntN(6+Align
, Off
) || (Off
& ((1<<Align
)-1)))
2001 BitTracker::RegisterRef RS
= MI
->getOperand(2);
2002 if (!BT
.has(RS
.Reg
))
2004 const BitTracker::RegisterCell
&RC
= BT
.lookup(RS
.Reg
);
2006 if (!HBS::getConst(RC
, 0, RC
.width(), U
))
2009 // Only consider 8-bit values to avoid constant-extenders.
2012 case Hexagon::S2_storerb_io
:
2015 case Hexagon::S2_storerh_io
:
2018 case Hexagon::S2_storeri_io
:
2022 // Opc is already checked above to be one of the three store instructions.
2023 // This silences a -Wuninitialized false positive on GCC 5.4.
2024 llvm_unreachable("Unexpected store opcode");
2029 MI
->removeOperand(2);
2031 case Hexagon::S2_storerb_io
:
2032 MI
->setDesc(HII
.get(Hexagon::S4_storeirb_io
));
2034 case Hexagon::S2_storerh_io
:
2035 MI
->setDesc(HII
.get(Hexagon::S4_storeirh_io
));
2037 case Hexagon::S2_storeri_io
:
2038 MI
->setDesc(HII
.get(Hexagon::S4_storeiri_io
));
2041 MI
->addOperand(MachineOperand::CreateImm(V
));
2045 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
2046 // last instruction in a sequence that results in something equivalent to
2047 // the pack-halfwords. The intent is to cause the entire sequence to become
2049 bool BitSimplification::genPackhl(MachineInstr
*MI
,
2050 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2051 unsigned Opc
= MI
->getOpcode();
2052 if (Opc
== Hexagon::S2_packhl
)
2054 BitTracker::RegisterRef Rs
, Rt
;
2055 if (!matchPackhl(RD
.Reg
, RC
, Rs
, Rt
))
2057 if (!validateReg(Rs
, Hexagon::S2_packhl
, 1) ||
2058 !validateReg(Rt
, Hexagon::S2_packhl
, 2))
2061 MachineBasicBlock
&B
= *MI
->getParent();
2062 Register NewR
= MRI
.createVirtualRegister(&Hexagon::DoubleRegsRegClass
);
2063 DebugLoc DL
= MI
->getDebugLoc();
2064 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2065 : MachineBasicBlock::iterator(MI
);
2066 BuildMI(B
, At
, DL
, HII
.get(Hexagon::S2_packhl
), NewR
)
2067 .addReg(Rs
.Reg
, 0, Rs
.Sub
)
2068 .addReg(Rt
.Reg
, 0, Rt
.Sub
);
2069 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2070 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2074 // If MI produces halfword of the input in the low half of the output,
2075 // replace it with zero-extend or extractu.
2076 bool BitSimplification::genExtractHalf(MachineInstr
*MI
,
2077 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2079 // Check for halfword in low 16 bits, zeros elsewhere.
2080 if (!matchHalf(RD
.Reg
, RC
, 0, L
) || !HBS::isZero(RC
, 16, 16))
2083 unsigned Opc
= MI
->getOpcode();
2084 MachineBasicBlock
&B
= *MI
->getParent();
2085 DebugLoc DL
= MI
->getDebugLoc();
2087 // Prefer zxth, since zxth can go in any slot, while extractu only in
2090 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2091 : MachineBasicBlock::iterator(MI
);
2092 if (L
.Low
&& Opc
!= Hexagon::A2_zxth
) {
2093 if (validateReg(L
, Hexagon::A2_zxth
, 1)) {
2094 NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2095 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_zxth
), NewR
)
2096 .addReg(L
.Reg
, 0, L
.Sub
);
2098 } else if (!L
.Low
&& Opc
!= Hexagon::S2_lsr_i_r
) {
2099 if (validateReg(L
, Hexagon::S2_lsr_i_r
, 1)) {
2100 NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2101 BuildMI(B
, MI
, DL
, HII
.get(Hexagon::S2_lsr_i_r
), NewR
)
2102 .addReg(L
.Reg
, 0, L
.Sub
)
2108 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2109 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2113 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2115 bool BitSimplification::genCombineHalf(MachineInstr
*MI
,
2116 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2118 // Check for combine h/l
2119 if (!matchHalf(RD
.Reg
, RC
, 0, L
) || !matchHalf(RD
.Reg
, RC
, 16, H
))
2121 // Do nothing if this is just a reg copy.
2122 if (L
.Reg
== H
.Reg
&& L
.Sub
== H
.Sub
&& !H
.Low
&& L
.Low
)
2125 unsigned Opc
= MI
->getOpcode();
2126 unsigned COpc
= getCombineOpcode(H
.Low
, L
.Low
);
2129 if (!validateReg(H
, COpc
, 1) || !validateReg(L
, COpc
, 2))
2132 MachineBasicBlock
&B
= *MI
->getParent();
2133 DebugLoc DL
= MI
->getDebugLoc();
2134 Register NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2135 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2136 : MachineBasicBlock::iterator(MI
);
2137 BuildMI(B
, At
, DL
, HII
.get(COpc
), NewR
)
2138 .addReg(H
.Reg
, 0, H
.Sub
)
2139 .addReg(L
.Reg
, 0, L
.Sub
);
2140 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2141 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2145 // If MI resets high bits of a register and keeps the lower ones, replace it
2146 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2147 bool BitSimplification::genExtractLow(MachineInstr
*MI
,
2148 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2149 unsigned Opc
= MI
->getOpcode();
2151 case Hexagon::A2_zxtb
:
2152 case Hexagon::A2_zxth
:
2153 case Hexagon::S2_extractu
:
2156 if (Opc
== Hexagon::A2_andir
&& MI
->getOperand(2).isImm()) {
2157 int32_t Imm
= MI
->getOperand(2).getImm();
2162 if (MI
->hasUnmodeledSideEffects() || MI
->isInlineAsm())
2164 unsigned W
= RC
.width();
2165 while (W
> 0 && RC
[W
-1].is(0))
2167 if (W
== 0 || W
== RC
.width())
2169 unsigned NewOpc
= (W
== 8) ? Hexagon::A2_zxtb
2170 : (W
== 16) ? Hexagon::A2_zxth
2171 : (W
< 10) ? Hexagon::A2_andir
2172 : Hexagon::S2_extractu
;
2173 MachineBasicBlock
&B
= *MI
->getParent();
2174 DebugLoc DL
= MI
->getDebugLoc();
2176 for (auto &Op
: MI
->uses()) {
2179 BitTracker::RegisterRef RS
= Op
;
2180 if (!BT
.has(RS
.Reg
))
2182 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
2184 if (!HBS::getSubregMask(RS
, BN
, BW
, MRI
))
2186 if (BW
< W
|| !HBS::isEqual(RC
, 0, SC
, BN
, W
))
2188 if (!validateReg(RS
, NewOpc
, 1))
2191 Register NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2192 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2193 : MachineBasicBlock::iterator(MI
);
2194 auto MIB
= BuildMI(B
, At
, DL
, HII
.get(NewOpc
), NewR
)
2195 .addReg(RS
.Reg
, 0, RS
.Sub
);
2196 if (NewOpc
== Hexagon::A2_andir
)
2197 MIB
.addImm((1 << W
) - 1);
2198 else if (NewOpc
== Hexagon::S2_extractu
)
2199 MIB
.addImm(W
).addImm(0);
2200 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2201 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2207 bool BitSimplification::genBitSplit(MachineInstr
*MI
,
2208 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
,
2209 const RegisterSet
&AVs
) {
2212 if (MaxBitSplit
.getNumOccurrences()) {
2213 if (CountBitSplit
>= MaxBitSplit
)
2217 unsigned Opc
= MI
->getOpcode();
2219 case Hexagon::A4_bitsplit
:
2220 case Hexagon::A4_bitspliti
:
2224 unsigned W
= RC
.width();
2228 auto ctlz
= [] (const BitTracker::RegisterCell
&C
) -> unsigned {
2229 unsigned Z
= C
.width();
2230 while (Z
> 0 && C
[Z
-1].is(0))
2232 return C
.width() - Z
;
2235 // Count the number of leading zeros in the target RC.
2236 unsigned Z
= ctlz(RC
);
2237 if (Z
== 0 || Z
== W
)
2240 // A simplistic analysis: assume the source register (the one being split)
2241 // is fully unknown, and that all its bits are self-references.
2242 const BitTracker::BitValue
&B0
= RC
[0];
2243 if (B0
.Type
!= BitTracker::BitValue::Ref
)
2246 unsigned SrcR
= B0
.RefI
.Reg
;
2248 unsigned Pos
= B0
.RefI
.Pos
;
2250 // All the non-zero bits should be consecutive bits from the same register.
2251 for (unsigned i
= 1; i
< W
-Z
; ++i
) {
2252 const BitTracker::BitValue
&V
= RC
[i
];
2253 if (V
.Type
!= BitTracker::BitValue::Ref
)
2255 if (V
.RefI
.Reg
!= SrcR
|| V
.RefI
.Pos
!= Pos
+i
)
2259 // Now, find the other bitfield among AVs.
2260 for (unsigned S
= AVs
.find_first(); S
; S
= AVs
.find_next(S
)) {
2261 // The number of leading zeros here should be the number of trailing
2263 unsigned SRC
= MRI
.getRegClass(S
)->getID();
2264 if (SRC
!= Hexagon::IntRegsRegClassID
&&
2265 SRC
!= Hexagon::DoubleRegsRegClassID
)
2269 const BitTracker::RegisterCell
&SC
= BT
.lookup(S
);
2270 if (SC
.width() != W
|| ctlz(SC
) != W
-Z
)
2272 // The Z lower bits should now match SrcR.
2273 const BitTracker::BitValue
&S0
= SC
[0];
2274 if (S0
.Type
!= BitTracker::BitValue::Ref
|| S0
.RefI
.Reg
!= SrcR
)
2276 unsigned P
= S0
.RefI
.Pos
;
2278 if (Pos
<= P
&& (Pos
+ W
-Z
) != P
)
2280 if (P
< Pos
&& (P
+ Z
) != Pos
)
2282 // The starting bitfield position must be at a subregister boundary.
2283 if (std::min(P
, Pos
) != 0 && std::min(P
, Pos
) != 32)
2287 for (I
= 1; I
< Z
; ++I
) {
2288 const BitTracker::BitValue
&V
= SC
[I
];
2289 if (V
.Type
!= BitTracker::BitValue::Ref
)
2291 if (V
.RefI
.Reg
!= SrcR
|| V
.RefI
.Pos
!= P
+I
)
2297 // Generate bitsplit where S is defined.
2298 if (MaxBitSplit
.getNumOccurrences())
2300 MachineInstr
*DefS
= MRI
.getVRegDef(S
);
2301 assert(DefS
!= nullptr);
2302 DebugLoc DL
= DefS
->getDebugLoc();
2303 MachineBasicBlock
&B
= *DefS
->getParent();
2304 auto At
= DefS
->isPHI() ? B
.getFirstNonPHI()
2305 : MachineBasicBlock::iterator(DefS
);
2306 if (MRI
.getRegClass(SrcR
)->getID() == Hexagon::DoubleRegsRegClassID
)
2307 SrcSR
= (std::min(Pos
, P
) == 32) ? Hexagon::isub_hi
: Hexagon::isub_lo
;
2308 if (!validateReg({SrcR
,SrcSR
}, Hexagon::A4_bitspliti
, 1))
2310 unsigned ImmOp
= Pos
<= P
? W
-Z
: Z
;
2312 // Find an existing bitsplit instruction if one already exists.
2314 for (MachineInstr
*In
: NewMIs
) {
2315 if (In
->getOpcode() != Hexagon::A4_bitspliti
)
2317 MachineOperand
&Op1
= In
->getOperand(1);
2318 if (Op1
.getReg() != SrcR
|| Op1
.getSubReg() != SrcSR
)
2320 if (In
->getOperand(2).getImm() != ImmOp
)
2322 // Check if the target register is available here.
2323 MachineOperand
&Op0
= In
->getOperand(0);
2324 MachineInstr
*DefI
= MRI
.getVRegDef(Op0
.getReg());
2325 assert(DefI
!= nullptr);
2326 if (!MDT
.dominates(DefI
, &*At
))
2329 // Found one that can be reused.
2330 assert(Op0
.getSubReg() == 0);
2331 NewR
= Op0
.getReg();
2335 NewR
= MRI
.createVirtualRegister(&Hexagon::DoubleRegsRegClass
);
2336 auto NewBS
= BuildMI(B
, At
, DL
, HII
.get(Hexagon::A4_bitspliti
), NewR
)
2337 .addReg(SrcR
, 0, SrcSR
)
2339 NewMIs
.push_back(NewBS
);
2342 HBS::replaceRegWithSub(RD
.Reg
, NewR
, Hexagon::isub_lo
, MRI
);
2343 HBS::replaceRegWithSub(S
, NewR
, Hexagon::isub_hi
, MRI
);
2345 HBS::replaceRegWithSub(S
, NewR
, Hexagon::isub_lo
, MRI
);
2346 HBS::replaceRegWithSub(RD
.Reg
, NewR
, Hexagon::isub_hi
, MRI
);
2354 // Check for tstbit simplification opportunity, where the bit being checked
2355 // can be tracked back to another register. For example:
2356 // %2 = S2_lsr_i_r %1, 5
2357 // %3 = S2_tstbit_i %2, 0
2359 // %3 = S2_tstbit_i %1, 5
2360 bool BitSimplification::simplifyTstbit(MachineInstr
*MI
,
2361 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2362 unsigned Opc
= MI
->getOpcode();
2363 if (Opc
!= Hexagon::S2_tstbit_i
)
2366 unsigned BN
= MI
->getOperand(2).getImm();
2367 BitTracker::RegisterRef RS
= MI
->getOperand(1);
2369 DebugLoc DL
= MI
->getDebugLoc();
2370 if (!BT
.has(RS
.Reg
) || !HBS::getSubregMask(RS
, F
, W
, MRI
))
2372 MachineBasicBlock
&B
= *MI
->getParent();
2373 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2374 : MachineBasicBlock::iterator(MI
);
2376 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
2377 const BitTracker::BitValue
&V
= SC
[F
+BN
];
2378 if (V
.Type
== BitTracker::BitValue::Ref
&& V
.RefI
.Reg
!= RS
.Reg
) {
2379 const TargetRegisterClass
*TC
= MRI
.getRegClass(V
.RefI
.Reg
);
2380 // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2381 // a double register, need to use a subregister and adjust bit
2383 unsigned P
= std::numeric_limits
<unsigned>::max();
2384 BitTracker::RegisterRef
RR(V
.RefI
.Reg
, 0);
2385 if (TC
== &Hexagon::DoubleRegsRegClass
) {
2387 RR
.Sub
= Hexagon::isub_lo
;
2390 RR
.Sub
= Hexagon::isub_hi
;
2392 } else if (TC
== &Hexagon::IntRegsRegClass
) {
2395 if (P
!= std::numeric_limits
<unsigned>::max()) {
2396 Register NewR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
2397 BuildMI(B
, At
, DL
, HII
.get(Hexagon::S2_tstbit_i
), NewR
)
2398 .addReg(RR
.Reg
, 0, RR
.Sub
)
2400 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2404 } else if (V
.is(0) || V
.is(1)) {
2405 Register NewR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
2406 unsigned NewOpc
= V
.is(0) ? Hexagon::PS_false
: Hexagon::PS_true
;
2407 BuildMI(B
, At
, DL
, HII
.get(NewOpc
), NewR
);
2408 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2415 // Detect whether RD is a bitfield extract (sign- or zero-extended) of
2416 // some register from the AVs set. Create a new corresponding instruction
2417 // at the location of MI. The intent is to recognize situations where
2418 // a sequence of instructions performs an operation that is equivalent to
2419 // an extract operation, such as a shift left followed by a shift right.
2420 bool BitSimplification::simplifyExtractLow(MachineInstr
*MI
,
2421 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
,
2422 const RegisterSet
&AVs
) {
2425 if (MaxExtract
.getNumOccurrences()) {
2426 if (CountExtract
>= MaxExtract
)
2431 unsigned W
= RC
.width();
2436 // The code is mostly class-independent, except for the part that generates
2437 // the extract instruction, and establishes the source register (in case it
2438 // needs to use a subregister).
2439 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2440 if (FRC
!= &Hexagon::IntRegsRegClass
&& FRC
!= &Hexagon::DoubleRegsRegClass
)
2442 assert(RD
.Sub
== 0);
2445 // If the cell has a form of 00..0xx..x with k zeros and n remaining
2446 // bits, this could be an extractu of the n bits, but it could also be
2447 // an extractu of a longer field which happens to have 0s in the top
2449 // The same logic applies to sign-extended fields.
2451 // Do not check for the extended extracts, since it would expand the
2452 // search space quite a bit. The search may be expensive as it is.
2454 const BitTracker::BitValue
&TopV
= RC
[W
-1];
2456 // Eliminate candidates that have self-referential bits, since they
2457 // cannot be extracts from other registers. Also, skip registers that
2458 // have compile-time constant values.
2459 bool IsConst
= true;
2460 for (unsigned I
= 0; I
!= W
; ++I
) {
2461 const BitTracker::BitValue
&V
= RC
[I
];
2462 if (V
.Type
== BitTracker::BitValue::Ref
&& V
.RefI
.Reg
== RD
.Reg
)
2464 IsConst
= IsConst
&& (V
.is(0) || V
.is(1));
2469 if (TopV
.is(0) || TopV
.is(1)) {
2470 bool S
= TopV
.is(1);
2471 for (--W
; W
> 0 && RC
[W
-1].is(S
); --W
)
2475 // The sign bit must be a part of the field being extended.
2479 // This could still be a sign-extended extract.
2480 assert(TopV
.Type
== BitTracker::BitValue::Ref
);
2481 if (TopV
.RefI
.Reg
== RD
.Reg
|| TopV
.RefI
.Pos
== W
-1)
2483 for (--W
; W
> 0 && RC
[W
-1] == TopV
; --W
)
2485 // The top bits of RC are copies of TopV. One occurrence of TopV will
2486 // be a part of the field.
2491 // This would be just a copy. It should be handled elsewhere.
2496 dbgs() << __func__
<< " on reg: " << printReg(RD
.Reg
, &HRI
, RD
.Sub
)
2498 dbgs() << "Cell: " << RC
<< '\n';
2499 dbgs() << "Expected bitfield size: " << Len
<< " bits, "
2500 << (Signed
? "sign" : "zero") << "-extended\n";
2503 bool Changed
= false;
2505 for (unsigned R
= AVs
.find_first(); R
!= 0; R
= AVs
.find_next(R
)) {
2508 const BitTracker::RegisterCell
&SC
= BT
.lookup(R
);
2509 unsigned SW
= SC
.width();
2511 // The source can be longer than the destination, as long as its size is
2512 // a multiple of the size of the destination. Also, we would need to be
2513 // able to refer to the subregister in the source that would be of the
2514 // same size as the destination, but only check the sizes here.
2515 if (SW
< RW
|| (SW
% RW
) != 0)
2518 // The field can start at any offset in SC as long as it contains Len
2519 // bits and does not cross subregister boundary (if the source register
2520 // is longer than the destination).
2522 while (Off
<= SW
-Len
) {
2523 unsigned OE
= (Off
+Len
)/RW
;
2525 // The assumption here is that if the source (R) is longer than the
2526 // destination, then the destination is a sequence of words of
2527 // size RW, and each such word in R can be accessed via a subregister.
2529 // If the beginning and the end of the field cross the subregister
2530 // boundary, advance to the next subregister.
2534 if (HBS::isEqual(RC
, 0, SC
, Off
, Len
))
2543 unsigned ExtOpc
= 0;
2546 ExtOpc
= Signed
? Hexagon::A2_sxtb
: Hexagon::A2_zxtb
;
2548 ExtOpc
= Signed
? Hexagon::A2_sxth
: Hexagon::A2_zxth
;
2549 else if (Len
< 10 && !Signed
)
2550 ExtOpc
= Hexagon::A2_andir
;
2554 Signed
? (RW
== 32 ? Hexagon::S4_extract
: Hexagon::S4_extractp
)
2555 : (RW
== 32 ? Hexagon::S2_extractu
: Hexagon::S2_extractup
);
2558 // This only recognizes isub_lo and isub_hi.
2559 if (RW
!= SW
&& RW
*2 != SW
)
2562 SR
= (Off
/RW
== 0) ? Hexagon::isub_lo
: Hexagon::isub_hi
;
2565 if (!validateReg({R
,SR
}, ExtOpc
, 1))
2568 // Don't generate the same instruction as the one being optimized.
2569 if (MI
->getOpcode() == ExtOpc
) {
2570 // All possible ExtOpc's have the source in operand(1).
2571 const MachineOperand
&SrcOp
= MI
->getOperand(1);
2572 if (SrcOp
.getReg() == R
)
2576 DebugLoc DL
= MI
->getDebugLoc();
2577 MachineBasicBlock
&B
= *MI
->getParent();
2578 Register NewR
= MRI
.createVirtualRegister(FRC
);
2579 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2580 : MachineBasicBlock::iterator(MI
);
2581 auto MIB
= BuildMI(B
, At
, DL
, HII
.get(ExtOpc
), NewR
)
2584 case Hexagon::A2_sxtb
:
2585 case Hexagon::A2_zxtb
:
2586 case Hexagon::A2_sxth
:
2587 case Hexagon::A2_zxth
:
2589 case Hexagon::A2_andir
:
2590 MIB
.addImm((1u << Len
) - 1);
2592 case Hexagon::S4_extract
:
2593 case Hexagon::S2_extractu
:
2594 case Hexagon::S4_extractp
:
2595 case Hexagon::S2_extractup
:
2600 llvm_unreachable("Unexpected opcode");
2603 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2604 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2612 bool BitSimplification::simplifyRCmp0(MachineInstr
*MI
,
2613 BitTracker::RegisterRef RD
) {
2614 unsigned Opc
= MI
->getOpcode();
2615 if (Opc
!= Hexagon::A4_rcmpeqi
&& Opc
!= Hexagon::A4_rcmpneqi
)
2617 MachineOperand
&CmpOp
= MI
->getOperand(2);
2618 if (!CmpOp
.isImm() || CmpOp
.getImm() != 0)
2621 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2622 if (FRC
!= &Hexagon::IntRegsRegClass
&& FRC
!= &Hexagon::DoubleRegsRegClass
)
2624 assert(RD
.Sub
== 0);
2626 MachineBasicBlock
&B
= *MI
->getParent();
2627 const DebugLoc
&DL
= MI
->getDebugLoc();
2628 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2629 : MachineBasicBlock::iterator(MI
);
2631 bool KnownNZ
= false;
2633 BitTracker::RegisterRef SR
= MI
->getOperand(1);
2634 if (!BT
.has(SR
.Reg
))
2636 const BitTracker::RegisterCell
&SC
= BT
.lookup(SR
.Reg
);
2638 if (!HBS::getSubregMask(SR
, F
, W
, MRI
))
2641 for (uint16_t I
= F
; I
!= F
+W
; ++I
) {
2642 const BitTracker::BitValue
&V
= SC
[I
];
2649 auto ReplaceWithConst
= [&](int C
) {
2650 Register NewR
= MRI
.createVirtualRegister(FRC
);
2651 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrsi
), NewR
)
2653 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2654 BitTracker::RegisterCell
NewRC(W
);
2655 for (uint16_t I
= 0; I
!= W
; ++I
) {
2656 NewRC
[I
] = BitTracker::BitValue(C
& 1);
2657 C
= unsigned(C
) >> 1;
2659 BT
.put(BitTracker::RegisterRef(NewR
), NewRC
);
2663 auto IsNonZero
= [] (const MachineOperand
&Op
) {
2664 if (Op
.isGlobal() || Op
.isBlockAddress())
2667 return Op
.getImm() != 0;
2669 return !Op
.getCImm()->isZero();
2671 return !Op
.getFPImm()->isZero();
2675 auto IsZero
= [] (const MachineOperand
&Op
) {
2676 if (Op
.isGlobal() || Op
.isBlockAddress())
2679 return Op
.getImm() == 0;
2681 return Op
.getCImm()->isZero();
2683 return Op
.getFPImm()->isZero();
2687 // If the source register is known to be 0 or non-0, the comparison can
2688 // be folded to a load of a constant.
2689 if (KnownZ
|| KnownNZ
) {
2690 assert(KnownZ
!= KnownNZ
&& "Register cannot be both 0 and non-0");
2691 return ReplaceWithConst(KnownZ
== (Opc
== Hexagon::A4_rcmpeqi
));
2694 // Special case: if the compare comes from a C2_muxii, then we know the
2695 // two possible constants that can be the source value.
2696 MachineInstr
*InpDef
= MRI
.getVRegDef(SR
.Reg
);
2699 if (SR
.Sub
== 0 && InpDef
->getOpcode() == Hexagon::C2_muxii
) {
2700 MachineOperand
&Src1
= InpDef
->getOperand(2);
2701 MachineOperand
&Src2
= InpDef
->getOperand(3);
2702 // Check if both are non-zero.
2703 bool KnownNZ1
= IsNonZero(Src1
), KnownNZ2
= IsNonZero(Src2
);
2704 if (KnownNZ1
&& KnownNZ2
)
2705 return ReplaceWithConst(Opc
== Hexagon::A4_rcmpneqi
);
2706 // Check if both are zero.
2707 bool KnownZ1
= IsZero(Src1
), KnownZ2
= IsZero(Src2
);
2708 if (KnownZ1
&& KnownZ2
)
2709 return ReplaceWithConst(Opc
== Hexagon::A4_rcmpeqi
);
2711 // If for both operands we know that they are either 0 or non-0,
2712 // replace the comparison with a C2_muxii, using the same predicate
2713 // register, but with operands substituted with 0/1 accordingly.
2714 if ((KnownZ1
|| KnownNZ1
) && (KnownZ2
|| KnownNZ2
)) {
2715 Register NewR
= MRI
.createVirtualRegister(FRC
);
2716 BuildMI(B
, At
, DL
, HII
.get(Hexagon::C2_muxii
), NewR
)
2717 .addReg(InpDef
->getOperand(1).getReg())
2718 .addImm(KnownZ1
== (Opc
== Hexagon::A4_rcmpeqi
))
2719 .addImm(KnownZ2
== (Opc
== Hexagon::A4_rcmpeqi
));
2720 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2721 // Create a new cell with only the least significant bit unknown.
2722 BitTracker::RegisterCell
NewRC(W
);
2723 NewRC
[0] = BitTracker::BitValue::self();
2724 NewRC
.fill(1, W
, BitTracker::BitValue::Zero
);
2725 BT
.put(BitTracker::RegisterRef(NewR
), NewRC
);
2733 bool BitSimplification::processBlock(MachineBasicBlock
&B
,
2734 const RegisterSet
&AVs
) {
2735 if (!BT
.reached(&B
))
2737 bool Changed
= false;
2738 RegisterSet AVB
= AVs
;
2741 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
, AVB
.insert(Defs
)) {
2742 MachineInstr
*MI
= &*I
;
2744 HBS::getInstrDefs(*MI
, Defs
);
2746 unsigned Opc
= MI
->getOpcode();
2747 if (Opc
== TargetOpcode::COPY
|| Opc
== TargetOpcode::REG_SEQUENCE
)
2750 if (MI
->mayStore()) {
2751 bool T
= genStoreUpperHalf(MI
);
2752 T
= T
|| genStoreImmediate(MI
);
2757 if (Defs
.count() != 1)
2759 const MachineOperand
&Op0
= MI
->getOperand(0);
2760 if (!Op0
.isReg() || !Op0
.isDef())
2762 BitTracker::RegisterRef RD
= Op0
;
2763 if (!BT
.has(RD
.Reg
))
2765 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2766 const BitTracker::RegisterCell
&RC
= BT
.lookup(RD
.Reg
);
2768 if (FRC
->getID() == Hexagon::DoubleRegsRegClassID
) {
2769 bool T
= genPackhl(MI
, RD
, RC
);
2770 T
= T
|| simplifyExtractLow(MI
, RD
, RC
, AVB
);
2775 if (FRC
->getID() == Hexagon::IntRegsRegClassID
) {
2776 bool T
= genBitSplit(MI
, RD
, RC
, AVB
);
2777 T
= T
|| simplifyExtractLow(MI
, RD
, RC
, AVB
);
2778 T
= T
|| genExtractHalf(MI
, RD
, RC
);
2779 T
= T
|| genCombineHalf(MI
, RD
, RC
);
2780 T
= T
|| genExtractLow(MI
, RD
, RC
);
2781 T
= T
|| simplifyRCmp0(MI
, RD
);
2786 if (FRC
->getID() == Hexagon::PredRegsRegClassID
) {
2787 bool T
= simplifyTstbit(MI
, RD
, RC
);
2795 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction
&MF
) {
2796 if (skipFunction(MF
.getFunction()))
2799 auto &HST
= MF
.getSubtarget
<HexagonSubtarget
>();
2800 auto &HRI
= *HST
.getRegisterInfo();
2801 auto &HII
= *HST
.getInstrInfo();
2803 MDT
= &getAnalysis
<MachineDominatorTreeWrapperPass
>().getDomTree();
2804 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2807 Changed
= DeadCodeElimination(MF
, *MDT
).run();
2809 const HexagonEvaluator
HE(HRI
, MRI
, HII
, MF
);
2810 BitTracker
BT(HE
, MF
);
2811 LLVM_DEBUG(BT
.trace(true));
2814 MachineBasicBlock
&Entry
= MF
.front();
2816 RegisterSet AIG
; // Available registers for IG.
2817 ConstGeneration
ImmG(BT
, HII
, MRI
);
2818 Changed
|= visitBlock(Entry
, ImmG
, AIG
);
2820 RegisterSet ARE
; // Available registers for RIE.
2821 RedundantInstrElimination
RIE(BT
, HII
, HRI
, MRI
);
2822 bool Ried
= visitBlock(Entry
, RIE
, ARE
);
2828 RegisterSet ACG
; // Available registers for CG.
2829 CopyGeneration
CopyG(BT
, HII
, HRI
, MRI
);
2830 Changed
|= visitBlock(Entry
, CopyG
, ACG
);
2832 RegisterSet ACP
; // Available registers for CP.
2833 CopyPropagation
CopyP(HRI
, MRI
);
2834 Changed
|= visitBlock(Entry
, CopyP
, ACP
);
2836 Changed
= DeadCodeElimination(MF
, *MDT
).run() || Changed
;
2839 RegisterSet ABS
; // Available registers for BS.
2840 BitSimplification
BitS(BT
, *MDT
, HII
, HRI
, MRI
, MF
);
2841 Changed
|= visitBlock(Entry
, BitS
, ABS
);
2843 Changed
= DeadCodeElimination(MF
, *MDT
).run() || Changed
;
2849 DeadCodeElimination(MF
, *MDT
).run();
2854 // Recognize loops where the code at the end of the loop matches the code
2855 // before the entry of the loop, and the matching code is such that is can
2856 // be simplified. This pass relies on the bit simplification above and only
2857 // prepares code in a way that can be handled by the bit simplifcation.
2859 // This is the motivating testcase (and explanation):
2862 // loop0(.LBB0_2, r1) // %for.body.preheader
2863 // r5:4 = memd(r0++#8)
2866 // r3 = lsr(r4, #16)
2867 // r7:6 = combine(r5, r5)
2870 // r3 = insert(r5, #16, #16)
2871 // r7:6 = vlsrw(r7:6, #16)
2876 // memh(r2+#6) = r6 # R6 is really R5.H
2881 // memh(r2+#2) = r3 # R3 is really R4.H
2884 // r5:4 = memd(r0++#8)
2886 // { # "Shuffling" code that sets up R3 and R6
2887 // r3 = lsr(r4, #16) # so that their halves can be stored in the
2888 // r7:6 = combine(r5, r5) # next iteration. This could be folded into
2889 // } # the stores if the code was at the beginning
2890 // { # of the loop iteration. Since the same code
2891 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
2892 // r7:6 = vlsrw(r7:6, #16) # there.
2899 // loop0(.LBB0_2, r1)
2900 // r5:4 = memd(r0++#8)
2905 // memh(r2+#6) = r5.h
2910 // memh(r2+#2) = r4.h
2913 // r5:4 = memd(r0++#8)
2918 FunctionPass
*createHexagonLoopRescheduling();
2919 void initializeHexagonLoopReschedulingPass(PassRegistry
&);
2921 } // end namespace llvm
2925 class HexagonLoopRescheduling
: public MachineFunctionPass
{
2929 HexagonLoopRescheduling() : MachineFunctionPass(ID
) {
2930 initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
2933 bool runOnMachineFunction(MachineFunction
&MF
) override
;
2936 const HexagonInstrInfo
*HII
= nullptr;
2937 const HexagonRegisterInfo
*HRI
= nullptr;
2938 MachineRegisterInfo
*MRI
= nullptr;
2939 BitTracker
*BTP
= nullptr;
2942 LoopCand(MachineBasicBlock
*lb
, MachineBasicBlock
*pb
,
2943 MachineBasicBlock
*eb
) : LB(lb
), PB(pb
), EB(eb
) {}
2945 MachineBasicBlock
*LB
, *PB
, *EB
;
2947 using InstrList
= std::vector
<MachineInstr
*>;
2949 BitTracker::RegisterRef Inp
, Out
;
2953 PhiInfo(MachineInstr
&P
, MachineBasicBlock
&B
);
2956 BitTracker::RegisterRef LR
, PR
; // Loop Register, Preheader Register
2957 MachineBasicBlock
*LB
, *PB
; // Loop Block, Preheader Block
2960 static unsigned getDefReg(const MachineInstr
*MI
);
2961 bool isConst(unsigned Reg
) const;
2962 bool isBitShuffle(const MachineInstr
*MI
, unsigned DefR
) const;
2963 bool isStoreInput(const MachineInstr
*MI
, unsigned DefR
) const;
2964 bool isShuffleOf(unsigned OutR
, unsigned InpR
) const;
2965 bool isSameShuffle(unsigned OutR1
, unsigned InpR1
, unsigned OutR2
,
2966 unsigned &InpR2
) const;
2967 void moveGroup(InstrGroup
&G
, MachineBasicBlock
&LB
, MachineBasicBlock
&PB
,
2968 MachineBasicBlock::iterator At
, unsigned OldPhiR
, unsigned NewPredR
);
2969 bool processLoop(LoopCand
&C
);
2972 } // end anonymous namespace
2974 char HexagonLoopRescheduling::ID
= 0;
2976 INITIALIZE_PASS(HexagonLoopRescheduling
, "hexagon-loop-resched",
2977 "Hexagon Loop Rescheduling", false, false)
2979 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr
&P
,
2980 MachineBasicBlock
&B
) {
2981 DefR
= HexagonLoopRescheduling::getDefReg(&P
);
2984 for (unsigned i
= 1, n
= P
.getNumOperands(); i
< n
; i
+= 2) {
2985 const MachineOperand
&OpB
= P
.getOperand(i
+1);
2986 if (OpB
.getMBB() == &B
) {
2987 LR
= P
.getOperand(i
);
2991 PR
= P
.getOperand(i
);
2995 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr
*MI
) {
2997 HBS::getInstrDefs(*MI
, Defs
);
2998 if (Defs
.count() != 1)
3000 return Defs
.find_first();
3003 bool HexagonLoopRescheduling::isConst(unsigned Reg
) const {
3006 const BitTracker::RegisterCell
&RC
= BTP
->lookup(Reg
);
3007 for (unsigned i
= 0, w
= RC
.width(); i
< w
; ++i
) {
3008 const BitTracker::BitValue
&V
= RC
[i
];
3009 if (!V
.is(0) && !V
.is(1))
3015 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr
*MI
,
3016 unsigned DefR
) const {
3017 unsigned Opc
= MI
->getOpcode();
3019 case TargetOpcode::COPY
:
3020 case Hexagon::S2_lsr_i_r
:
3021 case Hexagon::S2_asr_i_r
:
3022 case Hexagon::S2_asl_i_r
:
3023 case Hexagon::S2_lsr_i_p
:
3024 case Hexagon::S2_asr_i_p
:
3025 case Hexagon::S2_asl_i_p
:
3026 case Hexagon::S2_insert
:
3027 case Hexagon::A2_or
:
3028 case Hexagon::A2_orp
:
3029 case Hexagon::A2_and
:
3030 case Hexagon::A2_andp
:
3031 case Hexagon::A2_combinew
:
3032 case Hexagon::A4_combineri
:
3033 case Hexagon::A4_combineir
:
3034 case Hexagon::A2_combineii
:
3035 case Hexagon::A4_combineii
:
3036 case Hexagon::A2_combine_ll
:
3037 case Hexagon::A2_combine_lh
:
3038 case Hexagon::A2_combine_hl
:
3039 case Hexagon::A2_combine_hh
:
3045 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr
*MI
,
3046 unsigned InpR
) const {
3047 for (unsigned i
= 0, n
= MI
->getNumOperands(); i
< n
; ++i
) {
3048 const MachineOperand
&Op
= MI
->getOperand(i
);
3051 if (Op
.getReg() == InpR
)
3057 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR
, unsigned InpR
) const {
3058 if (!BTP
->has(OutR
) || !BTP
->has(InpR
))
3060 const BitTracker::RegisterCell
&OutC
= BTP
->lookup(OutR
);
3061 for (unsigned i
= 0, w
= OutC
.width(); i
< w
; ++i
) {
3062 const BitTracker::BitValue
&V
= OutC
[i
];
3063 if (V
.Type
!= BitTracker::BitValue::Ref
)
3065 if (V
.RefI
.Reg
!= InpR
)
3071 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1
, unsigned InpR1
,
3072 unsigned OutR2
, unsigned &InpR2
) const {
3073 if (!BTP
->has(OutR1
) || !BTP
->has(InpR1
) || !BTP
->has(OutR2
))
3075 const BitTracker::RegisterCell
&OutC1
= BTP
->lookup(OutR1
);
3076 const BitTracker::RegisterCell
&OutC2
= BTP
->lookup(OutR2
);
3077 unsigned W
= OutC1
.width();
3078 unsigned MatchR
= 0;
3079 if (W
!= OutC2
.width())
3081 for (unsigned i
= 0; i
< W
; ++i
) {
3082 const BitTracker::BitValue
&V1
= OutC1
[i
], &V2
= OutC2
[i
];
3083 if (V1
.Type
!= V2
.Type
|| V1
.Type
== BitTracker::BitValue::One
)
3085 if (V1
.Type
!= BitTracker::BitValue::Ref
)
3087 if (V1
.RefI
.Pos
!= V2
.RefI
.Pos
)
3089 if (V1
.RefI
.Reg
!= InpR1
)
3091 if (V2
.RefI
.Reg
== 0 || V2
.RefI
.Reg
== OutR2
)
3094 MatchR
= V2
.RefI
.Reg
;
3095 else if (V2
.RefI
.Reg
!= MatchR
)
3102 void HexagonLoopRescheduling::moveGroup(InstrGroup
&G
, MachineBasicBlock
&LB
,
3103 MachineBasicBlock
&PB
, MachineBasicBlock::iterator At
, unsigned OldPhiR
,
3104 unsigned NewPredR
) {
3105 DenseMap
<unsigned,unsigned> RegMap
;
3107 const TargetRegisterClass
*PhiRC
= MRI
->getRegClass(NewPredR
);
3108 Register PhiR
= MRI
->createVirtualRegister(PhiRC
);
3109 BuildMI(LB
, At
, At
->getDebugLoc(), HII
->get(TargetOpcode::PHI
), PhiR
)
3114 RegMap
.insert(std::make_pair(G
.Inp
.Reg
, PhiR
));
3116 for (const MachineInstr
*SI
: llvm::reverse(G
.Ins
)) {
3117 unsigned DR
= getDefReg(SI
);
3118 const TargetRegisterClass
*RC
= MRI
->getRegClass(DR
);
3119 Register NewDR
= MRI
->createVirtualRegister(RC
);
3120 DebugLoc DL
= SI
->getDebugLoc();
3122 auto MIB
= BuildMI(LB
, At
, DL
, HII
->get(SI
->getOpcode()), NewDR
);
3123 for (const MachineOperand
&Op
: SI
->operands()) {
3130 unsigned UseR
= RegMap
[Op
.getReg()];
3131 MIB
.addReg(UseR
, 0, Op
.getSubReg());
3133 RegMap
.insert(std::make_pair(DR
, NewDR
));
3136 HBS::replaceReg(OldPhiR
, RegMap
[G
.Out
.Reg
], *MRI
);
3139 bool HexagonLoopRescheduling::processLoop(LoopCand
&C
) {
3140 LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C
.LB
)
3142 std::vector
<PhiInfo
> Phis
;
3143 for (auto &I
: *C
.LB
) {
3146 unsigned PR
= getDefReg(&I
);
3149 bool BadUse
= false, GoodUse
= false;
3150 for (const MachineOperand
&MO
: MRI
->use_operands(PR
)) {
3151 const MachineInstr
*UseI
= MO
.getParent();
3152 if (UseI
->getParent() != C
.LB
) {
3156 if (isBitShuffle(UseI
, PR
) || isStoreInput(UseI
, PR
))
3159 if (BadUse
|| !GoodUse
)
3162 Phis
.push_back(PhiInfo(I
, *C
.LB
));
3166 dbgs() << "Phis: {";
3167 for (auto &I
: Phis
) {
3168 dbgs() << ' ' << printReg(I
.DefR
, HRI
) << "=phi("
3169 << printReg(I
.PR
.Reg
, HRI
, I
.PR
.Sub
) << ":b" << I
.PB
->getNumber()
3170 << ',' << printReg(I
.LR
.Reg
, HRI
, I
.LR
.Sub
) << ":b"
3171 << I
.LB
->getNumber() << ')';
3179 bool Changed
= false;
3182 // Go backwards in the block: for each bit shuffling instruction, check
3183 // if that instruction could potentially be moved to the front of the loop:
3184 // the output of the loop cannot be used in a non-shuffling instruction
3186 for (MachineInstr
&MI
: llvm::reverse(*C
.LB
)) {
3187 if (MI
.isTerminator())
3193 HBS::getInstrDefs(MI
, Defs
);
3194 if (Defs
.count() != 1)
3196 Register DefR
= Defs
.find_first();
3197 if (!DefR
.isVirtual())
3199 if (!isBitShuffle(&MI
, DefR
))
3202 bool BadUse
= false;
3203 for (auto UI
= MRI
->use_begin(DefR
), UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
3204 MachineInstr
*UseI
= UI
->getParent();
3205 if (UseI
->getParent() == C
.LB
) {
3206 if (UseI
->isPHI()) {
3207 // If the use is in a phi node in this loop, then it should be
3208 // the value corresponding to the back edge.
3209 unsigned Idx
= UI
.getOperandNo();
3210 if (UseI
->getOperand(Idx
+1).getMBB() != C
.LB
)
3213 if (!llvm::is_contained(ShufIns
, UseI
))
3217 // There is a use outside of the loop, but there is no epilog block
3218 // suitable for a copy-out.
3219 if (C
.EB
== nullptr)
3228 ShufIns
.push_back(&MI
);
3231 // Partition the list of shuffling instructions into instruction groups,
3232 // where each group has to be moved as a whole (i.e. a group is a chain of
3233 // dependent instructions). A group produces a single live output register,
3234 // which is meant to be the input of the loop phi node (although this is
3235 // not checked here yet). It also uses a single register as its input,
3236 // which is some value produced in the loop body. After moving the group
3237 // to the beginning of the loop, that input register would need to be
3238 // the loop-carried register (through a phi node) instead of the (currently
3239 // loop-carried) output register.
3240 using InstrGroupList
= std::vector
<InstrGroup
>;
3241 InstrGroupList Groups
;
3243 for (unsigned i
= 0, n
= ShufIns
.size(); i
< n
; ++i
) {
3244 MachineInstr
*SI
= ShufIns
[i
];
3249 G
.Ins
.push_back(SI
);
3250 G
.Out
.Reg
= getDefReg(SI
);
3252 HBS::getInstrUses(*SI
, Inputs
);
3254 for (unsigned j
= i
+1; j
< n
; ++j
) {
3255 MachineInstr
*MI
= ShufIns
[j
];
3259 HBS::getInstrDefs(*MI
, Defs
);
3260 // If this instruction does not define any pending inputs, skip it.
3261 if (!Defs
.intersects(Inputs
))
3263 // Otherwise, add it to the current group and remove the inputs that
3264 // are defined by MI.
3265 G
.Ins
.push_back(MI
);
3266 Inputs
.remove(Defs
);
3267 // Then add all registers used by MI.
3268 HBS::getInstrUses(*MI
, Inputs
);
3269 ShufIns
[j
] = nullptr;
3272 // Only add a group if it requires at most one register.
3273 if (Inputs
.count() > 1)
3275 auto LoopInpEq
= [G
] (const PhiInfo
&P
) -> bool {
3276 return G
.Out
.Reg
== P
.LR
.Reg
;
3278 if (llvm::none_of(Phis
, LoopInpEq
))
3281 G
.Inp
.Reg
= Inputs
.find_first();
3282 Groups
.push_back(G
);
3286 for (unsigned i
= 0, n
= Groups
.size(); i
< n
; ++i
) {
3287 InstrGroup
&G
= Groups
[i
];
3288 dbgs() << "Group[" << i
<< "] inp: "
3289 << printReg(G
.Inp
.Reg
, HRI
, G
.Inp
.Sub
)
3290 << " out: " << printReg(G
.Out
.Reg
, HRI
, G
.Out
.Sub
) << "\n";
3291 for (const MachineInstr
*MI
: G
.Ins
)
3292 dbgs() << " " << MI
;
3296 for (InstrGroup
&G
: Groups
) {
3297 if (!isShuffleOf(G
.Out
.Reg
, G
.Inp
.Reg
))
3299 auto LoopInpEq
= [G
] (const PhiInfo
&P
) -> bool {
3300 return G
.Out
.Reg
== P
.LR
.Reg
;
3302 auto F
= llvm::find_if(Phis
, LoopInpEq
);
3303 if (F
== Phis
.end())
3306 if (!isSameShuffle(G
.Out
.Reg
, G
.Inp
.Reg
, F
->PR
.Reg
, PrehR
)) {
3307 const MachineInstr
*DefPrehR
= MRI
->getVRegDef(F
->PR
.Reg
);
3308 unsigned Opc
= DefPrehR
->getOpcode();
3309 if (Opc
!= Hexagon::A2_tfrsi
&& Opc
!= Hexagon::A2_tfrpi
)
3311 if (!DefPrehR
->getOperand(1).isImm())
3313 if (DefPrehR
->getOperand(1).getImm() != 0)
3315 const TargetRegisterClass
*RC
= MRI
->getRegClass(G
.Inp
.Reg
);
3316 if (RC
!= MRI
->getRegClass(F
->PR
.Reg
)) {
3317 PrehR
= MRI
->createVirtualRegister(RC
);
3318 unsigned TfrI
= (RC
== &Hexagon::IntRegsRegClass
) ? Hexagon::A2_tfrsi
3319 : Hexagon::A2_tfrpi
;
3320 auto T
= C
.PB
->getFirstTerminator();
3321 DebugLoc DL
= (T
!= C
.PB
->end()) ? T
->getDebugLoc() : DebugLoc();
3322 BuildMI(*C
.PB
, T
, DL
, HII
->get(TfrI
), PrehR
)
3328 // isSameShuffle could match with PrehR being of a wider class than
3329 // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
3330 // it would match for the input being a 32-bit register, and PrehR
3331 // being a 64-bit register (where the low 32 bits match). This could
3332 // be handled, but for now skip these cases.
3333 if (MRI
->getRegClass(PrehR
) != MRI
->getRegClass(G
.Inp
.Reg
))
3335 moveGroup(G
, *F
->LB
, *F
->PB
, F
->LB
->getFirstNonPHI(), F
->DefR
, PrehR
);
3342 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction
&MF
) {
3343 if (skipFunction(MF
.getFunction()))
3346 auto &HST
= MF
.getSubtarget
<HexagonSubtarget
>();
3347 HII
= HST
.getInstrInfo();
3348 HRI
= HST
.getRegisterInfo();
3349 MRI
= &MF
.getRegInfo();
3350 const HexagonEvaluator
HE(*HRI
, *MRI
, *HII
, MF
);
3351 BitTracker
BT(HE
, MF
);
3352 LLVM_DEBUG(BT
.trace(true));
3356 std::vector
<LoopCand
> Cand
;
3358 for (auto &B
: MF
) {
3359 if (B
.pred_size() != 2 || B
.succ_size() != 2)
3361 MachineBasicBlock
*PB
= nullptr;
3362 bool IsLoop
= false;
3363 for (MachineBasicBlock
*Pred
: B
.predecessors()) {
3372 MachineBasicBlock
*EB
= nullptr;
3373 for (MachineBasicBlock
*Succ
: B
.successors()) {
3376 // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
3377 // edge from B to EP is non-critical.
3378 if (Succ
->pred_size() == 1)
3383 Cand
.push_back(LoopCand(&B
, PB
, EB
));
3386 bool Changed
= false;
3387 for (auto &C
: Cand
)
3388 Changed
|= processLoop(C
);
3393 //===----------------------------------------------------------------------===//
3394 // Public Constructor Functions
3395 //===----------------------------------------------------------------------===//
3397 FunctionPass
*llvm::createHexagonLoopRescheduling() {
3398 return new HexagonLoopRescheduling();
3401 FunctionPass
*llvm::createHexagonBitSimplify() {
3402 return new HexagonBitSimplify();