1 //===- HexagonBitSimplify.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "BitTracker.h"
10 #include "HexagonBitTracker.h"
11 #include "HexagonInstrInfo.h"
12 #include "HexagonRegisterInfo.h"
13 #include "HexagonSubtarget.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineOperand.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetRegisterInfo.h"
29 #include "llvm/IR/DebugLoc.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Support/raw_ostream.h"
47 #define DEBUG_TYPE "hexbit"
51 static cl::opt
<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden
,
52 cl::init(true), cl::desc("Preserve subregisters in tied operands"));
53 static cl::opt
<bool> GenExtract("hexbit-extract", cl::Hidden
,
54 cl::init(true), cl::desc("Generate extract instructions"));
55 static cl::opt
<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden
,
56 cl::init(true), cl::desc("Generate bitsplit instructions"));
58 static cl::opt
<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden
,
59 cl::init(std::numeric_limits
<unsigned>::max()));
60 static unsigned CountExtract
= 0;
61 static cl::opt
<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden
,
62 cl::init(std::numeric_limits
<unsigned>::max()));
63 static unsigned CountBitSplit
= 0;
67 void initializeHexagonBitSimplifyPass(PassRegistry
& Registry
);
68 FunctionPass
*createHexagonBitSimplify();
70 } // end namespace llvm
74 // Set of virtual registers, based on BitVector.
75 struct RegisterSet
: private BitVector
{
76 RegisterSet() = default;
77 explicit RegisterSet(unsigned s
, bool t
= false) : BitVector(s
, t
) {}
78 RegisterSet(const RegisterSet
&RS
) = default;
80 using BitVector::clear
;
81 using BitVector::count
;
83 unsigned find_first() const {
84 int First
= BitVector::find_first();
90 unsigned find_next(unsigned Prev
) const {
91 int Next
= BitVector::find_next(v2x(Prev
));
97 RegisterSet
&insert(unsigned R
) {
98 unsigned Idx
= v2x(R
);
100 return static_cast<RegisterSet
&>(BitVector::set(Idx
));
102 RegisterSet
&remove(unsigned R
) {
103 unsigned Idx
= v2x(R
);
106 return static_cast<RegisterSet
&>(BitVector::reset(Idx
));
109 RegisterSet
&insert(const RegisterSet
&Rs
) {
110 return static_cast<RegisterSet
&>(BitVector::operator|=(Rs
));
112 RegisterSet
&remove(const RegisterSet
&Rs
) {
113 return static_cast<RegisterSet
&>(BitVector::reset(Rs
));
116 reference
operator[](unsigned R
) {
117 unsigned Idx
= v2x(R
);
119 return BitVector::operator[](Idx
);
121 bool operator[](unsigned R
) const {
122 unsigned Idx
= v2x(R
);
123 assert(Idx
< size());
124 return BitVector::operator[](Idx
);
126 bool has(unsigned R
) const {
127 unsigned Idx
= v2x(R
);
130 return BitVector::test(Idx
);
134 return !BitVector::any();
136 bool includes(const RegisterSet
&Rs
) const {
137 // A.BitVector::test(B) <=> A-B != {}
138 return !Rs
.BitVector::test(*this);
140 bool intersects(const RegisterSet
&Rs
) const {
141 return BitVector::anyCommon(Rs
);
145 void ensure(unsigned Idx
) {
147 resize(std::max(Idx
+1, 32U));
150 static inline unsigned v2x(unsigned v
) {
151 return Register::virtReg2Index(v
);
154 static inline unsigned x2v(unsigned x
) {
155 return Register::index2VirtReg(x
);
160 PrintRegSet(const RegisterSet
&S
, const TargetRegisterInfo
*RI
)
163 friend raw_ostream
&operator<< (raw_ostream
&OS
,
164 const PrintRegSet
&P
);
167 const RegisterSet
&RS
;
168 const TargetRegisterInfo
*TRI
;
171 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegSet
&P
)
172 LLVM_ATTRIBUTE_UNUSED
;
173 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegSet
&P
) {
175 for (unsigned R
= P
.RS
.find_first(); R
; R
= P
.RS
.find_next(R
))
176 OS
<< ' ' << printReg(R
, P
.TRI
);
181 class Transformation
;
183 class HexagonBitSimplify
: public MachineFunctionPass
{
187 HexagonBitSimplify() : MachineFunctionPass(ID
) {}
189 StringRef
getPassName() const override
{
190 return "Hexagon bit simplification";
193 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
194 AU
.addRequired
<MachineDominatorTree
>();
195 AU
.addPreserved
<MachineDominatorTree
>();
196 MachineFunctionPass::getAnalysisUsage(AU
);
199 bool runOnMachineFunction(MachineFunction
&MF
) override
;
201 static void getInstrDefs(const MachineInstr
&MI
, RegisterSet
&Defs
);
202 static void getInstrUses(const MachineInstr
&MI
, RegisterSet
&Uses
);
203 static bool isEqual(const BitTracker::RegisterCell
&RC1
, uint16_t B1
,
204 const BitTracker::RegisterCell
&RC2
, uint16_t B2
, uint16_t W
);
205 static bool isZero(const BitTracker::RegisterCell
&RC
, uint16_t B
,
207 static bool getConst(const BitTracker::RegisterCell
&RC
, uint16_t B
,
208 uint16_t W
, uint64_t &U
);
209 static bool replaceReg(Register OldR
, Register NewR
,
210 MachineRegisterInfo
&MRI
);
211 static bool getSubregMask(const BitTracker::RegisterRef
&RR
,
212 unsigned &Begin
, unsigned &Width
, MachineRegisterInfo
&MRI
);
213 static bool replaceRegWithSub(Register OldR
, Register NewR
, unsigned NewSR
,
214 MachineRegisterInfo
&MRI
);
215 static bool replaceSubWithSub(Register OldR
, unsigned OldSR
, Register NewR
,
216 unsigned NewSR
, MachineRegisterInfo
&MRI
);
217 static bool parseRegSequence(const MachineInstr
&I
,
218 BitTracker::RegisterRef
&SL
, BitTracker::RegisterRef
&SH
,
219 const MachineRegisterInfo
&MRI
);
221 static bool getUsedBitsInStore(unsigned Opc
, BitVector
&Bits
,
223 static bool getUsedBits(unsigned Opc
, unsigned OpN
, BitVector
&Bits
,
224 uint16_t Begin
, const HexagonInstrInfo
&HII
);
226 static const TargetRegisterClass
*getFinalVRegClass(
227 const BitTracker::RegisterRef
&RR
, MachineRegisterInfo
&MRI
);
228 static bool isTransparentCopy(const BitTracker::RegisterRef
&RD
,
229 const BitTracker::RegisterRef
&RS
, MachineRegisterInfo
&MRI
);
232 MachineDominatorTree
*MDT
= nullptr;
234 bool visitBlock(MachineBasicBlock
&B
, Transformation
&T
, RegisterSet
&AVs
);
235 static bool hasTiedUse(unsigned Reg
, MachineRegisterInfo
&MRI
,
236 unsigned NewSub
= Hexagon::NoSubRegister
);
239 using HBS
= HexagonBitSimplify
;
241 // The purpose of this class is to provide a common facility to traverse
242 // the function top-down or bottom-up via the dominator tree, and keep
243 // track of the available registers.
244 class Transformation
{
248 Transformation(bool TD
) : TopDown(TD
) {}
249 virtual ~Transformation() = default;
251 virtual bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) = 0;
254 } // end anonymous namespace
256 char HexagonBitSimplify::ID
= 0;
258 INITIALIZE_PASS_BEGIN(HexagonBitSimplify
, "hexagon-bit-simplify",
259 "Hexagon bit simplification", false, false)
260 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
261 INITIALIZE_PASS_END(HexagonBitSimplify
, "hexagon-bit-simplify",
262 "Hexagon bit simplification", false, false)
264 bool HexagonBitSimplify::visitBlock(MachineBasicBlock
&B
, Transformation
&T
,
266 bool Changed
= false;
269 Changed
= T
.processBlock(B
, AVs
);
273 getInstrDefs(I
, Defs
);
274 RegisterSet NewAVs
= AVs
;
277 for (auto *DTN
: children
<MachineDomTreeNode
*>(MDT
->getNode(&B
)))
278 Changed
|= visitBlock(*(DTN
->getBlock()), T
, NewAVs
);
281 Changed
|= T
.processBlock(B
, AVs
);
287 // Utility functions:
289 void HexagonBitSimplify::getInstrDefs(const MachineInstr
&MI
,
291 for (auto &Op
: MI
.operands()) {
292 if (!Op
.isReg() || !Op
.isDef())
294 Register R
= Op
.getReg();
301 void HexagonBitSimplify::getInstrUses(const MachineInstr
&MI
,
303 for (auto &Op
: MI
.operands()) {
304 if (!Op
.isReg() || !Op
.isUse())
306 Register R
= Op
.getReg();
313 // Check if all the bits in range [B, E) in both cells are equal.
314 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell
&RC1
,
315 uint16_t B1
, const BitTracker::RegisterCell
&RC2
, uint16_t B2
,
317 for (uint16_t i
= 0; i
< W
; ++i
) {
318 // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
319 if (RC1
[B1
+i
].Type
== BitTracker::BitValue::Ref
&& RC1
[B1
+i
].RefI
.Reg
== 0)
322 if (RC2
[B2
+i
].Type
== BitTracker::BitValue::Ref
&& RC2
[B2
+i
].RefI
.Reg
== 0)
324 if (RC1
[B1
+i
] != RC2
[B2
+i
])
330 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell
&RC
,
331 uint16_t B
, uint16_t W
) {
332 assert(B
< RC
.width() && B
+W
<= RC
.width());
333 for (uint16_t i
= B
; i
< B
+W
; ++i
)
339 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell
&RC
,
340 uint16_t B
, uint16_t W
, uint64_t &U
) {
341 assert(B
< RC
.width() && B
+W
<= RC
.width());
343 for (uint16_t i
= B
+W
; i
> B
; --i
) {
344 const BitTracker::BitValue
&BV
= RC
[i
-1];
355 bool HexagonBitSimplify::replaceReg(Register OldR
, Register NewR
,
356 MachineRegisterInfo
&MRI
) {
357 if (!OldR
.isVirtual() || !NewR
.isVirtual())
359 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
361 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
362 NextI
= std::next(I
);
368 bool HexagonBitSimplify::replaceRegWithSub(Register OldR
, Register NewR
,
370 MachineRegisterInfo
&MRI
) {
371 if (!OldR
.isVirtual() || !NewR
.isVirtual())
373 if (hasTiedUse(OldR
, MRI
, NewSR
))
375 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
377 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
378 NextI
= std::next(I
);
385 bool HexagonBitSimplify::replaceSubWithSub(Register OldR
, unsigned OldSR
,
386 Register NewR
, unsigned NewSR
,
387 MachineRegisterInfo
&MRI
) {
388 if (!OldR
.isVirtual() || !NewR
.isVirtual())
390 if (OldSR
!= NewSR
&& hasTiedUse(OldR
, MRI
, NewSR
))
392 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
394 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
395 NextI
= std::next(I
);
396 if (I
->getSubReg() != OldSR
)
404 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
405 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
406 // if this succeeded, otherwise return false.
407 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef
&RR
,
408 unsigned &Begin
, unsigned &Width
, MachineRegisterInfo
&MRI
) {
409 const TargetRegisterClass
*RC
= MRI
.getRegClass(RR
.Reg
);
412 Width
= MRI
.getTargetRegisterInfo()->getRegSizeInBits(*RC
);
418 switch (RC
->getID()) {
419 case Hexagon::DoubleRegsRegClassID
:
420 case Hexagon::HvxWRRegClassID
:
421 Width
= MRI
.getTargetRegisterInfo()->getRegSizeInBits(*RC
) / 2;
422 if (RR
.Sub
== Hexagon::isub_hi
|| RR
.Sub
== Hexagon::vsub_hi
)
432 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
434 bool HexagonBitSimplify::parseRegSequence(const MachineInstr
&I
,
435 BitTracker::RegisterRef
&SL
, BitTracker::RegisterRef
&SH
,
436 const MachineRegisterInfo
&MRI
) {
437 assert(I
.getOpcode() == TargetOpcode::REG_SEQUENCE
);
438 unsigned Sub1
= I
.getOperand(2).getImm(), Sub2
= I
.getOperand(4).getImm();
439 auto &DstRC
= *MRI
.getRegClass(I
.getOperand(0).getReg());
440 auto &HRI
= static_cast<const HexagonRegisterInfo
&>(
441 *MRI
.getTargetRegisterInfo());
442 unsigned SubLo
= HRI
.getHexagonSubRegIndex(DstRC
, Hexagon::ps_sub_lo
);
443 unsigned SubHi
= HRI
.getHexagonSubRegIndex(DstRC
, Hexagon::ps_sub_hi
);
444 assert((Sub1
== SubLo
&& Sub2
== SubHi
) || (Sub1
== SubHi
&& Sub2
== SubLo
));
445 if (Sub1
== SubLo
&& Sub2
== SubHi
) {
446 SL
= I
.getOperand(1);
447 SH
= I
.getOperand(3);
450 if (Sub1
== SubHi
&& Sub2
== SubLo
) {
451 SH
= I
.getOperand(1);
452 SL
= I
.getOperand(3);
458 // All stores (except 64-bit stores) take a 32-bit register as the source
459 // of the value to be stored. If the instruction stores into a location
460 // that is shorter than 32 bits, some bits of the source register are not
461 // used. For each store instruction, calculate the set of used bits in
462 // the source register, and set appropriate bits in Bits. Return true if
463 // the bits are calculated, false otherwise.
464 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc
, BitVector
&Bits
,
466 using namespace Hexagon
;
470 case S2_storerb_io
: // memb(Rs32+#s11:0)=Rt32
471 case S2_storerbnew_io
: // memb(Rs32+#s11:0)=Nt8.new
472 case S2_pstorerbt_io
: // if (Pv4) memb(Rs32+#u6:0)=Rt32
473 case S2_pstorerbf_io
: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
474 case S4_pstorerbtnew_io
: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
475 case S4_pstorerbfnew_io
: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
476 case S2_pstorerbnewt_io
: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
477 case S2_pstorerbnewf_io
: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
478 case S4_pstorerbnewtnew_io
: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
479 case S4_pstorerbnewfnew_io
: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
480 case S2_storerb_pi
: // memb(Rx32++#s4:0)=Rt32
481 case S2_storerbnew_pi
: // memb(Rx32++#s4:0)=Nt8.new
482 case S2_pstorerbt_pi
: // if (Pv4) memb(Rx32++#s4:0)=Rt32
483 case S2_pstorerbf_pi
: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
484 case S2_pstorerbtnew_pi
: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
485 case S2_pstorerbfnew_pi
: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
486 case S2_pstorerbnewt_pi
: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
487 case S2_pstorerbnewf_pi
: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
488 case S2_pstorerbnewtnew_pi
: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
489 case S2_pstorerbnewfnew_pi
: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
490 case S4_storerb_ap
: // memb(Re32=#U6)=Rt32
491 case S4_storerbnew_ap
: // memb(Re32=#U6)=Nt8.new
492 case S2_storerb_pr
: // memb(Rx32++Mu2)=Rt32
493 case S2_storerbnew_pr
: // memb(Rx32++Mu2)=Nt8.new
494 case S4_storerb_ur
: // memb(Ru32<<#u2+#U6)=Rt32
495 case S4_storerbnew_ur
: // memb(Ru32<<#u2+#U6)=Nt8.new
496 case S2_storerb_pbr
: // memb(Rx32++Mu2:brev)=Rt32
497 case S2_storerbnew_pbr
: // memb(Rx32++Mu2:brev)=Nt8.new
498 case S2_storerb_pci
: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
499 case S2_storerbnew_pci
: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
500 case S2_storerb_pcr
: // memb(Rx32++I:circ(Mu2))=Rt32
501 case S2_storerbnew_pcr
: // memb(Rx32++I:circ(Mu2))=Nt8.new
502 case S4_storerb_rr
: // memb(Rs32+Ru32<<#u2)=Rt32
503 case S4_storerbnew_rr
: // memb(Rs32+Ru32<<#u2)=Nt8.new
504 case S4_pstorerbt_rr
: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
505 case S4_pstorerbf_rr
: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
506 case S4_pstorerbtnew_rr
: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
507 case S4_pstorerbfnew_rr
: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
508 case S4_pstorerbnewt_rr
: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
509 case S4_pstorerbnewf_rr
: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
510 case S4_pstorerbnewtnew_rr
: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
511 case S4_pstorerbnewfnew_rr
: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
512 case S2_storerbgp
: // memb(gp+#u16:0)=Rt32
513 case S2_storerbnewgp
: // memb(gp+#u16:0)=Nt8.new
514 case S4_pstorerbt_abs
: // if (Pv4) memb(#u6)=Rt32
515 case S4_pstorerbf_abs
: // if (!Pv4) memb(#u6)=Rt32
516 case S4_pstorerbtnew_abs
: // if (Pv4.new) memb(#u6)=Rt32
517 case S4_pstorerbfnew_abs
: // if (!Pv4.new) memb(#u6)=Rt32
518 case S4_pstorerbnewt_abs
: // if (Pv4) memb(#u6)=Nt8.new
519 case S4_pstorerbnewf_abs
: // if (!Pv4) memb(#u6)=Nt8.new
520 case S4_pstorerbnewtnew_abs
: // if (Pv4.new) memb(#u6)=Nt8.new
521 case S4_pstorerbnewfnew_abs
: // if (!Pv4.new) memb(#u6)=Nt8.new
522 Bits
.set(Begin
, Begin
+8);
526 case S2_storerh_io
: // memh(Rs32+#s11:1)=Rt32
527 case S2_storerhnew_io
: // memh(Rs32+#s11:1)=Nt8.new
528 case S2_pstorerht_io
: // if (Pv4) memh(Rs32+#u6:1)=Rt32
529 case S2_pstorerhf_io
: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
530 case S4_pstorerhtnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
531 case S4_pstorerhfnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
532 case S2_pstorerhnewt_io
: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
533 case S2_pstorerhnewf_io
: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
534 case S4_pstorerhnewtnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
535 case S4_pstorerhnewfnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
536 case S2_storerh_pi
: // memh(Rx32++#s4:1)=Rt32
537 case S2_storerhnew_pi
: // memh(Rx32++#s4:1)=Nt8.new
538 case S2_pstorerht_pi
: // if (Pv4) memh(Rx32++#s4:1)=Rt32
539 case S2_pstorerhf_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
540 case S2_pstorerhtnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
541 case S2_pstorerhfnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
542 case S2_pstorerhnewt_pi
: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
543 case S2_pstorerhnewf_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
544 case S2_pstorerhnewtnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
545 case S2_pstorerhnewfnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
546 case S4_storerh_ap
: // memh(Re32=#U6)=Rt32
547 case S4_storerhnew_ap
: // memh(Re32=#U6)=Nt8.new
548 case S2_storerh_pr
: // memh(Rx32++Mu2)=Rt32
549 case S2_storerhnew_pr
: // memh(Rx32++Mu2)=Nt8.new
550 case S4_storerh_ur
: // memh(Ru32<<#u2+#U6)=Rt32
551 case S4_storerhnew_ur
: // memh(Ru32<<#u2+#U6)=Nt8.new
552 case S2_storerh_pbr
: // memh(Rx32++Mu2:brev)=Rt32
553 case S2_storerhnew_pbr
: // memh(Rx32++Mu2:brev)=Nt8.new
554 case S2_storerh_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
555 case S2_storerhnew_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
556 case S2_storerh_pcr
: // memh(Rx32++I:circ(Mu2))=Rt32
557 case S2_storerhnew_pcr
: // memh(Rx32++I:circ(Mu2))=Nt8.new
558 case S4_storerh_rr
: // memh(Rs32+Ru32<<#u2)=Rt32
559 case S4_pstorerht_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
560 case S4_pstorerhf_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
561 case S4_pstorerhtnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
562 case S4_pstorerhfnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
563 case S4_storerhnew_rr
: // memh(Rs32+Ru32<<#u2)=Nt8.new
564 case S4_pstorerhnewt_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
565 case S4_pstorerhnewf_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
566 case S4_pstorerhnewtnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
567 case S4_pstorerhnewfnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
568 case S2_storerhgp
: // memh(gp+#u16:1)=Rt32
569 case S2_storerhnewgp
: // memh(gp+#u16:1)=Nt8.new
570 case S4_pstorerht_abs
: // if (Pv4) memh(#u6)=Rt32
571 case S4_pstorerhf_abs
: // if (!Pv4) memh(#u6)=Rt32
572 case S4_pstorerhtnew_abs
: // if (Pv4.new) memh(#u6)=Rt32
573 case S4_pstorerhfnew_abs
: // if (!Pv4.new) memh(#u6)=Rt32
574 case S4_pstorerhnewt_abs
: // if (Pv4) memh(#u6)=Nt8.new
575 case S4_pstorerhnewf_abs
: // if (!Pv4) memh(#u6)=Nt8.new
576 case S4_pstorerhnewtnew_abs
: // if (Pv4.new) memh(#u6)=Nt8.new
577 case S4_pstorerhnewfnew_abs
: // if (!Pv4.new) memh(#u6)=Nt8.new
578 Bits
.set(Begin
, Begin
+16);
582 case S2_storerf_io
: // memh(Rs32+#s11:1)=Rt.H32
583 case S2_pstorerft_io
: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
584 case S2_pstorerff_io
: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
585 case S4_pstorerftnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
586 case S4_pstorerffnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
587 case S2_storerf_pi
: // memh(Rx32++#s4:1)=Rt.H32
588 case S2_pstorerft_pi
: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
589 case S2_pstorerff_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
590 case S2_pstorerftnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
591 case S2_pstorerffnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
592 case S4_storerf_ap
: // memh(Re32=#U6)=Rt.H32
593 case S2_storerf_pr
: // memh(Rx32++Mu2)=Rt.H32
594 case S4_storerf_ur
: // memh(Ru32<<#u2+#U6)=Rt.H32
595 case S2_storerf_pbr
: // memh(Rx32++Mu2:brev)=Rt.H32
596 case S2_storerf_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
597 case S2_storerf_pcr
: // memh(Rx32++I:circ(Mu2))=Rt.H32
598 case S4_storerf_rr
: // memh(Rs32+Ru32<<#u2)=Rt.H32
599 case S4_pstorerft_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
600 case S4_pstorerff_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
601 case S4_pstorerftnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
602 case S4_pstorerffnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
603 case S2_storerfgp
: // memh(gp+#u16:1)=Rt.H32
604 case S4_pstorerft_abs
: // if (Pv4) memh(#u6)=Rt.H32
605 case S4_pstorerff_abs
: // if (!Pv4) memh(#u6)=Rt.H32
606 case S4_pstorerftnew_abs
: // if (Pv4.new) memh(#u6)=Rt.H32
607 case S4_pstorerffnew_abs
: // if (!Pv4.new) memh(#u6)=Rt.H32
608 Bits
.set(Begin
+16, Begin
+32);
615 // For an instruction with opcode Opc, calculate the set of bits that it
616 // uses in a register in operand OpN. This only calculates the set of used
617 // bits for cases where it does not depend on any operands (as is the case
618 // in shifts, for example). For concrete instructions from a program, the
619 // operand may be a subregister of a larger register, while Bits would
620 // correspond to the larger register in its entirety. Because of that,
621 // the parameter Begin can be used to indicate which bit of Bits should be
622 // considered the LSB of the operand.
623 bool HexagonBitSimplify::getUsedBits(unsigned Opc
, unsigned OpN
,
624 BitVector
&Bits
, uint16_t Begin
, const HexagonInstrInfo
&HII
) {
625 using namespace Hexagon
;
627 const MCInstrDesc
&D
= HII
.get(Opc
);
629 if (OpN
== D
.getNumOperands()-1)
630 return getUsedBitsInStore(Opc
, Bits
, Begin
);
635 // One register source. Used bits: R1[0-7].
642 Bits
.set(Begin
, Begin
+8);
647 // One register source. Used bits: R1[0-15].
655 Bits
.set(Begin
, Begin
+16);
660 // One register source. Used bits: R1[16-31].
663 Bits
.set(Begin
+16, Begin
+32);
668 // Two register sources. Used bits: R1[0-7], R2[0-7].
673 Bits
.set(Begin
, Begin
+8);
678 // Two register sources. Used bits: R1[0-15], R2[0-15].
683 case A2_addh_h16_sat_ll
:
685 case A2_addh_l16_sat_ll
:
688 case A2_subh_h16_sat_ll
:
690 case A2_subh_l16_sat_ll
:
691 case M2_mpy_acc_ll_s0
:
692 case M2_mpy_acc_ll_s1
:
693 case M2_mpy_acc_sat_ll_s0
:
694 case M2_mpy_acc_sat_ll_s1
:
697 case M2_mpy_nac_ll_s0
:
698 case M2_mpy_nac_ll_s1
:
699 case M2_mpy_nac_sat_ll_s0
:
700 case M2_mpy_nac_sat_ll_s1
:
701 case M2_mpy_rnd_ll_s0
:
702 case M2_mpy_rnd_ll_s1
:
703 case M2_mpy_sat_ll_s0
:
704 case M2_mpy_sat_ll_s1
:
705 case M2_mpy_sat_rnd_ll_s0
:
706 case M2_mpy_sat_rnd_ll_s1
:
707 case M2_mpyd_acc_ll_s0
:
708 case M2_mpyd_acc_ll_s1
:
711 case M2_mpyd_nac_ll_s0
:
712 case M2_mpyd_nac_ll_s1
:
713 case M2_mpyd_rnd_ll_s0
:
714 case M2_mpyd_rnd_ll_s1
:
715 case M2_mpyu_acc_ll_s0
:
716 case M2_mpyu_acc_ll_s1
:
719 case M2_mpyu_nac_ll_s0
:
720 case M2_mpyu_nac_ll_s1
:
721 case M2_mpyud_acc_ll_s0
:
722 case M2_mpyud_acc_ll_s1
:
725 case M2_mpyud_nac_ll_s0
:
726 case M2_mpyud_nac_ll_s1
:
727 if (OpN
== 1 || OpN
== 2) {
728 Bits
.set(Begin
, Begin
+16);
733 // Two register sources. Used bits: R1[0-15], R2[16-31].
735 case A2_addh_h16_sat_lh
:
738 case A2_subh_h16_sat_lh
:
739 case M2_mpy_acc_lh_s0
:
740 case M2_mpy_acc_lh_s1
:
741 case M2_mpy_acc_sat_lh_s0
:
742 case M2_mpy_acc_sat_lh_s1
:
745 case M2_mpy_nac_lh_s0
:
746 case M2_mpy_nac_lh_s1
:
747 case M2_mpy_nac_sat_lh_s0
:
748 case M2_mpy_nac_sat_lh_s1
:
749 case M2_mpy_rnd_lh_s0
:
750 case M2_mpy_rnd_lh_s1
:
751 case M2_mpy_sat_lh_s0
:
752 case M2_mpy_sat_lh_s1
:
753 case M2_mpy_sat_rnd_lh_s0
:
754 case M2_mpy_sat_rnd_lh_s1
:
755 case M2_mpyd_acc_lh_s0
:
756 case M2_mpyd_acc_lh_s1
:
759 case M2_mpyd_nac_lh_s0
:
760 case M2_mpyd_nac_lh_s1
:
761 case M2_mpyd_rnd_lh_s0
:
762 case M2_mpyd_rnd_lh_s1
:
763 case M2_mpyu_acc_lh_s0
:
764 case M2_mpyu_acc_lh_s1
:
767 case M2_mpyu_nac_lh_s0
:
768 case M2_mpyu_nac_lh_s1
:
769 case M2_mpyud_acc_lh_s0
:
770 case M2_mpyud_acc_lh_s1
:
773 case M2_mpyud_nac_lh_s0
:
774 case M2_mpyud_nac_lh_s1
:
775 // These four are actually LH.
777 case A2_addh_l16_sat_hl
:
779 case A2_subh_l16_sat_hl
:
781 Bits
.set(Begin
, Begin
+16);
785 Bits
.set(Begin
+16, Begin
+32);
790 // Two register sources, used bits: R1[16-31], R2[0-15].
792 case A2_addh_h16_sat_hl
:
795 case A2_subh_h16_sat_hl
:
796 case M2_mpy_acc_hl_s0
:
797 case M2_mpy_acc_hl_s1
:
798 case M2_mpy_acc_sat_hl_s0
:
799 case M2_mpy_acc_sat_hl_s1
:
802 case M2_mpy_nac_hl_s0
:
803 case M2_mpy_nac_hl_s1
:
804 case M2_mpy_nac_sat_hl_s0
:
805 case M2_mpy_nac_sat_hl_s1
:
806 case M2_mpy_rnd_hl_s0
:
807 case M2_mpy_rnd_hl_s1
:
808 case M2_mpy_sat_hl_s0
:
809 case M2_mpy_sat_hl_s1
:
810 case M2_mpy_sat_rnd_hl_s0
:
811 case M2_mpy_sat_rnd_hl_s1
:
812 case M2_mpyd_acc_hl_s0
:
813 case M2_mpyd_acc_hl_s1
:
816 case M2_mpyd_nac_hl_s0
:
817 case M2_mpyd_nac_hl_s1
:
818 case M2_mpyd_rnd_hl_s0
:
819 case M2_mpyd_rnd_hl_s1
:
820 case M2_mpyu_acc_hl_s0
:
821 case M2_mpyu_acc_hl_s1
:
824 case M2_mpyu_nac_hl_s0
:
825 case M2_mpyu_nac_hl_s1
:
826 case M2_mpyud_acc_hl_s0
:
827 case M2_mpyud_acc_hl_s1
:
830 case M2_mpyud_nac_hl_s0
:
831 case M2_mpyud_nac_hl_s1
:
833 Bits
.set(Begin
+16, Begin
+32);
837 Bits
.set(Begin
, Begin
+16);
842 // Two register sources, used bits: R1[16-31], R2[16-31].
844 case A2_addh_h16_sat_hh
:
847 case A2_subh_h16_sat_hh
:
848 case M2_mpy_acc_hh_s0
:
849 case M2_mpy_acc_hh_s1
:
850 case M2_mpy_acc_sat_hh_s0
:
851 case M2_mpy_acc_sat_hh_s1
:
854 case M2_mpy_nac_hh_s0
:
855 case M2_mpy_nac_hh_s1
:
856 case M2_mpy_nac_sat_hh_s0
:
857 case M2_mpy_nac_sat_hh_s1
:
858 case M2_mpy_rnd_hh_s0
:
859 case M2_mpy_rnd_hh_s1
:
860 case M2_mpy_sat_hh_s0
:
861 case M2_mpy_sat_hh_s1
:
862 case M2_mpy_sat_rnd_hh_s0
:
863 case M2_mpy_sat_rnd_hh_s1
:
864 case M2_mpyd_acc_hh_s0
:
865 case M2_mpyd_acc_hh_s1
:
868 case M2_mpyd_nac_hh_s0
:
869 case M2_mpyd_nac_hh_s1
:
870 case M2_mpyd_rnd_hh_s0
:
871 case M2_mpyd_rnd_hh_s1
:
872 case M2_mpyu_acc_hh_s0
:
873 case M2_mpyu_acc_hh_s1
:
876 case M2_mpyu_nac_hh_s0
:
877 case M2_mpyu_nac_hh_s1
:
878 case M2_mpyud_acc_hh_s0
:
879 case M2_mpyud_acc_hh_s1
:
882 case M2_mpyud_nac_hh_s0
:
883 case M2_mpyud_nac_hh_s1
:
884 if (OpN
== 1 || OpN
== 2) {
885 Bits
.set(Begin
+16, Begin
+32);
894 // Calculate the register class that matches Reg:Sub. For example, if
895 // %1 is a double register, then %1:isub_hi would match the "int"
897 const TargetRegisterClass
*HexagonBitSimplify::getFinalVRegClass(
898 const BitTracker::RegisterRef
&RR
, MachineRegisterInfo
&MRI
) {
899 if (!RR
.Reg
.isVirtual())
901 auto *RC
= MRI
.getRegClass(RR
.Reg
);
904 auto &HRI
= static_cast<const HexagonRegisterInfo
&>(
905 *MRI
.getTargetRegisterInfo());
907 auto VerifySR
= [&HRI
] (const TargetRegisterClass
*RC
, unsigned Sub
) -> void {
909 assert(Sub
== HRI
.getHexagonSubRegIndex(*RC
, Hexagon::ps_sub_lo
) ||
910 Sub
== HRI
.getHexagonSubRegIndex(*RC
, Hexagon::ps_sub_hi
));
913 switch (RC
->getID()) {
914 case Hexagon::DoubleRegsRegClassID
:
915 VerifySR(RC
, RR
.Sub
);
916 return &Hexagon::IntRegsRegClass
;
917 case Hexagon::HvxWRRegClassID
:
918 VerifySR(RC
, RR
.Sub
);
919 return &Hexagon::HvxVRRegClass
;
924 // Check if RD could be replaced with RS at any possible use of RD.
925 // For example a predicate register cannot be replaced with a integer
926 // register, but a 64-bit register with a subregister can be replaced
927 // with a 32-bit register.
928 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef
&RD
,
929 const BitTracker::RegisterRef
&RS
, MachineRegisterInfo
&MRI
) {
930 if (!RD
.Reg
.isVirtual() || !RS
.Reg
.isVirtual())
932 // Return false if one (or both) classes are nullptr.
933 auto *DRC
= getFinalVRegClass(RD
, MRI
);
937 return DRC
== getFinalVRegClass(RS
, MRI
);
940 bool HexagonBitSimplify::hasTiedUse(unsigned Reg
, MachineRegisterInfo
&MRI
,
942 if (!PreserveTiedOps
)
944 return llvm::any_of(MRI
.use_operands(Reg
),
945 [NewSub
] (const MachineOperand
&Op
) -> bool {
946 return Op
.getSubReg() != NewSub
&& Op
.isTied();
952 class DeadCodeElimination
{
954 DeadCodeElimination(MachineFunction
&mf
, MachineDominatorTree
&mdt
)
955 : MF(mf
), HII(*MF
.getSubtarget
<HexagonSubtarget
>().getInstrInfo()),
956 MDT(mdt
), MRI(mf
.getRegInfo()) {}
959 return runOnNode(MDT
.getRootNode());
963 bool isDead(unsigned R
) const;
964 bool runOnNode(MachineDomTreeNode
*N
);
967 const HexagonInstrInfo
&HII
;
968 MachineDominatorTree
&MDT
;
969 MachineRegisterInfo
&MRI
;
972 } // end anonymous namespace
974 bool DeadCodeElimination::isDead(unsigned R
) const {
975 for (auto I
= MRI
.use_begin(R
), E
= MRI
.use_end(); I
!= E
; ++I
) {
976 MachineInstr
*UseI
= I
->getParent();
977 if (UseI
->isDebugValue())
980 assert(!UseI
->getOperand(0).getSubReg());
981 Register DR
= UseI
->getOperand(0).getReg();
990 bool DeadCodeElimination::runOnNode(MachineDomTreeNode
*N
) {
991 bool Changed
= false;
993 for (auto *DTN
: children
<MachineDomTreeNode
*>(N
))
994 Changed
|= runOnNode(DTN
);
996 MachineBasicBlock
*B
= N
->getBlock();
997 std::vector
<MachineInstr
*> Instrs
;
998 for (auto I
= B
->rbegin(), E
= B
->rend(); I
!= E
; ++I
)
999 Instrs
.push_back(&*I
);
1001 for (auto MI
: Instrs
) {
1002 unsigned Opc
= MI
->getOpcode();
1003 // Do not touch lifetime markers. This is why the target-independent DCE
1005 if (Opc
== TargetOpcode::LIFETIME_START
||
1006 Opc
== TargetOpcode::LIFETIME_END
)
1009 if (MI
->isInlineAsm())
1011 // Delete PHIs if possible.
1012 if (!MI
->isPHI() && !MI
->isSafeToMove(nullptr, Store
))
1015 bool AllDead
= true;
1016 SmallVector
<unsigned,2> Regs
;
1017 for (auto &Op
: MI
->operands()) {
1018 if (!Op
.isReg() || !Op
.isDef())
1020 Register R
= Op
.getReg();
1021 if (!R
.isVirtual() || !isDead(R
)) {
1031 for (unsigned i
= 0, n
= Regs
.size(); i
!= n
; ++i
)
1032 MRI
.markUsesInDebugValueAsUndef(Regs
[i
]);
1041 // Eliminate redundant instructions
1043 // This transformation will identify instructions where the output register
1044 // is the same as one of its input registers. This only works on instructions
1045 // that define a single register (unlike post-increment loads, for example).
1046 // The equality check is actually more detailed: the code calculates which
1047 // bits of the output are used, and only compares these bits with the input
1049 // If the output matches an input, the instruction is replaced with COPY.
1050 // The copies will be removed by another transformation.
1051 class RedundantInstrElimination
: public Transformation
{
1053 RedundantInstrElimination(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1054 const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1055 : Transformation(true), HII(hii
), HRI(hri
), MRI(mri
), BT(bt
) {}
1057 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1060 bool isLossyShiftLeft(const MachineInstr
&MI
, unsigned OpN
,
1061 unsigned &LostB
, unsigned &LostE
);
1062 bool isLossyShiftRight(const MachineInstr
&MI
, unsigned OpN
,
1063 unsigned &LostB
, unsigned &LostE
);
1064 bool computeUsedBits(unsigned Reg
, BitVector
&Bits
);
1065 bool computeUsedBits(const MachineInstr
&MI
, unsigned OpN
, BitVector
&Bits
,
1067 bool usedBitsEqual(BitTracker::RegisterRef RD
, BitTracker::RegisterRef RS
);
1069 const HexagonInstrInfo
&HII
;
1070 const HexagonRegisterInfo
&HRI
;
1071 MachineRegisterInfo
&MRI
;
1075 } // end anonymous namespace
1077 // Check if the instruction is a lossy shift left, where the input being
1078 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1079 // of bit indices that are lost.
1080 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr
&MI
,
1081 unsigned OpN
, unsigned &LostB
, unsigned &LostE
) {
1082 using namespace Hexagon
;
1084 unsigned Opc
= MI
.getOpcode();
1085 unsigned ImN
, RegN
, Width
;
1092 case S2_asl_i_p_acc
:
1093 case S2_asl_i_p_and
:
1094 case S2_asl_i_p_nac
:
1096 case S2_asl_i_p_xacc
:
1106 case S2_addasl_rrri
:
1107 case S4_andi_asl_ri
:
1109 case S4_addi_asl_ri
:
1110 case S4_subi_asl_ri
:
1111 case S2_asl_i_r_acc
:
1112 case S2_asl_i_r_and
:
1113 case S2_asl_i_r_nac
:
1115 case S2_asl_i_r_sat
:
1116 case S2_asl_i_r_xacc
:
1128 assert(MI
.getOperand(ImN
).isImm());
1129 unsigned S
= MI
.getOperand(ImN
).getImm();
1137 // Check if the instruction is a lossy shift right, where the input being
1138 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1139 // of bit indices that are lost.
1140 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr
&MI
,
1141 unsigned OpN
, unsigned &LostB
, unsigned &LostE
) {
1142 using namespace Hexagon
;
1144 unsigned Opc
= MI
.getOpcode();
1152 case S2_asr_i_p_acc
:
1153 case S2_asr_i_p_and
:
1154 case S2_asr_i_p_nac
:
1156 case S2_lsr_i_p_acc
:
1157 case S2_lsr_i_p_and
:
1158 case S2_lsr_i_p_nac
:
1160 case S2_lsr_i_p_xacc
:
1169 case S4_andi_lsr_ri
:
1171 case S4_addi_lsr_ri
:
1172 case S4_subi_lsr_ri
:
1173 case S2_asr_i_r_acc
:
1174 case S2_asr_i_r_and
:
1175 case S2_asr_i_r_nac
:
1177 case S2_lsr_i_r_acc
:
1178 case S2_lsr_i_r_and
:
1179 case S2_lsr_i_r_nac
:
1181 case S2_lsr_i_r_xacc
:
1193 assert(MI
.getOperand(ImN
).isImm());
1194 unsigned S
= MI
.getOperand(ImN
).getImm();
1200 // Calculate the bit vector that corresponds to the used bits of register Reg.
1201 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
1202 // culation fails (i.e. the used bits are unknown), it returns false. Other-
1203 // wise, it returns true and sets the corresponding bits in Bits.
1204 bool RedundantInstrElimination::computeUsedBits(unsigned Reg
, BitVector
&Bits
) {
1205 BitVector
Used(Bits
.size());
1206 RegisterSet Visited
;
1207 std::vector
<unsigned> Pending
;
1208 Pending
.push_back(Reg
);
1210 for (unsigned i
= 0; i
< Pending
.size(); ++i
) {
1211 unsigned R
= Pending
[i
];
1215 for (auto I
= MRI
.use_begin(R
), E
= MRI
.use_end(); I
!= E
; ++I
) {
1216 BitTracker::RegisterRef UR
= *I
;
1218 if (!HBS::getSubregMask(UR
, B
, W
, MRI
))
1220 MachineInstr
&UseI
= *I
->getParent();
1221 if (UseI
.isPHI() || UseI
.isCopy()) {
1222 Register DefR
= UseI
.getOperand(0).getReg();
1223 if (!DefR
.isVirtual())
1225 Pending
.push_back(DefR
);
1227 if (!computeUsedBits(UseI
, I
.getOperandNo(), Used
, B
))
1236 // Calculate the bits used by instruction MI in a register in operand OpN.
1237 // Return true/false if the calculation succeeds/fails. If is succeeds, set
1238 // used bits in Bits. This function does not reset any bits in Bits, so
1239 // subsequent calls over different instructions will result in the union
1240 // of the used bits in all these instructions.
1241 // The register in question may be used with a sub-register, whereas Bits
1242 // holds the bits for the entire register. To keep track of that, the
1243 // argument Begin indicates where in Bits is the lowest-significant bit
1244 // of the register used in operand OpN. For example, in instruction:
1245 // %1 = S2_lsr_i_r %2:isub_hi, 10
1246 // the operand 1 is a 32-bit register, which happens to be a subregister
1247 // of the 64-bit register %2, and that subregister starts at position 32.
1248 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
1250 bool RedundantInstrElimination::computeUsedBits(const MachineInstr
&MI
,
1251 unsigned OpN
, BitVector
&Bits
, uint16_t Begin
) {
1252 unsigned Opc
= MI
.getOpcode();
1253 BitVector
T(Bits
.size());
1254 bool GotBits
= HBS::getUsedBits(Opc
, OpN
, T
, Begin
, HII
);
1255 // Even if we don't have bits yet, we could still provide some information
1256 // if the instruction is a lossy shift: the lost bits will be marked as
1259 if (isLossyShiftLeft(MI
, OpN
, LB
, LE
) || isLossyShiftRight(MI
, OpN
, LB
, LE
)) {
1260 assert(MI
.getOperand(OpN
).isReg());
1261 BitTracker::RegisterRef RR
= MI
.getOperand(OpN
);
1262 const TargetRegisterClass
*RC
= HBS::getFinalVRegClass(RR
, MRI
);
1263 uint16_t Width
= HRI
.getRegSizeInBits(*RC
);
1266 T
.set(Begin
, Begin
+Width
);
1267 assert(LB
<= LE
&& LB
< Width
&& LE
<= Width
);
1268 T
.reset(Begin
+LB
, Begin
+LE
);
1276 // Calculates the used bits in RD ("defined register"), and checks if these
1277 // bits in RS ("used register") and RD are identical.
1278 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD
,
1279 BitTracker::RegisterRef RS
) {
1280 const BitTracker::RegisterCell
&DC
= BT
.lookup(RD
.Reg
);
1281 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
1284 if (!HBS::getSubregMask(RD
, DB
, DW
, MRI
))
1287 if (!HBS::getSubregMask(RS
, SB
, SW
, MRI
))
1292 BitVector
Used(DC
.width());
1293 if (!computeUsedBits(RD
.Reg
, Used
))
1296 for (unsigned i
= 0; i
!= DW
; ++i
)
1297 if (Used
[i
+DB
] && DC
[DB
+i
] != SC
[SB
+i
])
1302 bool RedundantInstrElimination::processBlock(MachineBasicBlock
&B
,
1303 const RegisterSet
&) {
1304 if (!BT
.reached(&B
))
1306 bool Changed
= false;
1308 for (auto I
= B
.begin(), E
= B
.end(), NextI
= I
; I
!= E
; ++I
) {
1309 NextI
= std::next(I
);
1310 MachineInstr
*MI
= &*I
;
1312 if (MI
->getOpcode() == TargetOpcode::COPY
)
1314 if (MI
->isPHI() || MI
->hasUnmodeledSideEffects() || MI
->isInlineAsm())
1316 unsigned NumD
= MI
->getDesc().getNumDefs();
1320 BitTracker::RegisterRef RD
= MI
->getOperand(0);
1321 if (!BT
.has(RD
.Reg
))
1323 const BitTracker::RegisterCell
&DC
= BT
.lookup(RD
.Reg
);
1324 auto At
= MachineBasicBlock::iterator(MI
);
1326 // Find a source operand that is equal to the result.
1327 for (auto &Op
: MI
->uses()) {
1330 BitTracker::RegisterRef RS
= Op
;
1331 if (!BT
.has(RS
.Reg
))
1333 if (!HBS::isTransparentCopy(RD
, RS
, MRI
))
1337 if (!HBS::getSubregMask(RS
, BN
, BW
, MRI
))
1340 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
1341 if (!usedBitsEqual(RD
, RS
) && !HBS::isEqual(DC
, 0, SC
, BN
, BW
))
1344 // If found, replace the instruction with a COPY.
1345 const DebugLoc
&DL
= MI
->getDebugLoc();
1346 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
1347 Register NewR
= MRI
.createVirtualRegister(FRC
);
1348 MachineInstr
*CopyI
=
1349 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::COPY
), NewR
)
1350 .addReg(RS
.Reg
, 0, RS
.Sub
);
1351 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
1352 // This pass can create copies between registers that don't have the
1353 // exact same values. Updating the tracker has to involve updating
1354 // all dependent cells. Example:
1355 // %1 = inst %2 ; %1 != %2, but used bits are equal
1357 // %3 = copy %2 ; <- inserted
1358 // ... = %3 ; <- replaced from %2
1359 // Indirectly, we can create a "copy" between %1 and %2 even
1360 // though their exact values do not match.
1372 // Recognize instructions that produce constant values known at compile-time.
1373 // Replace them with register definitions that load these constants directly.
1374 class ConstGeneration
: public Transformation
{
1376 ConstGeneration(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1377 MachineRegisterInfo
&mri
)
1378 : Transformation(true), HII(hii
), MRI(mri
), BT(bt
) {}
1380 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1381 static bool isTfrConst(const MachineInstr
&MI
);
1384 Register
genTfrConst(const TargetRegisterClass
*RC
, int64_t C
,
1385 MachineBasicBlock
&B
, MachineBasicBlock::iterator At
,
1388 const HexagonInstrInfo
&HII
;
1389 MachineRegisterInfo
&MRI
;
1393 } // end anonymous namespace
1395 bool ConstGeneration::isTfrConst(const MachineInstr
&MI
) {
1396 unsigned Opc
= MI
.getOpcode();
1398 case Hexagon::A2_combineii
:
1399 case Hexagon::A4_combineii
:
1400 case Hexagon::A2_tfrsi
:
1401 case Hexagon::A2_tfrpi
:
1402 case Hexagon::PS_true
:
1403 case Hexagon::PS_false
:
1404 case Hexagon::CONST32
:
1405 case Hexagon::CONST64
:
1411 // Generate a transfer-immediate instruction that is appropriate for the
1412 // register class and the actual value being transferred.
1413 Register
ConstGeneration::genTfrConst(const TargetRegisterClass
*RC
, int64_t C
,
1414 MachineBasicBlock
&B
,
1415 MachineBasicBlock::iterator At
,
1417 Register Reg
= MRI
.createVirtualRegister(RC
);
1418 if (RC
== &Hexagon::IntRegsRegClass
) {
1419 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrsi
), Reg
)
1420 .addImm(int32_t(C
));
1424 if (RC
== &Hexagon::DoubleRegsRegClass
) {
1426 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrpi
), Reg
)
1431 unsigned Lo
= Lo_32(C
), Hi
= Hi_32(C
);
1432 if (isInt
<8>(Lo
) || isInt
<8>(Hi
)) {
1433 unsigned Opc
= isInt
<8>(Lo
) ? Hexagon::A2_combineii
1434 : Hexagon::A4_combineii
;
1435 BuildMI(B
, At
, DL
, HII
.get(Opc
), Reg
)
1436 .addImm(int32_t(Hi
))
1437 .addImm(int32_t(Lo
));
1440 MachineFunction
*MF
= B
.getParent();
1441 auto &HST
= MF
->getSubtarget
<HexagonSubtarget
>();
1443 // Disable CONST64 for tiny core since it takes a LD resource.
1444 if (!HST
.isTinyCore() ||
1445 MF
->getFunction().hasOptSize()) {
1446 BuildMI(B
, At
, DL
, HII
.get(Hexagon::CONST64
), Reg
)
1452 if (RC
== &Hexagon::PredRegsRegClass
) {
1455 Opc
= Hexagon::PS_false
;
1456 else if ((C
& 0xFF) == 0xFF)
1457 Opc
= Hexagon::PS_true
;
1460 BuildMI(B
, At
, DL
, HII
.get(Opc
), Reg
);
1467 bool ConstGeneration::processBlock(MachineBasicBlock
&B
, const RegisterSet
&) {
1468 if (!BT
.reached(&B
))
1470 bool Changed
= false;
1473 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
) {
1477 HBS::getInstrDefs(*I
, Defs
);
1478 if (Defs
.count() != 1)
1480 Register DR
= Defs
.find_first();
1481 if (!DR
.isVirtual())
1484 const BitTracker::RegisterCell
&DRC
= BT
.lookup(DR
);
1485 if (HBS::getConst(DRC
, 0, DRC
.width(), U
)) {
1487 DebugLoc DL
= I
->getDebugLoc();
1488 auto At
= I
->isPHI() ? B
.getFirstNonPHI() : I
;
1489 Register ImmReg
= genTfrConst(MRI
.getRegClass(DR
), C
, B
, At
, DL
);
1491 HBS::replaceReg(DR
, ImmReg
, MRI
);
1492 BT
.put(ImmReg
, DRC
);
1502 // Identify pairs of available registers which hold identical values.
1503 // In such cases, only one of them needs to be calculated, the other one
1504 // will be defined as a copy of the first.
1505 class CopyGeneration
: public Transformation
{
1507 CopyGeneration(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1508 const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1509 : Transformation(true), HII(hii
), HRI(hri
), MRI(mri
), BT(bt
) {}
1511 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1514 bool findMatch(const BitTracker::RegisterRef
&Inp
,
1515 BitTracker::RegisterRef
&Out
, const RegisterSet
&AVs
);
1517 const HexagonInstrInfo
&HII
;
1518 const HexagonRegisterInfo
&HRI
;
1519 MachineRegisterInfo
&MRI
;
1521 RegisterSet Forbidden
;
1524 // Eliminate register copies RD = RS, by replacing the uses of RD with
1526 class CopyPropagation
: public Transformation
{
1528 CopyPropagation(const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1529 : Transformation(false), HRI(hri
), MRI(mri
) {}
1531 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1533 static bool isCopyReg(unsigned Opc
, bool NoConv
);
1536 bool propagateRegCopy(MachineInstr
&MI
);
1538 const HexagonRegisterInfo
&HRI
;
1539 MachineRegisterInfo
&MRI
;
1542 } // end anonymous namespace
1544 /// Check if there is a register in AVs that is identical to Inp. If so,
1545 /// set Out to the found register. The output may be a pair Reg:Sub.
1546 bool CopyGeneration::findMatch(const BitTracker::RegisterRef
&Inp
,
1547 BitTracker::RegisterRef
&Out
, const RegisterSet
&AVs
) {
1548 if (!BT
.has(Inp
.Reg
))
1550 const BitTracker::RegisterCell
&InpRC
= BT
.lookup(Inp
.Reg
);
1551 auto *FRC
= HBS::getFinalVRegClass(Inp
, MRI
);
1553 if (!HBS::getSubregMask(Inp
, B
, W
, MRI
))
1556 for (Register R
= AVs
.find_first(); R
; R
= AVs
.find_next(R
)) {
1557 if (!BT
.has(R
) || Forbidden
[R
])
1559 const BitTracker::RegisterCell
&RC
= BT
.lookup(R
);
1560 unsigned RW
= RC
.width();
1562 if (FRC
!= MRI
.getRegClass(R
))
1564 if (!HBS::isTransparentCopy(R
, Inp
, MRI
))
1566 if (!HBS::isEqual(InpRC
, B
, RC
, 0, W
))
1572 // Check if there is a super-register, whose part (with a subregister)
1573 // is equal to the input.
1574 // Only do double registers for now.
1577 if (MRI
.getRegClass(R
) != &Hexagon::DoubleRegsRegClass
)
1580 if (HBS::isEqual(InpRC
, B
, RC
, 0, W
))
1581 Out
.Sub
= Hexagon::isub_lo
;
1582 else if (HBS::isEqual(InpRC
, B
, RC
, W
, W
))
1583 Out
.Sub
= Hexagon::isub_hi
;
1587 if (HBS::isTransparentCopy(Out
, Inp
, MRI
))
1593 bool CopyGeneration::processBlock(MachineBasicBlock
&B
,
1594 const RegisterSet
&AVs
) {
1595 if (!BT
.reached(&B
))
1597 RegisterSet
AVB(AVs
);
1598 bool Changed
= false;
1601 for (auto I
= B
.begin(), E
= B
.end(), NextI
= I
; I
!= E
;
1602 ++I
, AVB
.insert(Defs
)) {
1603 NextI
= std::next(I
);
1605 HBS::getInstrDefs(*I
, Defs
);
1607 unsigned Opc
= I
->getOpcode();
1608 if (CopyPropagation::isCopyReg(Opc
, false) ||
1609 ConstGeneration::isTfrConst(*I
))
1612 DebugLoc DL
= I
->getDebugLoc();
1613 auto At
= I
->isPHI() ? B
.getFirstNonPHI() : I
;
1615 for (Register R
= Defs
.find_first(); R
; R
= Defs
.find_next(R
)) {
1616 BitTracker::RegisterRef MR
;
1617 auto *FRC
= HBS::getFinalVRegClass(R
, MRI
);
1619 if (findMatch(R
, MR
, AVB
)) {
1620 Register NewR
= MRI
.createVirtualRegister(FRC
);
1621 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::COPY
), NewR
)
1622 .addReg(MR
.Reg
, 0, MR
.Sub
);
1623 BT
.put(BitTracker::RegisterRef(NewR
), BT
.get(MR
));
1624 HBS::replaceReg(R
, NewR
, MRI
);
1625 Forbidden
.insert(R
);
1629 if (FRC
== &Hexagon::DoubleRegsRegClass
||
1630 FRC
== &Hexagon::HvxWRRegClass
) {
1631 // Try to generate REG_SEQUENCE.
1632 unsigned SubLo
= HRI
.getHexagonSubRegIndex(*FRC
, Hexagon::ps_sub_lo
);
1633 unsigned SubHi
= HRI
.getHexagonSubRegIndex(*FRC
, Hexagon::ps_sub_hi
);
1634 BitTracker::RegisterRef TL
= { R
, SubLo
};
1635 BitTracker::RegisterRef TH
= { R
, SubHi
};
1636 BitTracker::RegisterRef ML
, MH
;
1637 if (findMatch(TL
, ML
, AVB
) && findMatch(TH
, MH
, AVB
)) {
1638 auto *FRC
= HBS::getFinalVRegClass(R
, MRI
);
1639 Register NewR
= MRI
.createVirtualRegister(FRC
);
1640 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::REG_SEQUENCE
), NewR
)
1641 .addReg(ML
.Reg
, 0, ML
.Sub
)
1643 .addReg(MH
.Reg
, 0, MH
.Sub
)
1645 BT
.put(BitTracker::RegisterRef(NewR
), BT
.get(R
));
1646 HBS::replaceReg(R
, NewR
, MRI
);
1647 Forbidden
.insert(R
);
1656 bool CopyPropagation::isCopyReg(unsigned Opc
, bool NoConv
) {
1658 case TargetOpcode::COPY
:
1659 case TargetOpcode::REG_SEQUENCE
:
1660 case Hexagon::A4_combineir
:
1661 case Hexagon::A4_combineri
:
1663 case Hexagon::A2_tfr
:
1664 case Hexagon::A2_tfrp
:
1665 case Hexagon::A2_combinew
:
1666 case Hexagon::V6_vcombine
:
1674 bool CopyPropagation::propagateRegCopy(MachineInstr
&MI
) {
1675 bool Changed
= false;
1676 unsigned Opc
= MI
.getOpcode();
1677 BitTracker::RegisterRef RD
= MI
.getOperand(0);
1678 assert(MI
.getOperand(0).getSubReg() == 0);
1681 case TargetOpcode::COPY
:
1682 case Hexagon::A2_tfr
:
1683 case Hexagon::A2_tfrp
: {
1684 BitTracker::RegisterRef RS
= MI
.getOperand(1);
1685 if (!HBS::isTransparentCopy(RD
, RS
, MRI
))
1688 Changed
= HBS::replaceRegWithSub(RD
.Reg
, RS
.Reg
, RS
.Sub
, MRI
);
1690 Changed
= HBS::replaceReg(RD
.Reg
, RS
.Reg
, MRI
);
1693 case TargetOpcode::REG_SEQUENCE
: {
1694 BitTracker::RegisterRef SL
, SH
;
1695 if (HBS::parseRegSequence(MI
, SL
, SH
, MRI
)) {
1696 const TargetRegisterClass
&RC
= *MRI
.getRegClass(RD
.Reg
);
1697 unsigned SubLo
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_lo
);
1698 unsigned SubHi
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_hi
);
1699 Changed
= HBS::replaceSubWithSub(RD
.Reg
, SubLo
, SL
.Reg
, SL
.Sub
, MRI
);
1700 Changed
|= HBS::replaceSubWithSub(RD
.Reg
, SubHi
, SH
.Reg
, SH
.Sub
, MRI
);
1704 case Hexagon::A2_combinew
:
1705 case Hexagon::V6_vcombine
: {
1706 const TargetRegisterClass
&RC
= *MRI
.getRegClass(RD
.Reg
);
1707 unsigned SubLo
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_lo
);
1708 unsigned SubHi
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_hi
);
1709 BitTracker::RegisterRef RH
= MI
.getOperand(1), RL
= MI
.getOperand(2);
1710 Changed
= HBS::replaceSubWithSub(RD
.Reg
, SubLo
, RL
.Reg
, RL
.Sub
, MRI
);
1711 Changed
|= HBS::replaceSubWithSub(RD
.Reg
, SubHi
, RH
.Reg
, RH
.Sub
, MRI
);
1714 case Hexagon::A4_combineir
:
1715 case Hexagon::A4_combineri
: {
1716 unsigned SrcX
= (Opc
== Hexagon::A4_combineir
) ? 2 : 1;
1717 unsigned Sub
= (Opc
== Hexagon::A4_combineir
) ? Hexagon::isub_lo
1719 BitTracker::RegisterRef RS
= MI
.getOperand(SrcX
);
1720 Changed
= HBS::replaceSubWithSub(RD
.Reg
, Sub
, RS
.Reg
, RS
.Sub
, MRI
);
1727 bool CopyPropagation::processBlock(MachineBasicBlock
&B
, const RegisterSet
&) {
1728 std::vector
<MachineInstr
*> Instrs
;
1729 for (auto I
= B
.rbegin(), E
= B
.rend(); I
!= E
; ++I
)
1730 Instrs
.push_back(&*I
);
1732 bool Changed
= false;
1733 for (auto I
: Instrs
) {
1734 unsigned Opc
= I
->getOpcode();
1735 if (!CopyPropagation::isCopyReg(Opc
, true))
1737 Changed
|= propagateRegCopy(*I
);
1745 // Recognize patterns that can be simplified and replace them with the
1747 // This is by no means complete
1748 class BitSimplification
: public Transformation
{
1750 BitSimplification(BitTracker
&bt
, const MachineDominatorTree
&mdt
,
1751 const HexagonInstrInfo
&hii
, const HexagonRegisterInfo
&hri
,
1752 MachineRegisterInfo
&mri
, MachineFunction
&mf
)
1753 : Transformation(true), MDT(mdt
), HII(hii
), HRI(hri
), MRI(mri
),
1756 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1759 struct RegHalf
: public BitTracker::RegisterRef
{
1760 bool Low
; // Low/High halfword.
1763 bool matchHalf(unsigned SelfR
, const BitTracker::RegisterCell
&RC
,
1764 unsigned B
, RegHalf
&RH
);
1765 bool validateReg(BitTracker::RegisterRef R
, unsigned Opc
, unsigned OpNum
);
1767 bool matchPackhl(unsigned SelfR
, const BitTracker::RegisterCell
&RC
,
1768 BitTracker::RegisterRef
&Rs
, BitTracker::RegisterRef
&Rt
);
1769 unsigned getCombineOpcode(bool HLow
, bool LLow
);
1771 bool genStoreUpperHalf(MachineInstr
*MI
);
1772 bool genStoreImmediate(MachineInstr
*MI
);
1773 bool genPackhl(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1774 const BitTracker::RegisterCell
&RC
);
1775 bool genExtractHalf(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1776 const BitTracker::RegisterCell
&RC
);
1777 bool genCombineHalf(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1778 const BitTracker::RegisterCell
&RC
);
1779 bool genExtractLow(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1780 const BitTracker::RegisterCell
&RC
);
1781 bool genBitSplit(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1782 const BitTracker::RegisterCell
&RC
, const RegisterSet
&AVs
);
1783 bool simplifyTstbit(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1784 const BitTracker::RegisterCell
&RC
);
1785 bool simplifyExtractLow(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1786 const BitTracker::RegisterCell
&RC
, const RegisterSet
&AVs
);
1787 bool simplifyRCmp0(MachineInstr
*MI
, BitTracker::RegisterRef RD
);
1789 // Cache of created instructions to avoid creating duplicates.
1790 // XXX Currently only used by genBitSplit.
1791 std::vector
<MachineInstr
*> NewMIs
;
1793 const MachineDominatorTree
&MDT
;
1794 const HexagonInstrInfo
&HII
;
1795 const HexagonRegisterInfo
&HRI
;
1796 MachineRegisterInfo
&MRI
;
1797 MachineFunction
&MF
;
1801 } // end anonymous namespace
1803 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
1804 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
1805 // set the information about the found register in RH.
1806 bool BitSimplification::matchHalf(unsigned SelfR
,
1807 const BitTracker::RegisterCell
&RC
, unsigned B
, RegHalf
&RH
) {
1808 // XXX This could be searching in the set of available registers, in case
1809 // the match is not exact.
1811 // Match 16-bit chunks, where the RC[B..B+15] references exactly one
1812 // register and all the bits B..B+15 match between RC and the register.
1813 // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1814 // and RC = { [0]:0 [1-15]:v1[1-15]... }.
1817 while (I
< B
+16 && RC
[I
].num())
1822 Register Reg
= RC
[I
].RefI
.Reg
;
1823 unsigned P
= RC
[I
].RefI
.Pos
; // The RefI.Pos will be advanced by I-B.
1826 unsigned Pos
= P
- (I
-B
);
1828 if (Reg
== 0 || Reg
== SelfR
) // Don't match "self".
1830 if (!Reg
.isVirtual())
1835 const BitTracker::RegisterCell
&SC
= BT
.lookup(Reg
);
1836 if (Pos
+16 > SC
.width())
1839 for (unsigned i
= 0; i
< 16; ++i
) {
1840 const BitTracker::BitValue
&RV
= RC
[i
+B
];
1841 if (RV
.Type
== BitTracker::BitValue::Ref
) {
1842 if (RV
.RefI
.Reg
!= Reg
)
1844 if (RV
.RefI
.Pos
!= i
+Pos
)
1848 if (RC
[i
+B
] != SC
[i
+Pos
])
1855 Sub
= Hexagon::isub_lo
;
1859 Sub
= Hexagon::isub_lo
;
1863 Sub
= Hexagon::isub_hi
;
1867 Sub
= Hexagon::isub_hi
;
1877 // If the subregister is not valid with the register, set it to 0.
1878 if (!HBS::getFinalVRegClass(RH
, MRI
))
1884 bool BitSimplification::validateReg(BitTracker::RegisterRef R
, unsigned Opc
,
1886 auto *OpRC
= HII
.getRegClass(HII
.get(Opc
), OpNum
, &HRI
, MF
);
1887 auto *RRC
= HBS::getFinalVRegClass(R
, MRI
);
1888 return OpRC
->hasSubClassEq(RRC
);
1891 // Check if RC matches the pattern of a S2_packhl. If so, return true and
1892 // set the inputs Rs and Rt.
1893 bool BitSimplification::matchPackhl(unsigned SelfR
,
1894 const BitTracker::RegisterCell
&RC
, BitTracker::RegisterRef
&Rs
,
1895 BitTracker::RegisterRef
&Rt
) {
1896 RegHalf L1
, H1
, L2
, H2
;
1898 if (!matchHalf(SelfR
, RC
, 0, L2
) || !matchHalf(SelfR
, RC
, 16, L1
))
1900 if (!matchHalf(SelfR
, RC
, 32, H2
) || !matchHalf(SelfR
, RC
, 48, H1
))
1903 // Rs = H1.L1, Rt = H2.L2
1904 if (H1
.Reg
!= L1
.Reg
|| H1
.Sub
!= L1
.Sub
|| H1
.Low
|| !L1
.Low
)
1906 if (H2
.Reg
!= L2
.Reg
|| H2
.Sub
!= L2
.Sub
|| H2
.Low
|| !L2
.Low
)
1914 unsigned BitSimplification::getCombineOpcode(bool HLow
, bool LLow
) {
1915 return HLow
? LLow
? Hexagon::A2_combine_ll
1916 : Hexagon::A2_combine_lh
1917 : LLow
? Hexagon::A2_combine_hl
1918 : Hexagon::A2_combine_hh
;
1921 // If MI stores the upper halfword of a register (potentially obtained via
1922 // shifts or extracts), replace it with a storerf instruction. This could
1923 // cause the "extraction" code to become dead.
1924 bool BitSimplification::genStoreUpperHalf(MachineInstr
*MI
) {
1925 unsigned Opc
= MI
->getOpcode();
1926 if (Opc
!= Hexagon::S2_storerh_io
)
1929 MachineOperand
&ValOp
= MI
->getOperand(2);
1930 BitTracker::RegisterRef RS
= ValOp
;
1931 if (!BT
.has(RS
.Reg
))
1933 const BitTracker::RegisterCell
&RC
= BT
.lookup(RS
.Reg
);
1935 if (!matchHalf(0, RC
, 0, H
))
1939 MI
->setDesc(HII
.get(Hexagon::S2_storerf_io
));
1940 ValOp
.setReg(H
.Reg
);
1941 ValOp
.setSubReg(H
.Sub
);
1945 // If MI stores a value known at compile-time, and the value is within a range
1946 // that avoids using constant-extenders, replace it with a store-immediate.
1947 bool BitSimplification::genStoreImmediate(MachineInstr
*MI
) {
1948 unsigned Opc
= MI
->getOpcode();
1951 case Hexagon::S2_storeri_io
:
1954 case Hexagon::S2_storerh_io
:
1957 case Hexagon::S2_storerb_io
:
1963 // Avoid stores to frame-indices (due to an unknown offset).
1964 if (!MI
->getOperand(0).isReg())
1966 MachineOperand
&OffOp
= MI
->getOperand(1);
1970 int64_t Off
= OffOp
.getImm();
1971 // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1972 if (!isUIntN(6+Align
, Off
) || (Off
& ((1<<Align
)-1)))
1975 BitTracker::RegisterRef RS
= MI
->getOperand(2);
1976 if (!BT
.has(RS
.Reg
))
1978 const BitTracker::RegisterCell
&RC
= BT
.lookup(RS
.Reg
);
1980 if (!HBS::getConst(RC
, 0, RC
.width(), U
))
1983 // Only consider 8-bit values to avoid constant-extenders.
1986 case Hexagon::S2_storerb_io
:
1989 case Hexagon::S2_storerh_io
:
1992 case Hexagon::S2_storeri_io
:
1996 // Opc is already checked above to be one of the three store instructions.
1997 // This silences a -Wuninitialized false positive on GCC 5.4.
1998 llvm_unreachable("Unexpected store opcode");
2003 MI
->RemoveOperand(2);
2005 case Hexagon::S2_storerb_io
:
2006 MI
->setDesc(HII
.get(Hexagon::S4_storeirb_io
));
2008 case Hexagon::S2_storerh_io
:
2009 MI
->setDesc(HII
.get(Hexagon::S4_storeirh_io
));
2011 case Hexagon::S2_storeri_io
:
2012 MI
->setDesc(HII
.get(Hexagon::S4_storeiri_io
));
2015 MI
->addOperand(MachineOperand::CreateImm(V
));
2019 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
2020 // last instruction in a sequence that results in something equivalent to
2021 // the pack-halfwords. The intent is to cause the entire sequence to become
2023 bool BitSimplification::genPackhl(MachineInstr
*MI
,
2024 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2025 unsigned Opc
= MI
->getOpcode();
2026 if (Opc
== Hexagon::S2_packhl
)
2028 BitTracker::RegisterRef Rs
, Rt
;
2029 if (!matchPackhl(RD
.Reg
, RC
, Rs
, Rt
))
2031 if (!validateReg(Rs
, Hexagon::S2_packhl
, 1) ||
2032 !validateReg(Rt
, Hexagon::S2_packhl
, 2))
2035 MachineBasicBlock
&B
= *MI
->getParent();
2036 Register NewR
= MRI
.createVirtualRegister(&Hexagon::DoubleRegsRegClass
);
2037 DebugLoc DL
= MI
->getDebugLoc();
2038 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2039 : MachineBasicBlock::iterator(MI
);
2040 BuildMI(B
, At
, DL
, HII
.get(Hexagon::S2_packhl
), NewR
)
2041 .addReg(Rs
.Reg
, 0, Rs
.Sub
)
2042 .addReg(Rt
.Reg
, 0, Rt
.Sub
);
2043 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2044 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2048 // If MI produces halfword of the input in the low half of the output,
2049 // replace it with zero-extend or extractu.
2050 bool BitSimplification::genExtractHalf(MachineInstr
*MI
,
2051 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2053 // Check for halfword in low 16 bits, zeros elsewhere.
2054 if (!matchHalf(RD
.Reg
, RC
, 0, L
) || !HBS::isZero(RC
, 16, 16))
2057 unsigned Opc
= MI
->getOpcode();
2058 MachineBasicBlock
&B
= *MI
->getParent();
2059 DebugLoc DL
= MI
->getDebugLoc();
2061 // Prefer zxth, since zxth can go in any slot, while extractu only in
2064 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2065 : MachineBasicBlock::iterator(MI
);
2066 if (L
.Low
&& Opc
!= Hexagon::A2_zxth
) {
2067 if (validateReg(L
, Hexagon::A2_zxth
, 1)) {
2068 NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2069 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_zxth
), NewR
)
2070 .addReg(L
.Reg
, 0, L
.Sub
);
2072 } else if (!L
.Low
&& Opc
!= Hexagon::S2_lsr_i_r
) {
2073 if (validateReg(L
, Hexagon::S2_lsr_i_r
, 1)) {
2074 NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2075 BuildMI(B
, MI
, DL
, HII
.get(Hexagon::S2_lsr_i_r
), NewR
)
2076 .addReg(L
.Reg
, 0, L
.Sub
)
2082 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2083 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2087 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2089 bool BitSimplification::genCombineHalf(MachineInstr
*MI
,
2090 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2092 // Check for combine h/l
2093 if (!matchHalf(RD
.Reg
, RC
, 0, L
) || !matchHalf(RD
.Reg
, RC
, 16, H
))
2095 // Do nothing if this is just a reg copy.
2096 if (L
.Reg
== H
.Reg
&& L
.Sub
== H
.Sub
&& !H
.Low
&& L
.Low
)
2099 unsigned Opc
= MI
->getOpcode();
2100 unsigned COpc
= getCombineOpcode(H
.Low
, L
.Low
);
2103 if (!validateReg(H
, COpc
, 1) || !validateReg(L
, COpc
, 2))
2106 MachineBasicBlock
&B
= *MI
->getParent();
2107 DebugLoc DL
= MI
->getDebugLoc();
2108 Register NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2109 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2110 : MachineBasicBlock::iterator(MI
);
2111 BuildMI(B
, At
, DL
, HII
.get(COpc
), NewR
)
2112 .addReg(H
.Reg
, 0, H
.Sub
)
2113 .addReg(L
.Reg
, 0, L
.Sub
);
2114 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2115 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2119 // If MI resets high bits of a register and keeps the lower ones, replace it
2120 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2121 bool BitSimplification::genExtractLow(MachineInstr
*MI
,
2122 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2123 unsigned Opc
= MI
->getOpcode();
2125 case Hexagon::A2_zxtb
:
2126 case Hexagon::A2_zxth
:
2127 case Hexagon::S2_extractu
:
2130 if (Opc
== Hexagon::A2_andir
&& MI
->getOperand(2).isImm()) {
2131 int32_t Imm
= MI
->getOperand(2).getImm();
2136 if (MI
->hasUnmodeledSideEffects() || MI
->isInlineAsm())
2138 unsigned W
= RC
.width();
2139 while (W
> 0 && RC
[W
-1].is(0))
2141 if (W
== 0 || W
== RC
.width())
2143 unsigned NewOpc
= (W
== 8) ? Hexagon::A2_zxtb
2144 : (W
== 16) ? Hexagon::A2_zxth
2145 : (W
< 10) ? Hexagon::A2_andir
2146 : Hexagon::S2_extractu
;
2147 MachineBasicBlock
&B
= *MI
->getParent();
2148 DebugLoc DL
= MI
->getDebugLoc();
2150 for (auto &Op
: MI
->uses()) {
2153 BitTracker::RegisterRef RS
= Op
;
2154 if (!BT
.has(RS
.Reg
))
2156 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
2158 if (!HBS::getSubregMask(RS
, BN
, BW
, MRI
))
2160 if (BW
< W
|| !HBS::isEqual(RC
, 0, SC
, BN
, W
))
2162 if (!validateReg(RS
, NewOpc
, 1))
2165 Register NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2166 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2167 : MachineBasicBlock::iterator(MI
);
2168 auto MIB
= BuildMI(B
, At
, DL
, HII
.get(NewOpc
), NewR
)
2169 .addReg(RS
.Reg
, 0, RS
.Sub
);
2170 if (NewOpc
== Hexagon::A2_andir
)
2171 MIB
.addImm((1 << W
) - 1);
2172 else if (NewOpc
== Hexagon::S2_extractu
)
2173 MIB
.addImm(W
).addImm(0);
2174 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2175 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2181 bool BitSimplification::genBitSplit(MachineInstr
*MI
,
2182 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
,
2183 const RegisterSet
&AVs
) {
2186 if (MaxBitSplit
.getNumOccurrences()) {
2187 if (CountBitSplit
>= MaxBitSplit
)
2191 unsigned Opc
= MI
->getOpcode();
2193 case Hexagon::A4_bitsplit
:
2194 case Hexagon::A4_bitspliti
:
2198 unsigned W
= RC
.width();
2202 auto ctlz
= [] (const BitTracker::RegisterCell
&C
) -> unsigned {
2203 unsigned Z
= C
.width();
2204 while (Z
> 0 && C
[Z
-1].is(0))
2206 return C
.width() - Z
;
2209 // Count the number of leading zeros in the target RC.
2210 unsigned Z
= ctlz(RC
);
2211 if (Z
== 0 || Z
== W
)
2214 // A simplistic analysis: assume the source register (the one being split)
2215 // is fully unknown, and that all its bits are self-references.
2216 const BitTracker::BitValue
&B0
= RC
[0];
2217 if (B0
.Type
!= BitTracker::BitValue::Ref
)
2220 unsigned SrcR
= B0
.RefI
.Reg
;
2222 unsigned Pos
= B0
.RefI
.Pos
;
2224 // All the non-zero bits should be consecutive bits from the same register.
2225 for (unsigned i
= 1; i
< W
-Z
; ++i
) {
2226 const BitTracker::BitValue
&V
= RC
[i
];
2227 if (V
.Type
!= BitTracker::BitValue::Ref
)
2229 if (V
.RefI
.Reg
!= SrcR
|| V
.RefI
.Pos
!= Pos
+i
)
2233 // Now, find the other bitfield among AVs.
2234 for (unsigned S
= AVs
.find_first(); S
; S
= AVs
.find_next(S
)) {
2235 // The number of leading zeros here should be the number of trailing
2237 unsigned SRC
= MRI
.getRegClass(S
)->getID();
2238 if (SRC
!= Hexagon::IntRegsRegClassID
&&
2239 SRC
!= Hexagon::DoubleRegsRegClassID
)
2243 const BitTracker::RegisterCell
&SC
= BT
.lookup(S
);
2244 if (SC
.width() != W
|| ctlz(SC
) != W
-Z
)
2246 // The Z lower bits should now match SrcR.
2247 const BitTracker::BitValue
&S0
= SC
[0];
2248 if (S0
.Type
!= BitTracker::BitValue::Ref
|| S0
.RefI
.Reg
!= SrcR
)
2250 unsigned P
= S0
.RefI
.Pos
;
2252 if (Pos
<= P
&& (Pos
+ W
-Z
) != P
)
2254 if (P
< Pos
&& (P
+ Z
) != Pos
)
2256 // The starting bitfield position must be at a subregister boundary.
2257 if (std::min(P
, Pos
) != 0 && std::min(P
, Pos
) != 32)
2261 for (I
= 1; I
< Z
; ++I
) {
2262 const BitTracker::BitValue
&V
= SC
[I
];
2263 if (V
.Type
!= BitTracker::BitValue::Ref
)
2265 if (V
.RefI
.Reg
!= SrcR
|| V
.RefI
.Pos
!= P
+I
)
2271 // Generate bitsplit where S is defined.
2272 if (MaxBitSplit
.getNumOccurrences())
2274 MachineInstr
*DefS
= MRI
.getVRegDef(S
);
2275 assert(DefS
!= nullptr);
2276 DebugLoc DL
= DefS
->getDebugLoc();
2277 MachineBasicBlock
&B
= *DefS
->getParent();
2278 auto At
= DefS
->isPHI() ? B
.getFirstNonPHI()
2279 : MachineBasicBlock::iterator(DefS
);
2280 if (MRI
.getRegClass(SrcR
)->getID() == Hexagon::DoubleRegsRegClassID
)
2281 SrcSR
= (std::min(Pos
, P
) == 32) ? Hexagon::isub_hi
: Hexagon::isub_lo
;
2282 if (!validateReg({SrcR
,SrcSR
}, Hexagon::A4_bitspliti
, 1))
2284 unsigned ImmOp
= Pos
<= P
? W
-Z
: Z
;
2286 // Find an existing bitsplit instruction if one already exists.
2288 for (MachineInstr
*In
: NewMIs
) {
2289 if (In
->getOpcode() != Hexagon::A4_bitspliti
)
2291 MachineOperand
&Op1
= In
->getOperand(1);
2292 if (Op1
.getReg() != SrcR
|| Op1
.getSubReg() != SrcSR
)
2294 if (In
->getOperand(2).getImm() != ImmOp
)
2296 // Check if the target register is available here.
2297 MachineOperand
&Op0
= In
->getOperand(0);
2298 MachineInstr
*DefI
= MRI
.getVRegDef(Op0
.getReg());
2299 assert(DefI
!= nullptr);
2300 if (!MDT
.dominates(DefI
, &*At
))
2303 // Found one that can be reused.
2304 assert(Op0
.getSubReg() == 0);
2305 NewR
= Op0
.getReg();
2309 NewR
= MRI
.createVirtualRegister(&Hexagon::DoubleRegsRegClass
);
2310 auto NewBS
= BuildMI(B
, At
, DL
, HII
.get(Hexagon::A4_bitspliti
), NewR
)
2311 .addReg(SrcR
, 0, SrcSR
)
2313 NewMIs
.push_back(NewBS
);
2316 HBS::replaceRegWithSub(RD
.Reg
, NewR
, Hexagon::isub_lo
, MRI
);
2317 HBS::replaceRegWithSub(S
, NewR
, Hexagon::isub_hi
, MRI
);
2319 HBS::replaceRegWithSub(S
, NewR
, Hexagon::isub_lo
, MRI
);
2320 HBS::replaceRegWithSub(RD
.Reg
, NewR
, Hexagon::isub_hi
, MRI
);
2328 // Check for tstbit simplification opportunity, where the bit being checked
2329 // can be tracked back to another register. For example:
2330 // %2 = S2_lsr_i_r %1, 5
2331 // %3 = S2_tstbit_i %2, 0
2333 // %3 = S2_tstbit_i %1, 5
2334 bool BitSimplification::simplifyTstbit(MachineInstr
*MI
,
2335 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2336 unsigned Opc
= MI
->getOpcode();
2337 if (Opc
!= Hexagon::S2_tstbit_i
)
2340 unsigned BN
= MI
->getOperand(2).getImm();
2341 BitTracker::RegisterRef RS
= MI
->getOperand(1);
2343 DebugLoc DL
= MI
->getDebugLoc();
2344 if (!BT
.has(RS
.Reg
) || !HBS::getSubregMask(RS
, F
, W
, MRI
))
2346 MachineBasicBlock
&B
= *MI
->getParent();
2347 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2348 : MachineBasicBlock::iterator(MI
);
2350 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
2351 const BitTracker::BitValue
&V
= SC
[F
+BN
];
2352 if (V
.Type
== BitTracker::BitValue::Ref
&& V
.RefI
.Reg
!= RS
.Reg
) {
2353 const TargetRegisterClass
*TC
= MRI
.getRegClass(V
.RefI
.Reg
);
2354 // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2355 // a double register, need to use a subregister and adjust bit
2357 unsigned P
= std::numeric_limits
<unsigned>::max();
2358 BitTracker::RegisterRef
RR(V
.RefI
.Reg
, 0);
2359 if (TC
== &Hexagon::DoubleRegsRegClass
) {
2361 RR
.Sub
= Hexagon::isub_lo
;
2364 RR
.Sub
= Hexagon::isub_hi
;
2366 } else if (TC
== &Hexagon::IntRegsRegClass
) {
2369 if (P
!= std::numeric_limits
<unsigned>::max()) {
2370 Register NewR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
2371 BuildMI(B
, At
, DL
, HII
.get(Hexagon::S2_tstbit_i
), NewR
)
2372 .addReg(RR
.Reg
, 0, RR
.Sub
)
2374 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2378 } else if (V
.is(0) || V
.is(1)) {
2379 Register NewR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
2380 unsigned NewOpc
= V
.is(0) ? Hexagon::PS_false
: Hexagon::PS_true
;
2381 BuildMI(B
, At
, DL
, HII
.get(NewOpc
), NewR
);
2382 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2389 // Detect whether RD is a bitfield extract (sign- or zero-extended) of
2390 // some register from the AVs set. Create a new corresponding instruction
2391 // at the location of MI. The intent is to recognize situations where
2392 // a sequence of instructions performs an operation that is equivalent to
2393 // an extract operation, such as a shift left followed by a shift right.
2394 bool BitSimplification::simplifyExtractLow(MachineInstr
*MI
,
2395 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
,
2396 const RegisterSet
&AVs
) {
2399 if (MaxExtract
.getNumOccurrences()) {
2400 if (CountExtract
>= MaxExtract
)
2405 unsigned W
= RC
.width();
2410 // The code is mostly class-independent, except for the part that generates
2411 // the extract instruction, and establishes the source register (in case it
2412 // needs to use a subregister).
2413 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2414 if (FRC
!= &Hexagon::IntRegsRegClass
&& FRC
!= &Hexagon::DoubleRegsRegClass
)
2416 assert(RD
.Sub
== 0);
2419 // If the cell has a form of 00..0xx..x with k zeros and n remaining
2420 // bits, this could be an extractu of the n bits, but it could also be
2421 // an extractu of a longer field which happens to have 0s in the top
2423 // The same logic applies to sign-extended fields.
2425 // Do not check for the extended extracts, since it would expand the
2426 // search space quite a bit. The search may be expensive as it is.
2428 const BitTracker::BitValue
&TopV
= RC
[W
-1];
2430 // Eliminate candidates that have self-referential bits, since they
2431 // cannot be extracts from other registers. Also, skip registers that
2432 // have compile-time constant values.
2433 bool IsConst
= true;
2434 for (unsigned I
= 0; I
!= W
; ++I
) {
2435 const BitTracker::BitValue
&V
= RC
[I
];
2436 if (V
.Type
== BitTracker::BitValue::Ref
&& V
.RefI
.Reg
== RD
.Reg
)
2438 IsConst
= IsConst
&& (V
.is(0) || V
.is(1));
2443 if (TopV
.is(0) || TopV
.is(1)) {
2444 bool S
= TopV
.is(1);
2445 for (--W
; W
> 0 && RC
[W
-1].is(S
); --W
)
2449 // The sign bit must be a part of the field being extended.
2453 // This could still be a sign-extended extract.
2454 assert(TopV
.Type
== BitTracker::BitValue::Ref
);
2455 if (TopV
.RefI
.Reg
== RD
.Reg
|| TopV
.RefI
.Pos
== W
-1)
2457 for (--W
; W
> 0 && RC
[W
-1] == TopV
; --W
)
2459 // The top bits of RC are copies of TopV. One occurrence of TopV will
2460 // be a part of the field.
2465 // This would be just a copy. It should be handled elsewhere.
2470 dbgs() << __func__
<< " on reg: " << printReg(RD
.Reg
, &HRI
, RD
.Sub
)
2472 dbgs() << "Cell: " << RC
<< '\n';
2473 dbgs() << "Expected bitfield size: " << Len
<< " bits, "
2474 << (Signed
? "sign" : "zero") << "-extended\n";
2477 bool Changed
= false;
2479 for (unsigned R
= AVs
.find_first(); R
!= 0; R
= AVs
.find_next(R
)) {
2482 const BitTracker::RegisterCell
&SC
= BT
.lookup(R
);
2483 unsigned SW
= SC
.width();
2485 // The source can be longer than the destination, as long as its size is
2486 // a multiple of the size of the destination. Also, we would need to be
2487 // able to refer to the subregister in the source that would be of the
2488 // same size as the destination, but only check the sizes here.
2489 if (SW
< RW
|| (SW
% RW
) != 0)
2492 // The field can start at any offset in SC as long as it contains Len
2493 // bits and does not cross subregister boundary (if the source register
2494 // is longer than the destination).
2496 while (Off
<= SW
-Len
) {
2497 unsigned OE
= (Off
+Len
)/RW
;
2499 // The assumption here is that if the source (R) is longer than the
2500 // destination, then the destination is a sequence of words of
2501 // size RW, and each such word in R can be accessed via a subregister.
2503 // If the beginning and the end of the field cross the subregister
2504 // boundary, advance to the next subregister.
2508 if (HBS::isEqual(RC
, 0, SC
, Off
, Len
))
2517 unsigned ExtOpc
= 0;
2520 ExtOpc
= Signed
? Hexagon::A2_sxtb
: Hexagon::A2_zxtb
;
2522 ExtOpc
= Signed
? Hexagon::A2_sxth
: Hexagon::A2_zxth
;
2523 else if (Len
< 10 && !Signed
)
2524 ExtOpc
= Hexagon::A2_andir
;
2528 Signed
? (RW
== 32 ? Hexagon::S4_extract
: Hexagon::S4_extractp
)
2529 : (RW
== 32 ? Hexagon::S2_extractu
: Hexagon::S2_extractup
);
2532 // This only recognizes isub_lo and isub_hi.
2533 if (RW
!= SW
&& RW
*2 != SW
)
2536 SR
= (Off
/RW
== 0) ? Hexagon::isub_lo
: Hexagon::isub_hi
;
2539 if (!validateReg({R
,SR
}, ExtOpc
, 1))
2542 // Don't generate the same instruction as the one being optimized.
2543 if (MI
->getOpcode() == ExtOpc
) {
2544 // All possible ExtOpc's have the source in operand(1).
2545 const MachineOperand
&SrcOp
= MI
->getOperand(1);
2546 if (SrcOp
.getReg() == R
)
2550 DebugLoc DL
= MI
->getDebugLoc();
2551 MachineBasicBlock
&B
= *MI
->getParent();
2552 Register NewR
= MRI
.createVirtualRegister(FRC
);
2553 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2554 : MachineBasicBlock::iterator(MI
);
2555 auto MIB
= BuildMI(B
, At
, DL
, HII
.get(ExtOpc
), NewR
)
2558 case Hexagon::A2_sxtb
:
2559 case Hexagon::A2_zxtb
:
2560 case Hexagon::A2_sxth
:
2561 case Hexagon::A2_zxth
:
2563 case Hexagon::A2_andir
:
2564 MIB
.addImm((1u << Len
) - 1);
2566 case Hexagon::S4_extract
:
2567 case Hexagon::S2_extractu
:
2568 case Hexagon::S4_extractp
:
2569 case Hexagon::S2_extractup
:
2574 llvm_unreachable("Unexpected opcode");
2577 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2578 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2586 bool BitSimplification::simplifyRCmp0(MachineInstr
*MI
,
2587 BitTracker::RegisterRef RD
) {
2588 unsigned Opc
= MI
->getOpcode();
2589 if (Opc
!= Hexagon::A4_rcmpeqi
&& Opc
!= Hexagon::A4_rcmpneqi
)
2591 MachineOperand
&CmpOp
= MI
->getOperand(2);
2592 if (!CmpOp
.isImm() || CmpOp
.getImm() != 0)
2595 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2596 if (FRC
!= &Hexagon::IntRegsRegClass
&& FRC
!= &Hexagon::DoubleRegsRegClass
)
2598 assert(RD
.Sub
== 0);
2600 MachineBasicBlock
&B
= *MI
->getParent();
2601 const DebugLoc
&DL
= MI
->getDebugLoc();
2602 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2603 : MachineBasicBlock::iterator(MI
);
2605 bool KnownNZ
= false;
2607 BitTracker::RegisterRef SR
= MI
->getOperand(1);
2608 if (!BT
.has(SR
.Reg
))
2610 const BitTracker::RegisterCell
&SC
= BT
.lookup(SR
.Reg
);
2612 if (!HBS::getSubregMask(SR
, F
, W
, MRI
))
2615 for (uint16_t I
= F
; I
!= F
+W
; ++I
) {
2616 const BitTracker::BitValue
&V
= SC
[I
];
2623 auto ReplaceWithConst
= [&](int C
) {
2624 Register NewR
= MRI
.createVirtualRegister(FRC
);
2625 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrsi
), NewR
)
2627 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2628 BitTracker::RegisterCell
NewRC(W
);
2629 for (uint16_t I
= 0; I
!= W
; ++I
) {
2630 NewRC
[I
] = BitTracker::BitValue(C
& 1);
2631 C
= unsigned(C
) >> 1;
2633 BT
.put(BitTracker::RegisterRef(NewR
), NewRC
);
2637 auto IsNonZero
= [] (const MachineOperand
&Op
) {
2638 if (Op
.isGlobal() || Op
.isBlockAddress())
2641 return Op
.getImm() != 0;
2643 return !Op
.getCImm()->isZero();
2645 return !Op
.getFPImm()->isZero();
2649 auto IsZero
= [] (const MachineOperand
&Op
) {
2650 if (Op
.isGlobal() || Op
.isBlockAddress())
2653 return Op
.getImm() == 0;
2655 return Op
.getCImm()->isZero();
2657 return Op
.getFPImm()->isZero();
2661 // If the source register is known to be 0 or non-0, the comparison can
2662 // be folded to a load of a constant.
2663 if (KnownZ
|| KnownNZ
) {
2664 assert(KnownZ
!= KnownNZ
&& "Register cannot be both 0 and non-0");
2665 return ReplaceWithConst(KnownZ
== (Opc
== Hexagon::A4_rcmpeqi
));
2668 // Special case: if the compare comes from a C2_muxii, then we know the
2669 // two possible constants that can be the source value.
2670 MachineInstr
*InpDef
= MRI
.getVRegDef(SR
.Reg
);
2673 if (SR
.Sub
== 0 && InpDef
->getOpcode() == Hexagon::C2_muxii
) {
2674 MachineOperand
&Src1
= InpDef
->getOperand(2);
2675 MachineOperand
&Src2
= InpDef
->getOperand(3);
2676 // Check if both are non-zero.
2677 bool KnownNZ1
= IsNonZero(Src1
), KnownNZ2
= IsNonZero(Src2
);
2678 if (KnownNZ1
&& KnownNZ2
)
2679 return ReplaceWithConst(Opc
== Hexagon::A4_rcmpneqi
);
2680 // Check if both are zero.
2681 bool KnownZ1
= IsZero(Src1
), KnownZ2
= IsZero(Src2
);
2682 if (KnownZ1
&& KnownZ2
)
2683 return ReplaceWithConst(Opc
== Hexagon::A4_rcmpeqi
);
2685 // If for both operands we know that they are either 0 or non-0,
2686 // replace the comparison with a C2_muxii, using the same predicate
2687 // register, but with operands substituted with 0/1 accordingly.
2688 if ((KnownZ1
|| KnownNZ1
) && (KnownZ2
|| KnownNZ2
)) {
2689 Register NewR
= MRI
.createVirtualRegister(FRC
);
2690 BuildMI(B
, At
, DL
, HII
.get(Hexagon::C2_muxii
), NewR
)
2691 .addReg(InpDef
->getOperand(1).getReg())
2692 .addImm(KnownZ1
== (Opc
== Hexagon::A4_rcmpeqi
))
2693 .addImm(KnownZ2
== (Opc
== Hexagon::A4_rcmpeqi
));
2694 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2695 // Create a new cell with only the least significant bit unknown.
2696 BitTracker::RegisterCell
NewRC(W
);
2697 NewRC
[0] = BitTracker::BitValue::self();
2698 NewRC
.fill(1, W
, BitTracker::BitValue::Zero
);
2699 BT
.put(BitTracker::RegisterRef(NewR
), NewRC
);
2707 bool BitSimplification::processBlock(MachineBasicBlock
&B
,
2708 const RegisterSet
&AVs
) {
2709 if (!BT
.reached(&B
))
2711 bool Changed
= false;
2712 RegisterSet AVB
= AVs
;
2715 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
, AVB
.insert(Defs
)) {
2716 MachineInstr
*MI
= &*I
;
2718 HBS::getInstrDefs(*MI
, Defs
);
2720 unsigned Opc
= MI
->getOpcode();
2721 if (Opc
== TargetOpcode::COPY
|| Opc
== TargetOpcode::REG_SEQUENCE
)
2724 if (MI
->mayStore()) {
2725 bool T
= genStoreUpperHalf(MI
);
2726 T
= T
|| genStoreImmediate(MI
);
2731 if (Defs
.count() != 1)
2733 const MachineOperand
&Op0
= MI
->getOperand(0);
2734 if (!Op0
.isReg() || !Op0
.isDef())
2736 BitTracker::RegisterRef RD
= Op0
;
2737 if (!BT
.has(RD
.Reg
))
2739 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2740 const BitTracker::RegisterCell
&RC
= BT
.lookup(RD
.Reg
);
2742 if (FRC
->getID() == Hexagon::DoubleRegsRegClassID
) {
2743 bool T
= genPackhl(MI
, RD
, RC
);
2744 T
= T
|| simplifyExtractLow(MI
, RD
, RC
, AVB
);
2749 if (FRC
->getID() == Hexagon::IntRegsRegClassID
) {
2750 bool T
= genBitSplit(MI
, RD
, RC
, AVB
);
2751 T
= T
|| simplifyExtractLow(MI
, RD
, RC
, AVB
);
2752 T
= T
|| genExtractHalf(MI
, RD
, RC
);
2753 T
= T
|| genCombineHalf(MI
, RD
, RC
);
2754 T
= T
|| genExtractLow(MI
, RD
, RC
);
2755 T
= T
|| simplifyRCmp0(MI
, RD
);
2760 if (FRC
->getID() == Hexagon::PredRegsRegClassID
) {
2761 bool T
= simplifyTstbit(MI
, RD
, RC
);
2769 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction
&MF
) {
2770 if (skipFunction(MF
.getFunction()))
2773 auto &HST
= MF
.getSubtarget
<HexagonSubtarget
>();
2774 auto &HRI
= *HST
.getRegisterInfo();
2775 auto &HII
= *HST
.getInstrInfo();
2777 MDT
= &getAnalysis
<MachineDominatorTree
>();
2778 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2781 Changed
= DeadCodeElimination(MF
, *MDT
).run();
2783 const HexagonEvaluator
HE(HRI
, MRI
, HII
, MF
);
2784 BitTracker
BT(HE
, MF
);
2785 LLVM_DEBUG(BT
.trace(true));
2788 MachineBasicBlock
&Entry
= MF
.front();
2790 RegisterSet AIG
; // Available registers for IG.
2791 ConstGeneration
ImmG(BT
, HII
, MRI
);
2792 Changed
|= visitBlock(Entry
, ImmG
, AIG
);
2794 RegisterSet ARE
; // Available registers for RIE.
2795 RedundantInstrElimination
RIE(BT
, HII
, HRI
, MRI
);
2796 bool Ried
= visitBlock(Entry
, RIE
, ARE
);
2802 RegisterSet ACG
; // Available registers for CG.
2803 CopyGeneration
CopyG(BT
, HII
, HRI
, MRI
);
2804 Changed
|= visitBlock(Entry
, CopyG
, ACG
);
2806 RegisterSet ACP
; // Available registers for CP.
2807 CopyPropagation
CopyP(HRI
, MRI
);
2808 Changed
|= visitBlock(Entry
, CopyP
, ACP
);
2810 Changed
= DeadCodeElimination(MF
, *MDT
).run() || Changed
;
2813 RegisterSet ABS
; // Available registers for BS.
2814 BitSimplification
BitS(BT
, *MDT
, HII
, HRI
, MRI
, MF
);
2815 Changed
|= visitBlock(Entry
, BitS
, ABS
);
2817 Changed
= DeadCodeElimination(MF
, *MDT
).run() || Changed
;
2823 DeadCodeElimination(MF
, *MDT
).run();
2828 // Recognize loops where the code at the end of the loop matches the code
2829 // before the entry of the loop, and the matching code is such that is can
2830 // be simplified. This pass relies on the bit simplification above and only
2831 // prepares code in a way that can be handled by the bit simplifcation.
2833 // This is the motivating testcase (and explanation):
2836 // loop0(.LBB0_2, r1) // %for.body.preheader
2837 // r5:4 = memd(r0++#8)
2840 // r3 = lsr(r4, #16)
2841 // r7:6 = combine(r5, r5)
2844 // r3 = insert(r5, #16, #16)
2845 // r7:6 = vlsrw(r7:6, #16)
2850 // memh(r2+#6) = r6 # R6 is really R5.H
2855 // memh(r2+#2) = r3 # R3 is really R4.H
2858 // r5:4 = memd(r0++#8)
2860 // { # "Shuffling" code that sets up R3 and R6
2861 // r3 = lsr(r4, #16) # so that their halves can be stored in the
2862 // r7:6 = combine(r5, r5) # next iteration. This could be folded into
2863 // } # the stores if the code was at the beginning
2864 // { # of the loop iteration. Since the same code
2865 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
2866 // r7:6 = vlsrw(r7:6, #16) # there.
2873 // loop0(.LBB0_2, r1)
2874 // r5:4 = memd(r0++#8)
2879 // memh(r2+#6) = r5.h
2884 // memh(r2+#2) = r4.h
2887 // r5:4 = memd(r0++#8)
2892 FunctionPass
*createHexagonLoopRescheduling();
2893 void initializeHexagonLoopReschedulingPass(PassRegistry
&);
2895 } // end namespace llvm
2899 class HexagonLoopRescheduling
: public MachineFunctionPass
{
2903 HexagonLoopRescheduling() : MachineFunctionPass(ID
) {
2904 initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
2907 bool runOnMachineFunction(MachineFunction
&MF
) override
;
2910 const HexagonInstrInfo
*HII
= nullptr;
2911 const HexagonRegisterInfo
*HRI
= nullptr;
2912 MachineRegisterInfo
*MRI
= nullptr;
2913 BitTracker
*BTP
= nullptr;
2916 LoopCand(MachineBasicBlock
*lb
, MachineBasicBlock
*pb
,
2917 MachineBasicBlock
*eb
) : LB(lb
), PB(pb
), EB(eb
) {}
2919 MachineBasicBlock
*LB
, *PB
, *EB
;
2921 using InstrList
= std::vector
<MachineInstr
*>;
2923 BitTracker::RegisterRef Inp
, Out
;
2927 PhiInfo(MachineInstr
&P
, MachineBasicBlock
&B
);
2930 BitTracker::RegisterRef LR
, PR
; // Loop Register, Preheader Register
2931 MachineBasicBlock
*LB
, *PB
; // Loop Block, Preheader Block
2934 static unsigned getDefReg(const MachineInstr
*MI
);
2935 bool isConst(unsigned Reg
) const;
2936 bool isBitShuffle(const MachineInstr
*MI
, unsigned DefR
) const;
2937 bool isStoreInput(const MachineInstr
*MI
, unsigned DefR
) const;
2938 bool isShuffleOf(unsigned OutR
, unsigned InpR
) const;
2939 bool isSameShuffle(unsigned OutR1
, unsigned InpR1
, unsigned OutR2
,
2940 unsigned &InpR2
) const;
2941 void moveGroup(InstrGroup
&G
, MachineBasicBlock
&LB
, MachineBasicBlock
&PB
,
2942 MachineBasicBlock::iterator At
, unsigned OldPhiR
, unsigned NewPredR
);
2943 bool processLoop(LoopCand
&C
);
2946 } // end anonymous namespace
2948 char HexagonLoopRescheduling::ID
= 0;
2950 INITIALIZE_PASS(HexagonLoopRescheduling
, "hexagon-loop-resched",
2951 "Hexagon Loop Rescheduling", false, false)
2953 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr
&P
,
2954 MachineBasicBlock
&B
) {
2955 DefR
= HexagonLoopRescheduling::getDefReg(&P
);
2958 for (unsigned i
= 1, n
= P
.getNumOperands(); i
< n
; i
+= 2) {
2959 const MachineOperand
&OpB
= P
.getOperand(i
+1);
2960 if (OpB
.getMBB() == &B
) {
2961 LR
= P
.getOperand(i
);
2965 PR
= P
.getOperand(i
);
2969 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr
*MI
) {
2971 HBS::getInstrDefs(*MI
, Defs
);
2972 if (Defs
.count() != 1)
2974 return Defs
.find_first();
2977 bool HexagonLoopRescheduling::isConst(unsigned Reg
) const {
2980 const BitTracker::RegisterCell
&RC
= BTP
->lookup(Reg
);
2981 for (unsigned i
= 0, w
= RC
.width(); i
< w
; ++i
) {
2982 const BitTracker::BitValue
&V
= RC
[i
];
2983 if (!V
.is(0) && !V
.is(1))
2989 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr
*MI
,
2990 unsigned DefR
) const {
2991 unsigned Opc
= MI
->getOpcode();
2993 case TargetOpcode::COPY
:
2994 case Hexagon::S2_lsr_i_r
:
2995 case Hexagon::S2_asr_i_r
:
2996 case Hexagon::S2_asl_i_r
:
2997 case Hexagon::S2_lsr_i_p
:
2998 case Hexagon::S2_asr_i_p
:
2999 case Hexagon::S2_asl_i_p
:
3000 case Hexagon::S2_insert
:
3001 case Hexagon::A2_or
:
3002 case Hexagon::A2_orp
:
3003 case Hexagon::A2_and
:
3004 case Hexagon::A2_andp
:
3005 case Hexagon::A2_combinew
:
3006 case Hexagon::A4_combineri
:
3007 case Hexagon::A4_combineir
:
3008 case Hexagon::A2_combineii
:
3009 case Hexagon::A4_combineii
:
3010 case Hexagon::A2_combine_ll
:
3011 case Hexagon::A2_combine_lh
:
3012 case Hexagon::A2_combine_hl
:
3013 case Hexagon::A2_combine_hh
:
3019 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr
*MI
,
3020 unsigned InpR
) const {
3021 for (unsigned i
= 0, n
= MI
->getNumOperands(); i
< n
; ++i
) {
3022 const MachineOperand
&Op
= MI
->getOperand(i
);
3025 if (Op
.getReg() == InpR
)
3031 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR
, unsigned InpR
) const {
3032 if (!BTP
->has(OutR
) || !BTP
->has(InpR
))
3034 const BitTracker::RegisterCell
&OutC
= BTP
->lookup(OutR
);
3035 for (unsigned i
= 0, w
= OutC
.width(); i
< w
; ++i
) {
3036 const BitTracker::BitValue
&V
= OutC
[i
];
3037 if (V
.Type
!= BitTracker::BitValue::Ref
)
3039 if (V
.RefI
.Reg
!= InpR
)
3045 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1
, unsigned InpR1
,
3046 unsigned OutR2
, unsigned &InpR2
) const {
3047 if (!BTP
->has(OutR1
) || !BTP
->has(InpR1
) || !BTP
->has(OutR2
))
3049 const BitTracker::RegisterCell
&OutC1
= BTP
->lookup(OutR1
);
3050 const BitTracker::RegisterCell
&OutC2
= BTP
->lookup(OutR2
);
3051 unsigned W
= OutC1
.width();
3052 unsigned MatchR
= 0;
3053 if (W
!= OutC2
.width())
3055 for (unsigned i
= 0; i
< W
; ++i
) {
3056 const BitTracker::BitValue
&V1
= OutC1
[i
], &V2
= OutC2
[i
];
3057 if (V1
.Type
!= V2
.Type
|| V1
.Type
== BitTracker::BitValue::One
)
3059 if (V1
.Type
!= BitTracker::BitValue::Ref
)
3061 if (V1
.RefI
.Pos
!= V2
.RefI
.Pos
)
3063 if (V1
.RefI
.Reg
!= InpR1
)
3065 if (V2
.RefI
.Reg
== 0 || V2
.RefI
.Reg
== OutR2
)
3068 MatchR
= V2
.RefI
.Reg
;
3069 else if (V2
.RefI
.Reg
!= MatchR
)
3076 void HexagonLoopRescheduling::moveGroup(InstrGroup
&G
, MachineBasicBlock
&LB
,
3077 MachineBasicBlock
&PB
, MachineBasicBlock::iterator At
, unsigned OldPhiR
,
3078 unsigned NewPredR
) {
3079 DenseMap
<unsigned,unsigned> RegMap
;
3081 const TargetRegisterClass
*PhiRC
= MRI
->getRegClass(NewPredR
);
3082 Register PhiR
= MRI
->createVirtualRegister(PhiRC
);
3083 BuildMI(LB
, At
, At
->getDebugLoc(), HII
->get(TargetOpcode::PHI
), PhiR
)
3088 RegMap
.insert(std::make_pair(G
.Inp
.Reg
, PhiR
));
3090 for (unsigned i
= G
.Ins
.size(); i
> 0; --i
) {
3091 const MachineInstr
*SI
= G
.Ins
[i
-1];
3092 unsigned DR
= getDefReg(SI
);
3093 const TargetRegisterClass
*RC
= MRI
->getRegClass(DR
);
3094 Register NewDR
= MRI
->createVirtualRegister(RC
);
3095 DebugLoc DL
= SI
->getDebugLoc();
3097 auto MIB
= BuildMI(LB
, At
, DL
, HII
->get(SI
->getOpcode()), NewDR
);
3098 for (unsigned j
= 0, m
= SI
->getNumOperands(); j
< m
; ++j
) {
3099 const MachineOperand
&Op
= SI
->getOperand(j
);
3106 unsigned UseR
= RegMap
[Op
.getReg()];
3107 MIB
.addReg(UseR
, 0, Op
.getSubReg());
3109 RegMap
.insert(std::make_pair(DR
, NewDR
));
3112 HBS::replaceReg(OldPhiR
, RegMap
[G
.Out
.Reg
], *MRI
);
3115 bool HexagonLoopRescheduling::processLoop(LoopCand
&C
) {
3116 LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C
.LB
)
3118 std::vector
<PhiInfo
> Phis
;
3119 for (auto &I
: *C
.LB
) {
3122 unsigned PR
= getDefReg(&I
);
3125 bool BadUse
= false, GoodUse
= false;
3126 for (auto UI
= MRI
->use_begin(PR
), UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
3127 MachineInstr
*UseI
= UI
->getParent();
3128 if (UseI
->getParent() != C
.LB
) {
3132 if (isBitShuffle(UseI
, PR
) || isStoreInput(UseI
, PR
))
3135 if (BadUse
|| !GoodUse
)
3138 Phis
.push_back(PhiInfo(I
, *C
.LB
));
3142 dbgs() << "Phis: {";
3143 for (auto &I
: Phis
) {
3144 dbgs() << ' ' << printReg(I
.DefR
, HRI
) << "=phi("
3145 << printReg(I
.PR
.Reg
, HRI
, I
.PR
.Sub
) << ":b" << I
.PB
->getNumber()
3146 << ',' << printReg(I
.LR
.Reg
, HRI
, I
.LR
.Sub
) << ":b"
3147 << I
.LB
->getNumber() << ')';
3155 bool Changed
= false;
3158 // Go backwards in the block: for each bit shuffling instruction, check
3159 // if that instruction could potentially be moved to the front of the loop:
3160 // the output of the loop cannot be used in a non-shuffling instruction
3162 for (auto I
= C
.LB
->rbegin(), E
= C
.LB
->rend(); I
!= E
; ++I
) {
3163 if (I
->isTerminator())
3169 HBS::getInstrDefs(*I
, Defs
);
3170 if (Defs
.count() != 1)
3172 Register DefR
= Defs
.find_first();
3173 if (!DefR
.isVirtual())
3175 if (!isBitShuffle(&*I
, DefR
))
3178 bool BadUse
= false;
3179 for (auto UI
= MRI
->use_begin(DefR
), UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
3180 MachineInstr
*UseI
= UI
->getParent();
3181 if (UseI
->getParent() == C
.LB
) {
3182 if (UseI
->isPHI()) {
3183 // If the use is in a phi node in this loop, then it should be
3184 // the value corresponding to the back edge.
3185 unsigned Idx
= UI
.getOperandNo();
3186 if (UseI
->getOperand(Idx
+1).getMBB() != C
.LB
)
3189 auto F
= find(ShufIns
, UseI
);
3190 if (F
== ShufIns
.end())
3194 // There is a use outside of the loop, but there is no epilog block
3195 // suitable for a copy-out.
3196 if (C
.EB
== nullptr)
3205 ShufIns
.push_back(&*I
);
3208 // Partition the list of shuffling instructions into instruction groups,
3209 // where each group has to be moved as a whole (i.e. a group is a chain of
3210 // dependent instructions). A group produces a single live output register,
3211 // which is meant to be the input of the loop phi node (although this is
3212 // not checked here yet). It also uses a single register as its input,
3213 // which is some value produced in the loop body. After moving the group
3214 // to the beginning of the loop, that input register would need to be
3215 // the loop-carried register (through a phi node) instead of the (currently
3216 // loop-carried) output register.
3217 using InstrGroupList
= std::vector
<InstrGroup
>;
3218 InstrGroupList Groups
;
3220 for (unsigned i
= 0, n
= ShufIns
.size(); i
< n
; ++i
) {
3221 MachineInstr
*SI
= ShufIns
[i
];
3226 G
.Ins
.push_back(SI
);
3227 G
.Out
.Reg
= getDefReg(SI
);
3229 HBS::getInstrUses(*SI
, Inputs
);
3231 for (unsigned j
= i
+1; j
< n
; ++j
) {
3232 MachineInstr
*MI
= ShufIns
[j
];
3236 HBS::getInstrDefs(*MI
, Defs
);
3237 // If this instruction does not define any pending inputs, skip it.
3238 if (!Defs
.intersects(Inputs
))
3240 // Otherwise, add it to the current group and remove the inputs that
3241 // are defined by MI.
3242 G
.Ins
.push_back(MI
);
3243 Inputs
.remove(Defs
);
3244 // Then add all registers used by MI.
3245 HBS::getInstrUses(*MI
, Inputs
);
3246 ShufIns
[j
] = nullptr;
3249 // Only add a group if it requires at most one register.
3250 if (Inputs
.count() > 1)
3252 auto LoopInpEq
= [G
] (const PhiInfo
&P
) -> bool {
3253 return G
.Out
.Reg
== P
.LR
.Reg
;
3255 if (llvm::find_if(Phis
, LoopInpEq
) == Phis
.end())
3258 G
.Inp
.Reg
= Inputs
.find_first();
3259 Groups
.push_back(G
);
3263 for (unsigned i
= 0, n
= Groups
.size(); i
< n
; ++i
) {
3264 InstrGroup
&G
= Groups
[i
];
3265 dbgs() << "Group[" << i
<< "] inp: "
3266 << printReg(G
.Inp
.Reg
, HRI
, G
.Inp
.Sub
)
3267 << " out: " << printReg(G
.Out
.Reg
, HRI
, G
.Out
.Sub
) << "\n";
3268 for (unsigned j
= 0, m
= G
.Ins
.size(); j
< m
; ++j
)
3269 dbgs() << " " << *G
.Ins
[j
];
3273 for (unsigned i
= 0, n
= Groups
.size(); i
< n
; ++i
) {
3274 InstrGroup
&G
= Groups
[i
];
3275 if (!isShuffleOf(G
.Out
.Reg
, G
.Inp
.Reg
))
3277 auto LoopInpEq
= [G
] (const PhiInfo
&P
) -> bool {
3278 return G
.Out
.Reg
== P
.LR
.Reg
;
3280 auto F
= llvm::find_if(Phis
, LoopInpEq
);
3281 if (F
== Phis
.end())
3284 if (!isSameShuffle(G
.Out
.Reg
, G
.Inp
.Reg
, F
->PR
.Reg
, PrehR
)) {
3285 const MachineInstr
*DefPrehR
= MRI
->getVRegDef(F
->PR
.Reg
);
3286 unsigned Opc
= DefPrehR
->getOpcode();
3287 if (Opc
!= Hexagon::A2_tfrsi
&& Opc
!= Hexagon::A2_tfrpi
)
3289 if (!DefPrehR
->getOperand(1).isImm())
3291 if (DefPrehR
->getOperand(1).getImm() != 0)
3293 const TargetRegisterClass
*RC
= MRI
->getRegClass(G
.Inp
.Reg
);
3294 if (RC
!= MRI
->getRegClass(F
->PR
.Reg
)) {
3295 PrehR
= MRI
->createVirtualRegister(RC
);
3296 unsigned TfrI
= (RC
== &Hexagon::IntRegsRegClass
) ? Hexagon::A2_tfrsi
3297 : Hexagon::A2_tfrpi
;
3298 auto T
= C
.PB
->getFirstTerminator();
3299 DebugLoc DL
= (T
!= C
.PB
->end()) ? T
->getDebugLoc() : DebugLoc();
3300 BuildMI(*C
.PB
, T
, DL
, HII
->get(TfrI
), PrehR
)
3306 // isSameShuffle could match with PrehR being of a wider class than
3307 // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
3308 // it would match for the input being a 32-bit register, and PrehR
3309 // being a 64-bit register (where the low 32 bits match). This could
3310 // be handled, but for now skip these cases.
3311 if (MRI
->getRegClass(PrehR
) != MRI
->getRegClass(G
.Inp
.Reg
))
3313 moveGroup(G
, *F
->LB
, *F
->PB
, F
->LB
->getFirstNonPHI(), F
->DefR
, PrehR
);
3320 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction
&MF
) {
3321 if (skipFunction(MF
.getFunction()))
3324 auto &HST
= MF
.getSubtarget
<HexagonSubtarget
>();
3325 HII
= HST
.getInstrInfo();
3326 HRI
= HST
.getRegisterInfo();
3327 MRI
= &MF
.getRegInfo();
3328 const HexagonEvaluator
HE(*HRI
, *MRI
, *HII
, MF
);
3329 BitTracker
BT(HE
, MF
);
3330 LLVM_DEBUG(BT
.trace(true));
3334 std::vector
<LoopCand
> Cand
;
3336 for (auto &B
: MF
) {
3337 if (B
.pred_size() != 2 || B
.succ_size() != 2)
3339 MachineBasicBlock
*PB
= nullptr;
3340 bool IsLoop
= false;
3341 for (auto PI
= B
.pred_begin(), PE
= B
.pred_end(); PI
!= PE
; ++PI
) {
3350 MachineBasicBlock
*EB
= nullptr;
3351 for (auto SI
= B
.succ_begin(), SE
= B
.succ_end(); SI
!= SE
; ++SI
) {
3354 // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
3355 // edge from B to EP is non-critical.
3356 if ((*SI
)->pred_size() == 1)
3361 Cand
.push_back(LoopCand(&B
, PB
, EB
));
3364 bool Changed
= false;
3365 for (auto &C
: Cand
)
3366 Changed
|= processLoop(C
);
3371 //===----------------------------------------------------------------------===//
3372 // Public Constructor Functions
3373 //===----------------------------------------------------------------------===//
3375 FunctionPass
*llvm::createHexagonLoopRescheduling() {
3376 return new HexagonLoopRescheduling();
3379 FunctionPass
*llvm::createHexagonBitSimplify() {
3380 return new HexagonBitSimplify();