1 //===- HexagonBitSimplify.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "BitTracker.h"
10 #include "HexagonBitTracker.h"
11 #include "HexagonInstrInfo.h"
12 #include "HexagonRegisterInfo.h"
13 #include "HexagonSubtarget.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineOperand.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetRegisterInfo.h"
29 #include "llvm/IR/DebugLoc.h"
30 #include "llvm/MC/MCInstrDesc.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
46 #define DEBUG_TYPE "hexbit"
50 static cl::opt
<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden
,
51 cl::init(true), cl::desc("Preserve subregisters in tied operands"));
52 static cl::opt
<bool> GenExtract("hexbit-extract", cl::Hidden
,
53 cl::init(true), cl::desc("Generate extract instructions"));
54 static cl::opt
<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden
,
55 cl::init(true), cl::desc("Generate bitsplit instructions"));
57 static cl::opt
<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden
,
58 cl::init(std::numeric_limits
<unsigned>::max()));
59 static unsigned CountExtract
= 0;
60 static cl::opt
<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden
,
61 cl::init(std::numeric_limits
<unsigned>::max()));
62 static unsigned CountBitSplit
= 0;
66 void initializeHexagonBitSimplifyPass(PassRegistry
& Registry
);
67 FunctionPass
*createHexagonBitSimplify();
69 } // end namespace llvm
73 // Set of virtual registers, based on BitVector.
74 struct RegisterSet
: private BitVector
{
75 RegisterSet() = default;
76 explicit RegisterSet(unsigned s
, bool t
= false) : BitVector(s
, t
) {}
77 RegisterSet(const RegisterSet
&RS
) = default;
79 using BitVector::clear
;
80 using BitVector::count
;
82 unsigned find_first() const {
83 int First
= BitVector::find_first();
89 unsigned find_next(unsigned Prev
) const {
90 int Next
= BitVector::find_next(v2x(Prev
));
96 RegisterSet
&insert(unsigned R
) {
97 unsigned Idx
= v2x(R
);
99 return static_cast<RegisterSet
&>(BitVector::set(Idx
));
101 RegisterSet
&remove(unsigned R
) {
102 unsigned Idx
= v2x(R
);
105 return static_cast<RegisterSet
&>(BitVector::reset(Idx
));
108 RegisterSet
&insert(const RegisterSet
&Rs
) {
109 return static_cast<RegisterSet
&>(BitVector::operator|=(Rs
));
111 RegisterSet
&remove(const RegisterSet
&Rs
) {
112 return static_cast<RegisterSet
&>(BitVector::reset(Rs
));
115 reference
operator[](unsigned R
) {
116 unsigned Idx
= v2x(R
);
118 return BitVector::operator[](Idx
);
120 bool operator[](unsigned R
) const {
121 unsigned Idx
= v2x(R
);
122 assert(Idx
< size());
123 return BitVector::operator[](Idx
);
125 bool has(unsigned R
) const {
126 unsigned Idx
= v2x(R
);
129 return BitVector::test(Idx
);
133 return !BitVector::any();
135 bool includes(const RegisterSet
&Rs
) const {
136 // A.BitVector::test(B) <=> A-B != {}
137 return !Rs
.BitVector::test(*this);
139 bool intersects(const RegisterSet
&Rs
) const {
140 return BitVector::anyCommon(Rs
);
144 void ensure(unsigned Idx
) {
146 resize(std::max(Idx
+1, 32U));
149 static inline unsigned v2x(unsigned v
) {
150 return Register::virtReg2Index(v
);
153 static inline unsigned x2v(unsigned x
) {
154 return Register::index2VirtReg(x
);
159 PrintRegSet(const RegisterSet
&S
, const TargetRegisterInfo
*RI
)
162 friend raw_ostream
&operator<< (raw_ostream
&OS
,
163 const PrintRegSet
&P
);
166 const RegisterSet
&RS
;
167 const TargetRegisterInfo
*TRI
;
170 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegSet
&P
)
171 LLVM_ATTRIBUTE_UNUSED
;
172 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegSet
&P
) {
174 for (unsigned R
= P
.RS
.find_first(); R
; R
= P
.RS
.find_next(R
))
175 OS
<< ' ' << printReg(R
, P
.TRI
);
180 class Transformation
;
182 class HexagonBitSimplify
: public MachineFunctionPass
{
186 HexagonBitSimplify() : MachineFunctionPass(ID
) {}
188 StringRef
getPassName() const override
{
189 return "Hexagon bit simplification";
192 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
193 AU
.addRequired
<MachineDominatorTree
>();
194 AU
.addPreserved
<MachineDominatorTree
>();
195 MachineFunctionPass::getAnalysisUsage(AU
);
198 bool runOnMachineFunction(MachineFunction
&MF
) override
;
200 static void getInstrDefs(const MachineInstr
&MI
, RegisterSet
&Defs
);
201 static void getInstrUses(const MachineInstr
&MI
, RegisterSet
&Uses
);
202 static bool isEqual(const BitTracker::RegisterCell
&RC1
, uint16_t B1
,
203 const BitTracker::RegisterCell
&RC2
, uint16_t B2
, uint16_t W
);
204 static bool isZero(const BitTracker::RegisterCell
&RC
, uint16_t B
,
206 static bool getConst(const BitTracker::RegisterCell
&RC
, uint16_t B
,
207 uint16_t W
, uint64_t &U
);
208 static bool replaceReg(unsigned OldR
, unsigned NewR
,
209 MachineRegisterInfo
&MRI
);
210 static bool getSubregMask(const BitTracker::RegisterRef
&RR
,
211 unsigned &Begin
, unsigned &Width
, MachineRegisterInfo
&MRI
);
212 static bool replaceRegWithSub(unsigned OldR
, unsigned NewR
,
213 unsigned NewSR
, MachineRegisterInfo
&MRI
);
214 static bool replaceSubWithSub(unsigned OldR
, unsigned OldSR
,
215 unsigned NewR
, unsigned NewSR
, MachineRegisterInfo
&MRI
);
216 static bool parseRegSequence(const MachineInstr
&I
,
217 BitTracker::RegisterRef
&SL
, BitTracker::RegisterRef
&SH
,
218 const MachineRegisterInfo
&MRI
);
220 static bool getUsedBitsInStore(unsigned Opc
, BitVector
&Bits
,
222 static bool getUsedBits(unsigned Opc
, unsigned OpN
, BitVector
&Bits
,
223 uint16_t Begin
, const HexagonInstrInfo
&HII
);
225 static const TargetRegisterClass
*getFinalVRegClass(
226 const BitTracker::RegisterRef
&RR
, MachineRegisterInfo
&MRI
);
227 static bool isTransparentCopy(const BitTracker::RegisterRef
&RD
,
228 const BitTracker::RegisterRef
&RS
, MachineRegisterInfo
&MRI
);
231 MachineDominatorTree
*MDT
= nullptr;
233 bool visitBlock(MachineBasicBlock
&B
, Transformation
&T
, RegisterSet
&AVs
);
234 static bool hasTiedUse(unsigned Reg
, MachineRegisterInfo
&MRI
,
235 unsigned NewSub
= Hexagon::NoSubRegister
);
238 using HBS
= HexagonBitSimplify
;
240 // The purpose of this class is to provide a common facility to traverse
241 // the function top-down or bottom-up via the dominator tree, and keep
242 // track of the available registers.
243 class Transformation
{
247 Transformation(bool TD
) : TopDown(TD
) {}
248 virtual ~Transformation() = default;
250 virtual bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) = 0;
253 } // end anonymous namespace
255 char HexagonBitSimplify::ID
= 0;
257 INITIALIZE_PASS_BEGIN(HexagonBitSimplify
, "hexagon-bit-simplify",
258 "Hexagon bit simplification", false, false)
259 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
260 INITIALIZE_PASS_END(HexagonBitSimplify
, "hexagon-bit-simplify",
261 "Hexagon bit simplification", false, false)
263 bool HexagonBitSimplify::visitBlock(MachineBasicBlock
&B
, Transformation
&T
,
265 bool Changed
= false;
268 Changed
= T
.processBlock(B
, AVs
);
272 getInstrDefs(I
, Defs
);
273 RegisterSet NewAVs
= AVs
;
276 for (auto *DTN
: children
<MachineDomTreeNode
*>(MDT
->getNode(&B
)))
277 Changed
|= visitBlock(*(DTN
->getBlock()), T
, NewAVs
);
280 Changed
|= T
.processBlock(B
, AVs
);
286 // Utility functions:
288 void HexagonBitSimplify::getInstrDefs(const MachineInstr
&MI
,
290 for (auto &Op
: MI
.operands()) {
291 if (!Op
.isReg() || !Op
.isDef())
293 Register R
= Op
.getReg();
294 if (!Register::isVirtualRegister(R
))
300 void HexagonBitSimplify::getInstrUses(const MachineInstr
&MI
,
302 for (auto &Op
: MI
.operands()) {
303 if (!Op
.isReg() || !Op
.isUse())
305 Register R
= Op
.getReg();
306 if (!Register::isVirtualRegister(R
))
312 // Check if all the bits in range [B, E) in both cells are equal.
313 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell
&RC1
,
314 uint16_t B1
, const BitTracker::RegisterCell
&RC2
, uint16_t B2
,
316 for (uint16_t i
= 0; i
< W
; ++i
) {
317 // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
318 if (RC1
[B1
+i
].Type
== BitTracker::BitValue::Ref
&& RC1
[B1
+i
].RefI
.Reg
== 0)
321 if (RC2
[B2
+i
].Type
== BitTracker::BitValue::Ref
&& RC2
[B2
+i
].RefI
.Reg
== 0)
323 if (RC1
[B1
+i
] != RC2
[B2
+i
])
329 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell
&RC
,
330 uint16_t B
, uint16_t W
) {
331 assert(B
< RC
.width() && B
+W
<= RC
.width());
332 for (uint16_t i
= B
; i
< B
+W
; ++i
)
338 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell
&RC
,
339 uint16_t B
, uint16_t W
, uint64_t &U
) {
340 assert(B
< RC
.width() && B
+W
<= RC
.width());
342 for (uint16_t i
= B
+W
; i
> B
; --i
) {
343 const BitTracker::BitValue
&BV
= RC
[i
-1];
354 bool HexagonBitSimplify::replaceReg(unsigned OldR
, unsigned NewR
,
355 MachineRegisterInfo
&MRI
) {
356 if (!Register::isVirtualRegister(OldR
) || !Register::isVirtualRegister(NewR
))
358 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
360 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
361 NextI
= std::next(I
);
367 bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR
, unsigned NewR
,
368 unsigned NewSR
, MachineRegisterInfo
&MRI
) {
369 if (!Register::isVirtualRegister(OldR
) || !Register::isVirtualRegister(NewR
))
371 if (hasTiedUse(OldR
, MRI
, NewSR
))
373 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
375 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
376 NextI
= std::next(I
);
383 bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR
, unsigned OldSR
,
384 unsigned NewR
, unsigned NewSR
, MachineRegisterInfo
&MRI
) {
385 if (!Register::isVirtualRegister(OldR
) || !Register::isVirtualRegister(NewR
))
387 if (OldSR
!= NewSR
&& hasTiedUse(OldR
, MRI
, NewSR
))
389 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
391 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
392 NextI
= std::next(I
);
393 if (I
->getSubReg() != OldSR
)
401 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
402 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
403 // if this succeeded, otherwise return false.
404 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef
&RR
,
405 unsigned &Begin
, unsigned &Width
, MachineRegisterInfo
&MRI
) {
406 const TargetRegisterClass
*RC
= MRI
.getRegClass(RR
.Reg
);
409 Width
= MRI
.getTargetRegisterInfo()->getRegSizeInBits(*RC
);
415 switch (RC
->getID()) {
416 case Hexagon::DoubleRegsRegClassID
:
417 case Hexagon::HvxWRRegClassID
:
418 Width
= MRI
.getTargetRegisterInfo()->getRegSizeInBits(*RC
) / 2;
419 if (RR
.Sub
== Hexagon::isub_hi
|| RR
.Sub
== Hexagon::vsub_hi
)
429 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
431 bool HexagonBitSimplify::parseRegSequence(const MachineInstr
&I
,
432 BitTracker::RegisterRef
&SL
, BitTracker::RegisterRef
&SH
,
433 const MachineRegisterInfo
&MRI
) {
434 assert(I
.getOpcode() == TargetOpcode::REG_SEQUENCE
);
435 unsigned Sub1
= I
.getOperand(2).getImm(), Sub2
= I
.getOperand(4).getImm();
436 auto &DstRC
= *MRI
.getRegClass(I
.getOperand(0).getReg());
437 auto &HRI
= static_cast<const HexagonRegisterInfo
&>(
438 *MRI
.getTargetRegisterInfo());
439 unsigned SubLo
= HRI
.getHexagonSubRegIndex(DstRC
, Hexagon::ps_sub_lo
);
440 unsigned SubHi
= HRI
.getHexagonSubRegIndex(DstRC
, Hexagon::ps_sub_hi
);
441 assert((Sub1
== SubLo
&& Sub2
== SubHi
) || (Sub1
== SubHi
&& Sub2
== SubLo
));
442 if (Sub1
== SubLo
&& Sub2
== SubHi
) {
443 SL
= I
.getOperand(1);
444 SH
= I
.getOperand(3);
447 if (Sub1
== SubHi
&& Sub2
== SubLo
) {
448 SH
= I
.getOperand(1);
449 SL
= I
.getOperand(3);
455 // All stores (except 64-bit stores) take a 32-bit register as the source
456 // of the value to be stored. If the instruction stores into a location
457 // that is shorter than 32 bits, some bits of the source register are not
458 // used. For each store instruction, calculate the set of used bits in
459 // the source register, and set appropriate bits in Bits. Return true if
460 // the bits are calculated, false otherwise.
461 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc
, BitVector
&Bits
,
463 using namespace Hexagon
;
467 case S2_storerb_io
: // memb(Rs32+#s11:0)=Rt32
468 case S2_storerbnew_io
: // memb(Rs32+#s11:0)=Nt8.new
469 case S2_pstorerbt_io
: // if (Pv4) memb(Rs32+#u6:0)=Rt32
470 case S2_pstorerbf_io
: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
471 case S4_pstorerbtnew_io
: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
472 case S4_pstorerbfnew_io
: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
473 case S2_pstorerbnewt_io
: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
474 case S2_pstorerbnewf_io
: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
475 case S4_pstorerbnewtnew_io
: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
476 case S4_pstorerbnewfnew_io
: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
477 case S2_storerb_pi
: // memb(Rx32++#s4:0)=Rt32
478 case S2_storerbnew_pi
: // memb(Rx32++#s4:0)=Nt8.new
479 case S2_pstorerbt_pi
: // if (Pv4) memb(Rx32++#s4:0)=Rt32
480 case S2_pstorerbf_pi
: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
481 case S2_pstorerbtnew_pi
: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
482 case S2_pstorerbfnew_pi
: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
483 case S2_pstorerbnewt_pi
: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
484 case S2_pstorerbnewf_pi
: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
485 case S2_pstorerbnewtnew_pi
: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
486 case S2_pstorerbnewfnew_pi
: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
487 case S4_storerb_ap
: // memb(Re32=#U6)=Rt32
488 case S4_storerbnew_ap
: // memb(Re32=#U6)=Nt8.new
489 case S2_storerb_pr
: // memb(Rx32++Mu2)=Rt32
490 case S2_storerbnew_pr
: // memb(Rx32++Mu2)=Nt8.new
491 case S4_storerb_ur
: // memb(Ru32<<#u2+#U6)=Rt32
492 case S4_storerbnew_ur
: // memb(Ru32<<#u2+#U6)=Nt8.new
493 case S2_storerb_pbr
: // memb(Rx32++Mu2:brev)=Rt32
494 case S2_storerbnew_pbr
: // memb(Rx32++Mu2:brev)=Nt8.new
495 case S2_storerb_pci
: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
496 case S2_storerbnew_pci
: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
497 case S2_storerb_pcr
: // memb(Rx32++I:circ(Mu2))=Rt32
498 case S2_storerbnew_pcr
: // memb(Rx32++I:circ(Mu2))=Nt8.new
499 case S4_storerb_rr
: // memb(Rs32+Ru32<<#u2)=Rt32
500 case S4_storerbnew_rr
: // memb(Rs32+Ru32<<#u2)=Nt8.new
501 case S4_pstorerbt_rr
: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
502 case S4_pstorerbf_rr
: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
503 case S4_pstorerbtnew_rr
: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
504 case S4_pstorerbfnew_rr
: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
505 case S4_pstorerbnewt_rr
: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
506 case S4_pstorerbnewf_rr
: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
507 case S4_pstorerbnewtnew_rr
: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
508 case S4_pstorerbnewfnew_rr
: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
509 case S2_storerbgp
: // memb(gp+#u16:0)=Rt32
510 case S2_storerbnewgp
: // memb(gp+#u16:0)=Nt8.new
511 case S4_pstorerbt_abs
: // if (Pv4) memb(#u6)=Rt32
512 case S4_pstorerbf_abs
: // if (!Pv4) memb(#u6)=Rt32
513 case S4_pstorerbtnew_abs
: // if (Pv4.new) memb(#u6)=Rt32
514 case S4_pstorerbfnew_abs
: // if (!Pv4.new) memb(#u6)=Rt32
515 case S4_pstorerbnewt_abs
: // if (Pv4) memb(#u6)=Nt8.new
516 case S4_pstorerbnewf_abs
: // if (!Pv4) memb(#u6)=Nt8.new
517 case S4_pstorerbnewtnew_abs
: // if (Pv4.new) memb(#u6)=Nt8.new
518 case S4_pstorerbnewfnew_abs
: // if (!Pv4.new) memb(#u6)=Nt8.new
519 Bits
.set(Begin
, Begin
+8);
523 case S2_storerh_io
: // memh(Rs32+#s11:1)=Rt32
524 case S2_storerhnew_io
: // memh(Rs32+#s11:1)=Nt8.new
525 case S2_pstorerht_io
: // if (Pv4) memh(Rs32+#u6:1)=Rt32
526 case S2_pstorerhf_io
: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
527 case S4_pstorerhtnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
528 case S4_pstorerhfnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
529 case S2_pstorerhnewt_io
: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
530 case S2_pstorerhnewf_io
: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
531 case S4_pstorerhnewtnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
532 case S4_pstorerhnewfnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
533 case S2_storerh_pi
: // memh(Rx32++#s4:1)=Rt32
534 case S2_storerhnew_pi
: // memh(Rx32++#s4:1)=Nt8.new
535 case S2_pstorerht_pi
: // if (Pv4) memh(Rx32++#s4:1)=Rt32
536 case S2_pstorerhf_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
537 case S2_pstorerhtnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
538 case S2_pstorerhfnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
539 case S2_pstorerhnewt_pi
: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
540 case S2_pstorerhnewf_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
541 case S2_pstorerhnewtnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
542 case S2_pstorerhnewfnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
543 case S4_storerh_ap
: // memh(Re32=#U6)=Rt32
544 case S4_storerhnew_ap
: // memh(Re32=#U6)=Nt8.new
545 case S2_storerh_pr
: // memh(Rx32++Mu2)=Rt32
546 case S2_storerhnew_pr
: // memh(Rx32++Mu2)=Nt8.new
547 case S4_storerh_ur
: // memh(Ru32<<#u2+#U6)=Rt32
548 case S4_storerhnew_ur
: // memh(Ru32<<#u2+#U6)=Nt8.new
549 case S2_storerh_pbr
: // memh(Rx32++Mu2:brev)=Rt32
550 case S2_storerhnew_pbr
: // memh(Rx32++Mu2:brev)=Nt8.new
551 case S2_storerh_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
552 case S2_storerhnew_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
553 case S2_storerh_pcr
: // memh(Rx32++I:circ(Mu2))=Rt32
554 case S2_storerhnew_pcr
: // memh(Rx32++I:circ(Mu2))=Nt8.new
555 case S4_storerh_rr
: // memh(Rs32+Ru32<<#u2)=Rt32
556 case S4_pstorerht_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
557 case S4_pstorerhf_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
558 case S4_pstorerhtnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
559 case S4_pstorerhfnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
560 case S4_storerhnew_rr
: // memh(Rs32+Ru32<<#u2)=Nt8.new
561 case S4_pstorerhnewt_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
562 case S4_pstorerhnewf_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
563 case S4_pstorerhnewtnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
564 case S4_pstorerhnewfnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
565 case S2_storerhgp
: // memh(gp+#u16:1)=Rt32
566 case S2_storerhnewgp
: // memh(gp+#u16:1)=Nt8.new
567 case S4_pstorerht_abs
: // if (Pv4) memh(#u6)=Rt32
568 case S4_pstorerhf_abs
: // if (!Pv4) memh(#u6)=Rt32
569 case S4_pstorerhtnew_abs
: // if (Pv4.new) memh(#u6)=Rt32
570 case S4_pstorerhfnew_abs
: // if (!Pv4.new) memh(#u6)=Rt32
571 case S4_pstorerhnewt_abs
: // if (Pv4) memh(#u6)=Nt8.new
572 case S4_pstorerhnewf_abs
: // if (!Pv4) memh(#u6)=Nt8.new
573 case S4_pstorerhnewtnew_abs
: // if (Pv4.new) memh(#u6)=Nt8.new
574 case S4_pstorerhnewfnew_abs
: // if (!Pv4.new) memh(#u6)=Nt8.new
575 Bits
.set(Begin
, Begin
+16);
579 case S2_storerf_io
: // memh(Rs32+#s11:1)=Rt.H32
580 case S2_pstorerft_io
: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
581 case S2_pstorerff_io
: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
582 case S4_pstorerftnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
583 case S4_pstorerffnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
584 case S2_storerf_pi
: // memh(Rx32++#s4:1)=Rt.H32
585 case S2_pstorerft_pi
: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
586 case S2_pstorerff_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
587 case S2_pstorerftnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
588 case S2_pstorerffnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
589 case S4_storerf_ap
: // memh(Re32=#U6)=Rt.H32
590 case S2_storerf_pr
: // memh(Rx32++Mu2)=Rt.H32
591 case S4_storerf_ur
: // memh(Ru32<<#u2+#U6)=Rt.H32
592 case S2_storerf_pbr
: // memh(Rx32++Mu2:brev)=Rt.H32
593 case S2_storerf_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
594 case S2_storerf_pcr
: // memh(Rx32++I:circ(Mu2))=Rt.H32
595 case S4_storerf_rr
: // memh(Rs32+Ru32<<#u2)=Rt.H32
596 case S4_pstorerft_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
597 case S4_pstorerff_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
598 case S4_pstorerftnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
599 case S4_pstorerffnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
600 case S2_storerfgp
: // memh(gp+#u16:1)=Rt.H32
601 case S4_pstorerft_abs
: // if (Pv4) memh(#u6)=Rt.H32
602 case S4_pstorerff_abs
: // if (!Pv4) memh(#u6)=Rt.H32
603 case S4_pstorerftnew_abs
: // if (Pv4.new) memh(#u6)=Rt.H32
604 case S4_pstorerffnew_abs
: // if (!Pv4.new) memh(#u6)=Rt.H32
605 Bits
.set(Begin
+16, Begin
+32);
612 // For an instruction with opcode Opc, calculate the set of bits that it
613 // uses in a register in operand OpN. This only calculates the set of used
614 // bits for cases where it does not depend on any operands (as is the case
615 // in shifts, for example). For concrete instructions from a program, the
616 // operand may be a subregister of a larger register, while Bits would
617 // correspond to the larger register in its entirety. Because of that,
618 // the parameter Begin can be used to indicate which bit of Bits should be
619 // considered the LSB of the operand.
620 bool HexagonBitSimplify::getUsedBits(unsigned Opc
, unsigned OpN
,
621 BitVector
&Bits
, uint16_t Begin
, const HexagonInstrInfo
&HII
) {
622 using namespace Hexagon
;
624 const MCInstrDesc
&D
= HII
.get(Opc
);
626 if (OpN
== D
.getNumOperands()-1)
627 return getUsedBitsInStore(Opc
, Bits
, Begin
);
632 // One register source. Used bits: R1[0-7].
639 Bits
.set(Begin
, Begin
+8);
644 // One register source. Used bits: R1[0-15].
652 Bits
.set(Begin
, Begin
+16);
657 // One register source. Used bits: R1[16-31].
660 Bits
.set(Begin
+16, Begin
+32);
665 // Two register sources. Used bits: R1[0-7], R2[0-7].
670 Bits
.set(Begin
, Begin
+8);
675 // Two register sources. Used bits: R1[0-15], R2[0-15].
680 case A2_addh_h16_sat_ll
:
682 case A2_addh_l16_sat_ll
:
685 case A2_subh_h16_sat_ll
:
687 case A2_subh_l16_sat_ll
:
688 case M2_mpy_acc_ll_s0
:
689 case M2_mpy_acc_ll_s1
:
690 case M2_mpy_acc_sat_ll_s0
:
691 case M2_mpy_acc_sat_ll_s1
:
694 case M2_mpy_nac_ll_s0
:
695 case M2_mpy_nac_ll_s1
:
696 case M2_mpy_nac_sat_ll_s0
:
697 case M2_mpy_nac_sat_ll_s1
:
698 case M2_mpy_rnd_ll_s0
:
699 case M2_mpy_rnd_ll_s1
:
700 case M2_mpy_sat_ll_s0
:
701 case M2_mpy_sat_ll_s1
:
702 case M2_mpy_sat_rnd_ll_s0
:
703 case M2_mpy_sat_rnd_ll_s1
:
704 case M2_mpyd_acc_ll_s0
:
705 case M2_mpyd_acc_ll_s1
:
708 case M2_mpyd_nac_ll_s0
:
709 case M2_mpyd_nac_ll_s1
:
710 case M2_mpyd_rnd_ll_s0
:
711 case M2_mpyd_rnd_ll_s1
:
712 case M2_mpyu_acc_ll_s0
:
713 case M2_mpyu_acc_ll_s1
:
716 case M2_mpyu_nac_ll_s0
:
717 case M2_mpyu_nac_ll_s1
:
718 case M2_mpyud_acc_ll_s0
:
719 case M2_mpyud_acc_ll_s1
:
722 case M2_mpyud_nac_ll_s0
:
723 case M2_mpyud_nac_ll_s1
:
724 if (OpN
== 1 || OpN
== 2) {
725 Bits
.set(Begin
, Begin
+16);
730 // Two register sources. Used bits: R1[0-15], R2[16-31].
732 case A2_addh_h16_sat_lh
:
735 case A2_subh_h16_sat_lh
:
736 case M2_mpy_acc_lh_s0
:
737 case M2_mpy_acc_lh_s1
:
738 case M2_mpy_acc_sat_lh_s0
:
739 case M2_mpy_acc_sat_lh_s1
:
742 case M2_mpy_nac_lh_s0
:
743 case M2_mpy_nac_lh_s1
:
744 case M2_mpy_nac_sat_lh_s0
:
745 case M2_mpy_nac_sat_lh_s1
:
746 case M2_mpy_rnd_lh_s0
:
747 case M2_mpy_rnd_lh_s1
:
748 case M2_mpy_sat_lh_s0
:
749 case M2_mpy_sat_lh_s1
:
750 case M2_mpy_sat_rnd_lh_s0
:
751 case M2_mpy_sat_rnd_lh_s1
:
752 case M2_mpyd_acc_lh_s0
:
753 case M2_mpyd_acc_lh_s1
:
756 case M2_mpyd_nac_lh_s0
:
757 case M2_mpyd_nac_lh_s1
:
758 case M2_mpyd_rnd_lh_s0
:
759 case M2_mpyd_rnd_lh_s1
:
760 case M2_mpyu_acc_lh_s0
:
761 case M2_mpyu_acc_lh_s1
:
764 case M2_mpyu_nac_lh_s0
:
765 case M2_mpyu_nac_lh_s1
:
766 case M2_mpyud_acc_lh_s0
:
767 case M2_mpyud_acc_lh_s1
:
770 case M2_mpyud_nac_lh_s0
:
771 case M2_mpyud_nac_lh_s1
:
772 // These four are actually LH.
774 case A2_addh_l16_sat_hl
:
776 case A2_subh_l16_sat_hl
:
778 Bits
.set(Begin
, Begin
+16);
782 Bits
.set(Begin
+16, Begin
+32);
787 // Two register sources, used bits: R1[16-31], R2[0-15].
789 case A2_addh_h16_sat_hl
:
792 case A2_subh_h16_sat_hl
:
793 case M2_mpy_acc_hl_s0
:
794 case M2_mpy_acc_hl_s1
:
795 case M2_mpy_acc_sat_hl_s0
:
796 case M2_mpy_acc_sat_hl_s1
:
799 case M2_mpy_nac_hl_s0
:
800 case M2_mpy_nac_hl_s1
:
801 case M2_mpy_nac_sat_hl_s0
:
802 case M2_mpy_nac_sat_hl_s1
:
803 case M2_mpy_rnd_hl_s0
:
804 case M2_mpy_rnd_hl_s1
:
805 case M2_mpy_sat_hl_s0
:
806 case M2_mpy_sat_hl_s1
:
807 case M2_mpy_sat_rnd_hl_s0
:
808 case M2_mpy_sat_rnd_hl_s1
:
809 case M2_mpyd_acc_hl_s0
:
810 case M2_mpyd_acc_hl_s1
:
813 case M2_mpyd_nac_hl_s0
:
814 case M2_mpyd_nac_hl_s1
:
815 case M2_mpyd_rnd_hl_s0
:
816 case M2_mpyd_rnd_hl_s1
:
817 case M2_mpyu_acc_hl_s0
:
818 case M2_mpyu_acc_hl_s1
:
821 case M2_mpyu_nac_hl_s0
:
822 case M2_mpyu_nac_hl_s1
:
823 case M2_mpyud_acc_hl_s0
:
824 case M2_mpyud_acc_hl_s1
:
827 case M2_mpyud_nac_hl_s0
:
828 case M2_mpyud_nac_hl_s1
:
830 Bits
.set(Begin
+16, Begin
+32);
834 Bits
.set(Begin
, Begin
+16);
839 // Two register sources, used bits: R1[16-31], R2[16-31].
841 case A2_addh_h16_sat_hh
:
844 case A2_subh_h16_sat_hh
:
845 case M2_mpy_acc_hh_s0
:
846 case M2_mpy_acc_hh_s1
:
847 case M2_mpy_acc_sat_hh_s0
:
848 case M2_mpy_acc_sat_hh_s1
:
851 case M2_mpy_nac_hh_s0
:
852 case M2_mpy_nac_hh_s1
:
853 case M2_mpy_nac_sat_hh_s0
:
854 case M2_mpy_nac_sat_hh_s1
:
855 case M2_mpy_rnd_hh_s0
:
856 case M2_mpy_rnd_hh_s1
:
857 case M2_mpy_sat_hh_s0
:
858 case M2_mpy_sat_hh_s1
:
859 case M2_mpy_sat_rnd_hh_s0
:
860 case M2_mpy_sat_rnd_hh_s1
:
861 case M2_mpyd_acc_hh_s0
:
862 case M2_mpyd_acc_hh_s1
:
865 case M2_mpyd_nac_hh_s0
:
866 case M2_mpyd_nac_hh_s1
:
867 case M2_mpyd_rnd_hh_s0
:
868 case M2_mpyd_rnd_hh_s1
:
869 case M2_mpyu_acc_hh_s0
:
870 case M2_mpyu_acc_hh_s1
:
873 case M2_mpyu_nac_hh_s0
:
874 case M2_mpyu_nac_hh_s1
:
875 case M2_mpyud_acc_hh_s0
:
876 case M2_mpyud_acc_hh_s1
:
879 case M2_mpyud_nac_hh_s0
:
880 case M2_mpyud_nac_hh_s1
:
881 if (OpN
== 1 || OpN
== 2) {
882 Bits
.set(Begin
+16, Begin
+32);
891 // Calculate the register class that matches Reg:Sub. For example, if
892 // %1 is a double register, then %1:isub_hi would match the "int"
894 const TargetRegisterClass
*HexagonBitSimplify::getFinalVRegClass(
895 const BitTracker::RegisterRef
&RR
, MachineRegisterInfo
&MRI
) {
896 if (!Register::isVirtualRegister(RR
.Reg
))
898 auto *RC
= MRI
.getRegClass(RR
.Reg
);
901 auto &HRI
= static_cast<const HexagonRegisterInfo
&>(
902 *MRI
.getTargetRegisterInfo());
904 auto VerifySR
= [&HRI
] (const TargetRegisterClass
*RC
, unsigned Sub
) -> void {
906 assert(Sub
== HRI
.getHexagonSubRegIndex(*RC
, Hexagon::ps_sub_lo
) ||
907 Sub
== HRI
.getHexagonSubRegIndex(*RC
, Hexagon::ps_sub_hi
));
910 switch (RC
->getID()) {
911 case Hexagon::DoubleRegsRegClassID
:
912 VerifySR(RC
, RR
.Sub
);
913 return &Hexagon::IntRegsRegClass
;
914 case Hexagon::HvxWRRegClassID
:
915 VerifySR(RC
, RR
.Sub
);
916 return &Hexagon::HvxVRRegClass
;
921 // Check if RD could be replaced with RS at any possible use of RD.
922 // For example a predicate register cannot be replaced with a integer
923 // register, but a 64-bit register with a subregister can be replaced
924 // with a 32-bit register.
925 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef
&RD
,
926 const BitTracker::RegisterRef
&RS
, MachineRegisterInfo
&MRI
) {
927 if (!Register::isVirtualRegister(RD
.Reg
) ||
928 !Register::isVirtualRegister(RS
.Reg
))
930 // Return false if one (or both) classes are nullptr.
931 auto *DRC
= getFinalVRegClass(RD
, MRI
);
935 return DRC
== getFinalVRegClass(RS
, MRI
);
938 bool HexagonBitSimplify::hasTiedUse(unsigned Reg
, MachineRegisterInfo
&MRI
,
940 if (!PreserveTiedOps
)
942 return llvm::any_of(MRI
.use_operands(Reg
),
943 [NewSub
] (const MachineOperand
&Op
) -> bool {
944 return Op
.getSubReg() != NewSub
&& Op
.isTied();
950 class DeadCodeElimination
{
952 DeadCodeElimination(MachineFunction
&mf
, MachineDominatorTree
&mdt
)
953 : MF(mf
), HII(*MF
.getSubtarget
<HexagonSubtarget
>().getInstrInfo()),
954 MDT(mdt
), MRI(mf
.getRegInfo()) {}
957 return runOnNode(MDT
.getRootNode());
961 bool isDead(unsigned R
) const;
962 bool runOnNode(MachineDomTreeNode
*N
);
965 const HexagonInstrInfo
&HII
;
966 MachineDominatorTree
&MDT
;
967 MachineRegisterInfo
&MRI
;
970 } // end anonymous namespace
972 bool DeadCodeElimination::isDead(unsigned R
) const {
973 for (auto I
= MRI
.use_begin(R
), E
= MRI
.use_end(); I
!= E
; ++I
) {
974 MachineInstr
*UseI
= I
->getParent();
975 if (UseI
->isDebugValue())
978 assert(!UseI
->getOperand(0).getSubReg());
979 Register DR
= UseI
->getOperand(0).getReg();
988 bool DeadCodeElimination::runOnNode(MachineDomTreeNode
*N
) {
989 bool Changed
= false;
991 for (auto *DTN
: children
<MachineDomTreeNode
*>(N
))
992 Changed
|= runOnNode(DTN
);
994 MachineBasicBlock
*B
= N
->getBlock();
995 std::vector
<MachineInstr
*> Instrs
;
996 for (auto I
= B
->rbegin(), E
= B
->rend(); I
!= E
; ++I
)
997 Instrs
.push_back(&*I
);
999 for (auto MI
: Instrs
) {
1000 unsigned Opc
= MI
->getOpcode();
1001 // Do not touch lifetime markers. This is why the target-independent DCE
1003 if (Opc
== TargetOpcode::LIFETIME_START
||
1004 Opc
== TargetOpcode::LIFETIME_END
)
1007 if (MI
->isInlineAsm())
1009 // Delete PHIs if possible.
1010 if (!MI
->isPHI() && !MI
->isSafeToMove(nullptr, Store
))
1013 bool AllDead
= true;
1014 SmallVector
<unsigned,2> Regs
;
1015 for (auto &Op
: MI
->operands()) {
1016 if (!Op
.isReg() || !Op
.isDef())
1018 Register R
= Op
.getReg();
1019 if (!Register::isVirtualRegister(R
) || !isDead(R
)) {
1029 for (unsigned i
= 0, n
= Regs
.size(); i
!= n
; ++i
)
1030 MRI
.markUsesInDebugValueAsUndef(Regs
[i
]);
1039 // Eliminate redundant instructions
1041 // This transformation will identify instructions where the output register
1042 // is the same as one of its input registers. This only works on instructions
1043 // that define a single register (unlike post-increment loads, for example).
1044 // The equality check is actually more detailed: the code calculates which
1045 // bits of the output are used, and only compares these bits with the input
1047 // If the output matches an input, the instruction is replaced with COPY.
1048 // The copies will be removed by another transformation.
1049 class RedundantInstrElimination
: public Transformation
{
1051 RedundantInstrElimination(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1052 const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1053 : Transformation(true), HII(hii
), HRI(hri
), MRI(mri
), BT(bt
) {}
1055 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1058 bool isLossyShiftLeft(const MachineInstr
&MI
, unsigned OpN
,
1059 unsigned &LostB
, unsigned &LostE
);
1060 bool isLossyShiftRight(const MachineInstr
&MI
, unsigned OpN
,
1061 unsigned &LostB
, unsigned &LostE
);
1062 bool computeUsedBits(unsigned Reg
, BitVector
&Bits
);
1063 bool computeUsedBits(const MachineInstr
&MI
, unsigned OpN
, BitVector
&Bits
,
1065 bool usedBitsEqual(BitTracker::RegisterRef RD
, BitTracker::RegisterRef RS
);
1067 const HexagonInstrInfo
&HII
;
1068 const HexagonRegisterInfo
&HRI
;
1069 MachineRegisterInfo
&MRI
;
1073 } // end anonymous namespace
1075 // Check if the instruction is a lossy shift left, where the input being
1076 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1077 // of bit indices that are lost.
1078 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr
&MI
,
1079 unsigned OpN
, unsigned &LostB
, unsigned &LostE
) {
1080 using namespace Hexagon
;
1082 unsigned Opc
= MI
.getOpcode();
1083 unsigned ImN
, RegN
, Width
;
1090 case S2_asl_i_p_acc
:
1091 case S2_asl_i_p_and
:
1092 case S2_asl_i_p_nac
:
1094 case S2_asl_i_p_xacc
:
1104 case S2_addasl_rrri
:
1105 case S4_andi_asl_ri
:
1107 case S4_addi_asl_ri
:
1108 case S4_subi_asl_ri
:
1109 case S2_asl_i_r_acc
:
1110 case S2_asl_i_r_and
:
1111 case S2_asl_i_r_nac
:
1113 case S2_asl_i_r_sat
:
1114 case S2_asl_i_r_xacc
:
1126 assert(MI
.getOperand(ImN
).isImm());
1127 unsigned S
= MI
.getOperand(ImN
).getImm();
1135 // Check if the instruction is a lossy shift right, where the input being
1136 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1137 // of bit indices that are lost.
1138 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr
&MI
,
1139 unsigned OpN
, unsigned &LostB
, unsigned &LostE
) {
1140 using namespace Hexagon
;
1142 unsigned Opc
= MI
.getOpcode();
1150 case S2_asr_i_p_acc
:
1151 case S2_asr_i_p_and
:
1152 case S2_asr_i_p_nac
:
1154 case S2_lsr_i_p_acc
:
1155 case S2_lsr_i_p_and
:
1156 case S2_lsr_i_p_nac
:
1158 case S2_lsr_i_p_xacc
:
1167 case S4_andi_lsr_ri
:
1169 case S4_addi_lsr_ri
:
1170 case S4_subi_lsr_ri
:
1171 case S2_asr_i_r_acc
:
1172 case S2_asr_i_r_and
:
1173 case S2_asr_i_r_nac
:
1175 case S2_lsr_i_r_acc
:
1176 case S2_lsr_i_r_and
:
1177 case S2_lsr_i_r_nac
:
1179 case S2_lsr_i_r_xacc
:
1191 assert(MI
.getOperand(ImN
).isImm());
1192 unsigned S
= MI
.getOperand(ImN
).getImm();
1198 // Calculate the bit vector that corresponds to the used bits of register Reg.
1199 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
1200 // culation fails (i.e. the used bits are unknown), it returns false. Other-
1201 // wise, it returns true and sets the corresponding bits in Bits.
1202 bool RedundantInstrElimination::computeUsedBits(unsigned Reg
, BitVector
&Bits
) {
1203 BitVector
Used(Bits
.size());
1204 RegisterSet Visited
;
1205 std::vector
<unsigned> Pending
;
1206 Pending
.push_back(Reg
);
1208 for (unsigned i
= 0; i
< Pending
.size(); ++i
) {
1209 unsigned R
= Pending
[i
];
1213 for (auto I
= MRI
.use_begin(R
), E
= MRI
.use_end(); I
!= E
; ++I
) {
1214 BitTracker::RegisterRef UR
= *I
;
1216 if (!HBS::getSubregMask(UR
, B
, W
, MRI
))
1218 MachineInstr
&UseI
= *I
->getParent();
1219 if (UseI
.isPHI() || UseI
.isCopy()) {
1220 Register DefR
= UseI
.getOperand(0).getReg();
1221 if (!Register::isVirtualRegister(DefR
))
1223 Pending
.push_back(DefR
);
1225 if (!computeUsedBits(UseI
, I
.getOperandNo(), Used
, B
))
1234 // Calculate the bits used by instruction MI in a register in operand OpN.
1235 // Return true/false if the calculation succeeds/fails. If is succeeds, set
1236 // used bits in Bits. This function does not reset any bits in Bits, so
1237 // subsequent calls over different instructions will result in the union
1238 // of the used bits in all these instructions.
1239 // The register in question may be used with a sub-register, whereas Bits
1240 // holds the bits for the entire register. To keep track of that, the
1241 // argument Begin indicates where in Bits is the lowest-significant bit
1242 // of the register used in operand OpN. For example, in instruction:
1243 // %1 = S2_lsr_i_r %2:isub_hi, 10
1244 // the operand 1 is a 32-bit register, which happens to be a subregister
1245 // of the 64-bit register %2, and that subregister starts at position 32.
1246 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
1248 bool RedundantInstrElimination::computeUsedBits(const MachineInstr
&MI
,
1249 unsigned OpN
, BitVector
&Bits
, uint16_t Begin
) {
1250 unsigned Opc
= MI
.getOpcode();
1251 BitVector
T(Bits
.size());
1252 bool GotBits
= HBS::getUsedBits(Opc
, OpN
, T
, Begin
, HII
);
1253 // Even if we don't have bits yet, we could still provide some information
1254 // if the instruction is a lossy shift: the lost bits will be marked as
1257 if (isLossyShiftLeft(MI
, OpN
, LB
, LE
) || isLossyShiftRight(MI
, OpN
, LB
, LE
)) {
1258 assert(MI
.getOperand(OpN
).isReg());
1259 BitTracker::RegisterRef RR
= MI
.getOperand(OpN
);
1260 const TargetRegisterClass
*RC
= HBS::getFinalVRegClass(RR
, MRI
);
1261 uint16_t Width
= HRI
.getRegSizeInBits(*RC
);
1264 T
.set(Begin
, Begin
+Width
);
1265 assert(LB
<= LE
&& LB
< Width
&& LE
<= Width
);
1266 T
.reset(Begin
+LB
, Begin
+LE
);
1274 // Calculates the used bits in RD ("defined register"), and checks if these
1275 // bits in RS ("used register") and RD are identical.
1276 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD
,
1277 BitTracker::RegisterRef RS
) {
1278 const BitTracker::RegisterCell
&DC
= BT
.lookup(RD
.Reg
);
1279 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
1282 if (!HBS::getSubregMask(RD
, DB
, DW
, MRI
))
1285 if (!HBS::getSubregMask(RS
, SB
, SW
, MRI
))
1290 BitVector
Used(DC
.width());
1291 if (!computeUsedBits(RD
.Reg
, Used
))
1294 for (unsigned i
= 0; i
!= DW
; ++i
)
1295 if (Used
[i
+DB
] && DC
[DB
+i
] != SC
[SB
+i
])
1300 bool RedundantInstrElimination::processBlock(MachineBasicBlock
&B
,
1301 const RegisterSet
&) {
1302 if (!BT
.reached(&B
))
1304 bool Changed
= false;
1306 for (auto I
= B
.begin(), E
= B
.end(), NextI
= I
; I
!= E
; ++I
) {
1307 NextI
= std::next(I
);
1308 MachineInstr
*MI
= &*I
;
1310 if (MI
->getOpcode() == TargetOpcode::COPY
)
1312 if (MI
->isPHI() || MI
->hasUnmodeledSideEffects() || MI
->isInlineAsm())
1314 unsigned NumD
= MI
->getDesc().getNumDefs();
1318 BitTracker::RegisterRef RD
= MI
->getOperand(0);
1319 if (!BT
.has(RD
.Reg
))
1321 const BitTracker::RegisterCell
&DC
= BT
.lookup(RD
.Reg
);
1322 auto At
= MachineBasicBlock::iterator(MI
);
1324 // Find a source operand that is equal to the result.
1325 for (auto &Op
: MI
->uses()) {
1328 BitTracker::RegisterRef RS
= Op
;
1329 if (!BT
.has(RS
.Reg
))
1331 if (!HBS::isTransparentCopy(RD
, RS
, MRI
))
1335 if (!HBS::getSubregMask(RS
, BN
, BW
, MRI
))
1338 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
1339 if (!usedBitsEqual(RD
, RS
) && !HBS::isEqual(DC
, 0, SC
, BN
, BW
))
1342 // If found, replace the instruction with a COPY.
1343 const DebugLoc
&DL
= MI
->getDebugLoc();
1344 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
1345 Register NewR
= MRI
.createVirtualRegister(FRC
);
1346 MachineInstr
*CopyI
=
1347 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::COPY
), NewR
)
1348 .addReg(RS
.Reg
, 0, RS
.Sub
);
1349 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
1350 // This pass can create copies between registers that don't have the
1351 // exact same values. Updating the tracker has to involve updating
1352 // all dependent cells. Example:
1353 // %1 = inst %2 ; %1 != %2, but used bits are equal
1355 // %3 = copy %2 ; <- inserted
1356 // ... = %3 ; <- replaced from %2
1357 // Indirectly, we can create a "copy" between %1 and %2 even
1358 // though their exact values do not match.
1370 // Recognize instructions that produce constant values known at compile-time.
1371 // Replace them with register definitions that load these constants directly.
1372 class ConstGeneration
: public Transformation
{
1374 ConstGeneration(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1375 MachineRegisterInfo
&mri
)
1376 : Transformation(true), HII(hii
), MRI(mri
), BT(bt
) {}
1378 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1379 static bool isTfrConst(const MachineInstr
&MI
);
1382 unsigned genTfrConst(const TargetRegisterClass
*RC
, int64_t C
,
1383 MachineBasicBlock
&B
, MachineBasicBlock::iterator At
, DebugLoc
&DL
);
1385 const HexagonInstrInfo
&HII
;
1386 MachineRegisterInfo
&MRI
;
1390 } // end anonymous namespace
1392 bool ConstGeneration::isTfrConst(const MachineInstr
&MI
) {
1393 unsigned Opc
= MI
.getOpcode();
1395 case Hexagon::A2_combineii
:
1396 case Hexagon::A4_combineii
:
1397 case Hexagon::A2_tfrsi
:
1398 case Hexagon::A2_tfrpi
:
1399 case Hexagon::PS_true
:
1400 case Hexagon::PS_false
:
1401 case Hexagon::CONST32
:
1402 case Hexagon::CONST64
:
1408 // Generate a transfer-immediate instruction that is appropriate for the
1409 // register class and the actual value being transferred.
1410 unsigned ConstGeneration::genTfrConst(const TargetRegisterClass
*RC
, int64_t C
,
1411 MachineBasicBlock
&B
, MachineBasicBlock::iterator At
, DebugLoc
&DL
) {
1412 Register Reg
= MRI
.createVirtualRegister(RC
);
1413 if (RC
== &Hexagon::IntRegsRegClass
) {
1414 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrsi
), Reg
)
1415 .addImm(int32_t(C
));
1419 if (RC
== &Hexagon::DoubleRegsRegClass
) {
1421 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrpi
), Reg
)
1426 unsigned Lo
= Lo_32(C
), Hi
= Hi_32(C
);
1427 if (isInt
<8>(Lo
) || isInt
<8>(Hi
)) {
1428 unsigned Opc
= isInt
<8>(Lo
) ? Hexagon::A2_combineii
1429 : Hexagon::A4_combineii
;
1430 BuildMI(B
, At
, DL
, HII
.get(Opc
), Reg
)
1431 .addImm(int32_t(Hi
))
1432 .addImm(int32_t(Lo
));
1436 BuildMI(B
, At
, DL
, HII
.get(Hexagon::CONST64
), Reg
)
1441 if (RC
== &Hexagon::PredRegsRegClass
) {
1444 Opc
= Hexagon::PS_false
;
1445 else if ((C
& 0xFF) == 0xFF)
1446 Opc
= Hexagon::PS_true
;
1449 BuildMI(B
, At
, DL
, HII
.get(Opc
), Reg
);
1456 bool ConstGeneration::processBlock(MachineBasicBlock
&B
, const RegisterSet
&) {
1457 if (!BT
.reached(&B
))
1459 bool Changed
= false;
1462 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
) {
1466 HBS::getInstrDefs(*I
, Defs
);
1467 if (Defs
.count() != 1)
1469 unsigned DR
= Defs
.find_first();
1470 if (!Register::isVirtualRegister(DR
))
1473 const BitTracker::RegisterCell
&DRC
= BT
.lookup(DR
);
1474 if (HBS::getConst(DRC
, 0, DRC
.width(), U
)) {
1476 DebugLoc DL
= I
->getDebugLoc();
1477 auto At
= I
->isPHI() ? B
.getFirstNonPHI() : I
;
1478 unsigned ImmReg
= genTfrConst(MRI
.getRegClass(DR
), C
, B
, At
, DL
);
1480 HBS::replaceReg(DR
, ImmReg
, MRI
);
1481 BT
.put(ImmReg
, DRC
);
1491 // Identify pairs of available registers which hold identical values.
1492 // In such cases, only one of them needs to be calculated, the other one
1493 // will be defined as a copy of the first.
1494 class CopyGeneration
: public Transformation
{
1496 CopyGeneration(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1497 const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1498 : Transformation(true), HII(hii
), HRI(hri
), MRI(mri
), BT(bt
) {}
1500 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1503 bool findMatch(const BitTracker::RegisterRef
&Inp
,
1504 BitTracker::RegisterRef
&Out
, const RegisterSet
&AVs
);
1506 const HexagonInstrInfo
&HII
;
1507 const HexagonRegisterInfo
&HRI
;
1508 MachineRegisterInfo
&MRI
;
1510 RegisterSet Forbidden
;
1513 // Eliminate register copies RD = RS, by replacing the uses of RD with
1515 class CopyPropagation
: public Transformation
{
1517 CopyPropagation(const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1518 : Transformation(false), HRI(hri
), MRI(mri
) {}
1520 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1522 static bool isCopyReg(unsigned Opc
, bool NoConv
);
1525 bool propagateRegCopy(MachineInstr
&MI
);
1527 const HexagonRegisterInfo
&HRI
;
1528 MachineRegisterInfo
&MRI
;
1531 } // end anonymous namespace
1533 /// Check if there is a register in AVs that is identical to Inp. If so,
1534 /// set Out to the found register. The output may be a pair Reg:Sub.
1535 bool CopyGeneration::findMatch(const BitTracker::RegisterRef
&Inp
,
1536 BitTracker::RegisterRef
&Out
, const RegisterSet
&AVs
) {
1537 if (!BT
.has(Inp
.Reg
))
1539 const BitTracker::RegisterCell
&InpRC
= BT
.lookup(Inp
.Reg
);
1540 auto *FRC
= HBS::getFinalVRegClass(Inp
, MRI
);
1542 if (!HBS::getSubregMask(Inp
, B
, W
, MRI
))
1545 for (unsigned R
= AVs
.find_first(); R
; R
= AVs
.find_next(R
)) {
1546 if (!BT
.has(R
) || Forbidden
[R
])
1548 const BitTracker::RegisterCell
&RC
= BT
.lookup(R
);
1549 unsigned RW
= RC
.width();
1551 if (FRC
!= MRI
.getRegClass(R
))
1553 if (!HBS::isTransparentCopy(R
, Inp
, MRI
))
1555 if (!HBS::isEqual(InpRC
, B
, RC
, 0, W
))
1561 // Check if there is a super-register, whose part (with a subregister)
1562 // is equal to the input.
1563 // Only do double registers for now.
1566 if (MRI
.getRegClass(R
) != &Hexagon::DoubleRegsRegClass
)
1569 if (HBS::isEqual(InpRC
, B
, RC
, 0, W
))
1570 Out
.Sub
= Hexagon::isub_lo
;
1571 else if (HBS::isEqual(InpRC
, B
, RC
, W
, W
))
1572 Out
.Sub
= Hexagon::isub_hi
;
1576 if (HBS::isTransparentCopy(Out
, Inp
, MRI
))
1582 bool CopyGeneration::processBlock(MachineBasicBlock
&B
,
1583 const RegisterSet
&AVs
) {
1584 if (!BT
.reached(&B
))
1586 RegisterSet
AVB(AVs
);
1587 bool Changed
= false;
1590 for (auto I
= B
.begin(), E
= B
.end(), NextI
= I
; I
!= E
;
1591 ++I
, AVB
.insert(Defs
)) {
1592 NextI
= std::next(I
);
1594 HBS::getInstrDefs(*I
, Defs
);
1596 unsigned Opc
= I
->getOpcode();
1597 if (CopyPropagation::isCopyReg(Opc
, false) ||
1598 ConstGeneration::isTfrConst(*I
))
1601 DebugLoc DL
= I
->getDebugLoc();
1602 auto At
= I
->isPHI() ? B
.getFirstNonPHI() : I
;
1604 for (unsigned R
= Defs
.find_first(); R
; R
= Defs
.find_next(R
)) {
1605 BitTracker::RegisterRef MR
;
1606 auto *FRC
= HBS::getFinalVRegClass(R
, MRI
);
1608 if (findMatch(R
, MR
, AVB
)) {
1609 Register NewR
= MRI
.createVirtualRegister(FRC
);
1610 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::COPY
), NewR
)
1611 .addReg(MR
.Reg
, 0, MR
.Sub
);
1612 BT
.put(BitTracker::RegisterRef(NewR
), BT
.get(MR
));
1613 HBS::replaceReg(R
, NewR
, MRI
);
1614 Forbidden
.insert(R
);
1618 if (FRC
== &Hexagon::DoubleRegsRegClass
||
1619 FRC
== &Hexagon::HvxWRRegClass
) {
1620 // Try to generate REG_SEQUENCE.
1621 unsigned SubLo
= HRI
.getHexagonSubRegIndex(*FRC
, Hexagon::ps_sub_lo
);
1622 unsigned SubHi
= HRI
.getHexagonSubRegIndex(*FRC
, Hexagon::ps_sub_hi
);
1623 BitTracker::RegisterRef TL
= { R
, SubLo
};
1624 BitTracker::RegisterRef TH
= { R
, SubHi
};
1625 BitTracker::RegisterRef ML
, MH
;
1626 if (findMatch(TL
, ML
, AVB
) && findMatch(TH
, MH
, AVB
)) {
1627 auto *FRC
= HBS::getFinalVRegClass(R
, MRI
);
1628 Register NewR
= MRI
.createVirtualRegister(FRC
);
1629 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::REG_SEQUENCE
), NewR
)
1630 .addReg(ML
.Reg
, 0, ML
.Sub
)
1632 .addReg(MH
.Reg
, 0, MH
.Sub
)
1634 BT
.put(BitTracker::RegisterRef(NewR
), BT
.get(R
));
1635 HBS::replaceReg(R
, NewR
, MRI
);
1636 Forbidden
.insert(R
);
1645 bool CopyPropagation::isCopyReg(unsigned Opc
, bool NoConv
) {
1647 case TargetOpcode::COPY
:
1648 case TargetOpcode::REG_SEQUENCE
:
1649 case Hexagon::A4_combineir
:
1650 case Hexagon::A4_combineri
:
1652 case Hexagon::A2_tfr
:
1653 case Hexagon::A2_tfrp
:
1654 case Hexagon::A2_combinew
:
1655 case Hexagon::V6_vcombine
:
1663 bool CopyPropagation::propagateRegCopy(MachineInstr
&MI
) {
1664 bool Changed
= false;
1665 unsigned Opc
= MI
.getOpcode();
1666 BitTracker::RegisterRef RD
= MI
.getOperand(0);
1667 assert(MI
.getOperand(0).getSubReg() == 0);
1670 case TargetOpcode::COPY
:
1671 case Hexagon::A2_tfr
:
1672 case Hexagon::A2_tfrp
: {
1673 BitTracker::RegisterRef RS
= MI
.getOperand(1);
1674 if (!HBS::isTransparentCopy(RD
, RS
, MRI
))
1677 Changed
= HBS::replaceRegWithSub(RD
.Reg
, RS
.Reg
, RS
.Sub
, MRI
);
1679 Changed
= HBS::replaceReg(RD
.Reg
, RS
.Reg
, MRI
);
1682 case TargetOpcode::REG_SEQUENCE
: {
1683 BitTracker::RegisterRef SL
, SH
;
1684 if (HBS::parseRegSequence(MI
, SL
, SH
, MRI
)) {
1685 const TargetRegisterClass
&RC
= *MRI
.getRegClass(RD
.Reg
);
1686 unsigned SubLo
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_lo
);
1687 unsigned SubHi
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_hi
);
1688 Changed
= HBS::replaceSubWithSub(RD
.Reg
, SubLo
, SL
.Reg
, SL
.Sub
, MRI
);
1689 Changed
|= HBS::replaceSubWithSub(RD
.Reg
, SubHi
, SH
.Reg
, SH
.Sub
, MRI
);
1693 case Hexagon::A2_combinew
:
1694 case Hexagon::V6_vcombine
: {
1695 const TargetRegisterClass
&RC
= *MRI
.getRegClass(RD
.Reg
);
1696 unsigned SubLo
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_lo
);
1697 unsigned SubHi
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_hi
);
1698 BitTracker::RegisterRef RH
= MI
.getOperand(1), RL
= MI
.getOperand(2);
1699 Changed
= HBS::replaceSubWithSub(RD
.Reg
, SubLo
, RL
.Reg
, RL
.Sub
, MRI
);
1700 Changed
|= HBS::replaceSubWithSub(RD
.Reg
, SubHi
, RH
.Reg
, RH
.Sub
, MRI
);
1703 case Hexagon::A4_combineir
:
1704 case Hexagon::A4_combineri
: {
1705 unsigned SrcX
= (Opc
== Hexagon::A4_combineir
) ? 2 : 1;
1706 unsigned Sub
= (Opc
== Hexagon::A4_combineir
) ? Hexagon::isub_lo
1708 BitTracker::RegisterRef RS
= MI
.getOperand(SrcX
);
1709 Changed
= HBS::replaceSubWithSub(RD
.Reg
, Sub
, RS
.Reg
, RS
.Sub
, MRI
);
1716 bool CopyPropagation::processBlock(MachineBasicBlock
&B
, const RegisterSet
&) {
1717 std::vector
<MachineInstr
*> Instrs
;
1718 for (auto I
= B
.rbegin(), E
= B
.rend(); I
!= E
; ++I
)
1719 Instrs
.push_back(&*I
);
1721 bool Changed
= false;
1722 for (auto I
: Instrs
) {
1723 unsigned Opc
= I
->getOpcode();
1724 if (!CopyPropagation::isCopyReg(Opc
, true))
1726 Changed
|= propagateRegCopy(*I
);
1734 // Recognize patterns that can be simplified and replace them with the
1736 // This is by no means complete
1737 class BitSimplification
: public Transformation
{
1739 BitSimplification(BitTracker
&bt
, const MachineDominatorTree
&mdt
,
1740 const HexagonInstrInfo
&hii
, const HexagonRegisterInfo
&hri
,
1741 MachineRegisterInfo
&mri
, MachineFunction
&mf
)
1742 : Transformation(true), MDT(mdt
), HII(hii
), HRI(hri
), MRI(mri
),
1745 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1748 struct RegHalf
: public BitTracker::RegisterRef
{
1749 bool Low
; // Low/High halfword.
1752 bool matchHalf(unsigned SelfR
, const BitTracker::RegisterCell
&RC
,
1753 unsigned B
, RegHalf
&RH
);
1754 bool validateReg(BitTracker::RegisterRef R
, unsigned Opc
, unsigned OpNum
);
1756 bool matchPackhl(unsigned SelfR
, const BitTracker::RegisterCell
&RC
,
1757 BitTracker::RegisterRef
&Rs
, BitTracker::RegisterRef
&Rt
);
1758 unsigned getCombineOpcode(bool HLow
, bool LLow
);
1760 bool genStoreUpperHalf(MachineInstr
*MI
);
1761 bool genStoreImmediate(MachineInstr
*MI
);
1762 bool genPackhl(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1763 const BitTracker::RegisterCell
&RC
);
1764 bool genExtractHalf(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1765 const BitTracker::RegisterCell
&RC
);
1766 bool genCombineHalf(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1767 const BitTracker::RegisterCell
&RC
);
1768 bool genExtractLow(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1769 const BitTracker::RegisterCell
&RC
);
1770 bool genBitSplit(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1771 const BitTracker::RegisterCell
&RC
, const RegisterSet
&AVs
);
1772 bool simplifyTstbit(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1773 const BitTracker::RegisterCell
&RC
);
1774 bool simplifyExtractLow(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1775 const BitTracker::RegisterCell
&RC
, const RegisterSet
&AVs
);
1776 bool simplifyRCmp0(MachineInstr
*MI
, BitTracker::RegisterRef RD
);
1778 // Cache of created instructions to avoid creating duplicates.
1779 // XXX Currently only used by genBitSplit.
1780 std::vector
<MachineInstr
*> NewMIs
;
1782 const MachineDominatorTree
&MDT
;
1783 const HexagonInstrInfo
&HII
;
1784 const HexagonRegisterInfo
&HRI
;
1785 MachineRegisterInfo
&MRI
;
1786 MachineFunction
&MF
;
1790 } // end anonymous namespace
1792 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
1793 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
1794 // set the information about the found register in RH.
1795 bool BitSimplification::matchHalf(unsigned SelfR
,
1796 const BitTracker::RegisterCell
&RC
, unsigned B
, RegHalf
&RH
) {
1797 // XXX This could be searching in the set of available registers, in case
1798 // the match is not exact.
1800 // Match 16-bit chunks, where the RC[B..B+15] references exactly one
1801 // register and all the bits B..B+15 match between RC and the register.
1802 // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1803 // and RC = { [0]:0 [1-15]:v1[1-15]... }.
1806 while (I
< B
+16 && RC
[I
].num())
1811 unsigned Reg
= RC
[I
].RefI
.Reg
;
1812 unsigned P
= RC
[I
].RefI
.Pos
; // The RefI.Pos will be advanced by I-B.
1815 unsigned Pos
= P
- (I
-B
);
1817 if (Reg
== 0 || Reg
== SelfR
) // Don't match "self".
1819 if (!Register::isVirtualRegister(Reg
))
1824 const BitTracker::RegisterCell
&SC
= BT
.lookup(Reg
);
1825 if (Pos
+16 > SC
.width())
1828 for (unsigned i
= 0; i
< 16; ++i
) {
1829 const BitTracker::BitValue
&RV
= RC
[i
+B
];
1830 if (RV
.Type
== BitTracker::BitValue::Ref
) {
1831 if (RV
.RefI
.Reg
!= Reg
)
1833 if (RV
.RefI
.Pos
!= i
+Pos
)
1837 if (RC
[i
+B
] != SC
[i
+Pos
])
1844 Sub
= Hexagon::isub_lo
;
1848 Sub
= Hexagon::isub_lo
;
1852 Sub
= Hexagon::isub_hi
;
1856 Sub
= Hexagon::isub_hi
;
1866 // If the subregister is not valid with the register, set it to 0.
1867 if (!HBS::getFinalVRegClass(RH
, MRI
))
1873 bool BitSimplification::validateReg(BitTracker::RegisterRef R
, unsigned Opc
,
1875 auto *OpRC
= HII
.getRegClass(HII
.get(Opc
), OpNum
, &HRI
, MF
);
1876 auto *RRC
= HBS::getFinalVRegClass(R
, MRI
);
1877 return OpRC
->hasSubClassEq(RRC
);
1880 // Check if RC matches the pattern of a S2_packhl. If so, return true and
1881 // set the inputs Rs and Rt.
1882 bool BitSimplification::matchPackhl(unsigned SelfR
,
1883 const BitTracker::RegisterCell
&RC
, BitTracker::RegisterRef
&Rs
,
1884 BitTracker::RegisterRef
&Rt
) {
1885 RegHalf L1
, H1
, L2
, H2
;
1887 if (!matchHalf(SelfR
, RC
, 0, L2
) || !matchHalf(SelfR
, RC
, 16, L1
))
1889 if (!matchHalf(SelfR
, RC
, 32, H2
) || !matchHalf(SelfR
, RC
, 48, H1
))
1892 // Rs = H1.L1, Rt = H2.L2
1893 if (H1
.Reg
!= L1
.Reg
|| H1
.Sub
!= L1
.Sub
|| H1
.Low
|| !L1
.Low
)
1895 if (H2
.Reg
!= L2
.Reg
|| H2
.Sub
!= L2
.Sub
|| H2
.Low
|| !L2
.Low
)
1903 unsigned BitSimplification::getCombineOpcode(bool HLow
, bool LLow
) {
1904 return HLow
? LLow
? Hexagon::A2_combine_ll
1905 : Hexagon::A2_combine_lh
1906 : LLow
? Hexagon::A2_combine_hl
1907 : Hexagon::A2_combine_hh
;
1910 // If MI stores the upper halfword of a register (potentially obtained via
1911 // shifts or extracts), replace it with a storerf instruction. This could
1912 // cause the "extraction" code to become dead.
1913 bool BitSimplification::genStoreUpperHalf(MachineInstr
*MI
) {
1914 unsigned Opc
= MI
->getOpcode();
1915 if (Opc
!= Hexagon::S2_storerh_io
)
1918 MachineOperand
&ValOp
= MI
->getOperand(2);
1919 BitTracker::RegisterRef RS
= ValOp
;
1920 if (!BT
.has(RS
.Reg
))
1922 const BitTracker::RegisterCell
&RC
= BT
.lookup(RS
.Reg
);
1924 if (!matchHalf(0, RC
, 0, H
))
1928 MI
->setDesc(HII
.get(Hexagon::S2_storerf_io
));
1929 ValOp
.setReg(H
.Reg
);
1930 ValOp
.setSubReg(H
.Sub
);
1934 // If MI stores a value known at compile-time, and the value is within a range
1935 // that avoids using constant-extenders, replace it with a store-immediate.
1936 bool BitSimplification::genStoreImmediate(MachineInstr
*MI
) {
1937 unsigned Opc
= MI
->getOpcode();
1940 case Hexagon::S2_storeri_io
:
1943 case Hexagon::S2_storerh_io
:
1946 case Hexagon::S2_storerb_io
:
1952 // Avoid stores to frame-indices (due to an unknown offset).
1953 if (!MI
->getOperand(0).isReg())
1955 MachineOperand
&OffOp
= MI
->getOperand(1);
1959 int64_t Off
= OffOp
.getImm();
1960 // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1961 if (!isUIntN(6+Align
, Off
) || (Off
& ((1<<Align
)-1)))
1964 BitTracker::RegisterRef RS
= MI
->getOperand(2);
1965 if (!BT
.has(RS
.Reg
))
1967 const BitTracker::RegisterCell
&RC
= BT
.lookup(RS
.Reg
);
1969 if (!HBS::getConst(RC
, 0, RC
.width(), U
))
1972 // Only consider 8-bit values to avoid constant-extenders.
1975 case Hexagon::S2_storerb_io
:
1978 case Hexagon::S2_storerh_io
:
1981 case Hexagon::S2_storeri_io
:
1985 // Opc is already checked above to be one of the three store instructions.
1986 // This silences a -Wuninitialized false positive on GCC 5.4.
1987 llvm_unreachable("Unexpected store opcode");
1992 MI
->RemoveOperand(2);
1994 case Hexagon::S2_storerb_io
:
1995 MI
->setDesc(HII
.get(Hexagon::S4_storeirb_io
));
1997 case Hexagon::S2_storerh_io
:
1998 MI
->setDesc(HII
.get(Hexagon::S4_storeirh_io
));
2000 case Hexagon::S2_storeri_io
:
2001 MI
->setDesc(HII
.get(Hexagon::S4_storeiri_io
));
2004 MI
->addOperand(MachineOperand::CreateImm(V
));
2008 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
2009 // last instruction in a sequence that results in something equivalent to
2010 // the pack-halfwords. The intent is to cause the entire sequence to become
2012 bool BitSimplification::genPackhl(MachineInstr
*MI
,
2013 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2014 unsigned Opc
= MI
->getOpcode();
2015 if (Opc
== Hexagon::S2_packhl
)
2017 BitTracker::RegisterRef Rs
, Rt
;
2018 if (!matchPackhl(RD
.Reg
, RC
, Rs
, Rt
))
2020 if (!validateReg(Rs
, Hexagon::S2_packhl
, 1) ||
2021 !validateReg(Rt
, Hexagon::S2_packhl
, 2))
2024 MachineBasicBlock
&B
= *MI
->getParent();
2025 Register NewR
= MRI
.createVirtualRegister(&Hexagon::DoubleRegsRegClass
);
2026 DebugLoc DL
= MI
->getDebugLoc();
2027 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2028 : MachineBasicBlock::iterator(MI
);
2029 BuildMI(B
, At
, DL
, HII
.get(Hexagon::S2_packhl
), NewR
)
2030 .addReg(Rs
.Reg
, 0, Rs
.Sub
)
2031 .addReg(Rt
.Reg
, 0, Rt
.Sub
);
2032 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2033 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2037 // If MI produces halfword of the input in the low half of the output,
2038 // replace it with zero-extend or extractu.
2039 bool BitSimplification::genExtractHalf(MachineInstr
*MI
,
2040 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2042 // Check for halfword in low 16 bits, zeros elsewhere.
2043 if (!matchHalf(RD
.Reg
, RC
, 0, L
) || !HBS::isZero(RC
, 16, 16))
2046 unsigned Opc
= MI
->getOpcode();
2047 MachineBasicBlock
&B
= *MI
->getParent();
2048 DebugLoc DL
= MI
->getDebugLoc();
2050 // Prefer zxth, since zxth can go in any slot, while extractu only in
2053 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2054 : MachineBasicBlock::iterator(MI
);
2055 if (L
.Low
&& Opc
!= Hexagon::A2_zxth
) {
2056 if (validateReg(L
, Hexagon::A2_zxth
, 1)) {
2057 NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2058 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_zxth
), NewR
)
2059 .addReg(L
.Reg
, 0, L
.Sub
);
2061 } else if (!L
.Low
&& Opc
!= Hexagon::S2_lsr_i_r
) {
2062 if (validateReg(L
, Hexagon::S2_lsr_i_r
, 1)) {
2063 NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2064 BuildMI(B
, MI
, DL
, HII
.get(Hexagon::S2_lsr_i_r
), NewR
)
2065 .addReg(L
.Reg
, 0, L
.Sub
)
2071 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2072 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2076 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2078 bool BitSimplification::genCombineHalf(MachineInstr
*MI
,
2079 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2081 // Check for combine h/l
2082 if (!matchHalf(RD
.Reg
, RC
, 0, L
) || !matchHalf(RD
.Reg
, RC
, 16, H
))
2084 // Do nothing if this is just a reg copy.
2085 if (L
.Reg
== H
.Reg
&& L
.Sub
== H
.Sub
&& !H
.Low
&& L
.Low
)
2088 unsigned Opc
= MI
->getOpcode();
2089 unsigned COpc
= getCombineOpcode(H
.Low
, L
.Low
);
2092 if (!validateReg(H
, COpc
, 1) || !validateReg(L
, COpc
, 2))
2095 MachineBasicBlock
&B
= *MI
->getParent();
2096 DebugLoc DL
= MI
->getDebugLoc();
2097 Register NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2098 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2099 : MachineBasicBlock::iterator(MI
);
2100 BuildMI(B
, At
, DL
, HII
.get(COpc
), NewR
)
2101 .addReg(H
.Reg
, 0, H
.Sub
)
2102 .addReg(L
.Reg
, 0, L
.Sub
);
2103 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2104 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2108 // If MI resets high bits of a register and keeps the lower ones, replace it
2109 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2110 bool BitSimplification::genExtractLow(MachineInstr
*MI
,
2111 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2112 unsigned Opc
= MI
->getOpcode();
2114 case Hexagon::A2_zxtb
:
2115 case Hexagon::A2_zxth
:
2116 case Hexagon::S2_extractu
:
2119 if (Opc
== Hexagon::A2_andir
&& MI
->getOperand(2).isImm()) {
2120 int32_t Imm
= MI
->getOperand(2).getImm();
2125 if (MI
->hasUnmodeledSideEffects() || MI
->isInlineAsm())
2127 unsigned W
= RC
.width();
2128 while (W
> 0 && RC
[W
-1].is(0))
2130 if (W
== 0 || W
== RC
.width())
2132 unsigned NewOpc
= (W
== 8) ? Hexagon::A2_zxtb
2133 : (W
== 16) ? Hexagon::A2_zxth
2134 : (W
< 10) ? Hexagon::A2_andir
2135 : Hexagon::S2_extractu
;
2136 MachineBasicBlock
&B
= *MI
->getParent();
2137 DebugLoc DL
= MI
->getDebugLoc();
2139 for (auto &Op
: MI
->uses()) {
2142 BitTracker::RegisterRef RS
= Op
;
2143 if (!BT
.has(RS
.Reg
))
2145 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
2147 if (!HBS::getSubregMask(RS
, BN
, BW
, MRI
))
2149 if (BW
< W
|| !HBS::isEqual(RC
, 0, SC
, BN
, W
))
2151 if (!validateReg(RS
, NewOpc
, 1))
2154 Register NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2155 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2156 : MachineBasicBlock::iterator(MI
);
2157 auto MIB
= BuildMI(B
, At
, DL
, HII
.get(NewOpc
), NewR
)
2158 .addReg(RS
.Reg
, 0, RS
.Sub
);
2159 if (NewOpc
== Hexagon::A2_andir
)
2160 MIB
.addImm((1 << W
) - 1);
2161 else if (NewOpc
== Hexagon::S2_extractu
)
2162 MIB
.addImm(W
).addImm(0);
2163 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2164 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2170 bool BitSimplification::genBitSplit(MachineInstr
*MI
,
2171 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
,
2172 const RegisterSet
&AVs
) {
2175 if (MaxBitSplit
.getNumOccurrences()) {
2176 if (CountBitSplit
>= MaxBitSplit
)
2180 unsigned Opc
= MI
->getOpcode();
2182 case Hexagon::A4_bitsplit
:
2183 case Hexagon::A4_bitspliti
:
2187 unsigned W
= RC
.width();
2191 auto ctlz
= [] (const BitTracker::RegisterCell
&C
) -> unsigned {
2192 unsigned Z
= C
.width();
2193 while (Z
> 0 && C
[Z
-1].is(0))
2195 return C
.width() - Z
;
2198 // Count the number of leading zeros in the target RC.
2199 unsigned Z
= ctlz(RC
);
2200 if (Z
== 0 || Z
== W
)
2203 // A simplistic analysis: assume the source register (the one being split)
2204 // is fully unknown, and that all its bits are self-references.
2205 const BitTracker::BitValue
&B0
= RC
[0];
2206 if (B0
.Type
!= BitTracker::BitValue::Ref
)
2209 unsigned SrcR
= B0
.RefI
.Reg
;
2211 unsigned Pos
= B0
.RefI
.Pos
;
2213 // All the non-zero bits should be consecutive bits from the same register.
2214 for (unsigned i
= 1; i
< W
-Z
; ++i
) {
2215 const BitTracker::BitValue
&V
= RC
[i
];
2216 if (V
.Type
!= BitTracker::BitValue::Ref
)
2218 if (V
.RefI
.Reg
!= SrcR
|| V
.RefI
.Pos
!= Pos
+i
)
2222 // Now, find the other bitfield among AVs.
2223 for (unsigned S
= AVs
.find_first(); S
; S
= AVs
.find_next(S
)) {
2224 // The number of leading zeros here should be the number of trailing
2226 unsigned SRC
= MRI
.getRegClass(S
)->getID();
2227 if (SRC
!= Hexagon::IntRegsRegClassID
&&
2228 SRC
!= Hexagon::DoubleRegsRegClassID
)
2232 const BitTracker::RegisterCell
&SC
= BT
.lookup(S
);
2233 if (SC
.width() != W
|| ctlz(SC
) != W
-Z
)
2235 // The Z lower bits should now match SrcR.
2236 const BitTracker::BitValue
&S0
= SC
[0];
2237 if (S0
.Type
!= BitTracker::BitValue::Ref
|| S0
.RefI
.Reg
!= SrcR
)
2239 unsigned P
= S0
.RefI
.Pos
;
2241 if (Pos
<= P
&& (Pos
+ W
-Z
) != P
)
2243 if (P
< Pos
&& (P
+ Z
) != Pos
)
2245 // The starting bitfield position must be at a subregister boundary.
2246 if (std::min(P
, Pos
) != 0 && std::min(P
, Pos
) != 32)
2250 for (I
= 1; I
< Z
; ++I
) {
2251 const BitTracker::BitValue
&V
= SC
[I
];
2252 if (V
.Type
!= BitTracker::BitValue::Ref
)
2254 if (V
.RefI
.Reg
!= SrcR
|| V
.RefI
.Pos
!= P
+I
)
2260 // Generate bitsplit where S is defined.
2261 if (MaxBitSplit
.getNumOccurrences())
2263 MachineInstr
*DefS
= MRI
.getVRegDef(S
);
2264 assert(DefS
!= nullptr);
2265 DebugLoc DL
= DefS
->getDebugLoc();
2266 MachineBasicBlock
&B
= *DefS
->getParent();
2267 auto At
= DefS
->isPHI() ? B
.getFirstNonPHI()
2268 : MachineBasicBlock::iterator(DefS
);
2269 if (MRI
.getRegClass(SrcR
)->getID() == Hexagon::DoubleRegsRegClassID
)
2270 SrcSR
= (std::min(Pos
, P
) == 32) ? Hexagon::isub_hi
: Hexagon::isub_lo
;
2271 if (!validateReg({SrcR
,SrcSR
}, Hexagon::A4_bitspliti
, 1))
2273 unsigned ImmOp
= Pos
<= P
? W
-Z
: Z
;
2275 // Find an existing bitsplit instruction if one already exists.
2277 for (MachineInstr
*In
: NewMIs
) {
2278 if (In
->getOpcode() != Hexagon::A4_bitspliti
)
2280 MachineOperand
&Op1
= In
->getOperand(1);
2281 if (Op1
.getReg() != SrcR
|| Op1
.getSubReg() != SrcSR
)
2283 if (In
->getOperand(2).getImm() != ImmOp
)
2285 // Check if the target register is available here.
2286 MachineOperand
&Op0
= In
->getOperand(0);
2287 MachineInstr
*DefI
= MRI
.getVRegDef(Op0
.getReg());
2288 assert(DefI
!= nullptr);
2289 if (!MDT
.dominates(DefI
, &*At
))
2292 // Found one that can be reused.
2293 assert(Op0
.getSubReg() == 0);
2294 NewR
= Op0
.getReg();
2298 NewR
= MRI
.createVirtualRegister(&Hexagon::DoubleRegsRegClass
);
2299 auto NewBS
= BuildMI(B
, At
, DL
, HII
.get(Hexagon::A4_bitspliti
), NewR
)
2300 .addReg(SrcR
, 0, SrcSR
)
2302 NewMIs
.push_back(NewBS
);
2305 HBS::replaceRegWithSub(RD
.Reg
, NewR
, Hexagon::isub_lo
, MRI
);
2306 HBS::replaceRegWithSub(S
, NewR
, Hexagon::isub_hi
, MRI
);
2308 HBS::replaceRegWithSub(S
, NewR
, Hexagon::isub_lo
, MRI
);
2309 HBS::replaceRegWithSub(RD
.Reg
, NewR
, Hexagon::isub_hi
, MRI
);
2317 // Check for tstbit simplification opportunity, where the bit being checked
2318 // can be tracked back to another register. For example:
2319 // %2 = S2_lsr_i_r %1, 5
2320 // %3 = S2_tstbit_i %2, 0
2322 // %3 = S2_tstbit_i %1, 5
2323 bool BitSimplification::simplifyTstbit(MachineInstr
*MI
,
2324 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2325 unsigned Opc
= MI
->getOpcode();
2326 if (Opc
!= Hexagon::S2_tstbit_i
)
2329 unsigned BN
= MI
->getOperand(2).getImm();
2330 BitTracker::RegisterRef RS
= MI
->getOperand(1);
2332 DebugLoc DL
= MI
->getDebugLoc();
2333 if (!BT
.has(RS
.Reg
) || !HBS::getSubregMask(RS
, F
, W
, MRI
))
2335 MachineBasicBlock
&B
= *MI
->getParent();
2336 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2337 : MachineBasicBlock::iterator(MI
);
2339 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
2340 const BitTracker::BitValue
&V
= SC
[F
+BN
];
2341 if (V
.Type
== BitTracker::BitValue::Ref
&& V
.RefI
.Reg
!= RS
.Reg
) {
2342 const TargetRegisterClass
*TC
= MRI
.getRegClass(V
.RefI
.Reg
);
2343 // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2344 // a double register, need to use a subregister and adjust bit
2346 unsigned P
= std::numeric_limits
<unsigned>::max();
2347 BitTracker::RegisterRef
RR(V
.RefI
.Reg
, 0);
2348 if (TC
== &Hexagon::DoubleRegsRegClass
) {
2350 RR
.Sub
= Hexagon::isub_lo
;
2353 RR
.Sub
= Hexagon::isub_hi
;
2355 } else if (TC
== &Hexagon::IntRegsRegClass
) {
2358 if (P
!= std::numeric_limits
<unsigned>::max()) {
2359 unsigned NewR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
2360 BuildMI(B
, At
, DL
, HII
.get(Hexagon::S2_tstbit_i
), NewR
)
2361 .addReg(RR
.Reg
, 0, RR
.Sub
)
2363 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2367 } else if (V
.is(0) || V
.is(1)) {
2368 Register NewR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
2369 unsigned NewOpc
= V
.is(0) ? Hexagon::PS_false
: Hexagon::PS_true
;
2370 BuildMI(B
, At
, DL
, HII
.get(NewOpc
), NewR
);
2371 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2378 // Detect whether RD is a bitfield extract (sign- or zero-extended) of
2379 // some register from the AVs set. Create a new corresponding instruction
2380 // at the location of MI. The intent is to recognize situations where
2381 // a sequence of instructions performs an operation that is equivalent to
2382 // an extract operation, such as a shift left followed by a shift right.
2383 bool BitSimplification::simplifyExtractLow(MachineInstr
*MI
,
2384 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
,
2385 const RegisterSet
&AVs
) {
2388 if (MaxExtract
.getNumOccurrences()) {
2389 if (CountExtract
>= MaxExtract
)
2394 unsigned W
= RC
.width();
2399 // The code is mostly class-independent, except for the part that generates
2400 // the extract instruction, and establishes the source register (in case it
2401 // needs to use a subregister).
2402 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2403 if (FRC
!= &Hexagon::IntRegsRegClass
&& FRC
!= &Hexagon::DoubleRegsRegClass
)
2405 assert(RD
.Sub
== 0);
2408 // If the cell has a form of 00..0xx..x with k zeros and n remaining
2409 // bits, this could be an extractu of the n bits, but it could also be
2410 // an extractu of a longer field which happens to have 0s in the top
2412 // The same logic applies to sign-extended fields.
2414 // Do not check for the extended extracts, since it would expand the
2415 // search space quite a bit. The search may be expensive as it is.
2417 const BitTracker::BitValue
&TopV
= RC
[W
-1];
2419 // Eliminate candidates that have self-referential bits, since they
2420 // cannot be extracts from other registers. Also, skip registers that
2421 // have compile-time constant values.
2422 bool IsConst
= true;
2423 for (unsigned I
= 0; I
!= W
; ++I
) {
2424 const BitTracker::BitValue
&V
= RC
[I
];
2425 if (V
.Type
== BitTracker::BitValue::Ref
&& V
.RefI
.Reg
== RD
.Reg
)
2427 IsConst
= IsConst
&& (V
.is(0) || V
.is(1));
2432 if (TopV
.is(0) || TopV
.is(1)) {
2433 bool S
= TopV
.is(1);
2434 for (--W
; W
> 0 && RC
[W
-1].is(S
); --W
)
2438 // The sign bit must be a part of the field being extended.
2442 // This could still be a sign-extended extract.
2443 assert(TopV
.Type
== BitTracker::BitValue::Ref
);
2444 if (TopV
.RefI
.Reg
== RD
.Reg
|| TopV
.RefI
.Pos
== W
-1)
2446 for (--W
; W
> 0 && RC
[W
-1] == TopV
; --W
)
2448 // The top bits of RC are copies of TopV. One occurrence of TopV will
2449 // be a part of the field.
2454 // This would be just a copy. It should be handled elsewhere.
2459 dbgs() << __func__
<< " on reg: " << printReg(RD
.Reg
, &HRI
, RD
.Sub
)
2461 dbgs() << "Cell: " << RC
<< '\n';
2462 dbgs() << "Expected bitfield size: " << Len
<< " bits, "
2463 << (Signed
? "sign" : "zero") << "-extended\n";
2466 bool Changed
= false;
2468 for (unsigned R
= AVs
.find_first(); R
!= 0; R
= AVs
.find_next(R
)) {
2471 const BitTracker::RegisterCell
&SC
= BT
.lookup(R
);
2472 unsigned SW
= SC
.width();
2474 // The source can be longer than the destination, as long as its size is
2475 // a multiple of the size of the destination. Also, we would need to be
2476 // able to refer to the subregister in the source that would be of the
2477 // same size as the destination, but only check the sizes here.
2478 if (SW
< RW
|| (SW
% RW
) != 0)
2481 // The field can start at any offset in SC as long as it contains Len
2482 // bits and does not cross subregister boundary (if the source register
2483 // is longer than the destination).
2485 while (Off
<= SW
-Len
) {
2486 unsigned OE
= (Off
+Len
)/RW
;
2488 // The assumption here is that if the source (R) is longer than the
2489 // destination, then the destination is a sequence of words of
2490 // size RW, and each such word in R can be accessed via a subregister.
2492 // If the beginning and the end of the field cross the subregister
2493 // boundary, advance to the next subregister.
2497 if (HBS::isEqual(RC
, 0, SC
, Off
, Len
))
2506 unsigned ExtOpc
= 0;
2509 ExtOpc
= Signed
? Hexagon::A2_sxtb
: Hexagon::A2_zxtb
;
2511 ExtOpc
= Signed
? Hexagon::A2_sxth
: Hexagon::A2_zxth
;
2512 else if (Len
< 10 && !Signed
)
2513 ExtOpc
= Hexagon::A2_andir
;
2517 Signed
? (RW
== 32 ? Hexagon::S4_extract
: Hexagon::S4_extractp
)
2518 : (RW
== 32 ? Hexagon::S2_extractu
: Hexagon::S2_extractup
);
2521 // This only recognizes isub_lo and isub_hi.
2522 if (RW
!= SW
&& RW
*2 != SW
)
2525 SR
= (Off
/RW
== 0) ? Hexagon::isub_lo
: Hexagon::isub_hi
;
2528 if (!validateReg({R
,SR
}, ExtOpc
, 1))
2531 // Don't generate the same instruction as the one being optimized.
2532 if (MI
->getOpcode() == ExtOpc
) {
2533 // All possible ExtOpc's have the source in operand(1).
2534 const MachineOperand
&SrcOp
= MI
->getOperand(1);
2535 if (SrcOp
.getReg() == R
)
2539 DebugLoc DL
= MI
->getDebugLoc();
2540 MachineBasicBlock
&B
= *MI
->getParent();
2541 Register NewR
= MRI
.createVirtualRegister(FRC
);
2542 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2543 : MachineBasicBlock::iterator(MI
);
2544 auto MIB
= BuildMI(B
, At
, DL
, HII
.get(ExtOpc
), NewR
)
2547 case Hexagon::A2_sxtb
:
2548 case Hexagon::A2_zxtb
:
2549 case Hexagon::A2_sxth
:
2550 case Hexagon::A2_zxth
:
2552 case Hexagon::A2_andir
:
2553 MIB
.addImm((1u << Len
) - 1);
2555 case Hexagon::S4_extract
:
2556 case Hexagon::S2_extractu
:
2557 case Hexagon::S4_extractp
:
2558 case Hexagon::S2_extractup
:
2563 llvm_unreachable("Unexpected opcode");
2566 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2567 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2575 bool BitSimplification::simplifyRCmp0(MachineInstr
*MI
,
2576 BitTracker::RegisterRef RD
) {
2577 unsigned Opc
= MI
->getOpcode();
2578 if (Opc
!= Hexagon::A4_rcmpeqi
&& Opc
!= Hexagon::A4_rcmpneqi
)
2580 MachineOperand
&CmpOp
= MI
->getOperand(2);
2581 if (!CmpOp
.isImm() || CmpOp
.getImm() != 0)
2584 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2585 if (FRC
!= &Hexagon::IntRegsRegClass
&& FRC
!= &Hexagon::DoubleRegsRegClass
)
2587 assert(RD
.Sub
== 0);
2589 MachineBasicBlock
&B
= *MI
->getParent();
2590 const DebugLoc
&DL
= MI
->getDebugLoc();
2591 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2592 : MachineBasicBlock::iterator(MI
);
2594 bool KnownNZ
= false;
2596 BitTracker::RegisterRef SR
= MI
->getOperand(1);
2597 if (!BT
.has(SR
.Reg
))
2599 const BitTracker::RegisterCell
&SC
= BT
.lookup(SR
.Reg
);
2601 if (!HBS::getSubregMask(SR
, F
, W
, MRI
))
2604 for (uint16_t I
= F
; I
!= F
+W
; ++I
) {
2605 const BitTracker::BitValue
&V
= SC
[I
];
2612 auto ReplaceWithConst
= [&](int C
) {
2613 Register NewR
= MRI
.createVirtualRegister(FRC
);
2614 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrsi
), NewR
)
2616 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2617 BitTracker::RegisterCell
NewRC(W
);
2618 for (uint16_t I
= 0; I
!= W
; ++I
) {
2619 NewRC
[I
] = BitTracker::BitValue(C
& 1);
2620 C
= unsigned(C
) >> 1;
2622 BT
.put(BitTracker::RegisterRef(NewR
), NewRC
);
2626 auto IsNonZero
= [] (const MachineOperand
&Op
) {
2627 if (Op
.isGlobal() || Op
.isBlockAddress())
2630 return Op
.getImm() != 0;
2632 return !Op
.getCImm()->isZero();
2634 return !Op
.getFPImm()->isZero();
2638 auto IsZero
= [] (const MachineOperand
&Op
) {
2639 if (Op
.isGlobal() || Op
.isBlockAddress())
2642 return Op
.getImm() == 0;
2644 return Op
.getCImm()->isZero();
2646 return Op
.getFPImm()->isZero();
2650 // If the source register is known to be 0 or non-0, the comparison can
2651 // be folded to a load of a constant.
2652 if (KnownZ
|| KnownNZ
) {
2653 assert(KnownZ
!= KnownNZ
&& "Register cannot be both 0 and non-0");
2654 return ReplaceWithConst(KnownZ
== (Opc
== Hexagon::A4_rcmpeqi
));
2657 // Special case: if the compare comes from a C2_muxii, then we know the
2658 // two possible constants that can be the source value.
2659 MachineInstr
*InpDef
= MRI
.getVRegDef(SR
.Reg
);
2662 if (SR
.Sub
== 0 && InpDef
->getOpcode() == Hexagon::C2_muxii
) {
2663 MachineOperand
&Src1
= InpDef
->getOperand(2);
2664 MachineOperand
&Src2
= InpDef
->getOperand(3);
2665 // Check if both are non-zero.
2666 bool KnownNZ1
= IsNonZero(Src1
), KnownNZ2
= IsNonZero(Src2
);
2667 if (KnownNZ1
&& KnownNZ2
)
2668 return ReplaceWithConst(Opc
== Hexagon::A4_rcmpneqi
);
2669 // Check if both are zero.
2670 bool KnownZ1
= IsZero(Src1
), KnownZ2
= IsZero(Src2
);
2671 if (KnownZ1
&& KnownZ2
)
2672 return ReplaceWithConst(Opc
== Hexagon::A4_rcmpeqi
);
2674 // If for both operands we know that they are either 0 or non-0,
2675 // replace the comparison with a C2_muxii, using the same predicate
2676 // register, but with operands substituted with 0/1 accordingly.
2677 if ((KnownZ1
|| KnownNZ1
) && (KnownZ2
|| KnownNZ2
)) {
2678 Register NewR
= MRI
.createVirtualRegister(FRC
);
2679 BuildMI(B
, At
, DL
, HII
.get(Hexagon::C2_muxii
), NewR
)
2680 .addReg(InpDef
->getOperand(1).getReg())
2681 .addImm(KnownZ1
== (Opc
== Hexagon::A4_rcmpeqi
))
2682 .addImm(KnownZ2
== (Opc
== Hexagon::A4_rcmpeqi
));
2683 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2684 // Create a new cell with only the least significant bit unknown.
2685 BitTracker::RegisterCell
NewRC(W
);
2686 NewRC
[0] = BitTracker::BitValue::self();
2687 NewRC
.fill(1, W
, BitTracker::BitValue::Zero
);
2688 BT
.put(BitTracker::RegisterRef(NewR
), NewRC
);
2696 bool BitSimplification::processBlock(MachineBasicBlock
&B
,
2697 const RegisterSet
&AVs
) {
2698 if (!BT
.reached(&B
))
2700 bool Changed
= false;
2701 RegisterSet AVB
= AVs
;
2704 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
, AVB
.insert(Defs
)) {
2705 MachineInstr
*MI
= &*I
;
2707 HBS::getInstrDefs(*MI
, Defs
);
2709 unsigned Opc
= MI
->getOpcode();
2710 if (Opc
== TargetOpcode::COPY
|| Opc
== TargetOpcode::REG_SEQUENCE
)
2713 if (MI
->mayStore()) {
2714 bool T
= genStoreUpperHalf(MI
);
2715 T
= T
|| genStoreImmediate(MI
);
2720 if (Defs
.count() != 1)
2722 const MachineOperand
&Op0
= MI
->getOperand(0);
2723 if (!Op0
.isReg() || !Op0
.isDef())
2725 BitTracker::RegisterRef RD
= Op0
;
2726 if (!BT
.has(RD
.Reg
))
2728 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2729 const BitTracker::RegisterCell
&RC
= BT
.lookup(RD
.Reg
);
2731 if (FRC
->getID() == Hexagon::DoubleRegsRegClassID
) {
2732 bool T
= genPackhl(MI
, RD
, RC
);
2733 T
= T
|| simplifyExtractLow(MI
, RD
, RC
, AVB
);
2738 if (FRC
->getID() == Hexagon::IntRegsRegClassID
) {
2739 bool T
= genBitSplit(MI
, RD
, RC
, AVB
);
2740 T
= T
|| simplifyExtractLow(MI
, RD
, RC
, AVB
);
2741 T
= T
|| genExtractHalf(MI
, RD
, RC
);
2742 T
= T
|| genCombineHalf(MI
, RD
, RC
);
2743 T
= T
|| genExtractLow(MI
, RD
, RC
);
2744 T
= T
|| simplifyRCmp0(MI
, RD
);
2749 if (FRC
->getID() == Hexagon::PredRegsRegClassID
) {
2750 bool T
= simplifyTstbit(MI
, RD
, RC
);
2758 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction
&MF
) {
2759 if (skipFunction(MF
.getFunction()))
2762 auto &HST
= MF
.getSubtarget
<HexagonSubtarget
>();
2763 auto &HRI
= *HST
.getRegisterInfo();
2764 auto &HII
= *HST
.getInstrInfo();
2766 MDT
= &getAnalysis
<MachineDominatorTree
>();
2767 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2770 Changed
= DeadCodeElimination(MF
, *MDT
).run();
2772 const HexagonEvaluator
HE(HRI
, MRI
, HII
, MF
);
2773 BitTracker
BT(HE
, MF
);
2774 LLVM_DEBUG(BT
.trace(true));
2777 MachineBasicBlock
&Entry
= MF
.front();
2779 RegisterSet AIG
; // Available registers for IG.
2780 ConstGeneration
ImmG(BT
, HII
, MRI
);
2781 Changed
|= visitBlock(Entry
, ImmG
, AIG
);
2783 RegisterSet ARE
; // Available registers for RIE.
2784 RedundantInstrElimination
RIE(BT
, HII
, HRI
, MRI
);
2785 bool Ried
= visitBlock(Entry
, RIE
, ARE
);
2791 RegisterSet ACG
; // Available registers for CG.
2792 CopyGeneration
CopyG(BT
, HII
, HRI
, MRI
);
2793 Changed
|= visitBlock(Entry
, CopyG
, ACG
);
2795 RegisterSet ACP
; // Available registers for CP.
2796 CopyPropagation
CopyP(HRI
, MRI
);
2797 Changed
|= visitBlock(Entry
, CopyP
, ACP
);
2799 Changed
= DeadCodeElimination(MF
, *MDT
).run() || Changed
;
2802 RegisterSet ABS
; // Available registers for BS.
2803 BitSimplification
BitS(BT
, *MDT
, HII
, HRI
, MRI
, MF
);
2804 Changed
|= visitBlock(Entry
, BitS
, ABS
);
2806 Changed
= DeadCodeElimination(MF
, *MDT
).run() || Changed
;
2812 DeadCodeElimination(MF
, *MDT
).run();
2817 // Recognize loops where the code at the end of the loop matches the code
2818 // before the entry of the loop, and the matching code is such that is can
2819 // be simplified. This pass relies on the bit simplification above and only
2820 // prepares code in a way that can be handled by the bit simplifcation.
2822 // This is the motivating testcase (and explanation):
2825 // loop0(.LBB0_2, r1) // %for.body.preheader
2826 // r5:4 = memd(r0++#8)
2829 // r3 = lsr(r4, #16)
2830 // r7:6 = combine(r5, r5)
2833 // r3 = insert(r5, #16, #16)
2834 // r7:6 = vlsrw(r7:6, #16)
2839 // memh(r2+#6) = r6 # R6 is really R5.H
2844 // memh(r2+#2) = r3 # R3 is really R4.H
2847 // r5:4 = memd(r0++#8)
2849 // { # "Shuffling" code that sets up R3 and R6
2850 // r3 = lsr(r4, #16) # so that their halves can be stored in the
2851 // r7:6 = combine(r5, r5) # next iteration. This could be folded into
2852 // } # the stores if the code was at the beginning
2853 // { # of the loop iteration. Since the same code
2854 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
2855 // r7:6 = vlsrw(r7:6, #16) # there.
2862 // loop0(.LBB0_2, r1)
2863 // r5:4 = memd(r0++#8)
2868 // memh(r2+#6) = r5.h
2873 // memh(r2+#2) = r4.h
2876 // r5:4 = memd(r0++#8)
2881 FunctionPass
*createHexagonLoopRescheduling();
2882 void initializeHexagonLoopReschedulingPass(PassRegistry
&);
2884 } // end namespace llvm
2888 class HexagonLoopRescheduling
: public MachineFunctionPass
{
2892 HexagonLoopRescheduling() : MachineFunctionPass(ID
) {
2893 initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
2896 bool runOnMachineFunction(MachineFunction
&MF
) override
;
2899 const HexagonInstrInfo
*HII
= nullptr;
2900 const HexagonRegisterInfo
*HRI
= nullptr;
2901 MachineRegisterInfo
*MRI
= nullptr;
2902 BitTracker
*BTP
= nullptr;
2905 LoopCand(MachineBasicBlock
*lb
, MachineBasicBlock
*pb
,
2906 MachineBasicBlock
*eb
) : LB(lb
), PB(pb
), EB(eb
) {}
2908 MachineBasicBlock
*LB
, *PB
, *EB
;
2910 using InstrList
= std::vector
<MachineInstr
*>;
2912 BitTracker::RegisterRef Inp
, Out
;
2916 PhiInfo(MachineInstr
&P
, MachineBasicBlock
&B
);
2919 BitTracker::RegisterRef LR
, PR
; // Loop Register, Preheader Register
2920 MachineBasicBlock
*LB
, *PB
; // Loop Block, Preheader Block
2923 static unsigned getDefReg(const MachineInstr
*MI
);
2924 bool isConst(unsigned Reg
) const;
2925 bool isBitShuffle(const MachineInstr
*MI
, unsigned DefR
) const;
2926 bool isStoreInput(const MachineInstr
*MI
, unsigned DefR
) const;
2927 bool isShuffleOf(unsigned OutR
, unsigned InpR
) const;
2928 bool isSameShuffle(unsigned OutR1
, unsigned InpR1
, unsigned OutR2
,
2929 unsigned &InpR2
) const;
2930 void moveGroup(InstrGroup
&G
, MachineBasicBlock
&LB
, MachineBasicBlock
&PB
,
2931 MachineBasicBlock::iterator At
, unsigned OldPhiR
, unsigned NewPredR
);
2932 bool processLoop(LoopCand
&C
);
2935 } // end anonymous namespace
2937 char HexagonLoopRescheduling::ID
= 0;
2939 INITIALIZE_PASS(HexagonLoopRescheduling
, "hexagon-loop-resched",
2940 "Hexagon Loop Rescheduling", false, false)
2942 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr
&P
,
2943 MachineBasicBlock
&B
) {
2944 DefR
= HexagonLoopRescheduling::getDefReg(&P
);
2947 for (unsigned i
= 1, n
= P
.getNumOperands(); i
< n
; i
+= 2) {
2948 const MachineOperand
&OpB
= P
.getOperand(i
+1);
2949 if (OpB
.getMBB() == &B
) {
2950 LR
= P
.getOperand(i
);
2954 PR
= P
.getOperand(i
);
2958 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr
*MI
) {
2960 HBS::getInstrDefs(*MI
, Defs
);
2961 if (Defs
.count() != 1)
2963 return Defs
.find_first();
2966 bool HexagonLoopRescheduling::isConst(unsigned Reg
) const {
2969 const BitTracker::RegisterCell
&RC
= BTP
->lookup(Reg
);
2970 for (unsigned i
= 0, w
= RC
.width(); i
< w
; ++i
) {
2971 const BitTracker::BitValue
&V
= RC
[i
];
2972 if (!V
.is(0) && !V
.is(1))
2978 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr
*MI
,
2979 unsigned DefR
) const {
2980 unsigned Opc
= MI
->getOpcode();
2982 case TargetOpcode::COPY
:
2983 case Hexagon::S2_lsr_i_r
:
2984 case Hexagon::S2_asr_i_r
:
2985 case Hexagon::S2_asl_i_r
:
2986 case Hexagon::S2_lsr_i_p
:
2987 case Hexagon::S2_asr_i_p
:
2988 case Hexagon::S2_asl_i_p
:
2989 case Hexagon::S2_insert
:
2990 case Hexagon::A2_or
:
2991 case Hexagon::A2_orp
:
2992 case Hexagon::A2_and
:
2993 case Hexagon::A2_andp
:
2994 case Hexagon::A2_combinew
:
2995 case Hexagon::A4_combineri
:
2996 case Hexagon::A4_combineir
:
2997 case Hexagon::A2_combineii
:
2998 case Hexagon::A4_combineii
:
2999 case Hexagon::A2_combine_ll
:
3000 case Hexagon::A2_combine_lh
:
3001 case Hexagon::A2_combine_hl
:
3002 case Hexagon::A2_combine_hh
:
3008 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr
*MI
,
3009 unsigned InpR
) const {
3010 for (unsigned i
= 0, n
= MI
->getNumOperands(); i
< n
; ++i
) {
3011 const MachineOperand
&Op
= MI
->getOperand(i
);
3014 if (Op
.getReg() == InpR
)
3020 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR
, unsigned InpR
) const {
3021 if (!BTP
->has(OutR
) || !BTP
->has(InpR
))
3023 const BitTracker::RegisterCell
&OutC
= BTP
->lookup(OutR
);
3024 for (unsigned i
= 0, w
= OutC
.width(); i
< w
; ++i
) {
3025 const BitTracker::BitValue
&V
= OutC
[i
];
3026 if (V
.Type
!= BitTracker::BitValue::Ref
)
3028 if (V
.RefI
.Reg
!= InpR
)
3034 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1
, unsigned InpR1
,
3035 unsigned OutR2
, unsigned &InpR2
) const {
3036 if (!BTP
->has(OutR1
) || !BTP
->has(InpR1
) || !BTP
->has(OutR2
))
3038 const BitTracker::RegisterCell
&OutC1
= BTP
->lookup(OutR1
);
3039 const BitTracker::RegisterCell
&OutC2
= BTP
->lookup(OutR2
);
3040 unsigned W
= OutC1
.width();
3041 unsigned MatchR
= 0;
3042 if (W
!= OutC2
.width())
3044 for (unsigned i
= 0; i
< W
; ++i
) {
3045 const BitTracker::BitValue
&V1
= OutC1
[i
], &V2
= OutC2
[i
];
3046 if (V1
.Type
!= V2
.Type
|| V1
.Type
== BitTracker::BitValue::One
)
3048 if (V1
.Type
!= BitTracker::BitValue::Ref
)
3050 if (V1
.RefI
.Pos
!= V2
.RefI
.Pos
)
3052 if (V1
.RefI
.Reg
!= InpR1
)
3054 if (V2
.RefI
.Reg
== 0 || V2
.RefI
.Reg
== OutR2
)
3057 MatchR
= V2
.RefI
.Reg
;
3058 else if (V2
.RefI
.Reg
!= MatchR
)
3065 void HexagonLoopRescheduling::moveGroup(InstrGroup
&G
, MachineBasicBlock
&LB
,
3066 MachineBasicBlock
&PB
, MachineBasicBlock::iterator At
, unsigned OldPhiR
,
3067 unsigned NewPredR
) {
3068 DenseMap
<unsigned,unsigned> RegMap
;
3070 const TargetRegisterClass
*PhiRC
= MRI
->getRegClass(NewPredR
);
3071 Register PhiR
= MRI
->createVirtualRegister(PhiRC
);
3072 BuildMI(LB
, At
, At
->getDebugLoc(), HII
->get(TargetOpcode::PHI
), PhiR
)
3077 RegMap
.insert(std::make_pair(G
.Inp
.Reg
, PhiR
));
3079 for (unsigned i
= G
.Ins
.size(); i
> 0; --i
) {
3080 const MachineInstr
*SI
= G
.Ins
[i
-1];
3081 unsigned DR
= getDefReg(SI
);
3082 const TargetRegisterClass
*RC
= MRI
->getRegClass(DR
);
3083 Register NewDR
= MRI
->createVirtualRegister(RC
);
3084 DebugLoc DL
= SI
->getDebugLoc();
3086 auto MIB
= BuildMI(LB
, At
, DL
, HII
->get(SI
->getOpcode()), NewDR
);
3087 for (unsigned j
= 0, m
= SI
->getNumOperands(); j
< m
; ++j
) {
3088 const MachineOperand
&Op
= SI
->getOperand(j
);
3095 unsigned UseR
= RegMap
[Op
.getReg()];
3096 MIB
.addReg(UseR
, 0, Op
.getSubReg());
3098 RegMap
.insert(std::make_pair(DR
, NewDR
));
3101 HBS::replaceReg(OldPhiR
, RegMap
[G
.Out
.Reg
], *MRI
);
3104 bool HexagonLoopRescheduling::processLoop(LoopCand
&C
) {
3105 LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C
.LB
)
3107 std::vector
<PhiInfo
> Phis
;
3108 for (auto &I
: *C
.LB
) {
3111 unsigned PR
= getDefReg(&I
);
3114 bool BadUse
= false, GoodUse
= false;
3115 for (auto UI
= MRI
->use_begin(PR
), UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
3116 MachineInstr
*UseI
= UI
->getParent();
3117 if (UseI
->getParent() != C
.LB
) {
3121 if (isBitShuffle(UseI
, PR
) || isStoreInput(UseI
, PR
))
3124 if (BadUse
|| !GoodUse
)
3127 Phis
.push_back(PhiInfo(I
, *C
.LB
));
3131 dbgs() << "Phis: {";
3132 for (auto &I
: Phis
) {
3133 dbgs() << ' ' << printReg(I
.DefR
, HRI
) << "=phi("
3134 << printReg(I
.PR
.Reg
, HRI
, I
.PR
.Sub
) << ":b" << I
.PB
->getNumber()
3135 << ',' << printReg(I
.LR
.Reg
, HRI
, I
.LR
.Sub
) << ":b"
3136 << I
.LB
->getNumber() << ')';
3144 bool Changed
= false;
3147 // Go backwards in the block: for each bit shuffling instruction, check
3148 // if that instruction could potentially be moved to the front of the loop:
3149 // the output of the loop cannot be used in a non-shuffling instruction
3151 for (auto I
= C
.LB
->rbegin(), E
= C
.LB
->rend(); I
!= E
; ++I
) {
3152 if (I
->isTerminator())
3158 HBS::getInstrDefs(*I
, Defs
);
3159 if (Defs
.count() != 1)
3161 unsigned DefR
= Defs
.find_first();
3162 if (!Register::isVirtualRegister(DefR
))
3164 if (!isBitShuffle(&*I
, DefR
))
3167 bool BadUse
= false;
3168 for (auto UI
= MRI
->use_begin(DefR
), UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
3169 MachineInstr
*UseI
= UI
->getParent();
3170 if (UseI
->getParent() == C
.LB
) {
3171 if (UseI
->isPHI()) {
3172 // If the use is in a phi node in this loop, then it should be
3173 // the value corresponding to the back edge.
3174 unsigned Idx
= UI
.getOperandNo();
3175 if (UseI
->getOperand(Idx
+1).getMBB() != C
.LB
)
3178 auto F
= find(ShufIns
, UseI
);
3179 if (F
== ShufIns
.end())
3183 // There is a use outside of the loop, but there is no epilog block
3184 // suitable for a copy-out.
3185 if (C
.EB
== nullptr)
3194 ShufIns
.push_back(&*I
);
3197 // Partition the list of shuffling instructions into instruction groups,
3198 // where each group has to be moved as a whole (i.e. a group is a chain of
3199 // dependent instructions). A group produces a single live output register,
3200 // which is meant to be the input of the loop phi node (although this is
3201 // not checked here yet). It also uses a single register as its input,
3202 // which is some value produced in the loop body. After moving the group
3203 // to the beginning of the loop, that input register would need to be
3204 // the loop-carried register (through a phi node) instead of the (currently
3205 // loop-carried) output register.
3206 using InstrGroupList
= std::vector
<InstrGroup
>;
3207 InstrGroupList Groups
;
3209 for (unsigned i
= 0, n
= ShufIns
.size(); i
< n
; ++i
) {
3210 MachineInstr
*SI
= ShufIns
[i
];
3215 G
.Ins
.push_back(SI
);
3216 G
.Out
.Reg
= getDefReg(SI
);
3218 HBS::getInstrUses(*SI
, Inputs
);
3220 for (unsigned j
= i
+1; j
< n
; ++j
) {
3221 MachineInstr
*MI
= ShufIns
[j
];
3225 HBS::getInstrDefs(*MI
, Defs
);
3226 // If this instruction does not define any pending inputs, skip it.
3227 if (!Defs
.intersects(Inputs
))
3229 // Otherwise, add it to the current group and remove the inputs that
3230 // are defined by MI.
3231 G
.Ins
.push_back(MI
);
3232 Inputs
.remove(Defs
);
3233 // Then add all registers used by MI.
3234 HBS::getInstrUses(*MI
, Inputs
);
3235 ShufIns
[j
] = nullptr;
3238 // Only add a group if it requires at most one register.
3239 if (Inputs
.count() > 1)
3241 auto LoopInpEq
= [G
] (const PhiInfo
&P
) -> bool {
3242 return G
.Out
.Reg
== P
.LR
.Reg
;
3244 if (llvm::find_if(Phis
, LoopInpEq
) == Phis
.end())
3247 G
.Inp
.Reg
= Inputs
.find_first();
3248 Groups
.push_back(G
);
3252 for (unsigned i
= 0, n
= Groups
.size(); i
< n
; ++i
) {
3253 InstrGroup
&G
= Groups
[i
];
3254 dbgs() << "Group[" << i
<< "] inp: "
3255 << printReg(G
.Inp
.Reg
, HRI
, G
.Inp
.Sub
)
3256 << " out: " << printReg(G
.Out
.Reg
, HRI
, G
.Out
.Sub
) << "\n";
3257 for (unsigned j
= 0, m
= G
.Ins
.size(); j
< m
; ++j
)
3258 dbgs() << " " << *G
.Ins
[j
];
3262 for (unsigned i
= 0, n
= Groups
.size(); i
< n
; ++i
) {
3263 InstrGroup
&G
= Groups
[i
];
3264 if (!isShuffleOf(G
.Out
.Reg
, G
.Inp
.Reg
))
3266 auto LoopInpEq
= [G
] (const PhiInfo
&P
) -> bool {
3267 return G
.Out
.Reg
== P
.LR
.Reg
;
3269 auto F
= llvm::find_if(Phis
, LoopInpEq
);
3270 if (F
== Phis
.end())
3273 if (!isSameShuffle(G
.Out
.Reg
, G
.Inp
.Reg
, F
->PR
.Reg
, PrehR
)) {
3274 const MachineInstr
*DefPrehR
= MRI
->getVRegDef(F
->PR
.Reg
);
3275 unsigned Opc
= DefPrehR
->getOpcode();
3276 if (Opc
!= Hexagon::A2_tfrsi
&& Opc
!= Hexagon::A2_tfrpi
)
3278 if (!DefPrehR
->getOperand(1).isImm())
3280 if (DefPrehR
->getOperand(1).getImm() != 0)
3282 const TargetRegisterClass
*RC
= MRI
->getRegClass(G
.Inp
.Reg
);
3283 if (RC
!= MRI
->getRegClass(F
->PR
.Reg
)) {
3284 PrehR
= MRI
->createVirtualRegister(RC
);
3285 unsigned TfrI
= (RC
== &Hexagon::IntRegsRegClass
) ? Hexagon::A2_tfrsi
3286 : Hexagon::A2_tfrpi
;
3287 auto T
= C
.PB
->getFirstTerminator();
3288 DebugLoc DL
= (T
!= C
.PB
->end()) ? T
->getDebugLoc() : DebugLoc();
3289 BuildMI(*C
.PB
, T
, DL
, HII
->get(TfrI
), PrehR
)
3295 // isSameShuffle could match with PrehR being of a wider class than
3296 // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
3297 // it would match for the input being a 32-bit register, and PrehR
3298 // being a 64-bit register (where the low 32 bits match). This could
3299 // be handled, but for now skip these cases.
3300 if (MRI
->getRegClass(PrehR
) != MRI
->getRegClass(G
.Inp
.Reg
))
3302 moveGroup(G
, *F
->LB
, *F
->PB
, F
->LB
->getFirstNonPHI(), F
->DefR
, PrehR
);
3309 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction
&MF
) {
3310 if (skipFunction(MF
.getFunction()))
3313 auto &HST
= MF
.getSubtarget
<HexagonSubtarget
>();
3314 HII
= HST
.getInstrInfo();
3315 HRI
= HST
.getRegisterInfo();
3316 MRI
= &MF
.getRegInfo();
3317 const HexagonEvaluator
HE(*HRI
, *MRI
, *HII
, MF
);
3318 BitTracker
BT(HE
, MF
);
3319 LLVM_DEBUG(BT
.trace(true));
3323 std::vector
<LoopCand
> Cand
;
3325 for (auto &B
: MF
) {
3326 if (B
.pred_size() != 2 || B
.succ_size() != 2)
3328 MachineBasicBlock
*PB
= nullptr;
3329 bool IsLoop
= false;
3330 for (auto PI
= B
.pred_begin(), PE
= B
.pred_end(); PI
!= PE
; ++PI
) {
3339 MachineBasicBlock
*EB
= nullptr;
3340 for (auto SI
= B
.succ_begin(), SE
= B
.succ_end(); SI
!= SE
; ++SI
) {
3343 // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
3344 // edge from B to EP is non-critical.
3345 if ((*SI
)->pred_size() == 1)
3350 Cand
.push_back(LoopCand(&B
, PB
, EB
));
3353 bool Changed
= false;
3354 for (auto &C
: Cand
)
3355 Changed
|= processLoop(C
);
3360 //===----------------------------------------------------------------------===//
3361 // Public Constructor Functions
3362 //===----------------------------------------------------------------------===//
3364 FunctionPass
*llvm::createHexagonLoopRescheduling() {
3365 return new HexagonLoopRescheduling();
3368 FunctionPass
*llvm::createHexagonBitSimplify() {
3369 return new HexagonBitSimplify();