1 //===- HexagonBitSimplify.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "BitTracker.h"
10 #include "HexagonBitTracker.h"
11 #include "HexagonInstrInfo.h"
12 #include "HexagonRegisterInfo.h"
13 #include "HexagonSubtarget.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineOperand.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetRegisterInfo.h"
29 #include "llvm/IR/DebugLoc.h"
30 #include "llvm/MC/MCInstrDesc.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
46 #define DEBUG_TYPE "hexbit"
50 static cl::opt
<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden
,
51 cl::init(true), cl::desc("Preserve subregisters in tied operands"));
52 static cl::opt
<bool> GenExtract("hexbit-extract", cl::Hidden
,
53 cl::init(true), cl::desc("Generate extract instructions"));
54 static cl::opt
<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden
,
55 cl::init(true), cl::desc("Generate bitsplit instructions"));
57 static cl::opt
<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden
,
58 cl::init(std::numeric_limits
<unsigned>::max()));
59 static unsigned CountExtract
= 0;
60 static cl::opt
<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden
,
61 cl::init(std::numeric_limits
<unsigned>::max()));
62 static unsigned CountBitSplit
= 0;
66 void initializeHexagonBitSimplifyPass(PassRegistry
& Registry
);
67 FunctionPass
*createHexagonBitSimplify();
69 } // end namespace llvm
73 // Set of virtual registers, based on BitVector.
74 struct RegisterSet
: private BitVector
{
75 RegisterSet() = default;
76 explicit RegisterSet(unsigned s
, bool t
= false) : BitVector(s
, t
) {}
77 RegisterSet(const RegisterSet
&RS
) = default;
79 using BitVector::clear
;
80 using BitVector::count
;
82 unsigned find_first() const {
83 int First
= BitVector::find_first();
89 unsigned find_next(unsigned Prev
) const {
90 int Next
= BitVector::find_next(v2x(Prev
));
96 RegisterSet
&insert(unsigned R
) {
97 unsigned Idx
= v2x(R
);
99 return static_cast<RegisterSet
&>(BitVector::set(Idx
));
101 RegisterSet
&remove(unsigned R
) {
102 unsigned Idx
= v2x(R
);
105 return static_cast<RegisterSet
&>(BitVector::reset(Idx
));
108 RegisterSet
&insert(const RegisterSet
&Rs
) {
109 return static_cast<RegisterSet
&>(BitVector::operator|=(Rs
));
111 RegisterSet
&remove(const RegisterSet
&Rs
) {
112 return static_cast<RegisterSet
&>(BitVector::reset(Rs
));
115 reference
operator[](unsigned R
) {
116 unsigned Idx
= v2x(R
);
118 return BitVector::operator[](Idx
);
120 bool operator[](unsigned R
) const {
121 unsigned Idx
= v2x(R
);
122 assert(Idx
< size());
123 return BitVector::operator[](Idx
);
125 bool has(unsigned R
) const {
126 unsigned Idx
= v2x(R
);
129 return BitVector::test(Idx
);
133 return !BitVector::any();
135 bool includes(const RegisterSet
&Rs
) const {
136 // A.BitVector::test(B) <=> A-B != {}
137 return !Rs
.BitVector::test(*this);
139 bool intersects(const RegisterSet
&Rs
) const {
140 return BitVector::anyCommon(Rs
);
144 void ensure(unsigned Idx
) {
146 resize(std::max(Idx
+1, 32U));
149 static inline unsigned v2x(unsigned v
) {
150 return TargetRegisterInfo::virtReg2Index(v
);
153 static inline unsigned x2v(unsigned x
) {
154 return TargetRegisterInfo::index2VirtReg(x
);
159 PrintRegSet(const RegisterSet
&S
, const TargetRegisterInfo
*RI
)
162 friend raw_ostream
&operator<< (raw_ostream
&OS
,
163 const PrintRegSet
&P
);
166 const RegisterSet
&RS
;
167 const TargetRegisterInfo
*TRI
;
170 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegSet
&P
)
171 LLVM_ATTRIBUTE_UNUSED
;
172 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegSet
&P
) {
174 for (unsigned R
= P
.RS
.find_first(); R
; R
= P
.RS
.find_next(R
))
175 OS
<< ' ' << printReg(R
, P
.TRI
);
180 class Transformation
;
182 class HexagonBitSimplify
: public MachineFunctionPass
{
186 HexagonBitSimplify() : MachineFunctionPass(ID
) {}
188 StringRef
getPassName() const override
{
189 return "Hexagon bit simplification";
192 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
193 AU
.addRequired
<MachineDominatorTree
>();
194 AU
.addPreserved
<MachineDominatorTree
>();
195 MachineFunctionPass::getAnalysisUsage(AU
);
198 bool runOnMachineFunction(MachineFunction
&MF
) override
;
200 static void getInstrDefs(const MachineInstr
&MI
, RegisterSet
&Defs
);
201 static void getInstrUses(const MachineInstr
&MI
, RegisterSet
&Uses
);
202 static bool isEqual(const BitTracker::RegisterCell
&RC1
, uint16_t B1
,
203 const BitTracker::RegisterCell
&RC2
, uint16_t B2
, uint16_t W
);
204 static bool isZero(const BitTracker::RegisterCell
&RC
, uint16_t B
,
206 static bool getConst(const BitTracker::RegisterCell
&RC
, uint16_t B
,
207 uint16_t W
, uint64_t &U
);
208 static bool replaceReg(unsigned OldR
, unsigned NewR
,
209 MachineRegisterInfo
&MRI
);
210 static bool getSubregMask(const BitTracker::RegisterRef
&RR
,
211 unsigned &Begin
, unsigned &Width
, MachineRegisterInfo
&MRI
);
212 static bool replaceRegWithSub(unsigned OldR
, unsigned NewR
,
213 unsigned NewSR
, MachineRegisterInfo
&MRI
);
214 static bool replaceSubWithSub(unsigned OldR
, unsigned OldSR
,
215 unsigned NewR
, unsigned NewSR
, MachineRegisterInfo
&MRI
);
216 static bool parseRegSequence(const MachineInstr
&I
,
217 BitTracker::RegisterRef
&SL
, BitTracker::RegisterRef
&SH
,
218 const MachineRegisterInfo
&MRI
);
220 static bool getUsedBitsInStore(unsigned Opc
, BitVector
&Bits
,
222 static bool getUsedBits(unsigned Opc
, unsigned OpN
, BitVector
&Bits
,
223 uint16_t Begin
, const HexagonInstrInfo
&HII
);
225 static const TargetRegisterClass
*getFinalVRegClass(
226 const BitTracker::RegisterRef
&RR
, MachineRegisterInfo
&MRI
);
227 static bool isTransparentCopy(const BitTracker::RegisterRef
&RD
,
228 const BitTracker::RegisterRef
&RS
, MachineRegisterInfo
&MRI
);
231 MachineDominatorTree
*MDT
= nullptr;
233 bool visitBlock(MachineBasicBlock
&B
, Transformation
&T
, RegisterSet
&AVs
);
234 static bool hasTiedUse(unsigned Reg
, MachineRegisterInfo
&MRI
,
235 unsigned NewSub
= Hexagon::NoSubRegister
);
238 using HBS
= HexagonBitSimplify
;
240 // The purpose of this class is to provide a common facility to traverse
241 // the function top-down or bottom-up via the dominator tree, and keep
242 // track of the available registers.
243 class Transformation
{
247 Transformation(bool TD
) : TopDown(TD
) {}
248 virtual ~Transformation() = default;
250 virtual bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) = 0;
253 } // end anonymous namespace
255 char HexagonBitSimplify::ID
= 0;
257 INITIALIZE_PASS_BEGIN(HexagonBitSimplify
, "hexagon-bit-simplify",
258 "Hexagon bit simplification", false, false)
259 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
260 INITIALIZE_PASS_END(HexagonBitSimplify
, "hexagon-bit-simplify",
261 "Hexagon bit simplification", false, false)
263 bool HexagonBitSimplify::visitBlock(MachineBasicBlock
&B
, Transformation
&T
,
265 bool Changed
= false;
268 Changed
= T
.processBlock(B
, AVs
);
272 getInstrDefs(I
, Defs
);
273 RegisterSet NewAVs
= AVs
;
276 for (auto *DTN
: children
<MachineDomTreeNode
*>(MDT
->getNode(&B
)))
277 Changed
|= visitBlock(*(DTN
->getBlock()), T
, NewAVs
);
280 Changed
|= T
.processBlock(B
, AVs
);
286 // Utility functions:
288 void HexagonBitSimplify::getInstrDefs(const MachineInstr
&MI
,
290 for (auto &Op
: MI
.operands()) {
291 if (!Op
.isReg() || !Op
.isDef())
293 unsigned R
= Op
.getReg();
294 if (!TargetRegisterInfo::isVirtualRegister(R
))
300 void HexagonBitSimplify::getInstrUses(const MachineInstr
&MI
,
302 for (auto &Op
: MI
.operands()) {
303 if (!Op
.isReg() || !Op
.isUse())
305 unsigned R
= Op
.getReg();
306 if (!TargetRegisterInfo::isVirtualRegister(R
))
312 // Check if all the bits in range [B, E) in both cells are equal.
313 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell
&RC1
,
314 uint16_t B1
, const BitTracker::RegisterCell
&RC2
, uint16_t B2
,
316 for (uint16_t i
= 0; i
< W
; ++i
) {
317 // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
318 if (RC1
[B1
+i
].Type
== BitTracker::BitValue::Ref
&& RC1
[B1
+i
].RefI
.Reg
== 0)
321 if (RC2
[B2
+i
].Type
== BitTracker::BitValue::Ref
&& RC2
[B2
+i
].RefI
.Reg
== 0)
323 if (RC1
[B1
+i
] != RC2
[B2
+i
])
329 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell
&RC
,
330 uint16_t B
, uint16_t W
) {
331 assert(B
< RC
.width() && B
+W
<= RC
.width());
332 for (uint16_t i
= B
; i
< B
+W
; ++i
)
338 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell
&RC
,
339 uint16_t B
, uint16_t W
, uint64_t &U
) {
340 assert(B
< RC
.width() && B
+W
<= RC
.width());
342 for (uint16_t i
= B
+W
; i
> B
; --i
) {
343 const BitTracker::BitValue
&BV
= RC
[i
-1];
354 bool HexagonBitSimplify::replaceReg(unsigned OldR
, unsigned NewR
,
355 MachineRegisterInfo
&MRI
) {
356 if (!TargetRegisterInfo::isVirtualRegister(OldR
) ||
357 !TargetRegisterInfo::isVirtualRegister(NewR
))
359 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
361 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
362 NextI
= std::next(I
);
368 bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR
, unsigned NewR
,
369 unsigned NewSR
, MachineRegisterInfo
&MRI
) {
370 if (!TargetRegisterInfo::isVirtualRegister(OldR
) ||
371 !TargetRegisterInfo::isVirtualRegister(NewR
))
373 if (hasTiedUse(OldR
, MRI
, NewSR
))
375 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
377 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
378 NextI
= std::next(I
);
385 bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR
, unsigned OldSR
,
386 unsigned NewR
, unsigned NewSR
, MachineRegisterInfo
&MRI
) {
387 if (!TargetRegisterInfo::isVirtualRegister(OldR
) ||
388 !TargetRegisterInfo::isVirtualRegister(NewR
))
390 if (OldSR
!= NewSR
&& hasTiedUse(OldR
, MRI
, NewSR
))
392 auto Begin
= MRI
.use_begin(OldR
), End
= MRI
.use_end();
394 for (auto I
= Begin
; I
!= End
; I
= NextI
) {
395 NextI
= std::next(I
);
396 if (I
->getSubReg() != OldSR
)
404 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
405 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
406 // if this succeeded, otherwise return false.
407 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef
&RR
,
408 unsigned &Begin
, unsigned &Width
, MachineRegisterInfo
&MRI
) {
409 const TargetRegisterClass
*RC
= MRI
.getRegClass(RR
.Reg
);
412 Width
= MRI
.getTargetRegisterInfo()->getRegSizeInBits(*RC
);
418 switch (RC
->getID()) {
419 case Hexagon::DoubleRegsRegClassID
:
420 case Hexagon::HvxWRRegClassID
:
421 Width
= MRI
.getTargetRegisterInfo()->getRegSizeInBits(*RC
) / 2;
422 if (RR
.Sub
== Hexagon::isub_hi
|| RR
.Sub
== Hexagon::vsub_hi
)
432 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
434 bool HexagonBitSimplify::parseRegSequence(const MachineInstr
&I
,
435 BitTracker::RegisterRef
&SL
, BitTracker::RegisterRef
&SH
,
436 const MachineRegisterInfo
&MRI
) {
437 assert(I
.getOpcode() == TargetOpcode::REG_SEQUENCE
);
438 unsigned Sub1
= I
.getOperand(2).getImm(), Sub2
= I
.getOperand(4).getImm();
439 auto &DstRC
= *MRI
.getRegClass(I
.getOperand(0).getReg());
440 auto &HRI
= static_cast<const HexagonRegisterInfo
&>(
441 *MRI
.getTargetRegisterInfo());
442 unsigned SubLo
= HRI
.getHexagonSubRegIndex(DstRC
, Hexagon::ps_sub_lo
);
443 unsigned SubHi
= HRI
.getHexagonSubRegIndex(DstRC
, Hexagon::ps_sub_hi
);
444 assert((Sub1
== SubLo
&& Sub2
== SubHi
) || (Sub1
== SubHi
&& Sub2
== SubLo
));
445 if (Sub1
== SubLo
&& Sub2
== SubHi
) {
446 SL
= I
.getOperand(1);
447 SH
= I
.getOperand(3);
450 if (Sub1
== SubHi
&& Sub2
== SubLo
) {
451 SH
= I
.getOperand(1);
452 SL
= I
.getOperand(3);
458 // All stores (except 64-bit stores) take a 32-bit register as the source
459 // of the value to be stored. If the instruction stores into a location
460 // that is shorter than 32 bits, some bits of the source register are not
461 // used. For each store instruction, calculate the set of used bits in
462 // the source register, and set appropriate bits in Bits. Return true if
463 // the bits are calculated, false otherwise.
464 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc
, BitVector
&Bits
,
466 using namespace Hexagon
;
470 case S2_storerb_io
: // memb(Rs32+#s11:0)=Rt32
471 case S2_storerbnew_io
: // memb(Rs32+#s11:0)=Nt8.new
472 case S2_pstorerbt_io
: // if (Pv4) memb(Rs32+#u6:0)=Rt32
473 case S2_pstorerbf_io
: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
474 case S4_pstorerbtnew_io
: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
475 case S4_pstorerbfnew_io
: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
476 case S2_pstorerbnewt_io
: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
477 case S2_pstorerbnewf_io
: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
478 case S4_pstorerbnewtnew_io
: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
479 case S4_pstorerbnewfnew_io
: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
480 case S2_storerb_pi
: // memb(Rx32++#s4:0)=Rt32
481 case S2_storerbnew_pi
: // memb(Rx32++#s4:0)=Nt8.new
482 case S2_pstorerbt_pi
: // if (Pv4) memb(Rx32++#s4:0)=Rt32
483 case S2_pstorerbf_pi
: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
484 case S2_pstorerbtnew_pi
: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
485 case S2_pstorerbfnew_pi
: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
486 case S2_pstorerbnewt_pi
: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
487 case S2_pstorerbnewf_pi
: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
488 case S2_pstorerbnewtnew_pi
: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
489 case S2_pstorerbnewfnew_pi
: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
490 case S4_storerb_ap
: // memb(Re32=#U6)=Rt32
491 case S4_storerbnew_ap
: // memb(Re32=#U6)=Nt8.new
492 case S2_storerb_pr
: // memb(Rx32++Mu2)=Rt32
493 case S2_storerbnew_pr
: // memb(Rx32++Mu2)=Nt8.new
494 case S4_storerb_ur
: // memb(Ru32<<#u2+#U6)=Rt32
495 case S4_storerbnew_ur
: // memb(Ru32<<#u2+#U6)=Nt8.new
496 case S2_storerb_pbr
: // memb(Rx32++Mu2:brev)=Rt32
497 case S2_storerbnew_pbr
: // memb(Rx32++Mu2:brev)=Nt8.new
498 case S2_storerb_pci
: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
499 case S2_storerbnew_pci
: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
500 case S2_storerb_pcr
: // memb(Rx32++I:circ(Mu2))=Rt32
501 case S2_storerbnew_pcr
: // memb(Rx32++I:circ(Mu2))=Nt8.new
502 case S4_storerb_rr
: // memb(Rs32+Ru32<<#u2)=Rt32
503 case S4_storerbnew_rr
: // memb(Rs32+Ru32<<#u2)=Nt8.new
504 case S4_pstorerbt_rr
: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
505 case S4_pstorerbf_rr
: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
506 case S4_pstorerbtnew_rr
: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
507 case S4_pstorerbfnew_rr
: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
508 case S4_pstorerbnewt_rr
: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
509 case S4_pstorerbnewf_rr
: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
510 case S4_pstorerbnewtnew_rr
: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
511 case S4_pstorerbnewfnew_rr
: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
512 case S2_storerbgp
: // memb(gp+#u16:0)=Rt32
513 case S2_storerbnewgp
: // memb(gp+#u16:0)=Nt8.new
514 case S4_pstorerbt_abs
: // if (Pv4) memb(#u6)=Rt32
515 case S4_pstorerbf_abs
: // if (!Pv4) memb(#u6)=Rt32
516 case S4_pstorerbtnew_abs
: // if (Pv4.new) memb(#u6)=Rt32
517 case S4_pstorerbfnew_abs
: // if (!Pv4.new) memb(#u6)=Rt32
518 case S4_pstorerbnewt_abs
: // if (Pv4) memb(#u6)=Nt8.new
519 case S4_pstorerbnewf_abs
: // if (!Pv4) memb(#u6)=Nt8.new
520 case S4_pstorerbnewtnew_abs
: // if (Pv4.new) memb(#u6)=Nt8.new
521 case S4_pstorerbnewfnew_abs
: // if (!Pv4.new) memb(#u6)=Nt8.new
522 Bits
.set(Begin
, Begin
+8);
526 case S2_storerh_io
: // memh(Rs32+#s11:1)=Rt32
527 case S2_storerhnew_io
: // memh(Rs32+#s11:1)=Nt8.new
528 case S2_pstorerht_io
: // if (Pv4) memh(Rs32+#u6:1)=Rt32
529 case S2_pstorerhf_io
: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
530 case S4_pstorerhtnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
531 case S4_pstorerhfnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
532 case S2_pstorerhnewt_io
: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
533 case S2_pstorerhnewf_io
: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
534 case S4_pstorerhnewtnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
535 case S4_pstorerhnewfnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
536 case S2_storerh_pi
: // memh(Rx32++#s4:1)=Rt32
537 case S2_storerhnew_pi
: // memh(Rx32++#s4:1)=Nt8.new
538 case S2_pstorerht_pi
: // if (Pv4) memh(Rx32++#s4:1)=Rt32
539 case S2_pstorerhf_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
540 case S2_pstorerhtnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
541 case S2_pstorerhfnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
542 case S2_pstorerhnewt_pi
: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
543 case S2_pstorerhnewf_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
544 case S2_pstorerhnewtnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
545 case S2_pstorerhnewfnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
546 case S4_storerh_ap
: // memh(Re32=#U6)=Rt32
547 case S4_storerhnew_ap
: // memh(Re32=#U6)=Nt8.new
548 case S2_storerh_pr
: // memh(Rx32++Mu2)=Rt32
549 case S2_storerhnew_pr
: // memh(Rx32++Mu2)=Nt8.new
550 case S4_storerh_ur
: // memh(Ru32<<#u2+#U6)=Rt32
551 case S4_storerhnew_ur
: // memh(Ru32<<#u2+#U6)=Nt8.new
552 case S2_storerh_pbr
: // memh(Rx32++Mu2:brev)=Rt32
553 case S2_storerhnew_pbr
: // memh(Rx32++Mu2:brev)=Nt8.new
554 case S2_storerh_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
555 case S2_storerhnew_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
556 case S2_storerh_pcr
: // memh(Rx32++I:circ(Mu2))=Rt32
557 case S2_storerhnew_pcr
: // memh(Rx32++I:circ(Mu2))=Nt8.new
558 case S4_storerh_rr
: // memh(Rs32+Ru32<<#u2)=Rt32
559 case S4_pstorerht_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
560 case S4_pstorerhf_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
561 case S4_pstorerhtnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
562 case S4_pstorerhfnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
563 case S4_storerhnew_rr
: // memh(Rs32+Ru32<<#u2)=Nt8.new
564 case S4_pstorerhnewt_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
565 case S4_pstorerhnewf_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
566 case S4_pstorerhnewtnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
567 case S4_pstorerhnewfnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
568 case S2_storerhgp
: // memh(gp+#u16:1)=Rt32
569 case S2_storerhnewgp
: // memh(gp+#u16:1)=Nt8.new
570 case S4_pstorerht_abs
: // if (Pv4) memh(#u6)=Rt32
571 case S4_pstorerhf_abs
: // if (!Pv4) memh(#u6)=Rt32
572 case S4_pstorerhtnew_abs
: // if (Pv4.new) memh(#u6)=Rt32
573 case S4_pstorerhfnew_abs
: // if (!Pv4.new) memh(#u6)=Rt32
574 case S4_pstorerhnewt_abs
: // if (Pv4) memh(#u6)=Nt8.new
575 case S4_pstorerhnewf_abs
: // if (!Pv4) memh(#u6)=Nt8.new
576 case S4_pstorerhnewtnew_abs
: // if (Pv4.new) memh(#u6)=Nt8.new
577 case S4_pstorerhnewfnew_abs
: // if (!Pv4.new) memh(#u6)=Nt8.new
578 Bits
.set(Begin
, Begin
+16);
582 case S2_storerf_io
: // memh(Rs32+#s11:1)=Rt.H32
583 case S2_pstorerft_io
: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
584 case S2_pstorerff_io
: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
585 case S4_pstorerftnew_io
: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
586 case S4_pstorerffnew_io
: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
587 case S2_storerf_pi
: // memh(Rx32++#s4:1)=Rt.H32
588 case S2_pstorerft_pi
: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
589 case S2_pstorerff_pi
: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
590 case S2_pstorerftnew_pi
: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
591 case S2_pstorerffnew_pi
: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
592 case S4_storerf_ap
: // memh(Re32=#U6)=Rt.H32
593 case S2_storerf_pr
: // memh(Rx32++Mu2)=Rt.H32
594 case S4_storerf_ur
: // memh(Ru32<<#u2+#U6)=Rt.H32
595 case S2_storerf_pbr
: // memh(Rx32++Mu2:brev)=Rt.H32
596 case S2_storerf_pci
: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
597 case S2_storerf_pcr
: // memh(Rx32++I:circ(Mu2))=Rt.H32
598 case S4_storerf_rr
: // memh(Rs32+Ru32<<#u2)=Rt.H32
599 case S4_pstorerft_rr
: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
600 case S4_pstorerff_rr
: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
601 case S4_pstorerftnew_rr
: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
602 case S4_pstorerffnew_rr
: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
603 case S2_storerfgp
: // memh(gp+#u16:1)=Rt.H32
604 case S4_pstorerft_abs
: // if (Pv4) memh(#u6)=Rt.H32
605 case S4_pstorerff_abs
: // if (!Pv4) memh(#u6)=Rt.H32
606 case S4_pstorerftnew_abs
: // if (Pv4.new) memh(#u6)=Rt.H32
607 case S4_pstorerffnew_abs
: // if (!Pv4.new) memh(#u6)=Rt.H32
608 Bits
.set(Begin
+16, Begin
+32);
615 // For an instruction with opcode Opc, calculate the set of bits that it
616 // uses in a register in operand OpN. This only calculates the set of used
617 // bits for cases where it does not depend on any operands (as is the case
618 // in shifts, for example). For concrete instructions from a program, the
619 // operand may be a subregister of a larger register, while Bits would
620 // correspond to the larger register in its entirety. Because of that,
621 // the parameter Begin can be used to indicate which bit of Bits should be
622 // considered the LSB of the operand.
623 bool HexagonBitSimplify::getUsedBits(unsigned Opc
, unsigned OpN
,
624 BitVector
&Bits
, uint16_t Begin
, const HexagonInstrInfo
&HII
) {
625 using namespace Hexagon
;
627 const MCInstrDesc
&D
= HII
.get(Opc
);
629 if (OpN
== D
.getNumOperands()-1)
630 return getUsedBitsInStore(Opc
, Bits
, Begin
);
635 // One register source. Used bits: R1[0-7].
642 Bits
.set(Begin
, Begin
+8);
647 // One register source. Used bits: R1[0-15].
655 Bits
.set(Begin
, Begin
+16);
660 // One register source. Used bits: R1[16-31].
663 Bits
.set(Begin
+16, Begin
+32);
668 // Two register sources. Used bits: R1[0-7], R2[0-7].
673 Bits
.set(Begin
, Begin
+8);
678 // Two register sources. Used bits: R1[0-15], R2[0-15].
683 case A2_addh_h16_sat_ll
:
685 case A2_addh_l16_sat_ll
:
688 case A2_subh_h16_sat_ll
:
690 case A2_subh_l16_sat_ll
:
691 case M2_mpy_acc_ll_s0
:
692 case M2_mpy_acc_ll_s1
:
693 case M2_mpy_acc_sat_ll_s0
:
694 case M2_mpy_acc_sat_ll_s1
:
697 case M2_mpy_nac_ll_s0
:
698 case M2_mpy_nac_ll_s1
:
699 case M2_mpy_nac_sat_ll_s0
:
700 case M2_mpy_nac_sat_ll_s1
:
701 case M2_mpy_rnd_ll_s0
:
702 case M2_mpy_rnd_ll_s1
:
703 case M2_mpy_sat_ll_s0
:
704 case M2_mpy_sat_ll_s1
:
705 case M2_mpy_sat_rnd_ll_s0
:
706 case M2_mpy_sat_rnd_ll_s1
:
707 case M2_mpyd_acc_ll_s0
:
708 case M2_mpyd_acc_ll_s1
:
711 case M2_mpyd_nac_ll_s0
:
712 case M2_mpyd_nac_ll_s1
:
713 case M2_mpyd_rnd_ll_s0
:
714 case M2_mpyd_rnd_ll_s1
:
715 case M2_mpyu_acc_ll_s0
:
716 case M2_mpyu_acc_ll_s1
:
719 case M2_mpyu_nac_ll_s0
:
720 case M2_mpyu_nac_ll_s1
:
721 case M2_mpyud_acc_ll_s0
:
722 case M2_mpyud_acc_ll_s1
:
725 case M2_mpyud_nac_ll_s0
:
726 case M2_mpyud_nac_ll_s1
:
727 if (OpN
== 1 || OpN
== 2) {
728 Bits
.set(Begin
, Begin
+16);
733 // Two register sources. Used bits: R1[0-15], R2[16-31].
735 case A2_addh_h16_sat_lh
:
738 case A2_subh_h16_sat_lh
:
739 case M2_mpy_acc_lh_s0
:
740 case M2_mpy_acc_lh_s1
:
741 case M2_mpy_acc_sat_lh_s0
:
742 case M2_mpy_acc_sat_lh_s1
:
745 case M2_mpy_nac_lh_s0
:
746 case M2_mpy_nac_lh_s1
:
747 case M2_mpy_nac_sat_lh_s0
:
748 case M2_mpy_nac_sat_lh_s1
:
749 case M2_mpy_rnd_lh_s0
:
750 case M2_mpy_rnd_lh_s1
:
751 case M2_mpy_sat_lh_s0
:
752 case M2_mpy_sat_lh_s1
:
753 case M2_mpy_sat_rnd_lh_s0
:
754 case M2_mpy_sat_rnd_lh_s1
:
755 case M2_mpyd_acc_lh_s0
:
756 case M2_mpyd_acc_lh_s1
:
759 case M2_mpyd_nac_lh_s0
:
760 case M2_mpyd_nac_lh_s1
:
761 case M2_mpyd_rnd_lh_s0
:
762 case M2_mpyd_rnd_lh_s1
:
763 case M2_mpyu_acc_lh_s0
:
764 case M2_mpyu_acc_lh_s1
:
767 case M2_mpyu_nac_lh_s0
:
768 case M2_mpyu_nac_lh_s1
:
769 case M2_mpyud_acc_lh_s0
:
770 case M2_mpyud_acc_lh_s1
:
773 case M2_mpyud_nac_lh_s0
:
774 case M2_mpyud_nac_lh_s1
:
775 // These four are actually LH.
777 case A2_addh_l16_sat_hl
:
779 case A2_subh_l16_sat_hl
:
781 Bits
.set(Begin
, Begin
+16);
785 Bits
.set(Begin
+16, Begin
+32);
790 // Two register sources, used bits: R1[16-31], R2[0-15].
792 case A2_addh_h16_sat_hl
:
795 case A2_subh_h16_sat_hl
:
796 case M2_mpy_acc_hl_s0
:
797 case M2_mpy_acc_hl_s1
:
798 case M2_mpy_acc_sat_hl_s0
:
799 case M2_mpy_acc_sat_hl_s1
:
802 case M2_mpy_nac_hl_s0
:
803 case M2_mpy_nac_hl_s1
:
804 case M2_mpy_nac_sat_hl_s0
:
805 case M2_mpy_nac_sat_hl_s1
:
806 case M2_mpy_rnd_hl_s0
:
807 case M2_mpy_rnd_hl_s1
:
808 case M2_mpy_sat_hl_s0
:
809 case M2_mpy_sat_hl_s1
:
810 case M2_mpy_sat_rnd_hl_s0
:
811 case M2_mpy_sat_rnd_hl_s1
:
812 case M2_mpyd_acc_hl_s0
:
813 case M2_mpyd_acc_hl_s1
:
816 case M2_mpyd_nac_hl_s0
:
817 case M2_mpyd_nac_hl_s1
:
818 case M2_mpyd_rnd_hl_s0
:
819 case M2_mpyd_rnd_hl_s1
:
820 case M2_mpyu_acc_hl_s0
:
821 case M2_mpyu_acc_hl_s1
:
824 case M2_mpyu_nac_hl_s0
:
825 case M2_mpyu_nac_hl_s1
:
826 case M2_mpyud_acc_hl_s0
:
827 case M2_mpyud_acc_hl_s1
:
830 case M2_mpyud_nac_hl_s0
:
831 case M2_mpyud_nac_hl_s1
:
833 Bits
.set(Begin
+16, Begin
+32);
837 Bits
.set(Begin
, Begin
+16);
842 // Two register sources, used bits: R1[16-31], R2[16-31].
844 case A2_addh_h16_sat_hh
:
847 case A2_subh_h16_sat_hh
:
848 case M2_mpy_acc_hh_s0
:
849 case M2_mpy_acc_hh_s1
:
850 case M2_mpy_acc_sat_hh_s0
:
851 case M2_mpy_acc_sat_hh_s1
:
854 case M2_mpy_nac_hh_s0
:
855 case M2_mpy_nac_hh_s1
:
856 case M2_mpy_nac_sat_hh_s0
:
857 case M2_mpy_nac_sat_hh_s1
:
858 case M2_mpy_rnd_hh_s0
:
859 case M2_mpy_rnd_hh_s1
:
860 case M2_mpy_sat_hh_s0
:
861 case M2_mpy_sat_hh_s1
:
862 case M2_mpy_sat_rnd_hh_s0
:
863 case M2_mpy_sat_rnd_hh_s1
:
864 case M2_mpyd_acc_hh_s0
:
865 case M2_mpyd_acc_hh_s1
:
868 case M2_mpyd_nac_hh_s0
:
869 case M2_mpyd_nac_hh_s1
:
870 case M2_mpyd_rnd_hh_s0
:
871 case M2_mpyd_rnd_hh_s1
:
872 case M2_mpyu_acc_hh_s0
:
873 case M2_mpyu_acc_hh_s1
:
876 case M2_mpyu_nac_hh_s0
:
877 case M2_mpyu_nac_hh_s1
:
878 case M2_mpyud_acc_hh_s0
:
879 case M2_mpyud_acc_hh_s1
:
882 case M2_mpyud_nac_hh_s0
:
883 case M2_mpyud_nac_hh_s1
:
884 if (OpN
== 1 || OpN
== 2) {
885 Bits
.set(Begin
+16, Begin
+32);
894 // Calculate the register class that matches Reg:Sub. For example, if
895 // %1 is a double register, then %1:isub_hi would match the "int"
897 const TargetRegisterClass
*HexagonBitSimplify::getFinalVRegClass(
898 const BitTracker::RegisterRef
&RR
, MachineRegisterInfo
&MRI
) {
899 if (!TargetRegisterInfo::isVirtualRegister(RR
.Reg
))
901 auto *RC
= MRI
.getRegClass(RR
.Reg
);
904 auto &HRI
= static_cast<const HexagonRegisterInfo
&>(
905 *MRI
.getTargetRegisterInfo());
907 auto VerifySR
= [&HRI
] (const TargetRegisterClass
*RC
, unsigned Sub
) -> void {
909 assert(Sub
== HRI
.getHexagonSubRegIndex(*RC
, Hexagon::ps_sub_lo
) ||
910 Sub
== HRI
.getHexagonSubRegIndex(*RC
, Hexagon::ps_sub_hi
));
913 switch (RC
->getID()) {
914 case Hexagon::DoubleRegsRegClassID
:
915 VerifySR(RC
, RR
.Sub
);
916 return &Hexagon::IntRegsRegClass
;
917 case Hexagon::HvxWRRegClassID
:
918 VerifySR(RC
, RR
.Sub
);
919 return &Hexagon::HvxVRRegClass
;
924 // Check if RD could be replaced with RS at any possible use of RD.
925 // For example a predicate register cannot be replaced with a integer
926 // register, but a 64-bit register with a subregister can be replaced
927 // with a 32-bit register.
928 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef
&RD
,
929 const BitTracker::RegisterRef
&RS
, MachineRegisterInfo
&MRI
) {
930 if (!TargetRegisterInfo::isVirtualRegister(RD
.Reg
) ||
931 !TargetRegisterInfo::isVirtualRegister(RS
.Reg
))
933 // Return false if one (or both) classes are nullptr.
934 auto *DRC
= getFinalVRegClass(RD
, MRI
);
938 return DRC
== getFinalVRegClass(RS
, MRI
);
941 bool HexagonBitSimplify::hasTiedUse(unsigned Reg
, MachineRegisterInfo
&MRI
,
943 if (!PreserveTiedOps
)
945 return llvm::any_of(MRI
.use_operands(Reg
),
946 [NewSub
] (const MachineOperand
&Op
) -> bool {
947 return Op
.getSubReg() != NewSub
&& Op
.isTied();
953 class DeadCodeElimination
{
955 DeadCodeElimination(MachineFunction
&mf
, MachineDominatorTree
&mdt
)
956 : MF(mf
), HII(*MF
.getSubtarget
<HexagonSubtarget
>().getInstrInfo()),
957 MDT(mdt
), MRI(mf
.getRegInfo()) {}
960 return runOnNode(MDT
.getRootNode());
964 bool isDead(unsigned R
) const;
965 bool runOnNode(MachineDomTreeNode
*N
);
968 const HexagonInstrInfo
&HII
;
969 MachineDominatorTree
&MDT
;
970 MachineRegisterInfo
&MRI
;
973 } // end anonymous namespace
975 bool DeadCodeElimination::isDead(unsigned R
) const {
976 for (auto I
= MRI
.use_begin(R
), E
= MRI
.use_end(); I
!= E
; ++I
) {
977 MachineInstr
*UseI
= I
->getParent();
978 if (UseI
->isDebugValue())
981 assert(!UseI
->getOperand(0).getSubReg());
982 unsigned DR
= UseI
->getOperand(0).getReg();
991 bool DeadCodeElimination::runOnNode(MachineDomTreeNode
*N
) {
992 bool Changed
= false;
994 for (auto *DTN
: children
<MachineDomTreeNode
*>(N
))
995 Changed
|= runOnNode(DTN
);
997 MachineBasicBlock
*B
= N
->getBlock();
998 std::vector
<MachineInstr
*> Instrs
;
999 for (auto I
= B
->rbegin(), E
= B
->rend(); I
!= E
; ++I
)
1000 Instrs
.push_back(&*I
);
1002 for (auto MI
: Instrs
) {
1003 unsigned Opc
= MI
->getOpcode();
1004 // Do not touch lifetime markers. This is why the target-independent DCE
1006 if (Opc
== TargetOpcode::LIFETIME_START
||
1007 Opc
== TargetOpcode::LIFETIME_END
)
1010 if (MI
->isInlineAsm())
1012 // Delete PHIs if possible.
1013 if (!MI
->isPHI() && !MI
->isSafeToMove(nullptr, Store
))
1016 bool AllDead
= true;
1017 SmallVector
<unsigned,2> Regs
;
1018 for (auto &Op
: MI
->operands()) {
1019 if (!Op
.isReg() || !Op
.isDef())
1021 unsigned R
= Op
.getReg();
1022 if (!TargetRegisterInfo::isVirtualRegister(R
) || !isDead(R
)) {
1032 for (unsigned i
= 0, n
= Regs
.size(); i
!= n
; ++i
)
1033 MRI
.markUsesInDebugValueAsUndef(Regs
[i
]);
1042 // Eliminate redundant instructions
1044 // This transformation will identify instructions where the output register
1045 // is the same as one of its input registers. This only works on instructions
1046 // that define a single register (unlike post-increment loads, for example).
1047 // The equality check is actually more detailed: the code calculates which
1048 // bits of the output are used, and only compares these bits with the input
1050 // If the output matches an input, the instruction is replaced with COPY.
1051 // The copies will be removed by another transformation.
1052 class RedundantInstrElimination
: public Transformation
{
1054 RedundantInstrElimination(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1055 const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1056 : Transformation(true), HII(hii
), HRI(hri
), MRI(mri
), BT(bt
) {}
1058 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1061 bool isLossyShiftLeft(const MachineInstr
&MI
, unsigned OpN
,
1062 unsigned &LostB
, unsigned &LostE
);
1063 bool isLossyShiftRight(const MachineInstr
&MI
, unsigned OpN
,
1064 unsigned &LostB
, unsigned &LostE
);
1065 bool computeUsedBits(unsigned Reg
, BitVector
&Bits
);
1066 bool computeUsedBits(const MachineInstr
&MI
, unsigned OpN
, BitVector
&Bits
,
1068 bool usedBitsEqual(BitTracker::RegisterRef RD
, BitTracker::RegisterRef RS
);
1070 const HexagonInstrInfo
&HII
;
1071 const HexagonRegisterInfo
&HRI
;
1072 MachineRegisterInfo
&MRI
;
1076 } // end anonymous namespace
1078 // Check if the instruction is a lossy shift left, where the input being
1079 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1080 // of bit indices that are lost.
1081 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr
&MI
,
1082 unsigned OpN
, unsigned &LostB
, unsigned &LostE
) {
1083 using namespace Hexagon
;
1085 unsigned Opc
= MI
.getOpcode();
1086 unsigned ImN
, RegN
, Width
;
1093 case S2_asl_i_p_acc
:
1094 case S2_asl_i_p_and
:
1095 case S2_asl_i_p_nac
:
1097 case S2_asl_i_p_xacc
:
1107 case S2_addasl_rrri
:
1108 case S4_andi_asl_ri
:
1110 case S4_addi_asl_ri
:
1111 case S4_subi_asl_ri
:
1112 case S2_asl_i_r_acc
:
1113 case S2_asl_i_r_and
:
1114 case S2_asl_i_r_nac
:
1116 case S2_asl_i_r_sat
:
1117 case S2_asl_i_r_xacc
:
1129 assert(MI
.getOperand(ImN
).isImm());
1130 unsigned S
= MI
.getOperand(ImN
).getImm();
1138 // Check if the instruction is a lossy shift right, where the input being
1139 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
1140 // of bit indices that are lost.
1141 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr
&MI
,
1142 unsigned OpN
, unsigned &LostB
, unsigned &LostE
) {
1143 using namespace Hexagon
;
1145 unsigned Opc
= MI
.getOpcode();
1153 case S2_asr_i_p_acc
:
1154 case S2_asr_i_p_and
:
1155 case S2_asr_i_p_nac
:
1157 case S2_lsr_i_p_acc
:
1158 case S2_lsr_i_p_and
:
1159 case S2_lsr_i_p_nac
:
1161 case S2_lsr_i_p_xacc
:
1170 case S4_andi_lsr_ri
:
1172 case S4_addi_lsr_ri
:
1173 case S4_subi_lsr_ri
:
1174 case S2_asr_i_r_acc
:
1175 case S2_asr_i_r_and
:
1176 case S2_asr_i_r_nac
:
1178 case S2_lsr_i_r_acc
:
1179 case S2_lsr_i_r_and
:
1180 case S2_lsr_i_r_nac
:
1182 case S2_lsr_i_r_xacc
:
1194 assert(MI
.getOperand(ImN
).isImm());
1195 unsigned S
= MI
.getOperand(ImN
).getImm();
1201 // Calculate the bit vector that corresponds to the used bits of register Reg.
1202 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
1203 // culation fails (i.e. the used bits are unknown), it returns false. Other-
1204 // wise, it returns true and sets the corresponding bits in Bits.
1205 bool RedundantInstrElimination::computeUsedBits(unsigned Reg
, BitVector
&Bits
) {
1206 BitVector
Used(Bits
.size());
1207 RegisterSet Visited
;
1208 std::vector
<unsigned> Pending
;
1209 Pending
.push_back(Reg
);
1211 for (unsigned i
= 0; i
< Pending
.size(); ++i
) {
1212 unsigned R
= Pending
[i
];
1216 for (auto I
= MRI
.use_begin(R
), E
= MRI
.use_end(); I
!= E
; ++I
) {
1217 BitTracker::RegisterRef UR
= *I
;
1219 if (!HBS::getSubregMask(UR
, B
, W
, MRI
))
1221 MachineInstr
&UseI
= *I
->getParent();
1222 if (UseI
.isPHI() || UseI
.isCopy()) {
1223 unsigned DefR
= UseI
.getOperand(0).getReg();
1224 if (!TargetRegisterInfo::isVirtualRegister(DefR
))
1226 Pending
.push_back(DefR
);
1228 if (!computeUsedBits(UseI
, I
.getOperandNo(), Used
, B
))
1237 // Calculate the bits used by instruction MI in a register in operand OpN.
1238 // Return true/false if the calculation succeeds/fails. If is succeeds, set
1239 // used bits in Bits. This function does not reset any bits in Bits, so
1240 // subsequent calls over different instructions will result in the union
1241 // of the used bits in all these instructions.
1242 // The register in question may be used with a sub-register, whereas Bits
1243 // holds the bits for the entire register. To keep track of that, the
1244 // argument Begin indicates where in Bits is the lowest-significant bit
1245 // of the register used in operand OpN. For example, in instruction:
1246 // %1 = S2_lsr_i_r %2:isub_hi, 10
1247 // the operand 1 is a 32-bit register, which happens to be a subregister
1248 // of the 64-bit register %2, and that subregister starts at position 32.
1249 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
1251 bool RedundantInstrElimination::computeUsedBits(const MachineInstr
&MI
,
1252 unsigned OpN
, BitVector
&Bits
, uint16_t Begin
) {
1253 unsigned Opc
= MI
.getOpcode();
1254 BitVector
T(Bits
.size());
1255 bool GotBits
= HBS::getUsedBits(Opc
, OpN
, T
, Begin
, HII
);
1256 // Even if we don't have bits yet, we could still provide some information
1257 // if the instruction is a lossy shift: the lost bits will be marked as
1260 if (isLossyShiftLeft(MI
, OpN
, LB
, LE
) || isLossyShiftRight(MI
, OpN
, LB
, LE
)) {
1261 assert(MI
.getOperand(OpN
).isReg());
1262 BitTracker::RegisterRef RR
= MI
.getOperand(OpN
);
1263 const TargetRegisterClass
*RC
= HBS::getFinalVRegClass(RR
, MRI
);
1264 uint16_t Width
= HRI
.getRegSizeInBits(*RC
);
1267 T
.set(Begin
, Begin
+Width
);
1268 assert(LB
<= LE
&& LB
< Width
&& LE
<= Width
);
1269 T
.reset(Begin
+LB
, Begin
+LE
);
1277 // Calculates the used bits in RD ("defined register"), and checks if these
1278 // bits in RS ("used register") and RD are identical.
1279 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD
,
1280 BitTracker::RegisterRef RS
) {
1281 const BitTracker::RegisterCell
&DC
= BT
.lookup(RD
.Reg
);
1282 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
1285 if (!HBS::getSubregMask(RD
, DB
, DW
, MRI
))
1288 if (!HBS::getSubregMask(RS
, SB
, SW
, MRI
))
1293 BitVector
Used(DC
.width());
1294 if (!computeUsedBits(RD
.Reg
, Used
))
1297 for (unsigned i
= 0; i
!= DW
; ++i
)
1298 if (Used
[i
+DB
] && DC
[DB
+i
] != SC
[SB
+i
])
1303 bool RedundantInstrElimination::processBlock(MachineBasicBlock
&B
,
1304 const RegisterSet
&) {
1305 if (!BT
.reached(&B
))
1307 bool Changed
= false;
1309 for (auto I
= B
.begin(), E
= B
.end(), NextI
= I
; I
!= E
; ++I
) {
1310 NextI
= std::next(I
);
1311 MachineInstr
*MI
= &*I
;
1313 if (MI
->getOpcode() == TargetOpcode::COPY
)
1315 if (MI
->isPHI() || MI
->hasUnmodeledSideEffects() || MI
->isInlineAsm())
1317 unsigned NumD
= MI
->getDesc().getNumDefs();
1321 BitTracker::RegisterRef RD
= MI
->getOperand(0);
1322 if (!BT
.has(RD
.Reg
))
1324 const BitTracker::RegisterCell
&DC
= BT
.lookup(RD
.Reg
);
1325 auto At
= MachineBasicBlock::iterator(MI
);
1327 // Find a source operand that is equal to the result.
1328 for (auto &Op
: MI
->uses()) {
1331 BitTracker::RegisterRef RS
= Op
;
1332 if (!BT
.has(RS
.Reg
))
1334 if (!HBS::isTransparentCopy(RD
, RS
, MRI
))
1338 if (!HBS::getSubregMask(RS
, BN
, BW
, MRI
))
1341 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
1342 if (!usedBitsEqual(RD
, RS
) && !HBS::isEqual(DC
, 0, SC
, BN
, BW
))
1345 // If found, replace the instruction with a COPY.
1346 const DebugLoc
&DL
= MI
->getDebugLoc();
1347 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
1348 unsigned NewR
= MRI
.createVirtualRegister(FRC
);
1349 MachineInstr
*CopyI
=
1350 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::COPY
), NewR
)
1351 .addReg(RS
.Reg
, 0, RS
.Sub
);
1352 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
1353 // This pass can create copies between registers that don't have the
1354 // exact same values. Updating the tracker has to involve updating
1355 // all dependent cells. Example:
1356 // %1 = inst %2 ; %1 != %2, but used bits are equal
1358 // %3 = copy %2 ; <- inserted
1359 // ... = %3 ; <- replaced from %2
1360 // Indirectly, we can create a "copy" between %1 and %2 even
1361 // though their exact values do not match.
1373 // Recognize instructions that produce constant values known at compile-time.
1374 // Replace them with register definitions that load these constants directly.
1375 class ConstGeneration
: public Transformation
{
1377 ConstGeneration(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1378 MachineRegisterInfo
&mri
)
1379 : Transformation(true), HII(hii
), MRI(mri
), BT(bt
) {}
1381 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1382 static bool isTfrConst(const MachineInstr
&MI
);
1385 unsigned genTfrConst(const TargetRegisterClass
*RC
, int64_t C
,
1386 MachineBasicBlock
&B
, MachineBasicBlock::iterator At
, DebugLoc
&DL
);
1388 const HexagonInstrInfo
&HII
;
1389 MachineRegisterInfo
&MRI
;
1393 } // end anonymous namespace
1395 bool ConstGeneration::isTfrConst(const MachineInstr
&MI
) {
1396 unsigned Opc
= MI
.getOpcode();
1398 case Hexagon::A2_combineii
:
1399 case Hexagon::A4_combineii
:
1400 case Hexagon::A2_tfrsi
:
1401 case Hexagon::A2_tfrpi
:
1402 case Hexagon::PS_true
:
1403 case Hexagon::PS_false
:
1404 case Hexagon::CONST32
:
1405 case Hexagon::CONST64
:
1411 // Generate a transfer-immediate instruction that is appropriate for the
1412 // register class and the actual value being transferred.
1413 unsigned ConstGeneration::genTfrConst(const TargetRegisterClass
*RC
, int64_t C
,
1414 MachineBasicBlock
&B
, MachineBasicBlock::iterator At
, DebugLoc
&DL
) {
1415 unsigned Reg
= MRI
.createVirtualRegister(RC
);
1416 if (RC
== &Hexagon::IntRegsRegClass
) {
1417 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrsi
), Reg
)
1418 .addImm(int32_t(C
));
1422 if (RC
== &Hexagon::DoubleRegsRegClass
) {
1424 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrpi
), Reg
)
1429 unsigned Lo
= Lo_32(C
), Hi
= Hi_32(C
);
1430 if (isInt
<8>(Lo
) || isInt
<8>(Hi
)) {
1431 unsigned Opc
= isInt
<8>(Lo
) ? Hexagon::A2_combineii
1432 : Hexagon::A4_combineii
;
1433 BuildMI(B
, At
, DL
, HII
.get(Opc
), Reg
)
1434 .addImm(int32_t(Hi
))
1435 .addImm(int32_t(Lo
));
1439 BuildMI(B
, At
, DL
, HII
.get(Hexagon::CONST64
), Reg
)
1444 if (RC
== &Hexagon::PredRegsRegClass
) {
1447 Opc
= Hexagon::PS_false
;
1448 else if ((C
& 0xFF) == 0xFF)
1449 Opc
= Hexagon::PS_true
;
1452 BuildMI(B
, At
, DL
, HII
.get(Opc
), Reg
);
1459 bool ConstGeneration::processBlock(MachineBasicBlock
&B
, const RegisterSet
&) {
1460 if (!BT
.reached(&B
))
1462 bool Changed
= false;
1465 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
) {
1469 HBS::getInstrDefs(*I
, Defs
);
1470 if (Defs
.count() != 1)
1472 unsigned DR
= Defs
.find_first();
1473 if (!TargetRegisterInfo::isVirtualRegister(DR
))
1476 const BitTracker::RegisterCell
&DRC
= BT
.lookup(DR
);
1477 if (HBS::getConst(DRC
, 0, DRC
.width(), U
)) {
1479 DebugLoc DL
= I
->getDebugLoc();
1480 auto At
= I
->isPHI() ? B
.getFirstNonPHI() : I
;
1481 unsigned ImmReg
= genTfrConst(MRI
.getRegClass(DR
), C
, B
, At
, DL
);
1483 HBS::replaceReg(DR
, ImmReg
, MRI
);
1484 BT
.put(ImmReg
, DRC
);
1494 // Identify pairs of available registers which hold identical values.
1495 // In such cases, only one of them needs to be calculated, the other one
1496 // will be defined as a copy of the first.
1497 class CopyGeneration
: public Transformation
{
1499 CopyGeneration(BitTracker
&bt
, const HexagonInstrInfo
&hii
,
1500 const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1501 : Transformation(true), HII(hii
), HRI(hri
), MRI(mri
), BT(bt
) {}
1503 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1506 bool findMatch(const BitTracker::RegisterRef
&Inp
,
1507 BitTracker::RegisterRef
&Out
, const RegisterSet
&AVs
);
1509 const HexagonInstrInfo
&HII
;
1510 const HexagonRegisterInfo
&HRI
;
1511 MachineRegisterInfo
&MRI
;
1513 RegisterSet Forbidden
;
1516 // Eliminate register copies RD = RS, by replacing the uses of RD with
1518 class CopyPropagation
: public Transformation
{
1520 CopyPropagation(const HexagonRegisterInfo
&hri
, MachineRegisterInfo
&mri
)
1521 : Transformation(false), HRI(hri
), MRI(mri
) {}
1523 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1525 static bool isCopyReg(unsigned Opc
, bool NoConv
);
1528 bool propagateRegCopy(MachineInstr
&MI
);
1530 const HexagonRegisterInfo
&HRI
;
1531 MachineRegisterInfo
&MRI
;
1534 } // end anonymous namespace
1536 /// Check if there is a register in AVs that is identical to Inp. If so,
1537 /// set Out to the found register. The output may be a pair Reg:Sub.
1538 bool CopyGeneration::findMatch(const BitTracker::RegisterRef
&Inp
,
1539 BitTracker::RegisterRef
&Out
, const RegisterSet
&AVs
) {
1540 if (!BT
.has(Inp
.Reg
))
1542 const BitTracker::RegisterCell
&InpRC
= BT
.lookup(Inp
.Reg
);
1543 auto *FRC
= HBS::getFinalVRegClass(Inp
, MRI
);
1545 if (!HBS::getSubregMask(Inp
, B
, W
, MRI
))
1548 for (unsigned R
= AVs
.find_first(); R
; R
= AVs
.find_next(R
)) {
1549 if (!BT
.has(R
) || Forbidden
[R
])
1551 const BitTracker::RegisterCell
&RC
= BT
.lookup(R
);
1552 unsigned RW
= RC
.width();
1554 if (FRC
!= MRI
.getRegClass(R
))
1556 if (!HBS::isTransparentCopy(R
, Inp
, MRI
))
1558 if (!HBS::isEqual(InpRC
, B
, RC
, 0, W
))
1564 // Check if there is a super-register, whose part (with a subregister)
1565 // is equal to the input.
1566 // Only do double registers for now.
1569 if (MRI
.getRegClass(R
) != &Hexagon::DoubleRegsRegClass
)
1572 if (HBS::isEqual(InpRC
, B
, RC
, 0, W
))
1573 Out
.Sub
= Hexagon::isub_lo
;
1574 else if (HBS::isEqual(InpRC
, B
, RC
, W
, W
))
1575 Out
.Sub
= Hexagon::isub_hi
;
1579 if (HBS::isTransparentCopy(Out
, Inp
, MRI
))
1585 bool CopyGeneration::processBlock(MachineBasicBlock
&B
,
1586 const RegisterSet
&AVs
) {
1587 if (!BT
.reached(&B
))
1589 RegisterSet
AVB(AVs
);
1590 bool Changed
= false;
1593 for (auto I
= B
.begin(), E
= B
.end(), NextI
= I
; I
!= E
;
1594 ++I
, AVB
.insert(Defs
)) {
1595 NextI
= std::next(I
);
1597 HBS::getInstrDefs(*I
, Defs
);
1599 unsigned Opc
= I
->getOpcode();
1600 if (CopyPropagation::isCopyReg(Opc
, false) ||
1601 ConstGeneration::isTfrConst(*I
))
1604 DebugLoc DL
= I
->getDebugLoc();
1605 auto At
= I
->isPHI() ? B
.getFirstNonPHI() : I
;
1607 for (unsigned R
= Defs
.find_first(); R
; R
= Defs
.find_next(R
)) {
1608 BitTracker::RegisterRef MR
;
1609 auto *FRC
= HBS::getFinalVRegClass(R
, MRI
);
1611 if (findMatch(R
, MR
, AVB
)) {
1612 unsigned NewR
= MRI
.createVirtualRegister(FRC
);
1613 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::COPY
), NewR
)
1614 .addReg(MR
.Reg
, 0, MR
.Sub
);
1615 BT
.put(BitTracker::RegisterRef(NewR
), BT
.get(MR
));
1616 HBS::replaceReg(R
, NewR
, MRI
);
1617 Forbidden
.insert(R
);
1621 if (FRC
== &Hexagon::DoubleRegsRegClass
||
1622 FRC
== &Hexagon::HvxWRRegClass
) {
1623 // Try to generate REG_SEQUENCE.
1624 unsigned SubLo
= HRI
.getHexagonSubRegIndex(*FRC
, Hexagon::ps_sub_lo
);
1625 unsigned SubHi
= HRI
.getHexagonSubRegIndex(*FRC
, Hexagon::ps_sub_hi
);
1626 BitTracker::RegisterRef TL
= { R
, SubLo
};
1627 BitTracker::RegisterRef TH
= { R
, SubHi
};
1628 BitTracker::RegisterRef ML
, MH
;
1629 if (findMatch(TL
, ML
, AVB
) && findMatch(TH
, MH
, AVB
)) {
1630 auto *FRC
= HBS::getFinalVRegClass(R
, MRI
);
1631 unsigned NewR
= MRI
.createVirtualRegister(FRC
);
1632 BuildMI(B
, At
, DL
, HII
.get(TargetOpcode::REG_SEQUENCE
), NewR
)
1633 .addReg(ML
.Reg
, 0, ML
.Sub
)
1635 .addReg(MH
.Reg
, 0, MH
.Sub
)
1637 BT
.put(BitTracker::RegisterRef(NewR
), BT
.get(R
));
1638 HBS::replaceReg(R
, NewR
, MRI
);
1639 Forbidden
.insert(R
);
1648 bool CopyPropagation::isCopyReg(unsigned Opc
, bool NoConv
) {
1650 case TargetOpcode::COPY
:
1651 case TargetOpcode::REG_SEQUENCE
:
1652 case Hexagon::A4_combineir
:
1653 case Hexagon::A4_combineri
:
1655 case Hexagon::A2_tfr
:
1656 case Hexagon::A2_tfrp
:
1657 case Hexagon::A2_combinew
:
1658 case Hexagon::V6_vcombine
:
1666 bool CopyPropagation::propagateRegCopy(MachineInstr
&MI
) {
1667 bool Changed
= false;
1668 unsigned Opc
= MI
.getOpcode();
1669 BitTracker::RegisterRef RD
= MI
.getOperand(0);
1670 assert(MI
.getOperand(0).getSubReg() == 0);
1673 case TargetOpcode::COPY
:
1674 case Hexagon::A2_tfr
:
1675 case Hexagon::A2_tfrp
: {
1676 BitTracker::RegisterRef RS
= MI
.getOperand(1);
1677 if (!HBS::isTransparentCopy(RD
, RS
, MRI
))
1680 Changed
= HBS::replaceRegWithSub(RD
.Reg
, RS
.Reg
, RS
.Sub
, MRI
);
1682 Changed
= HBS::replaceReg(RD
.Reg
, RS
.Reg
, MRI
);
1685 case TargetOpcode::REG_SEQUENCE
: {
1686 BitTracker::RegisterRef SL
, SH
;
1687 if (HBS::parseRegSequence(MI
, SL
, SH
, MRI
)) {
1688 const TargetRegisterClass
&RC
= *MRI
.getRegClass(RD
.Reg
);
1689 unsigned SubLo
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_lo
);
1690 unsigned SubHi
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_hi
);
1691 Changed
= HBS::replaceSubWithSub(RD
.Reg
, SubLo
, SL
.Reg
, SL
.Sub
, MRI
);
1692 Changed
|= HBS::replaceSubWithSub(RD
.Reg
, SubHi
, SH
.Reg
, SH
.Sub
, MRI
);
1696 case Hexagon::A2_combinew
:
1697 case Hexagon::V6_vcombine
: {
1698 const TargetRegisterClass
&RC
= *MRI
.getRegClass(RD
.Reg
);
1699 unsigned SubLo
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_lo
);
1700 unsigned SubHi
= HRI
.getHexagonSubRegIndex(RC
, Hexagon::ps_sub_hi
);
1701 BitTracker::RegisterRef RH
= MI
.getOperand(1), RL
= MI
.getOperand(2);
1702 Changed
= HBS::replaceSubWithSub(RD
.Reg
, SubLo
, RL
.Reg
, RL
.Sub
, MRI
);
1703 Changed
|= HBS::replaceSubWithSub(RD
.Reg
, SubHi
, RH
.Reg
, RH
.Sub
, MRI
);
1706 case Hexagon::A4_combineir
:
1707 case Hexagon::A4_combineri
: {
1708 unsigned SrcX
= (Opc
== Hexagon::A4_combineir
) ? 2 : 1;
1709 unsigned Sub
= (Opc
== Hexagon::A4_combineir
) ? Hexagon::isub_lo
1711 BitTracker::RegisterRef RS
= MI
.getOperand(SrcX
);
1712 Changed
= HBS::replaceSubWithSub(RD
.Reg
, Sub
, RS
.Reg
, RS
.Sub
, MRI
);
1719 bool CopyPropagation::processBlock(MachineBasicBlock
&B
, const RegisterSet
&) {
1720 std::vector
<MachineInstr
*> Instrs
;
1721 for (auto I
= B
.rbegin(), E
= B
.rend(); I
!= E
; ++I
)
1722 Instrs
.push_back(&*I
);
1724 bool Changed
= false;
1725 for (auto I
: Instrs
) {
1726 unsigned Opc
= I
->getOpcode();
1727 if (!CopyPropagation::isCopyReg(Opc
, true))
1729 Changed
|= propagateRegCopy(*I
);
1737 // Recognize patterns that can be simplified and replace them with the
1739 // This is by no means complete
1740 class BitSimplification
: public Transformation
{
1742 BitSimplification(BitTracker
&bt
, const MachineDominatorTree
&mdt
,
1743 const HexagonInstrInfo
&hii
, const HexagonRegisterInfo
&hri
,
1744 MachineRegisterInfo
&mri
, MachineFunction
&mf
)
1745 : Transformation(true), MDT(mdt
), HII(hii
), HRI(hri
), MRI(mri
),
1748 bool processBlock(MachineBasicBlock
&B
, const RegisterSet
&AVs
) override
;
1751 struct RegHalf
: public BitTracker::RegisterRef
{
1752 bool Low
; // Low/High halfword.
1755 bool matchHalf(unsigned SelfR
, const BitTracker::RegisterCell
&RC
,
1756 unsigned B
, RegHalf
&RH
);
1757 bool validateReg(BitTracker::RegisterRef R
, unsigned Opc
, unsigned OpNum
);
1759 bool matchPackhl(unsigned SelfR
, const BitTracker::RegisterCell
&RC
,
1760 BitTracker::RegisterRef
&Rs
, BitTracker::RegisterRef
&Rt
);
1761 unsigned getCombineOpcode(bool HLow
, bool LLow
);
1763 bool genStoreUpperHalf(MachineInstr
*MI
);
1764 bool genStoreImmediate(MachineInstr
*MI
);
1765 bool genPackhl(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1766 const BitTracker::RegisterCell
&RC
);
1767 bool genExtractHalf(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1768 const BitTracker::RegisterCell
&RC
);
1769 bool genCombineHalf(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1770 const BitTracker::RegisterCell
&RC
);
1771 bool genExtractLow(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1772 const BitTracker::RegisterCell
&RC
);
1773 bool genBitSplit(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1774 const BitTracker::RegisterCell
&RC
, const RegisterSet
&AVs
);
1775 bool simplifyTstbit(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1776 const BitTracker::RegisterCell
&RC
);
1777 bool simplifyExtractLow(MachineInstr
*MI
, BitTracker::RegisterRef RD
,
1778 const BitTracker::RegisterCell
&RC
, const RegisterSet
&AVs
);
1779 bool simplifyRCmp0(MachineInstr
*MI
, BitTracker::RegisterRef RD
);
1781 // Cache of created instructions to avoid creating duplicates.
1782 // XXX Currently only used by genBitSplit.
1783 std::vector
<MachineInstr
*> NewMIs
;
1785 const MachineDominatorTree
&MDT
;
1786 const HexagonInstrInfo
&HII
;
1787 const HexagonRegisterInfo
&HRI
;
1788 MachineRegisterInfo
&MRI
;
1789 MachineFunction
&MF
;
1793 } // end anonymous namespace
1795 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
1796 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
1797 // set the information about the found register in RH.
1798 bool BitSimplification::matchHalf(unsigned SelfR
,
1799 const BitTracker::RegisterCell
&RC
, unsigned B
, RegHalf
&RH
) {
1800 // XXX This could be searching in the set of available registers, in case
1801 // the match is not exact.
1803 // Match 16-bit chunks, where the RC[B..B+15] references exactly one
1804 // register and all the bits B..B+15 match between RC and the register.
1805 // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
1806 // and RC = { [0]:0 [1-15]:v1[1-15]... }.
1809 while (I
< B
+16 && RC
[I
].num())
1814 unsigned Reg
= RC
[I
].RefI
.Reg
;
1815 unsigned P
= RC
[I
].RefI
.Pos
; // The RefI.Pos will be advanced by I-B.
1818 unsigned Pos
= P
- (I
-B
);
1820 if (Reg
== 0 || Reg
== SelfR
) // Don't match "self".
1822 if (!TargetRegisterInfo::isVirtualRegister(Reg
))
1827 const BitTracker::RegisterCell
&SC
= BT
.lookup(Reg
);
1828 if (Pos
+16 > SC
.width())
1831 for (unsigned i
= 0; i
< 16; ++i
) {
1832 const BitTracker::BitValue
&RV
= RC
[i
+B
];
1833 if (RV
.Type
== BitTracker::BitValue::Ref
) {
1834 if (RV
.RefI
.Reg
!= Reg
)
1836 if (RV
.RefI
.Pos
!= i
+Pos
)
1840 if (RC
[i
+B
] != SC
[i
+Pos
])
1847 Sub
= Hexagon::isub_lo
;
1851 Sub
= Hexagon::isub_lo
;
1855 Sub
= Hexagon::isub_hi
;
1859 Sub
= Hexagon::isub_hi
;
1869 // If the subregister is not valid with the register, set it to 0.
1870 if (!HBS::getFinalVRegClass(RH
, MRI
))
1876 bool BitSimplification::validateReg(BitTracker::RegisterRef R
, unsigned Opc
,
1878 auto *OpRC
= HII
.getRegClass(HII
.get(Opc
), OpNum
, &HRI
, MF
);
1879 auto *RRC
= HBS::getFinalVRegClass(R
, MRI
);
1880 return OpRC
->hasSubClassEq(RRC
);
1883 // Check if RC matches the pattern of a S2_packhl. If so, return true and
1884 // set the inputs Rs and Rt.
1885 bool BitSimplification::matchPackhl(unsigned SelfR
,
1886 const BitTracker::RegisterCell
&RC
, BitTracker::RegisterRef
&Rs
,
1887 BitTracker::RegisterRef
&Rt
) {
1888 RegHalf L1
, H1
, L2
, H2
;
1890 if (!matchHalf(SelfR
, RC
, 0, L2
) || !matchHalf(SelfR
, RC
, 16, L1
))
1892 if (!matchHalf(SelfR
, RC
, 32, H2
) || !matchHalf(SelfR
, RC
, 48, H1
))
1895 // Rs = H1.L1, Rt = H2.L2
1896 if (H1
.Reg
!= L1
.Reg
|| H1
.Sub
!= L1
.Sub
|| H1
.Low
|| !L1
.Low
)
1898 if (H2
.Reg
!= L2
.Reg
|| H2
.Sub
!= L2
.Sub
|| H2
.Low
|| !L2
.Low
)
1906 unsigned BitSimplification::getCombineOpcode(bool HLow
, bool LLow
) {
1907 return HLow
? LLow
? Hexagon::A2_combine_ll
1908 : Hexagon::A2_combine_lh
1909 : LLow
? Hexagon::A2_combine_hl
1910 : Hexagon::A2_combine_hh
;
1913 // If MI stores the upper halfword of a register (potentially obtained via
1914 // shifts or extracts), replace it with a storerf instruction. This could
1915 // cause the "extraction" code to become dead.
1916 bool BitSimplification::genStoreUpperHalf(MachineInstr
*MI
) {
1917 unsigned Opc
= MI
->getOpcode();
1918 if (Opc
!= Hexagon::S2_storerh_io
)
1921 MachineOperand
&ValOp
= MI
->getOperand(2);
1922 BitTracker::RegisterRef RS
= ValOp
;
1923 if (!BT
.has(RS
.Reg
))
1925 const BitTracker::RegisterCell
&RC
= BT
.lookup(RS
.Reg
);
1927 if (!matchHalf(0, RC
, 0, H
))
1931 MI
->setDesc(HII
.get(Hexagon::S2_storerf_io
));
1932 ValOp
.setReg(H
.Reg
);
1933 ValOp
.setSubReg(H
.Sub
);
1937 // If MI stores a value known at compile-time, and the value is within a range
1938 // that avoids using constant-extenders, replace it with a store-immediate.
1939 bool BitSimplification::genStoreImmediate(MachineInstr
*MI
) {
1940 unsigned Opc
= MI
->getOpcode();
1943 case Hexagon::S2_storeri_io
:
1946 case Hexagon::S2_storerh_io
:
1949 case Hexagon::S2_storerb_io
:
1955 // Avoid stores to frame-indices (due to an unknown offset).
1956 if (!MI
->getOperand(0).isReg())
1958 MachineOperand
&OffOp
= MI
->getOperand(1);
1962 int64_t Off
= OffOp
.getImm();
1963 // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
1964 if (!isUIntN(6+Align
, Off
) || (Off
& ((1<<Align
)-1)))
1967 BitTracker::RegisterRef RS
= MI
->getOperand(2);
1968 if (!BT
.has(RS
.Reg
))
1970 const BitTracker::RegisterCell
&RC
= BT
.lookup(RS
.Reg
);
1972 if (!HBS::getConst(RC
, 0, RC
.width(), U
))
1975 // Only consider 8-bit values to avoid constant-extenders.
1978 case Hexagon::S2_storerb_io
:
1981 case Hexagon::S2_storerh_io
:
1984 case Hexagon::S2_storeri_io
:
1988 // Opc is already checked above to be one of the three store instructions.
1989 // This silences a -Wuninitialized false positive on GCC 5.4.
1990 llvm_unreachable("Unexpected store opcode");
1995 MI
->RemoveOperand(2);
1997 case Hexagon::S2_storerb_io
:
1998 MI
->setDesc(HII
.get(Hexagon::S4_storeirb_io
));
2000 case Hexagon::S2_storerh_io
:
2001 MI
->setDesc(HII
.get(Hexagon::S4_storeirh_io
));
2003 case Hexagon::S2_storeri_io
:
2004 MI
->setDesc(HII
.get(Hexagon::S4_storeiri_io
));
2007 MI
->addOperand(MachineOperand::CreateImm(V
));
2011 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
2012 // last instruction in a sequence that results in something equivalent to
2013 // the pack-halfwords. The intent is to cause the entire sequence to become
2015 bool BitSimplification::genPackhl(MachineInstr
*MI
,
2016 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2017 unsigned Opc
= MI
->getOpcode();
2018 if (Opc
== Hexagon::S2_packhl
)
2020 BitTracker::RegisterRef Rs
, Rt
;
2021 if (!matchPackhl(RD
.Reg
, RC
, Rs
, Rt
))
2023 if (!validateReg(Rs
, Hexagon::S2_packhl
, 1) ||
2024 !validateReg(Rt
, Hexagon::S2_packhl
, 2))
2027 MachineBasicBlock
&B
= *MI
->getParent();
2028 unsigned NewR
= MRI
.createVirtualRegister(&Hexagon::DoubleRegsRegClass
);
2029 DebugLoc DL
= MI
->getDebugLoc();
2030 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2031 : MachineBasicBlock::iterator(MI
);
2032 BuildMI(B
, At
, DL
, HII
.get(Hexagon::S2_packhl
), NewR
)
2033 .addReg(Rs
.Reg
, 0, Rs
.Sub
)
2034 .addReg(Rt
.Reg
, 0, Rt
.Sub
);
2035 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2036 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2040 // If MI produces halfword of the input in the low half of the output,
2041 // replace it with zero-extend or extractu.
2042 bool BitSimplification::genExtractHalf(MachineInstr
*MI
,
2043 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2045 // Check for halfword in low 16 bits, zeros elsewhere.
2046 if (!matchHalf(RD
.Reg
, RC
, 0, L
) || !HBS::isZero(RC
, 16, 16))
2049 unsigned Opc
= MI
->getOpcode();
2050 MachineBasicBlock
&B
= *MI
->getParent();
2051 DebugLoc DL
= MI
->getDebugLoc();
2053 // Prefer zxth, since zxth can go in any slot, while extractu only in
2056 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2057 : MachineBasicBlock::iterator(MI
);
2058 if (L
.Low
&& Opc
!= Hexagon::A2_zxth
) {
2059 if (validateReg(L
, Hexagon::A2_zxth
, 1)) {
2060 NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2061 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_zxth
), NewR
)
2062 .addReg(L
.Reg
, 0, L
.Sub
);
2064 } else if (!L
.Low
&& Opc
!= Hexagon::S2_lsr_i_r
) {
2065 if (validateReg(L
, Hexagon::S2_lsr_i_r
, 1)) {
2066 NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2067 BuildMI(B
, MI
, DL
, HII
.get(Hexagon::S2_lsr_i_r
), NewR
)
2068 .addReg(L
.Reg
, 0, L
.Sub
)
2074 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2075 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2079 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
2081 bool BitSimplification::genCombineHalf(MachineInstr
*MI
,
2082 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2084 // Check for combine h/l
2085 if (!matchHalf(RD
.Reg
, RC
, 0, L
) || !matchHalf(RD
.Reg
, RC
, 16, H
))
2087 // Do nothing if this is just a reg copy.
2088 if (L
.Reg
== H
.Reg
&& L
.Sub
== H
.Sub
&& !H
.Low
&& L
.Low
)
2091 unsigned Opc
= MI
->getOpcode();
2092 unsigned COpc
= getCombineOpcode(H
.Low
, L
.Low
);
2095 if (!validateReg(H
, COpc
, 1) || !validateReg(L
, COpc
, 2))
2098 MachineBasicBlock
&B
= *MI
->getParent();
2099 DebugLoc DL
= MI
->getDebugLoc();
2100 unsigned NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2101 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2102 : MachineBasicBlock::iterator(MI
);
2103 BuildMI(B
, At
, DL
, HII
.get(COpc
), NewR
)
2104 .addReg(H
.Reg
, 0, H
.Sub
)
2105 .addReg(L
.Reg
, 0, L
.Sub
);
2106 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2107 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2111 // If MI resets high bits of a register and keeps the lower ones, replace it
2112 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
2113 bool BitSimplification::genExtractLow(MachineInstr
*MI
,
2114 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2115 unsigned Opc
= MI
->getOpcode();
2117 case Hexagon::A2_zxtb
:
2118 case Hexagon::A2_zxth
:
2119 case Hexagon::S2_extractu
:
2122 if (Opc
== Hexagon::A2_andir
&& MI
->getOperand(2).isImm()) {
2123 int32_t Imm
= MI
->getOperand(2).getImm();
2128 if (MI
->hasUnmodeledSideEffects() || MI
->isInlineAsm())
2130 unsigned W
= RC
.width();
2131 while (W
> 0 && RC
[W
-1].is(0))
2133 if (W
== 0 || W
== RC
.width())
2135 unsigned NewOpc
= (W
== 8) ? Hexagon::A2_zxtb
2136 : (W
== 16) ? Hexagon::A2_zxth
2137 : (W
< 10) ? Hexagon::A2_andir
2138 : Hexagon::S2_extractu
;
2139 MachineBasicBlock
&B
= *MI
->getParent();
2140 DebugLoc DL
= MI
->getDebugLoc();
2142 for (auto &Op
: MI
->uses()) {
2145 BitTracker::RegisterRef RS
= Op
;
2146 if (!BT
.has(RS
.Reg
))
2148 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
2150 if (!HBS::getSubregMask(RS
, BN
, BW
, MRI
))
2152 if (BW
< W
|| !HBS::isEqual(RC
, 0, SC
, BN
, W
))
2154 if (!validateReg(RS
, NewOpc
, 1))
2157 unsigned NewR
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
2158 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2159 : MachineBasicBlock::iterator(MI
);
2160 auto MIB
= BuildMI(B
, At
, DL
, HII
.get(NewOpc
), NewR
)
2161 .addReg(RS
.Reg
, 0, RS
.Sub
);
2162 if (NewOpc
== Hexagon::A2_andir
)
2163 MIB
.addImm((1 << W
) - 1);
2164 else if (NewOpc
== Hexagon::S2_extractu
)
2165 MIB
.addImm(W
).addImm(0);
2166 HBS::replaceSubWithSub(RD
.Reg
, RD
.Sub
, NewR
, 0, MRI
);
2167 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2173 bool BitSimplification::genBitSplit(MachineInstr
*MI
,
2174 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
,
2175 const RegisterSet
&AVs
) {
2178 if (MaxBitSplit
.getNumOccurrences()) {
2179 if (CountBitSplit
>= MaxBitSplit
)
2183 unsigned Opc
= MI
->getOpcode();
2185 case Hexagon::A4_bitsplit
:
2186 case Hexagon::A4_bitspliti
:
2190 unsigned W
= RC
.width();
2194 auto ctlz
= [] (const BitTracker::RegisterCell
&C
) -> unsigned {
2195 unsigned Z
= C
.width();
2196 while (Z
> 0 && C
[Z
-1].is(0))
2198 return C
.width() - Z
;
2201 // Count the number of leading zeros in the target RC.
2202 unsigned Z
= ctlz(RC
);
2203 if (Z
== 0 || Z
== W
)
2206 // A simplistic analysis: assume the source register (the one being split)
2207 // is fully unknown, and that all its bits are self-references.
2208 const BitTracker::BitValue
&B0
= RC
[0];
2209 if (B0
.Type
!= BitTracker::BitValue::Ref
)
2212 unsigned SrcR
= B0
.RefI
.Reg
;
2214 unsigned Pos
= B0
.RefI
.Pos
;
2216 // All the non-zero bits should be consecutive bits from the same register.
2217 for (unsigned i
= 1; i
< W
-Z
; ++i
) {
2218 const BitTracker::BitValue
&V
= RC
[i
];
2219 if (V
.Type
!= BitTracker::BitValue::Ref
)
2221 if (V
.RefI
.Reg
!= SrcR
|| V
.RefI
.Pos
!= Pos
+i
)
2225 // Now, find the other bitfield among AVs.
2226 for (unsigned S
= AVs
.find_first(); S
; S
= AVs
.find_next(S
)) {
2227 // The number of leading zeros here should be the number of trailing
2229 unsigned SRC
= MRI
.getRegClass(S
)->getID();
2230 if (SRC
!= Hexagon::IntRegsRegClassID
&&
2231 SRC
!= Hexagon::DoubleRegsRegClassID
)
2235 const BitTracker::RegisterCell
&SC
= BT
.lookup(S
);
2236 if (SC
.width() != W
|| ctlz(SC
) != W
-Z
)
2238 // The Z lower bits should now match SrcR.
2239 const BitTracker::BitValue
&S0
= SC
[0];
2240 if (S0
.Type
!= BitTracker::BitValue::Ref
|| S0
.RefI
.Reg
!= SrcR
)
2242 unsigned P
= S0
.RefI
.Pos
;
2244 if (Pos
<= P
&& (Pos
+ W
-Z
) != P
)
2246 if (P
< Pos
&& (P
+ Z
) != Pos
)
2248 // The starting bitfield position must be at a subregister boundary.
2249 if (std::min(P
, Pos
) != 0 && std::min(P
, Pos
) != 32)
2253 for (I
= 1; I
< Z
; ++I
) {
2254 const BitTracker::BitValue
&V
= SC
[I
];
2255 if (V
.Type
!= BitTracker::BitValue::Ref
)
2257 if (V
.RefI
.Reg
!= SrcR
|| V
.RefI
.Pos
!= P
+I
)
2263 // Generate bitsplit where S is defined.
2264 if (MaxBitSplit
.getNumOccurrences())
2266 MachineInstr
*DefS
= MRI
.getVRegDef(S
);
2267 assert(DefS
!= nullptr);
2268 DebugLoc DL
= DefS
->getDebugLoc();
2269 MachineBasicBlock
&B
= *DefS
->getParent();
2270 auto At
= DefS
->isPHI() ? B
.getFirstNonPHI()
2271 : MachineBasicBlock::iterator(DefS
);
2272 if (MRI
.getRegClass(SrcR
)->getID() == Hexagon::DoubleRegsRegClassID
)
2273 SrcSR
= (std::min(Pos
, P
) == 32) ? Hexagon::isub_hi
: Hexagon::isub_lo
;
2274 if (!validateReg({SrcR
,SrcSR
}, Hexagon::A4_bitspliti
, 1))
2276 unsigned ImmOp
= Pos
<= P
? W
-Z
: Z
;
2278 // Find an existing bitsplit instruction if one already exists.
2280 for (MachineInstr
*In
: NewMIs
) {
2281 if (In
->getOpcode() != Hexagon::A4_bitspliti
)
2283 MachineOperand
&Op1
= In
->getOperand(1);
2284 if (Op1
.getReg() != SrcR
|| Op1
.getSubReg() != SrcSR
)
2286 if (In
->getOperand(2).getImm() != ImmOp
)
2288 // Check if the target register is available here.
2289 MachineOperand
&Op0
= In
->getOperand(0);
2290 MachineInstr
*DefI
= MRI
.getVRegDef(Op0
.getReg());
2291 assert(DefI
!= nullptr);
2292 if (!MDT
.dominates(DefI
, &*At
))
2295 // Found one that can be reused.
2296 assert(Op0
.getSubReg() == 0);
2297 NewR
= Op0
.getReg();
2301 NewR
= MRI
.createVirtualRegister(&Hexagon::DoubleRegsRegClass
);
2302 auto NewBS
= BuildMI(B
, At
, DL
, HII
.get(Hexagon::A4_bitspliti
), NewR
)
2303 .addReg(SrcR
, 0, SrcSR
)
2305 NewMIs
.push_back(NewBS
);
2308 HBS::replaceRegWithSub(RD
.Reg
, NewR
, Hexagon::isub_lo
, MRI
);
2309 HBS::replaceRegWithSub(S
, NewR
, Hexagon::isub_hi
, MRI
);
2311 HBS::replaceRegWithSub(S
, NewR
, Hexagon::isub_lo
, MRI
);
2312 HBS::replaceRegWithSub(RD
.Reg
, NewR
, Hexagon::isub_hi
, MRI
);
2320 // Check for tstbit simplification opportunity, where the bit being checked
2321 // can be tracked back to another register. For example:
2322 // %2 = S2_lsr_i_r %1, 5
2323 // %3 = S2_tstbit_i %2, 0
2325 // %3 = S2_tstbit_i %1, 5
2326 bool BitSimplification::simplifyTstbit(MachineInstr
*MI
,
2327 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
) {
2328 unsigned Opc
= MI
->getOpcode();
2329 if (Opc
!= Hexagon::S2_tstbit_i
)
2332 unsigned BN
= MI
->getOperand(2).getImm();
2333 BitTracker::RegisterRef RS
= MI
->getOperand(1);
2335 DebugLoc DL
= MI
->getDebugLoc();
2336 if (!BT
.has(RS
.Reg
) || !HBS::getSubregMask(RS
, F
, W
, MRI
))
2338 MachineBasicBlock
&B
= *MI
->getParent();
2339 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2340 : MachineBasicBlock::iterator(MI
);
2342 const BitTracker::RegisterCell
&SC
= BT
.lookup(RS
.Reg
);
2343 const BitTracker::BitValue
&V
= SC
[F
+BN
];
2344 if (V
.Type
== BitTracker::BitValue::Ref
&& V
.RefI
.Reg
!= RS
.Reg
) {
2345 const TargetRegisterClass
*TC
= MRI
.getRegClass(V
.RefI
.Reg
);
2346 // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
2347 // a double register, need to use a subregister and adjust bit
2349 unsigned P
= std::numeric_limits
<unsigned>::max();
2350 BitTracker::RegisterRef
RR(V
.RefI
.Reg
, 0);
2351 if (TC
== &Hexagon::DoubleRegsRegClass
) {
2353 RR
.Sub
= Hexagon::isub_lo
;
2356 RR
.Sub
= Hexagon::isub_hi
;
2358 } else if (TC
== &Hexagon::IntRegsRegClass
) {
2361 if (P
!= std::numeric_limits
<unsigned>::max()) {
2362 unsigned NewR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
2363 BuildMI(B
, At
, DL
, HII
.get(Hexagon::S2_tstbit_i
), NewR
)
2364 .addReg(RR
.Reg
, 0, RR
.Sub
)
2366 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2370 } else if (V
.is(0) || V
.is(1)) {
2371 unsigned NewR
= MRI
.createVirtualRegister(&Hexagon::PredRegsRegClass
);
2372 unsigned NewOpc
= V
.is(0) ? Hexagon::PS_false
: Hexagon::PS_true
;
2373 BuildMI(B
, At
, DL
, HII
.get(NewOpc
), NewR
);
2374 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2381 // Detect whether RD is a bitfield extract (sign- or zero-extended) of
2382 // some register from the AVs set. Create a new corresponding instruction
2383 // at the location of MI. The intent is to recognize situations where
2384 // a sequence of instructions performs an operation that is equivalent to
2385 // an extract operation, such as a shift left followed by a shift right.
2386 bool BitSimplification::simplifyExtractLow(MachineInstr
*MI
,
2387 BitTracker::RegisterRef RD
, const BitTracker::RegisterCell
&RC
,
2388 const RegisterSet
&AVs
) {
2391 if (MaxExtract
.getNumOccurrences()) {
2392 if (CountExtract
>= MaxExtract
)
2397 unsigned W
= RC
.width();
2402 // The code is mostly class-independent, except for the part that generates
2403 // the extract instruction, and establishes the source register (in case it
2404 // needs to use a subregister).
2405 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2406 if (FRC
!= &Hexagon::IntRegsRegClass
&& FRC
!= &Hexagon::DoubleRegsRegClass
)
2408 assert(RD
.Sub
== 0);
2411 // If the cell has a form of 00..0xx..x with k zeros and n remaining
2412 // bits, this could be an extractu of the n bits, but it could also be
2413 // an extractu of a longer field which happens to have 0s in the top
2415 // The same logic applies to sign-extended fields.
2417 // Do not check for the extended extracts, since it would expand the
2418 // search space quite a bit. The search may be expensive as it is.
2420 const BitTracker::BitValue
&TopV
= RC
[W
-1];
2422 // Eliminate candidates that have self-referential bits, since they
2423 // cannot be extracts from other registers. Also, skip registers that
2424 // have compile-time constant values.
2425 bool IsConst
= true;
2426 for (unsigned I
= 0; I
!= W
; ++I
) {
2427 const BitTracker::BitValue
&V
= RC
[I
];
2428 if (V
.Type
== BitTracker::BitValue::Ref
&& V
.RefI
.Reg
== RD
.Reg
)
2430 IsConst
= IsConst
&& (V
.is(0) || V
.is(1));
2435 if (TopV
.is(0) || TopV
.is(1)) {
2436 bool S
= TopV
.is(1);
2437 for (--W
; W
> 0 && RC
[W
-1].is(S
); --W
)
2441 // The sign bit must be a part of the field being extended.
2445 // This could still be a sign-extended extract.
2446 assert(TopV
.Type
== BitTracker::BitValue::Ref
);
2447 if (TopV
.RefI
.Reg
== RD
.Reg
|| TopV
.RefI
.Pos
== W
-1)
2449 for (--W
; W
> 0 && RC
[W
-1] == TopV
; --W
)
2451 // The top bits of RC are copies of TopV. One occurrence of TopV will
2452 // be a part of the field.
2457 // This would be just a copy. It should be handled elsewhere.
2462 dbgs() << __func__
<< " on reg: " << printReg(RD
.Reg
, &HRI
, RD
.Sub
)
2464 dbgs() << "Cell: " << RC
<< '\n';
2465 dbgs() << "Expected bitfield size: " << Len
<< " bits, "
2466 << (Signed
? "sign" : "zero") << "-extended\n";
2469 bool Changed
= false;
2471 for (unsigned R
= AVs
.find_first(); R
!= 0; R
= AVs
.find_next(R
)) {
2474 const BitTracker::RegisterCell
&SC
= BT
.lookup(R
);
2475 unsigned SW
= SC
.width();
2477 // The source can be longer than the destination, as long as its size is
2478 // a multiple of the size of the destination. Also, we would need to be
2479 // able to refer to the subregister in the source that would be of the
2480 // same size as the destination, but only check the sizes here.
2481 if (SW
< RW
|| (SW
% RW
) != 0)
2484 // The field can start at any offset in SC as long as it contains Len
2485 // bits and does not cross subregister boundary (if the source register
2486 // is longer than the destination).
2488 while (Off
<= SW
-Len
) {
2489 unsigned OE
= (Off
+Len
)/RW
;
2491 // The assumption here is that if the source (R) is longer than the
2492 // destination, then the destination is a sequence of words of
2493 // size RW, and each such word in R can be accessed via a subregister.
2495 // If the beginning and the end of the field cross the subregister
2496 // boundary, advance to the next subregister.
2500 if (HBS::isEqual(RC
, 0, SC
, Off
, Len
))
2509 unsigned ExtOpc
= 0;
2512 ExtOpc
= Signed
? Hexagon::A2_sxtb
: Hexagon::A2_zxtb
;
2514 ExtOpc
= Signed
? Hexagon::A2_sxth
: Hexagon::A2_zxth
;
2515 else if (Len
< 10 && !Signed
)
2516 ExtOpc
= Hexagon::A2_andir
;
2520 Signed
? (RW
== 32 ? Hexagon::S4_extract
: Hexagon::S4_extractp
)
2521 : (RW
== 32 ? Hexagon::S2_extractu
: Hexagon::S2_extractup
);
2524 // This only recognizes isub_lo and isub_hi.
2525 if (RW
!= SW
&& RW
*2 != SW
)
2528 SR
= (Off
/RW
== 0) ? Hexagon::isub_lo
: Hexagon::isub_hi
;
2531 if (!validateReg({R
,SR
}, ExtOpc
, 1))
2534 // Don't generate the same instruction as the one being optimized.
2535 if (MI
->getOpcode() == ExtOpc
) {
2536 // All possible ExtOpc's have the source in operand(1).
2537 const MachineOperand
&SrcOp
= MI
->getOperand(1);
2538 if (SrcOp
.getReg() == R
)
2542 DebugLoc DL
= MI
->getDebugLoc();
2543 MachineBasicBlock
&B
= *MI
->getParent();
2544 unsigned NewR
= MRI
.createVirtualRegister(FRC
);
2545 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2546 : MachineBasicBlock::iterator(MI
);
2547 auto MIB
= BuildMI(B
, At
, DL
, HII
.get(ExtOpc
), NewR
)
2550 case Hexagon::A2_sxtb
:
2551 case Hexagon::A2_zxtb
:
2552 case Hexagon::A2_sxth
:
2553 case Hexagon::A2_zxth
:
2555 case Hexagon::A2_andir
:
2556 MIB
.addImm((1u << Len
) - 1);
2558 case Hexagon::S4_extract
:
2559 case Hexagon::S2_extractu
:
2560 case Hexagon::S4_extractp
:
2561 case Hexagon::S2_extractup
:
2566 llvm_unreachable("Unexpected opcode");
2569 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2570 BT
.put(BitTracker::RegisterRef(NewR
), RC
);
2578 bool BitSimplification::simplifyRCmp0(MachineInstr
*MI
,
2579 BitTracker::RegisterRef RD
) {
2580 unsigned Opc
= MI
->getOpcode();
2581 if (Opc
!= Hexagon::A4_rcmpeqi
&& Opc
!= Hexagon::A4_rcmpneqi
)
2583 MachineOperand
&CmpOp
= MI
->getOperand(2);
2584 if (!CmpOp
.isImm() || CmpOp
.getImm() != 0)
2587 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2588 if (FRC
!= &Hexagon::IntRegsRegClass
&& FRC
!= &Hexagon::DoubleRegsRegClass
)
2590 assert(RD
.Sub
== 0);
2592 MachineBasicBlock
&B
= *MI
->getParent();
2593 const DebugLoc
&DL
= MI
->getDebugLoc();
2594 auto At
= MI
->isPHI() ? B
.getFirstNonPHI()
2595 : MachineBasicBlock::iterator(MI
);
2597 bool KnownNZ
= false;
2599 BitTracker::RegisterRef SR
= MI
->getOperand(1);
2600 if (!BT
.has(SR
.Reg
))
2602 const BitTracker::RegisterCell
&SC
= BT
.lookup(SR
.Reg
);
2604 if (!HBS::getSubregMask(SR
, F
, W
, MRI
))
2607 for (uint16_t I
= F
; I
!= F
+W
; ++I
) {
2608 const BitTracker::BitValue
&V
= SC
[I
];
2615 auto ReplaceWithConst
= [&] (int C
) {
2616 unsigned NewR
= MRI
.createVirtualRegister(FRC
);
2617 BuildMI(B
, At
, DL
, HII
.get(Hexagon::A2_tfrsi
), NewR
)
2619 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2620 BitTracker::RegisterCell
NewRC(W
);
2621 for (uint16_t I
= 0; I
!= W
; ++I
) {
2622 NewRC
[I
] = BitTracker::BitValue(C
& 1);
2623 C
= unsigned(C
) >> 1;
2625 BT
.put(BitTracker::RegisterRef(NewR
), NewRC
);
2629 auto IsNonZero
= [] (const MachineOperand
&Op
) {
2630 if (Op
.isGlobal() || Op
.isBlockAddress())
2633 return Op
.getImm() != 0;
2635 return !Op
.getCImm()->isZero();
2637 return !Op
.getFPImm()->isZero();
2641 auto IsZero
= [] (const MachineOperand
&Op
) {
2642 if (Op
.isGlobal() || Op
.isBlockAddress())
2645 return Op
.getImm() == 0;
2647 return Op
.getCImm()->isZero();
2649 return Op
.getFPImm()->isZero();
2653 // If the source register is known to be 0 or non-0, the comparison can
2654 // be folded to a load of a constant.
2655 if (KnownZ
|| KnownNZ
) {
2656 assert(KnownZ
!= KnownNZ
&& "Register cannot be both 0 and non-0");
2657 return ReplaceWithConst(KnownZ
== (Opc
== Hexagon::A4_rcmpeqi
));
2660 // Special case: if the compare comes from a C2_muxii, then we know the
2661 // two possible constants that can be the source value.
2662 MachineInstr
*InpDef
= MRI
.getVRegDef(SR
.Reg
);
2665 if (SR
.Sub
== 0 && InpDef
->getOpcode() == Hexagon::C2_muxii
) {
2666 MachineOperand
&Src1
= InpDef
->getOperand(2);
2667 MachineOperand
&Src2
= InpDef
->getOperand(3);
2668 // Check if both are non-zero.
2669 bool KnownNZ1
= IsNonZero(Src1
), KnownNZ2
= IsNonZero(Src2
);
2670 if (KnownNZ1
&& KnownNZ2
)
2671 return ReplaceWithConst(Opc
== Hexagon::A4_rcmpneqi
);
2672 // Check if both are zero.
2673 bool KnownZ1
= IsZero(Src1
), KnownZ2
= IsZero(Src2
);
2674 if (KnownZ1
&& KnownZ2
)
2675 return ReplaceWithConst(Opc
== Hexagon::A4_rcmpeqi
);
2677 // If for both operands we know that they are either 0 or non-0,
2678 // replace the comparison with a C2_muxii, using the same predicate
2679 // register, but with operands substituted with 0/1 accordingly.
2680 if ((KnownZ1
|| KnownNZ1
) && (KnownZ2
|| KnownNZ2
)) {
2681 unsigned NewR
= MRI
.createVirtualRegister(FRC
);
2682 BuildMI(B
, At
, DL
, HII
.get(Hexagon::C2_muxii
), NewR
)
2683 .addReg(InpDef
->getOperand(1).getReg())
2684 .addImm(KnownZ1
== (Opc
== Hexagon::A4_rcmpeqi
))
2685 .addImm(KnownZ2
== (Opc
== Hexagon::A4_rcmpeqi
));
2686 HBS::replaceReg(RD
.Reg
, NewR
, MRI
);
2687 // Create a new cell with only the least significant bit unknown.
2688 BitTracker::RegisterCell
NewRC(W
);
2689 NewRC
[0] = BitTracker::BitValue::self();
2690 NewRC
.fill(1, W
, BitTracker::BitValue::Zero
);
2691 BT
.put(BitTracker::RegisterRef(NewR
), NewRC
);
2699 bool BitSimplification::processBlock(MachineBasicBlock
&B
,
2700 const RegisterSet
&AVs
) {
2701 if (!BT
.reached(&B
))
2703 bool Changed
= false;
2704 RegisterSet AVB
= AVs
;
2707 for (auto I
= B
.begin(), E
= B
.end(); I
!= E
; ++I
, AVB
.insert(Defs
)) {
2708 MachineInstr
*MI
= &*I
;
2710 HBS::getInstrDefs(*MI
, Defs
);
2712 unsigned Opc
= MI
->getOpcode();
2713 if (Opc
== TargetOpcode::COPY
|| Opc
== TargetOpcode::REG_SEQUENCE
)
2716 if (MI
->mayStore()) {
2717 bool T
= genStoreUpperHalf(MI
);
2718 T
= T
|| genStoreImmediate(MI
);
2723 if (Defs
.count() != 1)
2725 const MachineOperand
&Op0
= MI
->getOperand(0);
2726 if (!Op0
.isReg() || !Op0
.isDef())
2728 BitTracker::RegisterRef RD
= Op0
;
2729 if (!BT
.has(RD
.Reg
))
2731 const TargetRegisterClass
*FRC
= HBS::getFinalVRegClass(RD
, MRI
);
2732 const BitTracker::RegisterCell
&RC
= BT
.lookup(RD
.Reg
);
2734 if (FRC
->getID() == Hexagon::DoubleRegsRegClassID
) {
2735 bool T
= genPackhl(MI
, RD
, RC
);
2736 T
= T
|| simplifyExtractLow(MI
, RD
, RC
, AVB
);
2741 if (FRC
->getID() == Hexagon::IntRegsRegClassID
) {
2742 bool T
= genBitSplit(MI
, RD
, RC
, AVB
);
2743 T
= T
|| simplifyExtractLow(MI
, RD
, RC
, AVB
);
2744 T
= T
|| genExtractHalf(MI
, RD
, RC
);
2745 T
= T
|| genCombineHalf(MI
, RD
, RC
);
2746 T
= T
|| genExtractLow(MI
, RD
, RC
);
2747 T
= T
|| simplifyRCmp0(MI
, RD
);
2752 if (FRC
->getID() == Hexagon::PredRegsRegClassID
) {
2753 bool T
= simplifyTstbit(MI
, RD
, RC
);
2761 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction
&MF
) {
2762 if (skipFunction(MF
.getFunction()))
2765 auto &HST
= MF
.getSubtarget
<HexagonSubtarget
>();
2766 auto &HRI
= *HST
.getRegisterInfo();
2767 auto &HII
= *HST
.getInstrInfo();
2769 MDT
= &getAnalysis
<MachineDominatorTree
>();
2770 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2773 Changed
= DeadCodeElimination(MF
, *MDT
).run();
2775 const HexagonEvaluator
HE(HRI
, MRI
, HII
, MF
);
2776 BitTracker
BT(HE
, MF
);
2777 LLVM_DEBUG(BT
.trace(true));
2780 MachineBasicBlock
&Entry
= MF
.front();
2782 RegisterSet AIG
; // Available registers for IG.
2783 ConstGeneration
ImmG(BT
, HII
, MRI
);
2784 Changed
|= visitBlock(Entry
, ImmG
, AIG
);
2786 RegisterSet ARE
; // Available registers for RIE.
2787 RedundantInstrElimination
RIE(BT
, HII
, HRI
, MRI
);
2788 bool Ried
= visitBlock(Entry
, RIE
, ARE
);
2794 RegisterSet ACG
; // Available registers for CG.
2795 CopyGeneration
CopyG(BT
, HII
, HRI
, MRI
);
2796 Changed
|= visitBlock(Entry
, CopyG
, ACG
);
2798 RegisterSet ACP
; // Available registers for CP.
2799 CopyPropagation
CopyP(HRI
, MRI
);
2800 Changed
|= visitBlock(Entry
, CopyP
, ACP
);
2802 Changed
= DeadCodeElimination(MF
, *MDT
).run() || Changed
;
2805 RegisterSet ABS
; // Available registers for BS.
2806 BitSimplification
BitS(BT
, *MDT
, HII
, HRI
, MRI
, MF
);
2807 Changed
|= visitBlock(Entry
, BitS
, ABS
);
2809 Changed
= DeadCodeElimination(MF
, *MDT
).run() || Changed
;
2815 DeadCodeElimination(MF
, *MDT
).run();
2820 // Recognize loops where the code at the end of the loop matches the code
2821 // before the entry of the loop, and the matching code is such that is can
2822 // be simplified. This pass relies on the bit simplification above and only
2823 // prepares code in a way that can be handled by the bit simplifcation.
2825 // This is the motivating testcase (and explanation):
2828 // loop0(.LBB0_2, r1) // %for.body.preheader
2829 // r5:4 = memd(r0++#8)
2832 // r3 = lsr(r4, #16)
2833 // r7:6 = combine(r5, r5)
2836 // r3 = insert(r5, #16, #16)
2837 // r7:6 = vlsrw(r7:6, #16)
2842 // memh(r2+#6) = r6 # R6 is really R5.H
2847 // memh(r2+#2) = r3 # R3 is really R4.H
2850 // r5:4 = memd(r0++#8)
2852 // { # "Shuffling" code that sets up R3 and R6
2853 // r3 = lsr(r4, #16) # so that their halves can be stored in the
2854 // r7:6 = combine(r5, r5) # next iteration. This could be folded into
2855 // } # the stores if the code was at the beginning
2856 // { # of the loop iteration. Since the same code
2857 // r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
2858 // r7:6 = vlsrw(r7:6, #16) # there.
2865 // loop0(.LBB0_2, r1)
2866 // r5:4 = memd(r0++#8)
2871 // memh(r2+#6) = r5.h
2876 // memh(r2+#2) = r4.h
2879 // r5:4 = memd(r0++#8)
2884 FunctionPass
*createHexagonLoopRescheduling();
2885 void initializeHexagonLoopReschedulingPass(PassRegistry
&);
2887 } // end namespace llvm
2891 class HexagonLoopRescheduling
: public MachineFunctionPass
{
2895 HexagonLoopRescheduling() : MachineFunctionPass(ID
) {
2896 initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
2899 bool runOnMachineFunction(MachineFunction
&MF
) override
;
2902 const HexagonInstrInfo
*HII
= nullptr;
2903 const HexagonRegisterInfo
*HRI
= nullptr;
2904 MachineRegisterInfo
*MRI
= nullptr;
2905 BitTracker
*BTP
= nullptr;
2908 LoopCand(MachineBasicBlock
*lb
, MachineBasicBlock
*pb
,
2909 MachineBasicBlock
*eb
) : LB(lb
), PB(pb
), EB(eb
) {}
2911 MachineBasicBlock
*LB
, *PB
, *EB
;
2913 using InstrList
= std::vector
<MachineInstr
*>;
2915 BitTracker::RegisterRef Inp
, Out
;
2919 PhiInfo(MachineInstr
&P
, MachineBasicBlock
&B
);
2922 BitTracker::RegisterRef LR
, PR
; // Loop Register, Preheader Register
2923 MachineBasicBlock
*LB
, *PB
; // Loop Block, Preheader Block
2926 static unsigned getDefReg(const MachineInstr
*MI
);
2927 bool isConst(unsigned Reg
) const;
2928 bool isBitShuffle(const MachineInstr
*MI
, unsigned DefR
) const;
2929 bool isStoreInput(const MachineInstr
*MI
, unsigned DefR
) const;
2930 bool isShuffleOf(unsigned OutR
, unsigned InpR
) const;
2931 bool isSameShuffle(unsigned OutR1
, unsigned InpR1
, unsigned OutR2
,
2932 unsigned &InpR2
) const;
2933 void moveGroup(InstrGroup
&G
, MachineBasicBlock
&LB
, MachineBasicBlock
&PB
,
2934 MachineBasicBlock::iterator At
, unsigned OldPhiR
, unsigned NewPredR
);
2935 bool processLoop(LoopCand
&C
);
2938 } // end anonymous namespace
2940 char HexagonLoopRescheduling::ID
= 0;
2942 INITIALIZE_PASS(HexagonLoopRescheduling
, "hexagon-loop-resched",
2943 "Hexagon Loop Rescheduling", false, false)
2945 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr
&P
,
2946 MachineBasicBlock
&B
) {
2947 DefR
= HexagonLoopRescheduling::getDefReg(&P
);
2950 for (unsigned i
= 1, n
= P
.getNumOperands(); i
< n
; i
+= 2) {
2951 const MachineOperand
&OpB
= P
.getOperand(i
+1);
2952 if (OpB
.getMBB() == &B
) {
2953 LR
= P
.getOperand(i
);
2957 PR
= P
.getOperand(i
);
2961 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr
*MI
) {
2963 HBS::getInstrDefs(*MI
, Defs
);
2964 if (Defs
.count() != 1)
2966 return Defs
.find_first();
2969 bool HexagonLoopRescheduling::isConst(unsigned Reg
) const {
2972 const BitTracker::RegisterCell
&RC
= BTP
->lookup(Reg
);
2973 for (unsigned i
= 0, w
= RC
.width(); i
< w
; ++i
) {
2974 const BitTracker::BitValue
&V
= RC
[i
];
2975 if (!V
.is(0) && !V
.is(1))
2981 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr
*MI
,
2982 unsigned DefR
) const {
2983 unsigned Opc
= MI
->getOpcode();
2985 case TargetOpcode::COPY
:
2986 case Hexagon::S2_lsr_i_r
:
2987 case Hexagon::S2_asr_i_r
:
2988 case Hexagon::S2_asl_i_r
:
2989 case Hexagon::S2_lsr_i_p
:
2990 case Hexagon::S2_asr_i_p
:
2991 case Hexagon::S2_asl_i_p
:
2992 case Hexagon::S2_insert
:
2993 case Hexagon::A2_or
:
2994 case Hexagon::A2_orp
:
2995 case Hexagon::A2_and
:
2996 case Hexagon::A2_andp
:
2997 case Hexagon::A2_combinew
:
2998 case Hexagon::A4_combineri
:
2999 case Hexagon::A4_combineir
:
3000 case Hexagon::A2_combineii
:
3001 case Hexagon::A4_combineii
:
3002 case Hexagon::A2_combine_ll
:
3003 case Hexagon::A2_combine_lh
:
3004 case Hexagon::A2_combine_hl
:
3005 case Hexagon::A2_combine_hh
:
3011 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr
*MI
,
3012 unsigned InpR
) const {
3013 for (unsigned i
= 0, n
= MI
->getNumOperands(); i
< n
; ++i
) {
3014 const MachineOperand
&Op
= MI
->getOperand(i
);
3017 if (Op
.getReg() == InpR
)
3023 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR
, unsigned InpR
) const {
3024 if (!BTP
->has(OutR
) || !BTP
->has(InpR
))
3026 const BitTracker::RegisterCell
&OutC
= BTP
->lookup(OutR
);
3027 for (unsigned i
= 0, w
= OutC
.width(); i
< w
; ++i
) {
3028 const BitTracker::BitValue
&V
= OutC
[i
];
3029 if (V
.Type
!= BitTracker::BitValue::Ref
)
3031 if (V
.RefI
.Reg
!= InpR
)
3037 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1
, unsigned InpR1
,
3038 unsigned OutR2
, unsigned &InpR2
) const {
3039 if (!BTP
->has(OutR1
) || !BTP
->has(InpR1
) || !BTP
->has(OutR2
))
3041 const BitTracker::RegisterCell
&OutC1
= BTP
->lookup(OutR1
);
3042 const BitTracker::RegisterCell
&OutC2
= BTP
->lookup(OutR2
);
3043 unsigned W
= OutC1
.width();
3044 unsigned MatchR
= 0;
3045 if (W
!= OutC2
.width())
3047 for (unsigned i
= 0; i
< W
; ++i
) {
3048 const BitTracker::BitValue
&V1
= OutC1
[i
], &V2
= OutC2
[i
];
3049 if (V1
.Type
!= V2
.Type
|| V1
.Type
== BitTracker::BitValue::One
)
3051 if (V1
.Type
!= BitTracker::BitValue::Ref
)
3053 if (V1
.RefI
.Pos
!= V2
.RefI
.Pos
)
3055 if (V1
.RefI
.Reg
!= InpR1
)
3057 if (V2
.RefI
.Reg
== 0 || V2
.RefI
.Reg
== OutR2
)
3060 MatchR
= V2
.RefI
.Reg
;
3061 else if (V2
.RefI
.Reg
!= MatchR
)
3068 void HexagonLoopRescheduling::moveGroup(InstrGroup
&G
, MachineBasicBlock
&LB
,
3069 MachineBasicBlock
&PB
, MachineBasicBlock::iterator At
, unsigned OldPhiR
,
3070 unsigned NewPredR
) {
3071 DenseMap
<unsigned,unsigned> RegMap
;
3073 const TargetRegisterClass
*PhiRC
= MRI
->getRegClass(NewPredR
);
3074 unsigned PhiR
= MRI
->createVirtualRegister(PhiRC
);
3075 BuildMI(LB
, At
, At
->getDebugLoc(), HII
->get(TargetOpcode::PHI
), PhiR
)
3080 RegMap
.insert(std::make_pair(G
.Inp
.Reg
, PhiR
));
3082 for (unsigned i
= G
.Ins
.size(); i
> 0; --i
) {
3083 const MachineInstr
*SI
= G
.Ins
[i
-1];
3084 unsigned DR
= getDefReg(SI
);
3085 const TargetRegisterClass
*RC
= MRI
->getRegClass(DR
);
3086 unsigned NewDR
= MRI
->createVirtualRegister(RC
);
3087 DebugLoc DL
= SI
->getDebugLoc();
3089 auto MIB
= BuildMI(LB
, At
, DL
, HII
->get(SI
->getOpcode()), NewDR
);
3090 for (unsigned j
= 0, m
= SI
->getNumOperands(); j
< m
; ++j
) {
3091 const MachineOperand
&Op
= SI
->getOperand(j
);
3098 unsigned UseR
= RegMap
[Op
.getReg()];
3099 MIB
.addReg(UseR
, 0, Op
.getSubReg());
3101 RegMap
.insert(std::make_pair(DR
, NewDR
));
3104 HBS::replaceReg(OldPhiR
, RegMap
[G
.Out
.Reg
], *MRI
);
3107 bool HexagonLoopRescheduling::processLoop(LoopCand
&C
) {
3108 LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C
.LB
)
3110 std::vector
<PhiInfo
> Phis
;
3111 for (auto &I
: *C
.LB
) {
3114 unsigned PR
= getDefReg(&I
);
3117 bool BadUse
= false, GoodUse
= false;
3118 for (auto UI
= MRI
->use_begin(PR
), UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
3119 MachineInstr
*UseI
= UI
->getParent();
3120 if (UseI
->getParent() != C
.LB
) {
3124 if (isBitShuffle(UseI
, PR
) || isStoreInput(UseI
, PR
))
3127 if (BadUse
|| !GoodUse
)
3130 Phis
.push_back(PhiInfo(I
, *C
.LB
));
3134 dbgs() << "Phis: {";
3135 for (auto &I
: Phis
) {
3136 dbgs() << ' ' << printReg(I
.DefR
, HRI
) << "=phi("
3137 << printReg(I
.PR
.Reg
, HRI
, I
.PR
.Sub
) << ":b" << I
.PB
->getNumber()
3138 << ',' << printReg(I
.LR
.Reg
, HRI
, I
.LR
.Sub
) << ":b"
3139 << I
.LB
->getNumber() << ')';
3147 bool Changed
= false;
3150 // Go backwards in the block: for each bit shuffling instruction, check
3151 // if that instruction could potentially be moved to the front of the loop:
3152 // the output of the loop cannot be used in a non-shuffling instruction
3154 for (auto I
= C
.LB
->rbegin(), E
= C
.LB
->rend(); I
!= E
; ++I
) {
3155 if (I
->isTerminator())
3161 HBS::getInstrDefs(*I
, Defs
);
3162 if (Defs
.count() != 1)
3164 unsigned DefR
= Defs
.find_first();
3165 if (!TargetRegisterInfo::isVirtualRegister(DefR
))
3167 if (!isBitShuffle(&*I
, DefR
))
3170 bool BadUse
= false;
3171 for (auto UI
= MRI
->use_begin(DefR
), UE
= MRI
->use_end(); UI
!= UE
; ++UI
) {
3172 MachineInstr
*UseI
= UI
->getParent();
3173 if (UseI
->getParent() == C
.LB
) {
3174 if (UseI
->isPHI()) {
3175 // If the use is in a phi node in this loop, then it should be
3176 // the value corresponding to the back edge.
3177 unsigned Idx
= UI
.getOperandNo();
3178 if (UseI
->getOperand(Idx
+1).getMBB() != C
.LB
)
3181 auto F
= find(ShufIns
, UseI
);
3182 if (F
== ShufIns
.end())
3186 // There is a use outside of the loop, but there is no epilog block
3187 // suitable for a copy-out.
3188 if (C
.EB
== nullptr)
3197 ShufIns
.push_back(&*I
);
3200 // Partition the list of shuffling instructions into instruction groups,
3201 // where each group has to be moved as a whole (i.e. a group is a chain of
3202 // dependent instructions). A group produces a single live output register,
3203 // which is meant to be the input of the loop phi node (although this is
3204 // not checked here yet). It also uses a single register as its input,
3205 // which is some value produced in the loop body. After moving the group
3206 // to the beginning of the loop, that input register would need to be
3207 // the loop-carried register (through a phi node) instead of the (currently
3208 // loop-carried) output register.
3209 using InstrGroupList
= std::vector
<InstrGroup
>;
3210 InstrGroupList Groups
;
3212 for (unsigned i
= 0, n
= ShufIns
.size(); i
< n
; ++i
) {
3213 MachineInstr
*SI
= ShufIns
[i
];
3218 G
.Ins
.push_back(SI
);
3219 G
.Out
.Reg
= getDefReg(SI
);
3221 HBS::getInstrUses(*SI
, Inputs
);
3223 for (unsigned j
= i
+1; j
< n
; ++j
) {
3224 MachineInstr
*MI
= ShufIns
[j
];
3228 HBS::getInstrDefs(*MI
, Defs
);
3229 // If this instruction does not define any pending inputs, skip it.
3230 if (!Defs
.intersects(Inputs
))
3232 // Otherwise, add it to the current group and remove the inputs that
3233 // are defined by MI.
3234 G
.Ins
.push_back(MI
);
3235 Inputs
.remove(Defs
);
3236 // Then add all registers used by MI.
3237 HBS::getInstrUses(*MI
, Inputs
);
3238 ShufIns
[j
] = nullptr;
3241 // Only add a group if it requires at most one register.
3242 if (Inputs
.count() > 1)
3244 auto LoopInpEq
= [G
] (const PhiInfo
&P
) -> bool {
3245 return G
.Out
.Reg
== P
.LR
.Reg
;
3247 if (llvm::find_if(Phis
, LoopInpEq
) == Phis
.end())
3250 G
.Inp
.Reg
= Inputs
.find_first();
3251 Groups
.push_back(G
);
3255 for (unsigned i
= 0, n
= Groups
.size(); i
< n
; ++i
) {
3256 InstrGroup
&G
= Groups
[i
];
3257 dbgs() << "Group[" << i
<< "] inp: "
3258 << printReg(G
.Inp
.Reg
, HRI
, G
.Inp
.Sub
)
3259 << " out: " << printReg(G
.Out
.Reg
, HRI
, G
.Out
.Sub
) << "\n";
3260 for (unsigned j
= 0, m
= G
.Ins
.size(); j
< m
; ++j
)
3261 dbgs() << " " << *G
.Ins
[j
];
3265 for (unsigned i
= 0, n
= Groups
.size(); i
< n
; ++i
) {
3266 InstrGroup
&G
= Groups
[i
];
3267 if (!isShuffleOf(G
.Out
.Reg
, G
.Inp
.Reg
))
3269 auto LoopInpEq
= [G
] (const PhiInfo
&P
) -> bool {
3270 return G
.Out
.Reg
== P
.LR
.Reg
;
3272 auto F
= llvm::find_if(Phis
, LoopInpEq
);
3273 if (F
== Phis
.end())
3276 if (!isSameShuffle(G
.Out
.Reg
, G
.Inp
.Reg
, F
->PR
.Reg
, PrehR
)) {
3277 const MachineInstr
*DefPrehR
= MRI
->getVRegDef(F
->PR
.Reg
);
3278 unsigned Opc
= DefPrehR
->getOpcode();
3279 if (Opc
!= Hexagon::A2_tfrsi
&& Opc
!= Hexagon::A2_tfrpi
)
3281 if (!DefPrehR
->getOperand(1).isImm())
3283 if (DefPrehR
->getOperand(1).getImm() != 0)
3285 const TargetRegisterClass
*RC
= MRI
->getRegClass(G
.Inp
.Reg
);
3286 if (RC
!= MRI
->getRegClass(F
->PR
.Reg
)) {
3287 PrehR
= MRI
->createVirtualRegister(RC
);
3288 unsigned TfrI
= (RC
== &Hexagon::IntRegsRegClass
) ? Hexagon::A2_tfrsi
3289 : Hexagon::A2_tfrpi
;
3290 auto T
= C
.PB
->getFirstTerminator();
3291 DebugLoc DL
= (T
!= C
.PB
->end()) ? T
->getDebugLoc() : DebugLoc();
3292 BuildMI(*C
.PB
, T
, DL
, HII
->get(TfrI
), PrehR
)
3298 // isSameShuffle could match with PrehR being of a wider class than
3299 // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
3300 // it would match for the input being a 32-bit register, and PrehR
3301 // being a 64-bit register (where the low 32 bits match). This could
3302 // be handled, but for now skip these cases.
3303 if (MRI
->getRegClass(PrehR
) != MRI
->getRegClass(G
.Inp
.Reg
))
3305 moveGroup(G
, *F
->LB
, *F
->PB
, F
->LB
->getFirstNonPHI(), F
->DefR
, PrehR
);
3312 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction
&MF
) {
3313 if (skipFunction(MF
.getFunction()))
3316 auto &HST
= MF
.getSubtarget
<HexagonSubtarget
>();
3317 HII
= HST
.getInstrInfo();
3318 HRI
= HST
.getRegisterInfo();
3319 MRI
= &MF
.getRegInfo();
3320 const HexagonEvaluator
HE(*HRI
, *MRI
, *HII
, MF
);
3321 BitTracker
BT(HE
, MF
);
3322 LLVM_DEBUG(BT
.trace(true));
3326 std::vector
<LoopCand
> Cand
;
3328 for (auto &B
: MF
) {
3329 if (B
.pred_size() != 2 || B
.succ_size() != 2)
3331 MachineBasicBlock
*PB
= nullptr;
3332 bool IsLoop
= false;
3333 for (auto PI
= B
.pred_begin(), PE
= B
.pred_end(); PI
!= PE
; ++PI
) {
3342 MachineBasicBlock
*EB
= nullptr;
3343 for (auto SI
= B
.succ_begin(), SE
= B
.succ_end(); SI
!= SE
; ++SI
) {
3346 // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
3347 // edge from B to EP is non-critical.
3348 if ((*SI
)->pred_size() == 1)
3353 Cand
.push_back(LoopCand(&B
, PB
, EB
));
3356 bool Changed
= false;
3357 for (auto &C
: Cand
)
3358 Changed
|= processLoop(C
);
3363 //===----------------------------------------------------------------------===//
3364 // Public Constructor Functions
3365 //===----------------------------------------------------------------------===//
3367 FunctionPass
*llvm::createHexagonLoopRescheduling() {
3368 return new HexagonLoopRescheduling();
3371 FunctionPass
*llvm::createHexagonBitSimplify() {
3372 return new HexagonBitSimplify();