1 //===- HexagonSplitDouble.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "HexagonInstrInfo.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/ADT/BitVector.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/MachineMemOperand.h"
23 #include "llvm/CodeGen/MachineOperand.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/TargetRegisterInfo.h"
26 #include "llvm/Config/llvm-config.h"
27 #include "llvm/IR/DebugLoc.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Compiler.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
43 #define DEBUG_TYPE "hsdr"
49 FunctionPass
*createHexagonSplitDoubleRegs();
50 void initializeHexagonSplitDoubleRegsPass(PassRegistry
&);
52 } // end namespace llvm
54 static cl::opt
<int> MaxHSDR("max-hsdr", cl::Hidden
, cl::init(-1),
55 cl::desc("Maximum number of split partitions"));
56 static cl::opt
<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden
, cl::init(true),
57 cl::desc("Do not split loads or stores"));
58 static cl::opt
<bool> SplitAll("hsdr-split-all", cl::Hidden
, cl::init(false),
59 cl::desc("Split all partitions"));
63 class HexagonSplitDoubleRegs
: public MachineFunctionPass
{
67 HexagonSplitDoubleRegs() : MachineFunctionPass(ID
) {}
69 StringRef
getPassName() const override
{
70 return "Hexagon Split Double Registers";
73 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
74 AU
.addRequired
<MachineLoopInfo
>();
75 AU
.addPreserved
<MachineLoopInfo
>();
76 MachineFunctionPass::getAnalysisUsage(AU
);
79 bool runOnMachineFunction(MachineFunction
&MF
) override
;
82 static const TargetRegisterClass
*const DoubleRC
;
84 const HexagonRegisterInfo
*TRI
= nullptr;
85 const HexagonInstrInfo
*TII
= nullptr;
86 const MachineLoopInfo
*MLI
;
87 MachineRegisterInfo
*MRI
;
89 using USet
= std::set
<unsigned>;
90 using UUSetMap
= std::map
<unsigned, USet
>;
91 using UUPair
= std::pair
<unsigned, unsigned>;
92 using UUPairMap
= std::map
<unsigned, UUPair
>;
93 using LoopRegMap
= std::map
<const MachineLoop
*, USet
>;
95 bool isInduction(unsigned Reg
, LoopRegMap
&IRM
) const;
96 bool isVolatileInstr(const MachineInstr
*MI
) const;
97 bool isFixedInstr(const MachineInstr
*MI
) const;
98 void partitionRegisters(UUSetMap
&P2Rs
);
99 int32_t profit(const MachineInstr
*MI
) const;
100 int32_t profit(Register Reg
) const;
101 bool isProfitable(const USet
&Part
, LoopRegMap
&IRM
) const;
103 void collectIndRegsForLoop(const MachineLoop
*L
, USet
&Rs
);
104 void collectIndRegs(LoopRegMap
&IRM
);
106 void createHalfInstr(unsigned Opc
, MachineInstr
*MI
,
107 const UUPairMap
&PairMap
, unsigned SubR
);
108 void splitMemRef(MachineInstr
*MI
, const UUPairMap
&PairMap
);
109 void splitImmediate(MachineInstr
*MI
, const UUPairMap
&PairMap
);
110 void splitCombine(MachineInstr
*MI
, const UUPairMap
&PairMap
);
111 void splitExt(MachineInstr
*MI
, const UUPairMap
&PairMap
);
112 void splitShift(MachineInstr
*MI
, const UUPairMap
&PairMap
);
113 void splitAslOr(MachineInstr
*MI
, const UUPairMap
&PairMap
);
114 bool splitInstr(MachineInstr
*MI
, const UUPairMap
&PairMap
);
115 void replaceSubregUses(MachineInstr
*MI
, const UUPairMap
&PairMap
);
116 void collapseRegPairs(MachineInstr
*MI
, const UUPairMap
&PairMap
);
117 bool splitPartition(const USet
&Part
);
121 static void dump_partition(raw_ostream
&, const USet
&,
122 const TargetRegisterInfo
&);
125 } // end anonymous namespace
127 char HexagonSplitDoubleRegs::ID
;
128 int HexagonSplitDoubleRegs::Counter
= 0;
129 const TargetRegisterClass
*const HexagonSplitDoubleRegs::DoubleRC
=
130 &Hexagon::DoubleRegsRegClass
;
132 INITIALIZE_PASS(HexagonSplitDoubleRegs
, "hexagon-split-double",
133 "Hexagon Split Double Registers", false, false)
135 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
136 LLVM_DUMP_METHOD
void HexagonSplitDoubleRegs::dump_partition(raw_ostream
&os
,
137 const USet
&Part
, const TargetRegisterInfo
&TRI
) {
140 dbgs() << ' ' << printReg(I
, &TRI
);
145 bool HexagonSplitDoubleRegs::isInduction(unsigned Reg
, LoopRegMap
&IRM
) const {
147 const USet
&Rs
= I
.second
;
148 if (Rs
.find(Reg
) != Rs
.end())
154 bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr
*MI
) const {
155 for (auto &MO
: MI
->memoperands())
156 if (MO
->isVolatile() || MO
->isAtomic())
161 bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr
*MI
) const {
162 if (MI
->mayLoadOrStore())
163 if (MemRefsFixed
|| isVolatileInstr(MI
))
165 if (MI
->isDebugInstr())
168 unsigned Opc
= MI
->getOpcode();
173 case TargetOpcode::PHI
:
174 case TargetOpcode::COPY
:
177 case Hexagon::L2_loadrd_io
:
178 // Not handling stack stores (only reg-based addresses).
179 if (MI
->getOperand(1).isReg())
182 case Hexagon::S2_storerd_io
:
183 // Not handling stack stores (only reg-based addresses).
184 if (MI
->getOperand(0).isReg())
187 case Hexagon::L2_loadrd_pi
:
188 case Hexagon::S2_storerd_pi
:
190 case Hexagon::A2_tfrpi
:
191 case Hexagon::A2_combineii
:
192 case Hexagon::A4_combineir
:
193 case Hexagon::A4_combineii
:
194 case Hexagon::A4_combineri
:
195 case Hexagon::A2_combinew
:
196 case Hexagon::CONST64
:
198 case Hexagon::A2_sxtw
:
200 case Hexagon::A2_andp
:
201 case Hexagon::A2_orp
:
202 case Hexagon::A2_xorp
:
203 case Hexagon::S2_asl_i_p_or
:
204 case Hexagon::S2_asl_i_p
:
205 case Hexagon::S2_asr_i_p
:
206 case Hexagon::S2_lsr_i_p
:
210 for (auto &Op
: MI
->operands()) {
213 Register R
= Op
.getReg();
220 void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap
&P2Rs
) {
221 using UUMap
= std::map
<unsigned, unsigned>;
222 using UVect
= std::vector
<unsigned>;
224 unsigned NumRegs
= MRI
->getNumVirtRegs();
225 BitVector
DoubleRegs(NumRegs
);
226 for (unsigned i
= 0; i
< NumRegs
; ++i
) {
227 Register R
= Register::index2VirtReg(i
);
228 if (MRI
->getRegClass(R
) == DoubleRC
)
232 BitVector
FixedRegs(NumRegs
);
233 for (int x
= DoubleRegs
.find_first(); x
>= 0; x
= DoubleRegs
.find_next(x
)) {
234 Register R
= Register::index2VirtReg(x
);
235 MachineInstr
*DefI
= MRI
->getVRegDef(R
);
236 // In some cases a register may exist, but never be defined or used.
237 // It should never appear anywhere, but mark it as "fixed", just to be
239 if (!DefI
|| isFixedInstr(DefI
))
244 for (int x
= DoubleRegs
.find_first(); x
>= 0; x
= DoubleRegs
.find_next(x
)) {
247 Register R
= Register::index2VirtReg(x
);
248 LLVM_DEBUG(dbgs() << printReg(R
, TRI
) << " ~~");
249 USet
&Asc
= AssocMap
[R
];
250 for (auto U
= MRI
->use_nodbg_begin(R
), Z
= MRI
->use_nodbg_end();
252 MachineOperand
&Op
= *U
;
253 MachineInstr
*UseI
= Op
.getParent();
254 if (isFixedInstr(UseI
))
256 for (unsigned i
= 0, n
= UseI
->getNumOperands(); i
< n
; ++i
) {
257 MachineOperand
&MO
= UseI
->getOperand(i
);
258 // Skip non-registers or registers with subregisters.
259 if (&MO
== &Op
|| !MO
.isReg() || MO
.getSubReg())
261 Register T
= MO
.getReg();
262 if (!T
.isVirtual()) {
266 if (MRI
->getRegClass(T
) != DoubleRC
)
268 unsigned u
= Register::virtReg2Index(T
);
271 LLVM_DEBUG(dbgs() << ' ' << printReg(T
, TRI
));
273 // Make it symmetric.
274 AssocMap
[T
].insert(R
);
277 LLVM_DEBUG(dbgs() << '\n');
283 for (int x
= DoubleRegs
.find_first(); x
>= 0; x
= DoubleRegs
.find_next(x
)) {
284 Register R
= Register::index2VirtReg(x
);
285 if (Visited
.count(R
))
287 // Create a new partition for R.
288 unsigned ThisP
= FixedRegs
[x
] ? 0 : NextP
++;
291 for (unsigned i
= 0; i
< WorkQ
.size(); ++i
) {
292 unsigned T
= WorkQ
[i
];
293 if (Visited
.count(T
))
297 // Add all registers associated with T.
298 USet
&Asc
= AssocMap
[T
];
299 append_range(WorkQ
, Asc
);
304 P2Rs
[I
.second
].insert(I
.first
);
307 static inline int32_t profitImm(unsigned Imm
) {
309 if (Imm
== 0 || Imm
== 0xFFFFFFFF)
314 int32_t HexagonSplitDoubleRegs::profit(const MachineInstr
*MI
) const {
316 unsigned Opc
= MI
->getOpcode();
318 case TargetOpcode::PHI
:
319 for (const auto &Op
: MI
->operands())
323 case TargetOpcode::COPY
:
324 if (MI
->getOperand(1).getSubReg() != 0)
328 case Hexagon::L2_loadrd_io
:
329 case Hexagon::S2_storerd_io
:
331 case Hexagon::L2_loadrd_pi
:
332 case Hexagon::S2_storerd_pi
:
335 case Hexagon::A2_tfrpi
:
336 case Hexagon::CONST64
: {
337 uint64_t D
= MI
->getOperand(1).getImm();
338 unsigned Lo
= D
& 0xFFFFFFFFULL
;
339 unsigned Hi
= D
>> 32;
340 return profitImm(Lo
) + profitImm(Hi
);
342 case Hexagon::A2_combineii
:
343 case Hexagon::A4_combineii
: {
344 const MachineOperand
&Op1
= MI
->getOperand(1);
345 const MachineOperand
&Op2
= MI
->getOperand(2);
346 int32_t Prof1
= Op1
.isImm() ? profitImm(Op1
.getImm()) : 0;
347 int32_t Prof2
= Op2
.isImm() ? profitImm(Op2
.getImm()) : 0;
348 return Prof1
+ Prof2
;
350 case Hexagon::A4_combineri
:
352 // Fall through into A4_combineir.
354 case Hexagon::A4_combineir
: {
356 const MachineOperand
&OpX
= MI
->getOperand(ImmX
);
358 int64_t V
= OpX
.getImm();
359 if (V
== 0 || V
== -1)
362 // Fall through into A2_combinew.
365 case Hexagon::A2_combinew
:
368 case Hexagon::A2_sxtw
:
371 case Hexagon::A2_andp
:
372 case Hexagon::A2_orp
:
373 case Hexagon::A2_xorp
: {
374 Register Rs
= MI
->getOperand(1).getReg();
375 Register Rt
= MI
->getOperand(2).getReg();
376 return profit(Rs
) + profit(Rt
);
379 case Hexagon::S2_asl_i_p_or
: {
380 unsigned S
= MI
->getOperand(3).getImm();
381 if (S
== 0 || S
== 32)
385 case Hexagon::S2_asl_i_p
:
386 case Hexagon::S2_asr_i_p
:
387 case Hexagon::S2_lsr_i_p
:
388 unsigned S
= MI
->getOperand(2).getImm();
389 if (S
== 0 || S
== 32)
401 int32_t HexagonSplitDoubleRegs::profit(Register Reg
) const {
402 assert(Reg
.isVirtual());
404 const MachineInstr
*DefI
= MRI
->getVRegDef(Reg
);
405 switch (DefI
->getOpcode()) {
406 case Hexagon::A2_tfrpi
:
407 case Hexagon::CONST64
:
408 case Hexagon::A2_combineii
:
409 case Hexagon::A4_combineii
:
410 case Hexagon::A4_combineri
:
411 case Hexagon::A4_combineir
:
412 case Hexagon::A2_combinew
:
420 bool HexagonSplitDoubleRegs::isProfitable(const USet
&Part
, LoopRegMap
&IRM
)
422 unsigned FixedNum
= 0, LoopPhiNum
= 0;
425 for (unsigned DR
: Part
) {
426 MachineInstr
*DefI
= MRI
->getVRegDef(DR
);
427 int32_t P
= profit(DefI
);
428 if (P
== std::numeric_limits
<int>::min())
431 // Reduce the profitability of splitting induction registers.
432 if (isInduction(DR
, IRM
))
435 for (auto U
= MRI
->use_nodbg_begin(DR
), W
= MRI
->use_nodbg_end();
437 MachineInstr
*UseI
= U
->getParent();
438 if (isFixedInstr(UseI
)) {
440 // Calculate the cost of generating REG_SEQUENCE instructions.
441 for (auto &Op
: UseI
->operands()) {
442 if (Op
.isReg() && Part
.count(Op
.getReg()))
448 // If a register from this partition is used in a fixed instruction,
449 // and there is also a register in this partition that is used in
450 // a loop phi node, then decrease the splitting profit as this can
451 // confuse the modulo scheduler.
453 const MachineBasicBlock
*PB
= UseI
->getParent();
454 const MachineLoop
*L
= MLI
->getLoopFor(PB
);
455 if (L
&& L
->getHeader() == PB
)
458 // Splittable instruction.
459 int32_t P
= profit(UseI
);
460 if (P
== std::numeric_limits
<int>::min())
466 if (FixedNum
> 0 && LoopPhiNum
> 0)
467 TotalP
-= 20*LoopPhiNum
;
469 LLVM_DEBUG(dbgs() << "Partition profit: " << TotalP
<< '\n');
475 void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop
*L
,
477 const MachineBasicBlock
*HB
= L
->getHeader();
478 const MachineBasicBlock
*LB
= L
->getLoopLatch();
482 // Examine the latch branch. Expect it to be a conditional branch to
483 // the header (either "br-cond header" or "br-cond exit; br header").
484 MachineBasicBlock
*TB
= nullptr, *FB
= nullptr;
485 MachineBasicBlock
*TmpLB
= const_cast<MachineBasicBlock
*>(LB
);
486 SmallVector
<MachineOperand
,2> Cond
;
487 bool BadLB
= TII
->analyzeBranch(*TmpLB
, TB
, FB
, Cond
, false);
488 // Only analyzable conditional branches. HII::analyzeBranch will put
489 // the branch opcode as the first element of Cond, and the predicate
490 // operand as the second.
491 if (BadLB
|| Cond
.size() != 2)
493 // Only simple jump-conditional (with or without negation).
494 if (!TII
->PredOpcodeHasJMP_c(Cond
[0].getImm()))
496 // Must go to the header.
497 if (TB
!= HB
&& FB
!= HB
)
499 assert(Cond
[1].isReg() && "Unexpected Cond vector from analyzeBranch");
500 // Expect a predicate register.
501 Register PR
= Cond
[1].getReg();
502 assert(MRI
->getRegClass(PR
) == &Hexagon::PredRegsRegClass
);
504 // Get the registers on which the loop controlling compare instruction
506 Register CmpR1
, CmpR2
;
507 const MachineInstr
*CmpI
= MRI
->getVRegDef(PR
);
508 while (CmpI
->getOpcode() == Hexagon::C2_not
)
509 CmpI
= MRI
->getVRegDef(CmpI
->getOperand(1).getReg());
511 int64_t Mask
= 0, Val
= 0;
512 bool OkCI
= TII
->analyzeCompare(*CmpI
, CmpR1
, CmpR2
, Mask
, Val
);
515 // Eliminate non-double input registers.
516 if (CmpR1
&& MRI
->getRegClass(CmpR1
) != DoubleRC
)
518 if (CmpR2
&& MRI
->getRegClass(CmpR2
) != DoubleRC
)
520 if (!CmpR1
&& !CmpR2
)
523 // Now examine the top of the loop: the phi nodes that could poten-
524 // tially define loop induction registers. The registers defined by
525 // such a phi node would be used in a 64-bit add, which then would
526 // be used in the loop compare instruction.
528 // Get the set of all double registers defined by phi nodes in the
530 using UVect
= std::vector
<unsigned>;
533 for (auto &MI
: *HB
) {
536 const MachineOperand
&MD
= MI
.getOperand(0);
537 Register R
= MD
.getReg();
538 if (MRI
->getRegClass(R
) == DoubleRC
)
544 auto NoIndOp
= [this, CmpR1
, CmpR2
] (unsigned R
) -> bool {
545 for (auto I
= MRI
->use_nodbg_begin(R
), E
= MRI
->use_nodbg_end();
547 const MachineInstr
*UseI
= I
->getParent();
548 if (UseI
->getOpcode() != Hexagon::A2_addp
)
550 // Get the output from the add. If it is one of the inputs to the
551 // loop-controlling compare instruction, then R is likely an induc-
553 Register T
= UseI
->getOperand(0).getReg();
554 if (T
== CmpR1
|| T
== CmpR2
)
559 UVect::iterator End
= llvm::remove_if(DP
, NoIndOp
);
560 Rs
.insert(DP
.begin(), End
);
565 dbgs() << "For loop at " << printMBBReference(*HB
) << " ind regs: ";
566 dump_partition(dbgs(), Rs
, *TRI
);
571 void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap
&IRM
) {
572 using LoopVector
= std::vector
<MachineLoop
*>;
576 append_range(WorkQ
, *MLI
);
577 for (unsigned i
= 0; i
< WorkQ
.size(); ++i
)
578 append_range(WorkQ
, *WorkQ
[i
]);
581 for (MachineLoop
*L
: WorkQ
) {
583 collectIndRegsForLoop(L
, Rs
);
585 IRM
.insert(std::make_pair(L
, Rs
));
589 void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc
, MachineInstr
*MI
,
590 const UUPairMap
&PairMap
, unsigned SubR
) {
591 MachineBasicBlock
&B
= *MI
->getParent();
592 DebugLoc DL
= MI
->getDebugLoc();
593 MachineInstr
*NewI
= BuildMI(B
, MI
, DL
, TII
->get(Opc
));
595 for (auto &Op
: MI
->operands()) {
597 NewI
->addOperand(Op
);
600 // For register operands, set the subregister.
601 Register R
= Op
.getReg();
602 unsigned SR
= Op
.getSubReg();
603 bool isVirtReg
= R
.isVirtual();
604 bool isKill
= Op
.isKill();
605 if (isVirtReg
&& MRI
->getRegClass(R
) == DoubleRC
) {
607 UUPairMap::const_iterator F
= PairMap
.find(R
);
608 if (F
== PairMap
.end()) {
611 const UUPair
&P
= F
->second
;
612 R
= (SubR
== Hexagon::isub_lo
) ? P
.first
: P
.second
;
616 auto CO
= MachineOperand::CreateReg(R
, Op
.isDef(), Op
.isImplicit(), isKill
,
617 Op
.isDead(), Op
.isUndef(), Op
.isEarlyClobber(), SR
, Op
.isDebug(),
618 Op
.isInternalRead());
619 NewI
->addOperand(CO
);
623 void HexagonSplitDoubleRegs::splitMemRef(MachineInstr
*MI
,
624 const UUPairMap
&PairMap
) {
625 bool Load
= MI
->mayLoad();
626 unsigned OrigOpc
= MI
->getOpcode();
627 bool PostInc
= (OrigOpc
== Hexagon::L2_loadrd_pi
||
628 OrigOpc
== Hexagon::S2_storerd_pi
);
629 MachineInstr
*LowI
, *HighI
;
630 MachineBasicBlock
&B
= *MI
->getParent();
631 DebugLoc DL
= MI
->getDebugLoc();
633 // Index of the base-address-register operand.
634 unsigned AdrX
= PostInc
? (Load
? 2 : 1)
636 MachineOperand
&AdrOp
= MI
->getOperand(AdrX
);
637 unsigned RSA
= getRegState(AdrOp
);
638 MachineOperand
&ValOp
= Load
? MI
->getOperand(0)
639 : (PostInc
? MI
->getOperand(3)
640 : MI
->getOperand(2));
641 UUPairMap::const_iterator F
= PairMap
.find(ValOp
.getReg());
642 assert(F
!= PairMap
.end());
645 const UUPair
&P
= F
->second
;
646 int64_t Off
= PostInc
? 0 : MI
->getOperand(2).getImm();
647 LowI
= BuildMI(B
, MI
, DL
, TII
->get(Hexagon::L2_loadri_io
), P
.first
)
648 .addReg(AdrOp
.getReg(), RSA
& ~RegState::Kill
, AdrOp
.getSubReg())
650 HighI
= BuildMI(B
, MI
, DL
, TII
->get(Hexagon::L2_loadri_io
), P
.second
)
651 .addReg(AdrOp
.getReg(), RSA
& ~RegState::Kill
, AdrOp
.getSubReg())
654 const UUPair
&P
= F
->second
;
655 int64_t Off
= PostInc
? 0 : MI
->getOperand(1).getImm();
656 LowI
= BuildMI(B
, MI
, DL
, TII
->get(Hexagon::S2_storeri_io
))
657 .addReg(AdrOp
.getReg(), RSA
& ~RegState::Kill
, AdrOp
.getSubReg())
660 HighI
= BuildMI(B
, MI
, DL
, TII
->get(Hexagon::S2_storeri_io
))
661 .addReg(AdrOp
.getReg(), RSA
& ~RegState::Kill
, AdrOp
.getSubReg())
667 // Create the increment of the address register.
668 int64_t Inc
= Load
? MI
->getOperand(3).getImm()
669 : MI
->getOperand(2).getImm();
670 MachineOperand
&UpdOp
= Load
? MI
->getOperand(1) : MI
->getOperand(0);
671 const TargetRegisterClass
*RC
= MRI
->getRegClass(UpdOp
.getReg());
672 Register NewR
= MRI
->createVirtualRegister(RC
);
673 assert(!UpdOp
.getSubReg() && "Def operand with subreg");
674 BuildMI(B
, MI
, DL
, TII
->get(Hexagon::A2_addi
), NewR
)
675 .addReg(AdrOp
.getReg(), RSA
)
677 MRI
->replaceRegWith(UpdOp
.getReg(), NewR
);
678 // The original instruction will be deleted later.
681 // Generate a new pair of memory-operands.
682 MachineFunction
&MF
= *B
.getParent();
683 for (auto &MO
: MI
->memoperands()) {
684 const MachinePointerInfo
&Ptr
= MO
->getPointerInfo();
685 MachineMemOperand::Flags F
= MO
->getFlags();
686 Align A
= MO
->getAlign();
688 auto *Tmp1
= MF
.getMachineMemOperand(Ptr
, F
, 4 /*size*/, A
);
689 LowI
->addMemOperand(MF
, Tmp1
);
691 MF
.getMachineMemOperand(Ptr
, F
, 4 /*size*/, std::min(A
, Align(4)));
692 HighI
->addMemOperand(MF
, Tmp2
);
696 void HexagonSplitDoubleRegs::splitImmediate(MachineInstr
*MI
,
697 const UUPairMap
&PairMap
) {
698 MachineOperand
&Op0
= MI
->getOperand(0);
699 MachineOperand
&Op1
= MI
->getOperand(1);
700 assert(Op0
.isReg() && Op1
.isImm());
701 uint64_t V
= Op1
.getImm();
703 MachineBasicBlock
&B
= *MI
->getParent();
704 DebugLoc DL
= MI
->getDebugLoc();
705 UUPairMap::const_iterator F
= PairMap
.find(Op0
.getReg());
706 assert(F
!= PairMap
.end());
707 const UUPair
&P
= F
->second
;
709 // The operand to A2_tfrsi can only have 32 significant bits. Immediate
710 // values in MachineOperand are stored as 64-bit integers, and so the
711 // value -1 may be represented either as 64-bit -1, or 4294967295. Both
712 // will have the 32 higher bits truncated in the end, but -1 will remain
713 // as -1, while the latter may appear to be a large unsigned value
714 // requiring a constant extender. The casting to int32_t will select the
715 // former representation. (The same reasoning applies to all 32-bit
717 BuildMI(B
, MI
, DL
, TII
->get(Hexagon::A2_tfrsi
), P
.first
)
718 .addImm(int32_t(V
& 0xFFFFFFFFULL
));
719 BuildMI(B
, MI
, DL
, TII
->get(Hexagon::A2_tfrsi
), P
.second
)
720 .addImm(int32_t(V
>> 32));
723 void HexagonSplitDoubleRegs::splitCombine(MachineInstr
*MI
,
724 const UUPairMap
&PairMap
) {
725 MachineOperand
&Op0
= MI
->getOperand(0);
726 MachineOperand
&Op1
= MI
->getOperand(1);
727 MachineOperand
&Op2
= MI
->getOperand(2);
730 MachineBasicBlock
&B
= *MI
->getParent();
731 DebugLoc DL
= MI
->getDebugLoc();
732 UUPairMap::const_iterator F
= PairMap
.find(Op0
.getReg());
733 assert(F
!= PairMap
.end());
734 const UUPair
&P
= F
->second
;
737 BuildMI(B
, MI
, DL
, TII
->get(Hexagon::A2_tfrsi
), P
.second
)
740 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::COPY
), P
.second
)
741 .addReg(Op1
.getReg(), getRegState(Op1
), Op1
.getSubReg());
745 BuildMI(B
, MI
, DL
, TII
->get(Hexagon::A2_tfrsi
), P
.first
)
748 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::COPY
), P
.first
)
749 .addReg(Op2
.getReg(), getRegState(Op2
), Op2
.getSubReg());
753 void HexagonSplitDoubleRegs::splitExt(MachineInstr
*MI
,
754 const UUPairMap
&PairMap
) {
755 MachineOperand
&Op0
= MI
->getOperand(0);
756 MachineOperand
&Op1
= MI
->getOperand(1);
757 assert(Op0
.isReg() && Op1
.isReg());
759 MachineBasicBlock
&B
= *MI
->getParent();
760 DebugLoc DL
= MI
->getDebugLoc();
761 UUPairMap::const_iterator F
= PairMap
.find(Op0
.getReg());
762 assert(F
!= PairMap
.end());
763 const UUPair
&P
= F
->second
;
764 unsigned RS
= getRegState(Op1
);
766 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::COPY
), P
.first
)
767 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, Op1
.getSubReg());
768 BuildMI(B
, MI
, DL
, TII
->get(Hexagon::S2_asr_i_r
), P
.second
)
769 .addReg(Op1
.getReg(), RS
, Op1
.getSubReg())
773 void HexagonSplitDoubleRegs::splitShift(MachineInstr
*MI
,
774 const UUPairMap
&PairMap
) {
775 using namespace Hexagon
;
777 MachineOperand
&Op0
= MI
->getOperand(0);
778 MachineOperand
&Op1
= MI
->getOperand(1);
779 MachineOperand
&Op2
= MI
->getOperand(2);
780 assert(Op0
.isReg() && Op1
.isReg() && Op2
.isImm());
781 int64_t Sh64
= Op2
.getImm();
782 assert(Sh64
>= 0 && Sh64
< 64);
785 UUPairMap::const_iterator F
= PairMap
.find(Op0
.getReg());
786 assert(F
!= PairMap
.end());
787 const UUPair
&P
= F
->second
;
788 Register LoR
= P
.first
;
789 Register HiR
= P
.second
;
791 unsigned Opc
= MI
->getOpcode();
792 bool Right
= (Opc
== S2_lsr_i_p
|| Opc
== S2_asr_i_p
);
794 bool Signed
= (Opc
== S2_asr_i_p
);
796 MachineBasicBlock
&B
= *MI
->getParent();
797 DebugLoc DL
= MI
->getDebugLoc();
798 unsigned RS
= getRegState(Op1
);
799 unsigned ShiftOpc
= Left
? S2_asl_i_r
800 : (Signed
? S2_asr_i_r
: S2_lsr_i_r
);
801 unsigned LoSR
= isub_lo
;
802 unsigned HiSR
= isub_hi
;
805 // No shift, subregister copy.
806 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::COPY
), LoR
)
807 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, LoSR
);
808 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::COPY
), HiR
)
809 .addReg(Op1
.getReg(), RS
, HiSR
);
811 const TargetRegisterClass
*IntRC
= &IntRegsRegClass
;
812 Register TmpR
= MRI
->createVirtualRegister(IntRC
);
814 // Shift left: DR = shl R, #s
815 // LoR = shl R.lo, #s
816 // TmpR = extractu R.lo, #s, #32-s
817 // HiR = or (TmpR, asl(R.hi, #s))
818 // Shift right: DR = shr R, #s
819 // HiR = shr R.hi, #s
820 // TmpR = shr R.lo, #s
821 // LoR = insert TmpR, R.hi, #s, #32-s
824 // LoR = shl R.lo, #s
826 // TmpR = shr R.lo, #s
828 // Make a special case for A2_aslh and A2_asrh (they are predicable as
829 // opposed to S2_asl_i_r/S2_asr_i_r).
831 BuildMI(B
, MI
, DL
, TII
->get(A2_aslh
), LoR
)
832 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, LoSR
);
833 else if (S
== 16 && Signed
)
834 BuildMI(B
, MI
, DL
, TII
->get(A2_asrh
), TmpR
)
835 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, LoSR
);
837 BuildMI(B
, MI
, DL
, TII
->get(ShiftOpc
), (Left
? LoR
: TmpR
))
838 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, LoSR
)
842 // TmpR = extractu R.lo, #s, #32-s
843 BuildMI(B
, MI
, DL
, TII
->get(S2_extractu
), TmpR
)
844 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, LoSR
)
847 // HiR = or (TmpR, asl(R.hi, #s))
848 BuildMI(B
, MI
, DL
, TII
->get(S2_asl_i_r_or
), HiR
)
850 .addReg(Op1
.getReg(), RS
, HiSR
)
853 // HiR = shr R.hi, #s
854 BuildMI(B
, MI
, DL
, TII
->get(ShiftOpc
), HiR
)
855 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, HiSR
)
857 // LoR = insert TmpR, R.hi, #s, #32-s
858 BuildMI(B
, MI
, DL
, TII
->get(S2_insert
), LoR
)
860 .addReg(Op1
.getReg(), RS
, HiSR
)
864 } else if (S
== 32) {
865 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::COPY
), (Left
? HiR
: LoR
))
866 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, (Left
? LoSR
: HiSR
));
868 BuildMI(B
, MI
, DL
, TII
->get(A2_tfrsi
), (Left
? LoR
: HiR
))
870 else // Must be right shift.
871 BuildMI(B
, MI
, DL
, TII
->get(S2_asr_i_r
), HiR
)
872 .addReg(Op1
.getReg(), RS
, HiSR
)
877 BuildMI(B
, MI
, DL
, TII
->get(A2_aslh
), HiR
)
878 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, LoSR
);
879 else if (S
== 16 && Signed
)
880 BuildMI(B
, MI
, DL
, TII
->get(A2_asrh
), LoR
)
881 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, HiSR
);
883 BuildMI(B
, MI
, DL
, TII
->get(ShiftOpc
), (Left
? HiR
: LoR
))
884 .addReg(Op1
.getReg(), RS
& ~RegState::Kill
, (Left
? LoSR
: HiSR
))
888 BuildMI(B
, MI
, DL
, TII
->get(S2_asr_i_r
), HiR
)
889 .addReg(Op1
.getReg(), RS
, HiSR
)
892 BuildMI(B
, MI
, DL
, TII
->get(A2_tfrsi
), (Left
? LoR
: HiR
))
897 void HexagonSplitDoubleRegs::splitAslOr(MachineInstr
*MI
,
898 const UUPairMap
&PairMap
) {
899 using namespace Hexagon
;
901 MachineOperand
&Op0
= MI
->getOperand(0);
902 MachineOperand
&Op1
= MI
->getOperand(1);
903 MachineOperand
&Op2
= MI
->getOperand(2);
904 MachineOperand
&Op3
= MI
->getOperand(3);
905 assert(Op0
.isReg() && Op1
.isReg() && Op2
.isReg() && Op3
.isImm());
906 int64_t Sh64
= Op3
.getImm();
907 assert(Sh64
>= 0 && Sh64
< 64);
910 UUPairMap::const_iterator F
= PairMap
.find(Op0
.getReg());
911 assert(F
!= PairMap
.end());
912 const UUPair
&P
= F
->second
;
913 unsigned LoR
= P
.first
;
914 unsigned HiR
= P
.second
;
916 MachineBasicBlock
&B
= *MI
->getParent();
917 DebugLoc DL
= MI
->getDebugLoc();
918 unsigned RS1
= getRegState(Op1
);
919 unsigned RS2
= getRegState(Op2
);
920 const TargetRegisterClass
*IntRC
= &IntRegsRegClass
;
922 unsigned LoSR
= isub_lo
;
923 unsigned HiSR
= isub_hi
;
925 // Op0 = S2_asl_i_p_or Op1, Op2, Op3
926 // means: Op0 = or (Op1, asl(Op2, Op3))
929 // DR = or (R1, asl(R2, #s))
931 // LoR = or (R1.lo, asl(R2.lo, #s))
932 // Tmp1 = extractu R2.lo, #s, #32-s
933 // Tmp2 = or R1.hi, Tmp1
934 // HiR = or (Tmp2, asl(R2.hi, #s))
937 // DR = or (R1, asl(R2, #0))
939 // i.e. LoR = or R1.lo, R2.lo
940 // HiR = or R1.hi, R2.hi
941 BuildMI(B
, MI
, DL
, TII
->get(A2_or
), LoR
)
942 .addReg(Op1
.getReg(), RS1
& ~RegState::Kill
, LoSR
)
943 .addReg(Op2
.getReg(), RS2
& ~RegState::Kill
, LoSR
);
944 BuildMI(B
, MI
, DL
, TII
->get(A2_or
), HiR
)
945 .addReg(Op1
.getReg(), RS1
, HiSR
)
946 .addReg(Op2
.getReg(), RS2
, HiSR
);
948 BuildMI(B
, MI
, DL
, TII
->get(S2_asl_i_r_or
), LoR
)
949 .addReg(Op1
.getReg(), RS1
& ~RegState::Kill
, LoSR
)
950 .addReg(Op2
.getReg(), RS2
& ~RegState::Kill
, LoSR
)
952 Register TmpR1
= MRI
->createVirtualRegister(IntRC
);
953 BuildMI(B
, MI
, DL
, TII
->get(S2_extractu
), TmpR1
)
954 .addReg(Op2
.getReg(), RS2
& ~RegState::Kill
, LoSR
)
957 Register TmpR2
= MRI
->createVirtualRegister(IntRC
);
958 BuildMI(B
, MI
, DL
, TII
->get(A2_or
), TmpR2
)
959 .addReg(Op1
.getReg(), RS1
, HiSR
)
961 BuildMI(B
, MI
, DL
, TII
->get(S2_asl_i_r_or
), HiR
)
963 .addReg(Op2
.getReg(), RS2
, HiSR
)
965 } else if (S
== 32) {
966 // DR = or (R1, asl(R2, #32))
969 // HiR = or R1.hi, R2.lo
970 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::COPY
), LoR
)
971 .addReg(Op1
.getReg(), RS1
& ~RegState::Kill
, LoSR
);
972 BuildMI(B
, MI
, DL
, TII
->get(A2_or
), HiR
)
973 .addReg(Op1
.getReg(), RS1
, HiSR
)
974 .addReg(Op2
.getReg(), RS2
, LoSR
);
976 // DR = or (R1, asl(R2, #s))
979 // HiR = or (R1:hi, asl(R2:lo, #s-32))
981 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::COPY
), LoR
)
982 .addReg(Op1
.getReg(), RS1
& ~RegState::Kill
, LoSR
);
983 BuildMI(B
, MI
, DL
, TII
->get(S2_asl_i_r_or
), HiR
)
984 .addReg(Op1
.getReg(), RS1
, HiSR
)
985 .addReg(Op2
.getReg(), RS2
, LoSR
)
990 bool HexagonSplitDoubleRegs::splitInstr(MachineInstr
*MI
,
991 const UUPairMap
&PairMap
) {
992 using namespace Hexagon
;
994 LLVM_DEBUG(dbgs() << "Splitting: " << *MI
);
996 unsigned Opc
= MI
->getOpcode();
999 case TargetOpcode::PHI
:
1000 case TargetOpcode::COPY
: {
1001 Register DstR
= MI
->getOperand(0).getReg();
1002 if (MRI
->getRegClass(DstR
) == DoubleRC
) {
1003 createHalfInstr(Opc
, MI
, PairMap
, isub_lo
);
1004 createHalfInstr(Opc
, MI
, PairMap
, isub_hi
);
1010 createHalfInstr(A2_and
, MI
, PairMap
, isub_lo
);
1011 createHalfInstr(A2_and
, MI
, PairMap
, isub_hi
);
1015 createHalfInstr(A2_or
, MI
, PairMap
, isub_lo
);
1016 createHalfInstr(A2_or
, MI
, PairMap
, isub_hi
);
1020 createHalfInstr(A2_xor
, MI
, PairMap
, isub_lo
);
1021 createHalfInstr(A2_xor
, MI
, PairMap
, isub_hi
);
1029 splitMemRef(MI
, PairMap
);
1035 splitImmediate(MI
, PairMap
);
1044 splitCombine(MI
, PairMap
);
1049 splitExt(MI
, PairMap
);
1056 splitShift(MI
, PairMap
);
1061 splitAslOr(MI
, PairMap
);
1066 llvm_unreachable("Instruction not splitable");
1073 void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr
*MI
,
1074 const UUPairMap
&PairMap
) {
1075 for (auto &Op
: MI
->operands()) {
1076 if (!Op
.isReg() || !Op
.isUse() || !Op
.getSubReg())
1078 Register R
= Op
.getReg();
1079 UUPairMap::const_iterator F
= PairMap
.find(R
);
1080 if (F
== PairMap
.end())
1082 const UUPair
&P
= F
->second
;
1083 switch (Op
.getSubReg()) {
1084 case Hexagon::isub_lo
:
1087 case Hexagon::isub_hi
:
1088 Op
.setReg(P
.second
);
1095 void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr
*MI
,
1096 const UUPairMap
&PairMap
) {
1097 MachineBasicBlock
&B
= *MI
->getParent();
1098 DebugLoc DL
= MI
->getDebugLoc();
1100 for (auto &Op
: MI
->operands()) {
1101 if (!Op
.isReg() || !Op
.isUse())
1103 Register R
= Op
.getReg();
1106 if (MRI
->getRegClass(R
) != DoubleRC
|| Op
.getSubReg())
1108 UUPairMap::const_iterator F
= PairMap
.find(R
);
1109 if (F
== PairMap
.end())
1111 const UUPair
&Pr
= F
->second
;
1112 Register NewDR
= MRI
->createVirtualRegister(DoubleRC
);
1113 BuildMI(B
, MI
, DL
, TII
->get(TargetOpcode::REG_SEQUENCE
), NewDR
)
1115 .addImm(Hexagon::isub_lo
)
1117 .addImm(Hexagon::isub_hi
);
1122 bool HexagonSplitDoubleRegs::splitPartition(const USet
&Part
) {
1123 using MISet
= std::set
<MachineInstr
*>;
1125 const TargetRegisterClass
*IntRC
= &Hexagon::IntRegsRegClass
;
1126 bool Changed
= false;
1128 LLVM_DEBUG(dbgs() << "Splitting partition: ";
1129 dump_partition(dbgs(), Part
, *TRI
); dbgs() << '\n');
1134 for (unsigned DR
: Part
) {
1135 MachineInstr
*DefI
= MRI
->getVRegDef(DR
);
1136 SplitIns
.insert(DefI
);
1138 // Collect all instructions, including fixed ones. We won't split them,
1139 // but we need to visit them again to insert the REG_SEQUENCE instructions.
1140 for (auto U
= MRI
->use_nodbg_begin(DR
), W
= MRI
->use_nodbg_end();
1142 SplitIns
.insert(U
->getParent());
1144 Register LoR
= MRI
->createVirtualRegister(IntRC
);
1145 Register HiR
= MRI
->createVirtualRegister(IntRC
);
1146 LLVM_DEBUG(dbgs() << "Created mapping: " << printReg(DR
, TRI
) << " -> "
1147 << printReg(HiR
, TRI
) << ':' << printReg(LoR
, TRI
)
1149 PairMap
.insert(std::make_pair(DR
, UUPair(LoR
, HiR
)));
1153 for (auto MI
: SplitIns
) {
1154 if (isFixedInstr(MI
)) {
1155 collapseRegPairs(MI
, PairMap
);
1157 bool Done
= splitInstr(MI
, PairMap
);
1164 for (unsigned DR
: Part
) {
1165 // Before erasing "double" instructions, revisit all uses of the double
1166 // registers in this partition, and replace all uses of them with subre-
1167 // gisters, with the corresponding single registers.
1169 for (auto U
= MRI
->use_nodbg_begin(DR
), W
= MRI
->use_nodbg_end();
1171 Uses
.insert(U
->getParent());
1173 replaceSubregUses(M
, PairMap
);
1176 for (auto MI
: Erase
) {
1177 MachineBasicBlock
*B
= MI
->getParent();
1184 bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction
&MF
) {
1185 if (skipFunction(MF
.getFunction()))
1188 LLVM_DEBUG(dbgs() << "Splitting double registers in function: "
1189 << MF
.getName() << '\n');
1191 auto &ST
= MF
.getSubtarget
<HexagonSubtarget
>();
1192 TRI
= ST
.getRegisterInfo();
1193 TII
= ST
.getInstrInfo();
1194 MRI
= &MF
.getRegInfo();
1195 MLI
= &getAnalysis
<MachineLoopInfo
>();
1200 collectIndRegs(IRM
);
1201 partitionRegisters(P2Rs
);
1204 dbgs() << "Register partitioning: (partition #0 is fixed)\n";
1205 for (UUSetMap::iterator I
= P2Rs
.begin(), E
= P2Rs
.end(); I
!= E
; ++I
) {
1206 dbgs() << '#' << I
->first
<< " -> ";
1207 dump_partition(dbgs(), I
->second
, *TRI
);
1212 bool Changed
= false;
1213 int Limit
= MaxHSDR
;
1215 for (UUSetMap::iterator I
= P2Rs
.begin(), E
= P2Rs
.end(); I
!= E
; ++I
) {
1218 if (Limit
>= 0 && Counter
>= Limit
)
1220 USet
&Part
= I
->second
;
1221 LLVM_DEBUG(dbgs() << "Calculating profit for partition #" << I
->first
1223 if (!isProfitable(Part
, IRM
))
1226 Changed
|= splitPartition(Part
);
1232 FunctionPass
*llvm::createHexagonSplitDoubleRegs() {
1233 return new HexagonSplitDoubleRegs();