1 //===- HexagonConstExtenders.cpp ------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "HexagonInstrInfo.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/CodeGen/MachineDominators.h"
14 #include "llvm/CodeGen/MachineFunctionPass.h"
15 #include "llvm/CodeGen/MachineInstrBuilder.h"
16 #include "llvm/CodeGen/MachineRegisterInfo.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/raw_ostream.h"
19 #include "llvm/Pass.h"
25 #define DEBUG_TYPE "hexagon-cext-opt"
29 static cl::opt
<unsigned> CountThreshold("hexagon-cext-threshold",
30 cl::init(3), cl::Hidden
, cl::ZeroOrMore
,
31 cl::desc("Minimum number of extenders to trigger replacement"));
33 static cl::opt
<unsigned> ReplaceLimit("hexagon-cext-limit", cl::init(0),
34 cl::Hidden
, cl::ZeroOrMore
, cl::desc("Maximum number of replacements"));
37 void initializeHexagonConstExtendersPass(PassRegistry
&);
38 FunctionPass
*createHexagonConstExtenders();
41 static int32_t adjustUp(int32_t V
, uint8_t A
, uint8_t O
) {
42 assert(isPowerOf2_32(A
));
43 int32_t U
= (V
& -A
) + O
;
44 return U
>= V
? U
: U
+A
;
47 static int32_t adjustDown(int32_t V
, uint8_t A
, uint8_t O
) {
48 assert(isPowerOf2_32(A
));
49 int32_t U
= (V
& -A
) + O
;
50 return U
<= V
? U
: U
-A
;
55 // The range of values between Min and Max that are of form Align*N+Offset,
56 // for some integer N. Min and Max are required to be of that form as well,
57 // except in the case of an empty range.
58 int32_t Min
= INT_MIN
, Max
= INT_MAX
;
62 OffsetRange() = default;
63 OffsetRange(int32_t L
, int32_t H
, uint8_t A
, uint8_t O
= 0)
64 : Min(L
), Max(H
), Align(A
), Offset(O
) {}
65 OffsetRange
&intersect(OffsetRange A
) {
70 if (Offset
>= A
.Offset
&& (Offset
- A
.Offset
) % A
.Align
== 0) {
71 Min
= adjustUp(std::max(Min
, A
.Min
), Align
, Offset
);
72 Max
= adjustDown(std::min(Max
, A
.Max
), Align
, Offset
);
74 // Make an empty range.
78 // Canonicalize empty ranges.
80 std::tie(Min
, Max
, Align
) = std::make_tuple(0, -1, 1);
83 OffsetRange
&shift(int32_t S
) {
86 Offset
= (Offset
+S
) % Align
;
89 OffsetRange
&extendBy(int32_t D
) {
90 // If D < 0, extend Min, otherwise extend Max.
91 assert(D
% Align
== 0);
93 Min
= (INT_MIN
-D
< Min
) ? Min
+D
: INT_MIN
;
95 Max
= (INT_MAX
-D
> Max
) ? Max
+D
: INT_MAX
;
101 bool contains(int32_t V
) const {
102 return Min
<= V
&& V
<= Max
&& (V
-Offset
) % Align
== 0;
104 bool operator==(const OffsetRange
&R
) const {
105 return Min
== R
.Min
&& Max
== R
.Max
&& Align
== R
.Align
;
107 bool operator!=(const OffsetRange
&R
) const {
108 return !operator==(R
);
110 bool operator<(const OffsetRange
&R
) const {
115 return Align
< R
.Align
;
117 static OffsetRange
zero() { return {0, 0, 1}; }
122 Node(const OffsetRange
&R
) : MaxEnd(R
.Max
), Range(R
) {}
126 const OffsetRange
&Range
;
127 Node
*Left
= nullptr, *Right
= nullptr;
130 Node
*Root
= nullptr;
132 void add(const OffsetRange
&R
) {
135 void erase(const Node
*N
) {
136 Root
= remove(Root
, N
);
139 void order(SmallVectorImpl
<Node
*> &Seq
) const {
142 SmallVector
<Node
*,8> nodesWith(int32_t P
, bool CheckAlign
= true) {
143 SmallVector
<Node
*,8> Nodes
;
144 nodesWith(Root
, P
, CheckAlign
, Nodes
);
149 SmallVector
<Node
*,8> Nodes
;
151 for (Node
*N
: Nodes
)
156 void dump(const Node
*N
) const;
157 void order(Node
*N
, SmallVectorImpl
<Node
*> &Seq
) const;
158 void nodesWith(Node
*N
, int32_t P
, bool CheckA
,
159 SmallVectorImpl
<Node
*> &Seq
) const;
161 Node
*add(Node
*N
, const OffsetRange
&R
);
162 Node
*remove(Node
*N
, const Node
*D
);
163 Node
*rotateLeft(Node
*Lower
, Node
*Higher
);
164 Node
*rotateRight(Node
*Lower
, Node
*Higher
);
165 unsigned height(Node
*N
) {
166 return N
!= nullptr ? N
->Height
: 0;
168 Node
*update(Node
*N
) {
169 assert(N
!= nullptr);
170 N
->Height
= 1 + std::max(height(N
->Left
), height(N
->Right
));
172 N
->MaxEnd
= std::max(N
->MaxEnd
, N
->Left
->MaxEnd
);
174 N
->MaxEnd
= std::max(N
->MaxEnd
, N
->Right
->MaxEnd
);
177 Node
*rebalance(Node
*N
) {
178 assert(N
!= nullptr);
179 int32_t Balance
= height(N
->Right
) - height(N
->Left
);
181 return rotateRight(N
->Left
, N
);
183 return rotateLeft(N
->Right
, N
);
189 MachineBasicBlock
*Block
= nullptr;
190 MachineBasicBlock::iterator At
;
192 Loc(MachineBasicBlock
*B
, MachineBasicBlock::iterator It
)
194 if (B
->end() == It
) {
197 assert(It
->getParent() == B
);
198 Pos
= std::distance(B
->begin(), It
);
201 bool operator<(Loc A
) const {
202 if (Block
!= A
.Block
)
203 return Block
->getNumber() < A
.Block
->getNumber();
206 return Pos
!= -1 && Pos
< A
.Pos
;
212 struct HexagonConstExtenders
: public MachineFunctionPass
{
214 HexagonConstExtenders() : MachineFunctionPass(ID
) {}
216 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
217 AU
.addRequired
<MachineDominatorTree
>();
218 AU
.addPreserved
<MachineDominatorTree
>();
219 MachineFunctionPass::getAnalysisUsage(AU
);
222 StringRef
getPassName() const override
{
223 return "Hexagon constant-extender optimization";
225 bool runOnMachineFunction(MachineFunction
&MF
) override
;
229 Register() = default;
230 Register(unsigned R
, unsigned S
) : Reg(R
), Sub(S
) {}
231 Register(const MachineOperand
&Op
)
232 : Reg(Op
.getReg()), Sub(Op
.getSubReg()) {}
233 Register
&operator=(const MachineOperand
&Op
) {
236 Sub
= Op
.getSubReg();
237 } else if (Op
.isFI()) {
238 Reg
= TargetRegisterInfo::index2StackSlot(Op
.getIndex());
242 bool isVReg() const {
243 return Reg
!= 0 && !TargetRegisterInfo::isStackSlot(Reg
) &&
244 TargetRegisterInfo::isVirtualRegister(Reg
);
246 bool isSlot() const {
247 return Reg
!= 0 && TargetRegisterInfo::isStackSlot(Reg
);
249 operator MachineOperand() const {
251 return MachineOperand::CreateReg(Reg
, /*Def*/false, /*Imp*/false,
252 /*Kill*/false, /*Dead*/false, /*Undef*/false,
253 /*EarlyClobber*/false, Sub
);
254 if (TargetRegisterInfo::isStackSlot(Reg
)) {
255 int FI
= TargetRegisterInfo::stackSlot2Index(Reg
);
256 return MachineOperand::CreateFI(FI
);
258 llvm_unreachable("Cannot create MachineOperand");
260 bool operator==(Register R
) const { return Reg
== R
.Reg
&& Sub
== R
.Sub
; }
261 bool operator!=(Register R
) const { return !operator==(R
); }
262 bool operator<(Register R
) const {
264 return Reg
< R
.Reg
|| (Reg
== R
.Reg
&& Sub
< R
.Sub
);
266 unsigned Reg
= 0, Sub
= 0;
270 // A subexpression in which the extender is used. In general, this
271 // represents an expression where adding D to the extender will be
272 // equivalent to adding D to the expression as a whole. In other
273 // words, expr(add(##V,D) = add(expr(##V),D).
275 // The original motivation for this are the io/ur addressing modes,
276 // where the offset is extended. Consider the io example:
277 // In memw(Rs+##V), the ##V could be replaced by a register Rt to
278 // form the rr mode: memw(Rt+Rs<<0). In such case, however, the
279 // register Rt must have exactly the value of ##V. If there was
280 // another instruction memw(Rs+##V+4), it would need a different Rt.
281 // Now, if Rt was initialized as "##V+Rs<<0", both of these
282 // instructions could use the same Rt, just with different offsets.
283 // Here it's clear that "initializer+4" should be the same as if
284 // the offset 4 was added to the ##V in the initializer.
286 // The only kinds of expressions that support the requirement of
287 // commuting with addition are addition and subtraction from ##V.
288 // Include shifting the Rs to account for the ur addressing mode:
296 ExtExpr(Register RS
, bool NG
, unsigned SH
) : Rs(RS
), S(SH
), Neg(NG
) {}
297 // Expression is trivial if it does not modify the extender.
298 bool trivial() const {
301 bool operator==(const ExtExpr
&Ex
) const {
302 return Rs
== Ex
.Rs
&& S
== Ex
.S
&& Neg
== Ex
.Neg
;
304 bool operator!=(const ExtExpr
&Ex
) const {
305 return !operator==(Ex
);
307 bool operator<(const ExtExpr
&Ex
) const {
312 return !Neg
&& Ex
.Neg
;
317 MachineInstr
*UseMI
= nullptr;
318 unsigned OpNum
= -1u;
319 // The subexpression in which the extender is used (e.g. address
322 // Optional register that is assigned the value of Expr.
324 // Def means that the output of the instruction may differ from the
325 // original by a constant c, and that the difference can be corrected
326 // by adding/subtracting c in all users of the defined register.
329 MachineOperand
&getOp() {
330 return UseMI
->getOperand(OpNum
);
332 const MachineOperand
&getOp() const {
333 return UseMI
->getOperand(OpNum
);
339 const ConstantFP
*CFP
; // MO_FPImmediate
340 const char *SymbolName
; // MO_ExternalSymbol
341 const GlobalValue
*GV
; // MO_GlobalAddress
342 const BlockAddress
*BA
; // MO_BlockAddress
343 int64_t ImmVal
; // MO_Immediate, MO_TargetIndex,
344 // and MO_ConstantPoolIndex
346 unsigned Kind
; // Same as in MachineOperand.
347 unsigned char TF
; // TargetFlags.
349 ExtRoot(const MachineOperand
&Op
);
350 bool operator==(const ExtRoot
&ER
) const {
351 return Kind
== ER
.Kind
&& V
.ImmVal
== ER
.V
.ImmVal
;
353 bool operator!=(const ExtRoot
&ER
) const {
354 return !operator==(ER
);
356 bool operator<(const ExtRoot
&ER
) const;
359 struct ExtValue
: public ExtRoot
{
362 ExtValue(const MachineOperand
&Op
);
363 ExtValue(const ExtDesc
&ED
) : ExtValue(ED
.getOp()) {}
364 ExtValue(const ExtRoot
&ER
, int32_t Off
) : ExtRoot(ER
), Offset(Off
) {}
365 bool operator<(const ExtValue
&EV
) const;
366 bool operator==(const ExtValue
&EV
) const {
367 return ExtRoot(*this) == ExtRoot(EV
) && Offset
== EV
.Offset
;
369 bool operator!=(const ExtValue
&EV
) const {
370 return !operator==(EV
);
372 explicit operator MachineOperand() const;
375 using IndexList
= SetVector
<unsigned>;
376 using ExtenderInit
= std::pair
<ExtValue
, ExtExpr
>;
377 using AssignmentMap
= std::map
<ExtenderInit
, IndexList
>;
378 using LocDefList
= std::vector
<std::pair
<Loc
, IndexList
>>;
380 const HexagonInstrInfo
*HII
= nullptr;
381 const HexagonRegisterInfo
*HRI
= nullptr;
382 MachineDominatorTree
*MDT
= nullptr;
383 MachineRegisterInfo
*MRI
= nullptr;
384 std::vector
<ExtDesc
> Extenders
;
385 std::vector
<unsigned> NewRegs
;
387 bool isStoreImmediate(unsigned Opc
) const;
388 bool isRegOffOpcode(unsigned ExtOpc
) const ;
389 unsigned getRegOffOpcode(unsigned ExtOpc
) const;
390 unsigned getDirectRegReplacement(unsigned ExtOpc
) const;
391 OffsetRange
getOffsetRange(Register R
, const MachineInstr
&MI
) const;
392 OffsetRange
getOffsetRange(const ExtDesc
&ED
) const;
393 OffsetRange
getOffsetRange(Register Rd
) const;
395 void recordExtender(MachineInstr
&MI
, unsigned OpNum
);
396 void collectInstr(MachineInstr
&MI
);
397 void collect(MachineFunction
&MF
);
398 void assignInits(const ExtRoot
&ER
, unsigned Begin
, unsigned End
,
399 AssignmentMap
&IMap
);
400 void calculatePlacement(const ExtenderInit
&ExtI
, const IndexList
&Refs
,
402 Register
insertInitializer(Loc DefL
, const ExtenderInit
&ExtI
);
403 bool replaceInstrExact(const ExtDesc
&ED
, Register ExtR
);
404 bool replaceInstrExpr(const ExtDesc
&ED
, const ExtenderInit
&ExtI
,
405 Register ExtR
, int32_t &Diff
);
406 bool replaceInstr(unsigned Idx
, Register ExtR
, const ExtenderInit
&ExtI
);
407 bool replaceExtenders(const AssignmentMap
&IMap
);
409 unsigned getOperandIndex(const MachineInstr
&MI
,
410 const MachineOperand
&Op
) const;
411 const MachineOperand
&getPredicateOp(const MachineInstr
&MI
) const;
412 const MachineOperand
&getLoadResultOp(const MachineInstr
&MI
) const;
413 const MachineOperand
&getStoredValueOp(const MachineInstr
&MI
) const;
415 friend struct PrintRegister
;
416 friend struct PrintExpr
;
417 friend struct PrintInit
;
418 friend struct PrintIMap
;
419 friend raw_ostream
&operator<< (raw_ostream
&OS
,
420 const struct PrintRegister
&P
);
421 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintExpr
&P
);
422 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintInit
&P
);
423 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtDesc
&ED
);
424 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtRoot
&ER
);
425 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtValue
&EV
);
426 friend raw_ostream
&operator<< (raw_ostream
&OS
, const OffsetRange
&OR
);
427 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintIMap
&P
);
430 using HCE
= HexagonConstExtenders
;
432 LLVM_ATTRIBUTE_UNUSED
433 raw_ostream
&operator<< (raw_ostream
&OS
, const OffsetRange
&OR
) {
436 OS
<< '[' << OR
.Min
<< ',' << OR
.Max
<< "]a" << unsigned(OR
.Align
)
437 << '+' << unsigned(OR
.Offset
);
441 struct PrintRegister
{
442 PrintRegister(HCE::Register R
, const HexagonRegisterInfo
&I
)
445 const HexagonRegisterInfo
&HRI
;
448 LLVM_ATTRIBUTE_UNUSED
449 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegister
&P
) {
451 OS
<< printReg(P
.Rs
.Reg
, &P
.HRI
, P
.Rs
.Sub
);
458 PrintExpr(const HCE::ExtExpr
&E
, const HexagonRegisterInfo
&I
)
460 const HCE::ExtExpr
&Ex
;
461 const HexagonRegisterInfo
&HRI
;
464 LLVM_ATTRIBUTE_UNUSED
465 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintExpr
&P
) {
466 OS
<< "## " << (P
.Ex
.Neg
? "- " : "+ ");
467 if (P
.Ex
.Rs
.Reg
!= 0)
468 OS
<< printReg(P
.Ex
.Rs
.Reg
, &P
.HRI
, P
.Ex
.Rs
.Sub
);
471 OS
<< " << " << P
.Ex
.S
;
476 PrintInit(const HCE::ExtenderInit
&EI
, const HexagonRegisterInfo
&I
)
477 : ExtI(EI
), HRI(I
) {}
478 const HCE::ExtenderInit
&ExtI
;
479 const HexagonRegisterInfo
&HRI
;
482 LLVM_ATTRIBUTE_UNUSED
483 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintInit
&P
) {
484 OS
<< '[' << P
.ExtI
.first
<< ", "
485 << PrintExpr(P
.ExtI
.second
, P
.HRI
) << ']';
489 LLVM_ATTRIBUTE_UNUSED
490 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtDesc
&ED
) {
491 assert(ED
.OpNum
!= -1u);
492 const MachineBasicBlock
&MBB
= *ED
.getOp().getParent()->getParent();
493 const MachineFunction
&MF
= *MBB
.getParent();
494 const auto &HRI
= *MF
.getSubtarget
<HexagonSubtarget
>().getRegisterInfo();
495 OS
<< "bb#" << MBB
.getNumber() << ": ";
497 OS
<< printReg(ED
.Rd
.Reg
, &HRI
, ED
.Rd
.Sub
);
500 OS
<< " = " << PrintExpr(ED
.Expr
, HRI
);
506 LLVM_ATTRIBUTE_UNUSED
507 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtRoot
&ER
) {
509 case MachineOperand::MO_Immediate
:
510 OS
<< "imm:" << ER
.V
.ImmVal
;
512 case MachineOperand::MO_FPImmediate
:
513 OS
<< "fpi:" << *ER
.V
.CFP
;
515 case MachineOperand::MO_ExternalSymbol
:
516 OS
<< "sym:" << *ER
.V
.SymbolName
;
518 case MachineOperand::MO_GlobalAddress
:
519 OS
<< "gad:" << ER
.V
.GV
->getName();
521 case MachineOperand::MO_BlockAddress
:
522 OS
<< "blk:" << *ER
.V
.BA
;
524 case MachineOperand::MO_TargetIndex
:
525 OS
<< "tgi:" << ER
.V
.ImmVal
;
527 case MachineOperand::MO_ConstantPoolIndex
:
528 OS
<< "cpi:" << ER
.V
.ImmVal
;
530 case MachineOperand::MO_JumpTableIndex
:
531 OS
<< "jti:" << ER
.V
.ImmVal
;
534 OS
<< "???:" << ER
.V
.ImmVal
;
540 LLVM_ATTRIBUTE_UNUSED
541 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtValue
&EV
) {
542 OS
<< HCE::ExtRoot(EV
) << " off:" << EV
.Offset
;
547 PrintIMap(const HCE::AssignmentMap
&M
, const HexagonRegisterInfo
&I
)
549 const HCE::AssignmentMap
&IMap
;
550 const HexagonRegisterInfo
&HRI
;
553 LLVM_ATTRIBUTE_UNUSED
554 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintIMap
&P
) {
556 for (const std::pair
<HCE::ExtenderInit
,HCE::IndexList
> &Q
: P
.IMap
) {
557 OS
<< " " << PrintInit(Q
.first
, P
.HRI
) << " -> {";
558 for (unsigned I
: Q
.second
)
567 INITIALIZE_PASS_BEGIN(HexagonConstExtenders
, "hexagon-cext-opt",
568 "Hexagon constant-extender optimization", false, false)
569 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
570 INITIALIZE_PASS_END(HexagonConstExtenders
, "hexagon-cext-opt",
571 "Hexagon constant-extender optimization", false, false)
573 static unsigned ReplaceCounter
= 0;
578 LLVM_DUMP_METHOD
void RangeTree::dump() const {
579 dbgs() << "Root: " << Root
<< '\n';
584 LLVM_DUMP_METHOD
void RangeTree::dump(const Node
*N
) const {
585 dbgs() << "Node: " << N
<< '\n';
586 dbgs() << " Height: " << N
->Height
<< '\n';
587 dbgs() << " Count: " << N
->Count
<< '\n';
588 dbgs() << " MaxEnd: " << N
->MaxEnd
<< '\n';
589 dbgs() << " Range: " << N
->Range
<< '\n';
590 dbgs() << " Left: " << N
->Left
<< '\n';
591 dbgs() << " Right: " << N
->Right
<< "\n\n";
600 void RangeTree::order(Node
*N
, SmallVectorImpl
<Node
*> &Seq
) const {
605 order(N
->Right
, Seq
);
608 void RangeTree::nodesWith(Node
*N
, int32_t P
, bool CheckA
,
609 SmallVectorImpl
<Node
*> &Seq
) const {
610 if (N
== nullptr || N
->MaxEnd
< P
)
612 nodesWith(N
->Left
, P
, CheckA
, Seq
);
613 if (N
->Range
.Min
<= P
) {
614 if ((CheckA
&& N
->Range
.contains(P
)) || (!CheckA
&& P
<= N
->Range
.Max
))
616 nodesWith(N
->Right
, P
, CheckA
, Seq
);
620 RangeTree::Node
*RangeTree::add(Node
*N
, const OffsetRange
&R
) {
630 N
->Left
= add(N
->Left
, R
);
632 N
->Right
= add(N
->Right
, R
);
633 return rebalance(update(N
));
636 RangeTree::Node
*RangeTree::remove(Node
*N
, const Node
*D
) {
637 assert(N
!= nullptr);
640 assert(N
->Range
!= D
->Range
&& "N and D should not be equal");
641 if (D
->Range
< N
->Range
)
642 N
->Left
= remove(N
->Left
, D
);
644 N
->Right
= remove(N
->Right
, D
);
645 return rebalance(update(N
));
648 // We got to the node we need to remove. If any of its children are
649 // missing, simply replace it with the other child.
650 if (N
->Left
== nullptr || N
->Right
== nullptr)
651 return (N
->Left
== nullptr) ? N
->Right
: N
->Left
;
653 // Find the rightmost child of N->Left, remove it and plug it in place
658 M
->Left
= remove(N
->Left
, M
);
660 return rebalance(update(M
));
663 RangeTree::Node
*RangeTree::rotateLeft(Node
*Lower
, Node
*Higher
) {
664 assert(Higher
->Right
== Lower
);
665 // The Lower node is on the right from Higher. Make sure that Lower's
666 // balance is greater to the right. Otherwise the rotation will create
667 // an unbalanced tree again.
668 if (height(Lower
->Left
) > height(Lower
->Right
))
669 Lower
= rotateRight(Lower
->Left
, Lower
);
670 assert(height(Lower
->Left
) <= height(Lower
->Right
));
671 Higher
->Right
= Lower
->Left
;
673 Lower
->Left
= Higher
;
678 RangeTree::Node
*RangeTree::rotateRight(Node
*Lower
, Node
*Higher
) {
679 assert(Higher
->Left
== Lower
);
680 // The Lower node is on the left from Higher. Make sure that Lower's
681 // balance is greater to the left. Otherwise the rotation will create
682 // an unbalanced tree again.
683 if (height(Lower
->Left
) < height(Lower
->Right
))
684 Lower
= rotateLeft(Lower
->Right
, Lower
);
685 assert(height(Lower
->Left
) >= height(Lower
->Right
));
686 Higher
->Left
= Lower
->Right
;
688 Lower
->Right
= Higher
;
694 HCE::ExtRoot::ExtRoot(const MachineOperand
&Op
) {
695 // Always store ImmVal, since it's the field used for comparisons.
698 ; // Keep 0. Do not use Op.getImm() for value here (treat 0 as the root).
699 else if (Op
.isFPImm())
700 V
.CFP
= Op
.getFPImm();
701 else if (Op
.isSymbol())
702 V
.SymbolName
= Op
.getSymbolName();
703 else if (Op
.isGlobal())
704 V
.GV
= Op
.getGlobal();
705 else if (Op
.isBlockAddress())
706 V
.BA
= Op
.getBlockAddress();
707 else if (Op
.isCPI() || Op
.isTargetIndex() || Op
.isJTI())
708 V
.ImmVal
= Op
.getIndex();
710 llvm_unreachable("Unexpected operand type");
713 TF
= Op
.getTargetFlags();
716 bool HCE::ExtRoot::operator< (const HCE::ExtRoot
&ER
) const {
718 return Kind
< ER
.Kind
;
720 case MachineOperand::MO_Immediate
:
721 case MachineOperand::MO_TargetIndex
:
722 case MachineOperand::MO_ConstantPoolIndex
:
723 case MachineOperand::MO_JumpTableIndex
:
724 return V
.ImmVal
< ER
.V
.ImmVal
;
725 case MachineOperand::MO_FPImmediate
: {
726 const APFloat
&ThisF
= V
.CFP
->getValueAPF();
727 const APFloat
&OtherF
= ER
.V
.CFP
->getValueAPF();
728 return ThisF
.bitcastToAPInt().ult(OtherF
.bitcastToAPInt());
730 case MachineOperand::MO_ExternalSymbol
:
731 return StringRef(V
.SymbolName
) < StringRef(ER
.V
.SymbolName
);
732 case MachineOperand::MO_GlobalAddress
:
733 // Do not use GUIDs, since they depend on the source path. Moving the
734 // source file to a different directory could cause different GUID
735 // values for a pair of given symbols. These symbols could then compare
736 // "less" in one directory, but "greater" in another.
737 assert(!V
.GV
->getName().empty() && !ER
.V
.GV
->getName().empty());
738 return V
.GV
->getName() < ER
.V
.GV
->getName();
739 case MachineOperand::MO_BlockAddress
: {
740 const BasicBlock
*ThisB
= V
.BA
->getBasicBlock();
741 const BasicBlock
*OtherB
= ER
.V
.BA
->getBasicBlock();
742 assert(ThisB
->getParent() == OtherB
->getParent());
743 const Function
&F
= *ThisB
->getParent();
744 return std::distance(F
.begin(), ThisB
->getIterator()) <
745 std::distance(F
.begin(), OtherB
->getIterator());
748 return V
.ImmVal
< ER
.V
.ImmVal
;
751 HCE::ExtValue::ExtValue(const MachineOperand
&Op
) : ExtRoot(Op
) {
753 Offset
= Op
.getImm();
754 else if (Op
.isFPImm() || Op
.isJTI())
756 else if (Op
.isSymbol() || Op
.isGlobal() || Op
.isBlockAddress() ||
757 Op
.isCPI() || Op
.isTargetIndex())
758 Offset
= Op
.getOffset();
760 llvm_unreachable("Unexpected operand type");
763 bool HCE::ExtValue::operator< (const HCE::ExtValue
&EV
) const {
764 const ExtRoot
&ER
= *this;
765 if (!(ER
== ExtRoot(EV
)))
767 return Offset
< EV
.Offset
;
770 HCE::ExtValue::operator MachineOperand() const {
772 case MachineOperand::MO_Immediate
:
773 return MachineOperand::CreateImm(V
.ImmVal
+ Offset
);
774 case MachineOperand::MO_FPImmediate
:
776 return MachineOperand::CreateFPImm(V
.CFP
);
777 case MachineOperand::MO_ExternalSymbol
:
779 return MachineOperand::CreateES(V
.SymbolName
, TF
);
780 case MachineOperand::MO_GlobalAddress
:
781 return MachineOperand::CreateGA(V
.GV
, Offset
, TF
);
782 case MachineOperand::MO_BlockAddress
:
783 return MachineOperand::CreateBA(V
.BA
, Offset
, TF
);
784 case MachineOperand::MO_TargetIndex
:
785 return MachineOperand::CreateTargetIndex(V
.ImmVal
, Offset
, TF
);
786 case MachineOperand::MO_ConstantPoolIndex
:
787 return MachineOperand::CreateCPI(V
.ImmVal
, Offset
, TF
);
788 case MachineOperand::MO_JumpTableIndex
:
790 return MachineOperand::CreateJTI(V
.ImmVal
, TF
);
792 llvm_unreachable("Unhandled kind");
796 bool HCE::isStoreImmediate(unsigned Opc
) const {
798 case Hexagon::S4_storeirbt_io
:
799 case Hexagon::S4_storeirbf_io
:
800 case Hexagon::S4_storeirht_io
:
801 case Hexagon::S4_storeirhf_io
:
802 case Hexagon::S4_storeirit_io
:
803 case Hexagon::S4_storeirif_io
:
804 case Hexagon::S4_storeirb_io
:
805 case Hexagon::S4_storeirh_io
:
806 case Hexagon::S4_storeiri_io
:
814 bool HCE::isRegOffOpcode(unsigned Opc
) const {
816 case Hexagon::L2_loadrub_io
:
817 case Hexagon::L2_loadrb_io
:
818 case Hexagon::L2_loadruh_io
:
819 case Hexagon::L2_loadrh_io
:
820 case Hexagon::L2_loadri_io
:
821 case Hexagon::L2_loadrd_io
:
822 case Hexagon::L2_loadbzw2_io
:
823 case Hexagon::L2_loadbzw4_io
:
824 case Hexagon::L2_loadbsw2_io
:
825 case Hexagon::L2_loadbsw4_io
:
826 case Hexagon::L2_loadalignh_io
:
827 case Hexagon::L2_loadalignb_io
:
828 case Hexagon::L2_ploadrubt_io
:
829 case Hexagon::L2_ploadrubf_io
:
830 case Hexagon::L2_ploadrbt_io
:
831 case Hexagon::L2_ploadrbf_io
:
832 case Hexagon::L2_ploadruht_io
:
833 case Hexagon::L2_ploadruhf_io
:
834 case Hexagon::L2_ploadrht_io
:
835 case Hexagon::L2_ploadrhf_io
:
836 case Hexagon::L2_ploadrit_io
:
837 case Hexagon::L2_ploadrif_io
:
838 case Hexagon::L2_ploadrdt_io
:
839 case Hexagon::L2_ploadrdf_io
:
840 case Hexagon::S2_storerb_io
:
841 case Hexagon::S2_storerh_io
:
842 case Hexagon::S2_storerf_io
:
843 case Hexagon::S2_storeri_io
:
844 case Hexagon::S2_storerd_io
:
845 case Hexagon::S2_pstorerbt_io
:
846 case Hexagon::S2_pstorerbf_io
:
847 case Hexagon::S2_pstorerht_io
:
848 case Hexagon::S2_pstorerhf_io
:
849 case Hexagon::S2_pstorerft_io
:
850 case Hexagon::S2_pstorerff_io
:
851 case Hexagon::S2_pstorerit_io
:
852 case Hexagon::S2_pstorerif_io
:
853 case Hexagon::S2_pstorerdt_io
:
854 case Hexagon::S2_pstorerdf_io
:
855 case Hexagon::A2_addi
:
863 unsigned HCE::getRegOffOpcode(unsigned ExtOpc
) const {
864 // If there exists an instruction that takes a register and offset,
865 // that corresponds to the ExtOpc, return it, otherwise return 0.
866 using namespace Hexagon
;
868 case A2_tfrsi
: return A2_addi
;
872 const MCInstrDesc
&D
= HII
->get(ExtOpc
);
873 if (D
.mayLoad() || D
.mayStore()) {
874 uint64_t F
= D
.TSFlags
;
875 unsigned AM
= (F
>> HexagonII::AddrModePos
) & HexagonII::AddrModeMask
;
877 case HexagonII::Absolute
:
878 case HexagonII::AbsoluteSet
:
879 case HexagonII::BaseLongOffset
:
883 case L4_loadrub_ur
: return L2_loadrub_io
;
886 case L4_loadrb_ur
: return L2_loadrb_io
;
889 case L4_loadruh_ur
: return L2_loadruh_io
;
892 case L4_loadrh_ur
: return L2_loadrh_io
;
895 case L4_loadri_ur
: return L2_loadri_io
;
898 case L4_loadrd_ur
: return L2_loadrd_io
;
900 case L4_loadbzw2_ur
: return L2_loadbzw2_io
;
902 case L4_loadbzw4_ur
: return L2_loadbzw4_io
;
904 case L4_loadbsw2_ur
: return L2_loadbsw2_io
;
906 case L4_loadbsw4_ur
: return L2_loadbsw4_io
;
907 case L4_loadalignh_ap
:
908 case L4_loadalignh_ur
: return L2_loadalignh_io
;
909 case L4_loadalignb_ap
:
910 case L4_loadalignb_ur
: return L2_loadalignb_io
;
911 case L4_ploadrubt_abs
: return L2_ploadrubt_io
;
912 case L4_ploadrubf_abs
: return L2_ploadrubf_io
;
913 case L4_ploadrbt_abs
: return L2_ploadrbt_io
;
914 case L4_ploadrbf_abs
: return L2_ploadrbf_io
;
915 case L4_ploadruht_abs
: return L2_ploadruht_io
;
916 case L4_ploadruhf_abs
: return L2_ploadruhf_io
;
917 case L4_ploadrht_abs
: return L2_ploadrht_io
;
918 case L4_ploadrhf_abs
: return L2_ploadrhf_io
;
919 case L4_ploadrit_abs
: return L2_ploadrit_io
;
920 case L4_ploadrif_abs
: return L2_ploadrif_io
;
921 case L4_ploadrdt_abs
: return L2_ploadrdt_io
;
922 case L4_ploadrdf_abs
: return L2_ploadrdf_io
;
925 case S4_storerb_ur
: return S2_storerb_io
;
928 case S4_storerh_ur
: return S2_storerh_io
;
931 case S4_storerf_ur
: return S2_storerf_io
;
934 case S4_storeri_ur
: return S2_storeri_io
;
937 case S4_storerd_ur
: return S2_storerd_io
;
938 case S4_pstorerbt_abs
: return S2_pstorerbt_io
;
939 case S4_pstorerbf_abs
: return S2_pstorerbf_io
;
940 case S4_pstorerht_abs
: return S2_pstorerht_io
;
941 case S4_pstorerhf_abs
: return S2_pstorerhf_io
;
942 case S4_pstorerft_abs
: return S2_pstorerft_io
;
943 case S4_pstorerff_abs
: return S2_pstorerff_io
;
944 case S4_pstorerit_abs
: return S2_pstorerit_io
;
945 case S4_pstorerif_abs
: return S2_pstorerif_io
;
946 case S4_pstorerdt_abs
: return S2_pstorerdt_io
;
947 case S4_pstorerdf_abs
: return S2_pstorerdf_io
;
952 case HexagonII::BaseImmOffset
:
953 if (!isStoreImmediate(ExtOpc
))
963 unsigned HCE::getDirectRegReplacement(unsigned ExtOpc
) const {
965 case Hexagon::A2_addi
: return Hexagon::A2_add
;
966 case Hexagon::A2_andir
: return Hexagon::A2_and
;
967 case Hexagon::A2_combineii
: return Hexagon::A4_combineri
;
968 case Hexagon::A2_orir
: return Hexagon::A2_or
;
969 case Hexagon::A2_paddif
: return Hexagon::A2_paddf
;
970 case Hexagon::A2_paddit
: return Hexagon::A2_paddt
;
971 case Hexagon::A2_subri
: return Hexagon::A2_sub
;
972 case Hexagon::A2_tfrsi
: return TargetOpcode::COPY
;
973 case Hexagon::A4_cmpbeqi
: return Hexagon::A4_cmpbeq
;
974 case Hexagon::A4_cmpbgti
: return Hexagon::A4_cmpbgt
;
975 case Hexagon::A4_cmpbgtui
: return Hexagon::A4_cmpbgtu
;
976 case Hexagon::A4_cmpheqi
: return Hexagon::A4_cmpheq
;
977 case Hexagon::A4_cmphgti
: return Hexagon::A4_cmphgt
;
978 case Hexagon::A4_cmphgtui
: return Hexagon::A4_cmphgtu
;
979 case Hexagon::A4_combineii
: return Hexagon::A4_combineir
;
980 case Hexagon::A4_combineir
: return TargetOpcode::REG_SEQUENCE
;
981 case Hexagon::A4_combineri
: return TargetOpcode::REG_SEQUENCE
;
982 case Hexagon::A4_rcmpeqi
: return Hexagon::A4_rcmpeq
;
983 case Hexagon::A4_rcmpneqi
: return Hexagon::A4_rcmpneq
;
984 case Hexagon::C2_cmoveif
: return Hexagon::A2_tfrpf
;
985 case Hexagon::C2_cmoveit
: return Hexagon::A2_tfrpt
;
986 case Hexagon::C2_cmpeqi
: return Hexagon::C2_cmpeq
;
987 case Hexagon::C2_cmpgti
: return Hexagon::C2_cmpgt
;
988 case Hexagon::C2_cmpgtui
: return Hexagon::C2_cmpgtu
;
989 case Hexagon::C2_muxii
: return Hexagon::C2_muxir
;
990 case Hexagon::C2_muxir
: return Hexagon::C2_mux
;
991 case Hexagon::C2_muxri
: return Hexagon::C2_mux
;
992 case Hexagon::C4_cmpltei
: return Hexagon::C4_cmplte
;
993 case Hexagon::C4_cmplteui
: return Hexagon::C4_cmplteu
;
994 case Hexagon::C4_cmpneqi
: return Hexagon::C4_cmpneq
;
995 case Hexagon::M2_accii
: return Hexagon::M2_acci
; // T -> T
997 case Hexagon::M2_macsip
: return Hexagon::M2_maci
; // T -> T
998 case Hexagon::M2_mpysin
: return Hexagon::M2_mpyi
;
999 case Hexagon::M2_mpysip
: return Hexagon::M2_mpyi
;
1000 case Hexagon::M2_mpysmi
: return Hexagon::M2_mpyi
;
1001 case Hexagon::M2_naccii
: return Hexagon::M2_nacci
; // T -> T
1002 case Hexagon::M4_mpyri_addi
: return Hexagon::M4_mpyri_addr
;
1003 case Hexagon::M4_mpyri_addr
: return Hexagon::M4_mpyrr_addr
; // _ -> T
1004 case Hexagon::M4_mpyrr_addi
: return Hexagon::M4_mpyrr_addr
; // _ -> T
1005 case Hexagon::S4_addaddi
: return Hexagon::M2_acci
; // _ -> T
1006 case Hexagon::S4_addi_asl_ri
: return Hexagon::S2_asl_i_r_acc
; // T -> T
1007 case Hexagon::S4_addi_lsr_ri
: return Hexagon::S2_lsr_i_r_acc
; // T -> T
1008 case Hexagon::S4_andi_asl_ri
: return Hexagon::S2_asl_i_r_and
; // T -> T
1009 case Hexagon::S4_andi_lsr_ri
: return Hexagon::S2_lsr_i_r_and
; // T -> T
1010 case Hexagon::S4_ori_asl_ri
: return Hexagon::S2_asl_i_r_or
; // T -> T
1011 case Hexagon::S4_ori_lsr_ri
: return Hexagon::S2_lsr_i_r_or
; // T -> T
1012 case Hexagon::S4_subaddi
: return Hexagon::M2_subacc
; // _ -> T
1013 case Hexagon::S4_subi_asl_ri
: return Hexagon::S2_asl_i_r_nac
; // T -> T
1014 case Hexagon::S4_subi_lsr_ri
: return Hexagon::S2_lsr_i_r_nac
; // T -> T
1016 // Store-immediates:
1017 case Hexagon::S4_storeirbf_io
: return Hexagon::S2_pstorerbf_io
;
1018 case Hexagon::S4_storeirb_io
: return Hexagon::S2_storerb_io
;
1019 case Hexagon::S4_storeirbt_io
: return Hexagon::S2_pstorerbt_io
;
1020 case Hexagon::S4_storeirhf_io
: return Hexagon::S2_pstorerhf_io
;
1021 case Hexagon::S4_storeirh_io
: return Hexagon::S2_storerh_io
;
1022 case Hexagon::S4_storeirht_io
: return Hexagon::S2_pstorerht_io
;
1023 case Hexagon::S4_storeirif_io
: return Hexagon::S2_pstorerif_io
;
1024 case Hexagon::S4_storeiri_io
: return Hexagon::S2_storeri_io
;
1025 case Hexagon::S4_storeirit_io
: return Hexagon::S2_pstorerit_io
;
1033 // Return the allowable deviation from the current value of Rb (i.e. the
1034 // range of values that can be added to the current value) which the
1035 // instruction MI can accommodate.
1036 // The instruction MI is a user of register Rb, which is defined via an
1037 // extender. It may be possible for MI to be tweaked to work for a register
1038 // defined with a slightly different value. For example
1039 // ... = L2_loadrub_io Rb, 1
1040 // can be modifed to be
1041 // ... = L2_loadrub_io Rb', 0
1043 // The range for Rb would be [Min+1, Max+1], where [Min, Max] is a range
1044 // for L2_loadrub with offset 0. That means that Rb could be replaced with
1045 // Rc, where Rc-Rb belongs to [Min+1, Max+1].
1046 OffsetRange
HCE::getOffsetRange(Register Rb
, const MachineInstr
&MI
) const {
1047 unsigned Opc
= MI
.getOpcode();
1048 // Instructions that are constant-extended may be replaced with something
1049 // else that no longer offers the same range as the original.
1050 if (!isRegOffOpcode(Opc
) || HII
->isConstExtended(MI
))
1051 return OffsetRange::zero();
1053 if (Opc
== Hexagon::A2_addi
) {
1054 const MachineOperand
&Op1
= MI
.getOperand(1), &Op2
= MI
.getOperand(2);
1055 if (Rb
!= Register(Op1
) || !Op2
.isImm())
1056 return OffsetRange::zero();
1057 OffsetRange R
= { -(1<<15)+1, (1<<15)-1, 1 };
1058 return R
.shift(Op2
.getImm());
1061 // HII::getBaseAndOffsetPosition returns the increment position as "offset".
1062 if (HII
->isPostIncrement(MI
))
1063 return OffsetRange::zero();
1065 const MCInstrDesc
&D
= HII
->get(Opc
);
1066 assert(D
.mayLoad() || D
.mayStore());
1068 unsigned BaseP
, OffP
;
1069 if (!HII
->getBaseAndOffsetPosition(MI
, BaseP
, OffP
) ||
1070 Rb
!= Register(MI
.getOperand(BaseP
)) ||
1071 !MI
.getOperand(OffP
).isImm())
1072 return OffsetRange::zero();
1074 uint64_t F
= (D
.TSFlags
>> HexagonII::MemAccessSizePos
) &
1075 HexagonII::MemAccesSizeMask
;
1076 uint8_t A
= HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F
));
1077 unsigned L
= Log2_32(A
);
1078 unsigned S
= 10+L
; // sint11_L
1079 int32_t Min
= -alignDown((1<<S
)-1, A
);
1081 // The range will be shifted by Off. To prefer non-negative offsets,
1082 // adjust Max accordingly.
1083 int32_t Off
= MI
.getOperand(OffP
).getImm();
1084 int32_t Max
= Off
>= 0 ? 0 : -Off
;
1086 OffsetRange R
= { Min
, Max
, A
};
1087 return R
.shift(Off
);
1090 // Return the allowable deviation from the current value of the extender ED,
1091 // for which the instruction corresponding to ED can be modified without
1092 // using an extender.
1093 // The instruction uses the extender directly. It will be replaced with
1094 // another instruction, say MJ, where the extender will be replaced with a
1095 // register. MJ can allow some variability with respect to the value of
1096 // that register, as is the case with indexed memory instructions.
1097 OffsetRange
HCE::getOffsetRange(const ExtDesc
&ED
) const {
1098 // The only way that there can be a non-zero range available is if
1099 // the instruction using ED will be converted to an indexed memory
1101 unsigned IdxOpc
= getRegOffOpcode(ED
.UseMI
->getOpcode());
1104 return OffsetRange::zero();
1105 case Hexagon::A2_addi
: // s16
1106 return { -32767, 32767, 1 };
1107 case Hexagon::A2_subri
: // s10
1108 return { -511, 511, 1 };
1111 if (!ED
.UseMI
->mayLoad() && !ED
.UseMI
->mayStore())
1112 return OffsetRange::zero();
1113 const MCInstrDesc
&D
= HII
->get(IdxOpc
);
1114 uint64_t F
= (D
.TSFlags
>> HexagonII::MemAccessSizePos
) &
1115 HexagonII::MemAccesSizeMask
;
1116 uint8_t A
= HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F
));
1117 unsigned L
= Log2_32(A
);
1118 unsigned S
= 10+L
; // sint11_L
1119 int32_t Min
= -alignDown((1<<S
)-1, A
);
1120 int32_t Max
= 0; // Force non-negative offsets.
1121 return { Min
, Max
, A
};
1124 // Get the allowable deviation from the current value of Rd by checking
1126 OffsetRange
HCE::getOffsetRange(Register Rd
) const {
1128 for (const MachineOperand
&Op
: MRI
->use_operands(Rd
.Reg
)) {
1129 // Make sure that the register being used by this operand is identical
1130 // to the register that was defined: using a different subregister
1131 // precludes any non-trivial range.
1132 if (Rd
!= Register(Op
))
1133 return OffsetRange::zero();
1134 Range
.intersect(getOffsetRange(Rd
, *Op
.getParent()));
1139 void HCE::recordExtender(MachineInstr
&MI
, unsigned OpNum
) {
1140 unsigned Opc
= MI
.getOpcode();
1144 bool IsLoad
= MI
.mayLoad();
1145 bool IsStore
= MI
.mayStore();
1147 // Fixed stack slots have negative indexes, and they cannot be used
1148 // with TRI::stackSlot2Index and TRI::index2StackSlot. This is somewhat
1149 // unfortunate, but should not be a frequent thing.
1150 for (MachineOperand
&Op
: MI
.operands())
1151 if (Op
.isFI() && Op
.getIndex() < 0)
1154 if (IsLoad
|| IsStore
) {
1155 unsigned AM
= HII
->getAddrMode(MI
);
1157 // (Re: ##Off + Rb<<S) = Rd: ##Val
1158 case HexagonII::Absolute
: // (__: ## + __<<_)
1160 case HexagonII::AbsoluteSet
: // (Rd: ## + __<<_)
1161 ED
.Rd
= MI
.getOperand(OpNum
-1);
1164 case HexagonII::BaseImmOffset
: // (__: ## + Rs<<0)
1165 // Store-immediates are treated as non-memory operations, since
1166 // it's the value being stored that is extended (as opposed to
1167 // a part of the address).
1168 if (!isStoreImmediate(Opc
))
1169 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1171 case HexagonII::BaseLongOffset
: // (__: ## + Rs<<S)
1172 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-2);
1173 ED
.Expr
.S
= MI
.getOperand(OpNum
-1).getImm();
1176 llvm_unreachable("Unhandled memory instruction");
1180 case Hexagon::A2_tfrsi
: // (Rd: ## + __<<_)
1181 ED
.Rd
= MI
.getOperand(0);
1184 case Hexagon::A2_combineii
: // (Rd: ## + __<<_)
1185 case Hexagon::A4_combineir
:
1186 ED
.Rd
= { MI
.getOperand(0).getReg(), Hexagon::isub_hi
};
1189 case Hexagon::A4_combineri
: // (Rd: ## + __<<_)
1190 ED
.Rd
= { MI
.getOperand(0).getReg(), Hexagon::isub_lo
};
1193 case Hexagon::A2_addi
: // (Rd: ## + Rs<<0)
1194 ED
.Rd
= MI
.getOperand(0);
1195 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1197 case Hexagon::M2_accii
: // (__: ## + Rs<<0)
1198 case Hexagon::M2_naccii
:
1199 case Hexagon::S4_addaddi
:
1200 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1202 case Hexagon::A2_subri
: // (Rd: ## - Rs<<0)
1203 ED
.Rd
= MI
.getOperand(0);
1204 ED
.Expr
.Rs
= MI
.getOperand(OpNum
+1);
1207 case Hexagon::S4_subaddi
: // (__: ## - Rs<<0)
1208 ED
.Expr
.Rs
= MI
.getOperand(OpNum
+1);
1211 default: // (__: ## + __<<_)
1218 // Ignore unnamed globals.
1219 ExtRoot
ER(ED
.getOp());
1220 if (ER
.Kind
== MachineOperand::MO_GlobalAddress
)
1221 if (ER
.V
.GV
->getName().empty())
1223 Extenders
.push_back(ED
);
1226 void HCE::collectInstr(MachineInstr
&MI
) {
1227 if (!HII
->isConstExtended(MI
))
1230 // Skip some non-convertible instructions.
1231 unsigned Opc
= MI
.getOpcode();
1233 case Hexagon::M2_macsin
: // There is no Rx -= mpyi(Rs,Rt).
1234 case Hexagon::C4_addipc
:
1235 case Hexagon::S4_or_andi
:
1236 case Hexagon::S4_or_andix
:
1237 case Hexagon::S4_or_ori
:
1240 recordExtender(MI
, HII
->getCExtOpNum(MI
));
1243 void HCE::collect(MachineFunction
&MF
) {
1245 for (MachineBasicBlock
&MBB
: MF
) {
1246 // Skip unreachable blocks.
1247 if (MBB
.getNumber() == -1)
1249 for (MachineInstr
&MI
: MBB
)
1254 void HCE::assignInits(const ExtRoot
&ER
, unsigned Begin
, unsigned End
,
1255 AssignmentMap
&IMap
) {
1256 // Sanity check: make sure that all extenders in the range [Begin..End)
1257 // share the same root ER.
1258 for (unsigned I
= Begin
; I
!= End
; ++I
)
1259 assert(ER
== ExtRoot(Extenders
[I
].getOp()));
1261 // Construct the list of ranges, such that for each P in Ranges[I],
1262 // a register Reg = ER+P can be used in place of Extender[I]. If the
1263 // instruction allows, uses in the form of Reg+Off are considered
1264 // (here, Off = required_value - P).
1265 std::vector
<OffsetRange
> Ranges(End
-Begin
);
1267 // For each extender that is a def, visit all uses of the defined register,
1268 // and produce an offset range that works for all uses. The def doesn't
1269 // have to be checked, because it can become dead if all uses can be updated
1270 // to use a different reg/offset.
1271 for (unsigned I
= Begin
; I
!= End
; ++I
) {
1272 const ExtDesc
&ED
= Extenders
[I
];
1276 LLVM_DEBUG(dbgs() << " =" << I
<< ". " << EV
<< " " << ED
<< '\n');
1277 assert(ED
.Rd
.Reg
!= 0);
1278 Ranges
[I
-Begin
] = getOffsetRange(ED
.Rd
).shift(EV
.Offset
);
1279 // A2_tfrsi is a special case: it will be replaced with A2_addi, which
1280 // has a 16-bit signed offset. This means that A2_tfrsi not only has a
1281 // range coming from its uses, but also from the fact that its replacement
1282 // has a range as well.
1283 if (ED
.UseMI
->getOpcode() == Hexagon::A2_tfrsi
) {
1284 int32_t D
= alignDown(32767, Ranges
[I
-Begin
].Align
); // XXX hardcoded
1285 Ranges
[I
-Begin
].extendBy(-D
).extendBy(D
);
1289 // Visit all non-def extenders. For each one, determine the offset range
1290 // available for it.
1291 for (unsigned I
= Begin
; I
!= End
; ++I
) {
1292 const ExtDesc
&ED
= Extenders
[I
];
1296 LLVM_DEBUG(dbgs() << " " << I
<< ". " << EV
<< " " << ED
<< '\n');
1297 OffsetRange Dev
= getOffsetRange(ED
);
1298 Ranges
[I
-Begin
].intersect(Dev
.shift(EV
.Offset
));
1301 // Here for each I there is a corresponding Range[I]. Construct the
1302 // inverse map, that to each range will assign the set of indexes in
1303 // [Begin..End) that this range corresponds to.
1304 std::map
<OffsetRange
, IndexList
> RangeMap
;
1305 for (unsigned I
= Begin
; I
!= End
; ++I
)
1306 RangeMap
[Ranges
[I
-Begin
]].insert(I
);
1309 dbgs() << "Ranges\n";
1310 for (unsigned I
= Begin
; I
!= End
; ++I
)
1311 dbgs() << " " << I
<< ". " << Ranges
[I
-Begin
] << '\n';
1312 dbgs() << "RangeMap\n";
1313 for (auto &P
: RangeMap
) {
1314 dbgs() << " " << P
.first
<< " ->";
1315 for (unsigned I
: P
.second
)
1321 // Select the definition points, and generate the assignment between
1322 // these points and the uses.
1324 // For each candidate offset, keep a pair CandData consisting of
1325 // the total number of ranges containing that candidate, and the
1326 // vector of corresponding RangeTree nodes.
1327 using CandData
= std::pair
<unsigned, SmallVector
<RangeTree::Node
*,8>>;
1328 std::map
<int32_t, CandData
> CandMap
;
1331 for (const OffsetRange
&R
: Ranges
)
1333 SmallVector
<RangeTree::Node
*,8> Nodes
;
1336 auto MaxAlign
= [](const SmallVectorImpl
<RangeTree::Node
*> &Nodes
,
1337 uint8_t Align
, uint8_t Offset
) {
1338 for (RangeTree::Node
*N
: Nodes
) {
1339 if (N
->Range
.Align
<= Align
|| N
->Range
.Offset
< Offset
)
1341 if ((N
->Range
.Offset
- Offset
) % Align
!= 0)
1343 Align
= N
->Range
.Align
;
1344 Offset
= N
->Range
.Offset
;
1346 return std::make_pair(Align
, Offset
);
1349 // Construct the set of all potential definition points from the endpoints
1350 // of the ranges. If a given endpoint also belongs to a different range,
1351 // but with a higher alignment, also consider the more-highly-aligned
1352 // value of this endpoint.
1353 std::set
<int32_t> CandSet
;
1354 for (RangeTree::Node
*N
: Nodes
) {
1355 const OffsetRange
&R
= N
->Range
;
1356 auto P0
= MaxAlign(Tree
.nodesWith(R
.Min
, false), R
.Align
, R
.Offset
);
1357 CandSet
.insert(R
.Min
);
1358 if (R
.Align
< P0
.first
)
1359 CandSet
.insert(adjustUp(R
.Min
, P0
.first
, P0
.second
));
1360 auto P1
= MaxAlign(Tree
.nodesWith(R
.Max
, false), R
.Align
, R
.Offset
);
1361 CandSet
.insert(R
.Max
);
1362 if (R
.Align
< P1
.first
)
1363 CandSet
.insert(adjustDown(R
.Max
, P1
.first
, P1
.second
));
1366 // Build the assignment map: candidate C -> { list of extender indexes }.
1367 // This has to be done iteratively:
1368 // - pick the candidate that covers the maximum number of extenders,
1369 // - add the candidate to the map,
1370 // - remove the extenders from the pool.
1372 using CMap
= std::map
<int32_t,unsigned>;
1374 for (auto It
= CandSet
.begin(), Et
= CandSet
.end(); It
!= Et
; ) {
1375 auto &&V
= Tree
.nodesWith(*It
);
1376 unsigned N
= std::accumulate(V
.begin(), V
.end(), 0u,
1377 [](unsigned Acc
, const RangeTree::Node
*N
) {
1378 return Acc
+ N
->Count
;
1381 Counts
.insert({*It
, N
});
1382 It
= (N
!= 0) ? std::next(It
) : CandSet
.erase(It
);
1387 // Find the best candidate with respect to the number of extenders covered.
1388 auto BestIt
= std::max_element(Counts
.begin(), Counts
.end(),
1389 [](const CMap::value_type
&A
, const CMap::value_type
&B
) {
1390 return A
.second
< B
.second
||
1391 (A
.second
== B
.second
&& A
< B
);
1393 int32_t Best
= BestIt
->first
;
1394 ExtValue
BestV(ER
, Best
);
1395 for (RangeTree::Node
*N
: Tree
.nodesWith(Best
)) {
1396 for (unsigned I
: RangeMap
[N
->Range
])
1397 IMap
[{BestV
,Extenders
[I
].Expr
}].insert(I
);
1402 LLVM_DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap
, *HRI
));
1404 // There is some ambiguity in what initializer should be used, if the
1405 // descriptor's subexpression is non-trivial: it can be the entire
1406 // subexpression (which is what has been done so far), or it can be
1407 // the extender's value itself, if all corresponding extenders have the
1408 // exact value of the initializer (i.e. require offset of 0).
1410 // To reduce the number of initializers, merge such special cases.
1411 for (std::pair
<const ExtenderInit
,IndexList
> &P
: IMap
) {
1412 // Skip trivial initializers.
1413 if (P
.first
.second
.trivial())
1415 // If the corresponding trivial initializer does not exist, skip this
1417 const ExtValue
&EV
= P
.first
.first
;
1418 AssignmentMap::iterator F
= IMap
.find({EV
, ExtExpr()});
1419 if (F
== IMap
.end())
1422 // Finally, check if all extenders have the same value as the initializer.
1423 // Make sure that extenders that are a part of a stack address are not
1424 // merged with those that aren't. Stack addresses need an offset field
1425 // (to be used by frame index elimination), while non-stack expressions
1426 // can be replaced with forms (such as rr) that do not have such a field.
1429 // Collected 3 extenders
1430 // =2. imm:0 off:32968 bb#2: %7 = ## + __ << 0, def
1431 // 0. imm:0 off:267 bb#0: __ = ## + SS#1 << 0
1432 // 1. imm:0 off:267 bb#1: __ = ## + SS#1 << 0
1434 // 0. [-756,267]a1+0
1435 // 1. [-756,267]a1+0
1436 // 2. [201,65735]a1+0
1438 // [-756,267]a1+0 -> 0 1
1439 // [201,65735]a1+0 -> 2
1440 // IMap (before fixup) = {
1441 // [imm:0 off:267, ## + __ << 0] -> { 2 }
1442 // [imm:0 off:267, ## + SS#1 << 0] -> { 0 1 }
1444 // IMap (after fixup) = {
1445 // [imm:0 off:267, ## + __ << 0] -> { 2 0 1 }
1446 // [imm:0 off:267, ## + SS#1 << 0] -> { }
1448 // Inserted def in bb#0 for initializer: [imm:0 off:267, ## + __ << 0]
1449 // %12:intregs = A2_tfrsi 267
1452 // %12:intregs = A2_tfrsi 267
1453 // S4_pstorerbt_rr %3, %12, %stack.1, 0, killed %4
1456 // if (p0.new) memb(r0+r29<<#4) = r2
1458 bool IsStack
= any_of(F
->second
, [this](unsigned I
) {
1459 return Extenders
[I
].Expr
.Rs
.isSlot();
1461 auto SameValue
= [&EV
,this,IsStack
](unsigned I
) {
1462 const ExtDesc
&ED
= Extenders
[I
];
1463 return ED
.Expr
.Rs
.isSlot() == IsStack
&&
1464 ExtValue(ED
).Offset
== EV
.Offset
;
1466 if (all_of(P
.second
, SameValue
)) {
1467 F
->second
.insert(P
.second
.begin(), P
.second
.end());
1472 LLVM_DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap
, *HRI
));
1475 void HCE::calculatePlacement(const ExtenderInit
&ExtI
, const IndexList
&Refs
,
1480 // The placement calculation is somewhat simple right now: it finds a
1481 // single location for the def that dominates all refs. Since this may
1482 // place the def far from the uses, producing several locations for
1483 // defs that collectively dominate all refs could be better.
1484 // For now only do the single one.
1485 DenseSet
<MachineBasicBlock
*> Blocks
;
1486 DenseSet
<MachineInstr
*> RefMIs
;
1487 const ExtDesc
&ED0
= Extenders
[Refs
[0]];
1488 MachineBasicBlock
*DomB
= ED0
.UseMI
->getParent();
1489 RefMIs
.insert(ED0
.UseMI
);
1490 Blocks
.insert(DomB
);
1491 for (unsigned i
= 1, e
= Refs
.size(); i
!= e
; ++i
) {
1492 const ExtDesc
&ED
= Extenders
[Refs
[i
]];
1493 MachineBasicBlock
*MBB
= ED
.UseMI
->getParent();
1494 RefMIs
.insert(ED
.UseMI
);
1495 DomB
= MDT
->findNearestCommonDominator(DomB
, MBB
);
1500 // The block DomB should be dominated by the def of each register used
1501 // in the initializer.
1502 Register Rs
= ExtI
.second
.Rs
; // Only one reg allowed now.
1503 const MachineInstr
*DefI
= Rs
.isVReg() ? MRI
->getVRegDef(Rs
.Reg
) : nullptr;
1505 // This should be guaranteed given that the entire expression is used
1506 // at each instruction in Refs. Add an assertion just in case.
1507 assert(!DefI
|| MDT
->dominates(DefI
->getParent(), DomB
));
1510 MachineBasicBlock::iterator It
;
1511 if (Blocks
.count(DomB
)) {
1512 // Try to find the latest possible location for the def.
1513 MachineBasicBlock::iterator End
= DomB
->end();
1514 for (It
= DomB
->begin(); It
!= End
; ++It
)
1515 if (RefMIs
.count(&*It
))
1517 assert(It
!= End
&& "Should have found a ref in DomB");
1519 // DomB does not contain any refs.
1520 It
= DomB
->getFirstTerminator();
1522 Loc
DefLoc(DomB
, It
);
1523 Defs
.emplace_back(DefLoc
, Refs
);
1526 HCE::Register
HCE::insertInitializer(Loc DefL
, const ExtenderInit
&ExtI
) {
1527 unsigned DefR
= MRI
->createVirtualRegister(&Hexagon::IntRegsRegClass
);
1528 MachineBasicBlock
&MBB
= *DefL
.Block
;
1529 MachineBasicBlock::iterator At
= DefL
.At
;
1530 DebugLoc dl
= DefL
.Block
->findDebugLoc(DefL
.At
);
1531 const ExtValue
&EV
= ExtI
.first
;
1532 MachineOperand
ExtOp(EV
);
1534 const ExtExpr
&Ex
= ExtI
.second
;
1535 const MachineInstr
*InitI
= nullptr;
1537 if (Ex
.Rs
.isSlot()) {
1538 assert(Ex
.S
== 0 && "Cannot have a shift of a stack slot");
1539 assert(!Ex
.Neg
&& "Cannot subtract a stack slot");
1540 // DefR = PS_fi Rb,##EV
1541 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::PS_fi
), DefR
)
1542 .add(MachineOperand(Ex
.Rs
))
1545 assert((Ex
.Rs
.Reg
== 0 || Ex
.Rs
.isVReg()) && "Expecting virtual register");
1548 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_tfrsi
), DefR
)
1550 } else if (Ex
.S
== 0) {
1552 // DefR = sub(##EV,Rb)
1553 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_subri
), DefR
)
1555 .add(MachineOperand(Ex
.Rs
));
1557 // DefR = add(Rb,##EV)
1558 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_addi
), DefR
)
1559 .add(MachineOperand(Ex
.Rs
))
1563 unsigned NewOpc
= Ex
.Neg
? Hexagon::S4_subi_asl_ri
1564 : Hexagon::S4_addi_asl_ri
;
1565 // DefR = add(##EV,asl(Rb,S))
1566 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
), DefR
)
1568 .add(MachineOperand(Ex
.Rs
))
1575 LLVM_DEBUG(dbgs() << "Inserted def in bb#" << MBB
.getNumber()
1576 << " for initializer: " << PrintInit(ExtI
, *HRI
) << "\n "
1581 // Replace the extender at index Idx with the register ExtR.
1582 bool HCE::replaceInstrExact(const ExtDesc
&ED
, Register ExtR
) {
1583 MachineInstr
&MI
= *ED
.UseMI
;
1584 MachineBasicBlock
&MBB
= *MI
.getParent();
1585 MachineBasicBlock::iterator At
= MI
.getIterator();
1586 DebugLoc dl
= MI
.getDebugLoc();
1587 unsigned ExtOpc
= MI
.getOpcode();
1589 // With a few exceptions, direct replacement amounts to creating an
1590 // instruction with a corresponding register opcode, with all operands
1591 // the same, except for the register used in place of the extender.
1592 unsigned RegOpc
= getDirectRegReplacement(ExtOpc
);
1594 if (RegOpc
== TargetOpcode::REG_SEQUENCE
) {
1595 if (ExtOpc
== Hexagon::A4_combineri
)
1596 BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
))
1597 .add(MI
.getOperand(0))
1598 .add(MI
.getOperand(1))
1599 .addImm(Hexagon::isub_hi
)
1600 .add(MachineOperand(ExtR
))
1601 .addImm(Hexagon::isub_lo
);
1602 else if (ExtOpc
== Hexagon::A4_combineir
)
1603 BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
))
1604 .add(MI
.getOperand(0))
1605 .add(MachineOperand(ExtR
))
1606 .addImm(Hexagon::isub_hi
)
1607 .add(MI
.getOperand(2))
1608 .addImm(Hexagon::isub_lo
);
1610 llvm_unreachable("Unexpected opcode became REG_SEQUENCE");
1614 if (ExtOpc
== Hexagon::C2_cmpgei
|| ExtOpc
== Hexagon::C2_cmpgeui
) {
1615 unsigned NewOpc
= ExtOpc
== Hexagon::C2_cmpgei
? Hexagon::C2_cmplt
1616 : Hexagon::C2_cmpltu
;
1617 BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
))
1618 .add(MI
.getOperand(0))
1619 .add(MachineOperand(ExtR
))
1620 .add(MI
.getOperand(1));
1626 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
));
1627 unsigned RegN
= ED
.OpNum
;
1628 // Copy all operands except the one that has the extender.
1629 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
1631 MIB
.add(MI
.getOperand(i
));
1633 MIB
.add(MachineOperand(ExtR
));
1635 MIB
.cloneMemRefs(MI
);
1640 if ((MI
.mayLoad() || MI
.mayStore()) && !isStoreImmediate(ExtOpc
)) {
1641 // For memory instructions, there is an asymmetry in the addressing
1642 // modes. Addressing modes allowing extenders can be replaced with
1643 // addressing modes that use registers, but the order of operands
1644 // (or even their number) may be different.
1646 // BaseImmOffset (io) -> BaseRegOffset (rr)
1647 // BaseLongOffset (ur) -> BaseRegOffset (rr)
1648 unsigned RegOpc
, Shift
;
1649 unsigned AM
= HII
->getAddrMode(MI
);
1650 if (AM
== HexagonII::BaseImmOffset
) {
1651 RegOpc
= HII
->changeAddrMode_io_rr(ExtOpc
);
1653 } else if (AM
== HexagonII::BaseLongOffset
) {
1654 // Loads: Rd = L4_loadri_ur Rs, S, ##
1655 // Stores: S4_storeri_ur Rs, S, ##, Rt
1656 RegOpc
= HII
->changeAddrMode_ur_rr(ExtOpc
);
1657 Shift
= MI
.getOperand(MI
.mayLoad() ? 2 : 1).getImm();
1659 llvm_unreachable("Unexpected addressing mode");
1662 if (RegOpc
== -1u) {
1663 dbgs() << "\nExtOpc: " << HII
->getName(ExtOpc
) << " has no rr version\n";
1664 llvm_unreachable("No corresponding rr instruction");
1668 unsigned BaseP
, OffP
;
1669 HII
->getBaseAndOffsetPosition(MI
, BaseP
, OffP
);
1671 // Build an rr instruction: (RegOff + RegBase<<0)
1672 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
));
1673 // First, add the def for loads.
1675 MIB
.add(getLoadResultOp(MI
));
1676 // Handle possible predication.
1677 if (HII
->isPredicated(MI
))
1678 MIB
.add(getPredicateOp(MI
));
1679 // Build the address.
1680 MIB
.add(MachineOperand(ExtR
)); // RegOff
1681 MIB
.add(MI
.getOperand(BaseP
)); // RegBase
1682 MIB
.addImm(Shift
); // << Shift
1683 // Add the stored value for stores.
1685 MIB
.add(getStoredValueOp(MI
));
1686 MIB
.cloneMemRefs(MI
);
1692 dbgs() << '\n' << MI
;
1694 llvm_unreachable("Unhandled exact replacement");
1698 // Replace the extender ED with a form corresponding to the initializer ExtI.
1699 bool HCE::replaceInstrExpr(const ExtDesc
&ED
, const ExtenderInit
&ExtI
,
1700 Register ExtR
, int32_t &Diff
) {
1701 MachineInstr
&MI
= *ED
.UseMI
;
1702 MachineBasicBlock
&MBB
= *MI
.getParent();
1703 MachineBasicBlock::iterator At
= MI
.getIterator();
1704 DebugLoc dl
= MI
.getDebugLoc();
1705 unsigned ExtOpc
= MI
.getOpcode();
1707 if (ExtOpc
== Hexagon::A2_tfrsi
) {
1708 // A2_tfrsi is a special case: it's replaced with A2_addi, which introduces
1709 // another range. One range is the one that's common to all tfrsi's uses,
1710 // this one is the range of immediates in A2_addi. When calculating ranges,
1711 // the addi's 16-bit argument was included, so now we need to make it such
1712 // that the produced value is in the range for the uses alone.
1713 // Most of the time, simply adding Diff will make the addi produce exact
1714 // result, but if Diff is outside of the 16-bit range, some adjustment
1716 unsigned IdxOpc
= getRegOffOpcode(ExtOpc
);
1717 assert(IdxOpc
== Hexagon::A2_addi
);
1719 // Clamp Diff to the 16 bit range.
1720 int32_t D
= isInt
<16>(Diff
) ? Diff
: (Diff
> 0 ? 32767 : -32768);
1722 // Split Diff into two values: one that is close to min/max int16,
1723 // and the other being the rest, and such that both have the same
1724 // "alignment" as Diff.
1726 OffsetRange R
= getOffsetRange(MI
.getOperand(0));
1727 uint32_t A
= std::min
<uint32_t>(R
.Align
, 1u << countTrailingZeros(UD
));
1730 BuildMI(MBB
, At
, dl
, HII
->get(IdxOpc
))
1731 .add(MI
.getOperand(0))
1732 .add(MachineOperand(ExtR
))
1736 // Make sure the output is within allowable range for uses.
1737 // "Diff" is a difference in the "opposite direction", i.e. Ext - DefV,
1738 // not DefV - Ext, as the getOffsetRange would calculate.
1739 OffsetRange Uses
= getOffsetRange(MI
.getOperand(0));
1740 if (!Uses
.contains(-Diff
))
1741 dbgs() << "Diff: " << -Diff
<< " out of range " << Uses
1743 assert(Uses
.contains(-Diff
));
1749 const ExtValue
&EV
= ExtI
.first
; (void)EV
;
1750 const ExtExpr
&Ex
= ExtI
.second
; (void)Ex
;
1752 if (ExtOpc
== Hexagon::A2_addi
|| ExtOpc
== Hexagon::A2_subri
) {
1753 // If addi/subri are replaced with the exactly matching initializer,
1754 // they amount to COPY.
1755 // Check that the initializer is an exact match (for simplicity).
1757 bool IsAddi
= ExtOpc
== Hexagon::A2_addi
;
1758 const MachineOperand
&RegOp
= MI
.getOperand(IsAddi
? 1 : 2);
1759 const MachineOperand
&ImmOp
= MI
.getOperand(IsAddi
? 2 : 1);
1760 assert(Ex
.Rs
== RegOp
&& EV
== ImmOp
&& Ex
.Neg
!= IsAddi
&&
1761 "Initializer mismatch");
1763 BuildMI(MBB
, At
, dl
, HII
->get(TargetOpcode::COPY
))
1764 .add(MI
.getOperand(0))
1765 .add(MachineOperand(ExtR
));
1770 if (ExtOpc
== Hexagon::M2_accii
|| ExtOpc
== Hexagon::M2_naccii
||
1771 ExtOpc
== Hexagon::S4_addaddi
|| ExtOpc
== Hexagon::S4_subaddi
) {
1772 // M2_accii: add(Rt,add(Rs,V)) (tied)
1773 // M2_naccii: sub(Rt,add(Rs,V))
1774 // S4_addaddi: add(Rt,add(Rs,V))
1775 // S4_subaddi: add(Rt,sub(V,Rs))
1776 // Check that Rs and V match the initializer expression. The Rs+V is the
1777 // combination that is considered "subexpression" for V, although Rx+V
1778 // would also be valid.
1780 bool IsSub
= ExtOpc
== Hexagon::S4_subaddi
;
1781 Register Rs
= MI
.getOperand(IsSub
? 3 : 2);
1782 ExtValue V
= MI
.getOperand(IsSub
? 2 : 3);
1783 assert(EV
== V
&& Rs
== Ex
.Rs
&& IsSub
== Ex
.Neg
&& "Initializer mismatch");
1785 unsigned NewOpc
= ExtOpc
== Hexagon::M2_naccii
? Hexagon::A2_sub
1787 BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
))
1788 .add(MI
.getOperand(0))
1789 .add(MI
.getOperand(1))
1790 .add(MachineOperand(ExtR
));
1795 if (MI
.mayLoad() || MI
.mayStore()) {
1796 unsigned IdxOpc
= getRegOffOpcode(ExtOpc
);
1797 assert(IdxOpc
&& "Expecting indexed opcode");
1798 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(IdxOpc
));
1799 // Construct the new indexed instruction.
1800 // First, add the def for loads.
1802 MIB
.add(getLoadResultOp(MI
));
1803 // Handle possible predication.
1804 if (HII
->isPredicated(MI
))
1805 MIB
.add(getPredicateOp(MI
));
1806 // Build the address.
1807 MIB
.add(MachineOperand(ExtR
));
1809 // Add the stored value for stores.
1811 MIB
.add(getStoredValueOp(MI
));
1812 MIB
.cloneMemRefs(MI
);
1818 dbgs() << '\n' << PrintInit(ExtI
, *HRI
) << " " << MI
;
1820 llvm_unreachable("Unhandled expr replacement");
1824 bool HCE::replaceInstr(unsigned Idx
, Register ExtR
, const ExtenderInit
&ExtI
) {
1825 if (ReplaceLimit
.getNumOccurrences()) {
1826 if (ReplaceLimit
<= ReplaceCounter
)
1830 const ExtDesc
&ED
= Extenders
[Idx
];
1831 assert((!ED
.IsDef
|| ED
.Rd
.Reg
!= 0) && "Missing Rd for def");
1832 const ExtValue
&DefV
= ExtI
.first
;
1833 assert(ExtRoot(ExtValue(ED
)) == ExtRoot(DefV
) && "Extender root mismatch");
1834 const ExtExpr
&DefEx
= ExtI
.second
;
1837 int32_t Diff
= EV
.Offset
- DefV
.Offset
;
1838 const MachineInstr
&MI
= *ED
.UseMI
;
1839 LLVM_DEBUG(dbgs() << __func__
<< " Idx:" << Idx
<< " ExtR:"
1840 << PrintRegister(ExtR
, *HRI
) << " Diff:" << Diff
<< '\n');
1842 // These two addressing modes must be converted into indexed forms
1843 // regardless of what the initializer looks like.
1844 bool IsAbs
= false, IsAbsSet
= false;
1845 if (MI
.mayLoad() || MI
.mayStore()) {
1846 unsigned AM
= HII
->getAddrMode(MI
);
1847 IsAbs
= AM
== HexagonII::Absolute
;
1848 IsAbsSet
= AM
== HexagonII::AbsoluteSet
;
1851 // If it's a def, remember all operands that need to be updated.
1852 // If ED is a def, and Diff is not 0, then all uses of the register Rd
1853 // defined by ED must be in the form (Rd, imm), i.e. the immediate offset
1854 // must follow the Rd in the operand list.
1855 std::vector
<std::pair
<MachineInstr
*,unsigned>> RegOps
;
1856 if (ED
.IsDef
&& Diff
!= 0) {
1857 for (MachineOperand
&Op
: MRI
->use_operands(ED
.Rd
.Reg
)) {
1858 MachineInstr
&UI
= *Op
.getParent();
1859 RegOps
.push_back({&UI
, getOperandIndex(UI
, Op
)});
1863 // Replace the instruction.
1864 bool Replaced
= false;
1865 if (Diff
== 0 && DefEx
.trivial() && !IsAbs
&& !IsAbsSet
)
1866 Replaced
= replaceInstrExact(ED
, ExtR
);
1868 Replaced
= replaceInstrExpr(ED
, ExtI
, ExtR
, Diff
);
1870 if (Diff
!= 0 && Replaced
&& ED
.IsDef
) {
1871 // Update offsets of the def's uses.
1872 for (std::pair
<MachineInstr
*,unsigned> P
: RegOps
) {
1873 unsigned J
= P
.second
;
1874 assert(P
.first
->getNumOperands() > J
+1 &&
1875 P
.first
->getOperand(J
+1).isImm());
1876 MachineOperand
&ImmOp
= P
.first
->getOperand(J
+1);
1877 ImmOp
.setImm(ImmOp
.getImm() + Diff
);
1879 // If it was an absolute-set instruction, the "set" part has been removed.
1880 // ExtR will now be the register with the extended value, and since all
1881 // users of Rd have been updated, all that needs to be done is to replace
1884 assert(ED
.Rd
.Sub
== 0 && ExtR
.Sub
== 0);
1885 MRI
->replaceRegWith(ED
.Rd
.Reg
, ExtR
.Reg
);
1892 bool HCE::replaceExtenders(const AssignmentMap
&IMap
) {
1894 bool Changed
= false;
1896 for (const std::pair
<ExtenderInit
,IndexList
> &P
: IMap
) {
1897 const IndexList
&Idxs
= P
.second
;
1898 if (Idxs
.size() < CountThreshold
)
1902 calculatePlacement(P
.first
, Idxs
, Defs
);
1903 for (const std::pair
<Loc
,IndexList
> &Q
: Defs
) {
1904 Register DefR
= insertInitializer(Q
.first
, P
.first
);
1905 NewRegs
.push_back(DefR
.Reg
);
1906 for (unsigned I
: Q
.second
)
1907 Changed
|= replaceInstr(I
, DefR
, P
.first
);
1913 unsigned HCE::getOperandIndex(const MachineInstr
&MI
,
1914 const MachineOperand
&Op
) const {
1915 for (unsigned i
= 0, n
= MI
.getNumOperands(); i
!= n
; ++i
)
1916 if (&MI
.getOperand(i
) == &Op
)
1918 llvm_unreachable("Not an operand of MI");
1921 const MachineOperand
&HCE::getPredicateOp(const MachineInstr
&MI
) const {
1922 assert(HII
->isPredicated(MI
));
1923 for (const MachineOperand
&Op
: MI
.operands()) {
1924 if (!Op
.isReg() || !Op
.isUse() ||
1925 MRI
->getRegClass(Op
.getReg()) != &Hexagon::PredRegsRegClass
)
1927 assert(Op
.getSubReg() == 0 && "Predicate register with a subregister");
1930 llvm_unreachable("Predicate operand not found");
1933 const MachineOperand
&HCE::getLoadResultOp(const MachineInstr
&MI
) const {
1934 assert(MI
.mayLoad());
1935 return MI
.getOperand(0);
1938 const MachineOperand
&HCE::getStoredValueOp(const MachineInstr
&MI
) const {
1939 assert(MI
.mayStore());
1940 return MI
.getOperand(MI
.getNumExplicitOperands()-1);
1943 bool HCE::runOnMachineFunction(MachineFunction
&MF
) {
1944 if (skipFunction(MF
.getFunction()))
1946 if (MF
.getFunction().hasPersonalityFn()) {
1947 LLVM_DEBUG(dbgs() << getPassName() << ": skipping " << MF
.getName()
1948 << " due to exception handling\n");
1951 LLVM_DEBUG(MF
.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
1953 HII
= MF
.getSubtarget
<HexagonSubtarget
>().getInstrInfo();
1954 HRI
= MF
.getSubtarget
<HexagonSubtarget
>().getRegisterInfo();
1955 MDT
= &getAnalysis
<MachineDominatorTree
>();
1956 MRI
= &MF
.getRegInfo();
1960 llvm::sort(Extenders
, [this](const ExtDesc
&A
, const ExtDesc
&B
) {
1961 ExtValue
VA(A
), VB(B
);
1964 const MachineInstr
*MA
= A
.UseMI
;
1965 const MachineInstr
*MB
= B
.UseMI
;
1967 // If it's the same instruction, compare operand numbers.
1968 return A
.OpNum
< B
.OpNum
;
1971 const MachineBasicBlock
*BA
= MA
->getParent();
1972 const MachineBasicBlock
*BB
= MB
->getParent();
1973 assert(BA
->getNumber() != -1 && BB
->getNumber() != -1);
1975 return BA
->getNumber() < BB
->getNumber();
1976 return MDT
->dominates(MA
, MB
);
1979 bool Changed
= false;
1980 LLVM_DEBUG(dbgs() << "Collected " << Extenders
.size() << " extenders\n");
1981 for (unsigned I
= 0, E
= Extenders
.size(); I
!= E
; ) {
1983 const ExtRoot
&T
= Extenders
[B
].getOp();
1984 while (I
!= E
&& ExtRoot(Extenders
[I
].getOp()) == T
)
1988 assignInits(T
, B
, I
, IMap
);
1989 Changed
|= replaceExtenders(IMap
);
1994 MF
.print(dbgs() << "After " << getPassName() << '\n', nullptr);
1996 dbgs() << "No changes\n";
2001 FunctionPass
*llvm::createHexagonConstExtenders() {
2002 return new HexagonConstExtenders();