1 //===- HexagonConstExtenders.cpp ------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "HexagonInstrInfo.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/CodeGen/MachineDominators.h"
15 #include "llvm/CodeGen/MachineFunctionPass.h"
16 #include "llvm/CodeGen/MachineInstrBuilder.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/CodeGen/Register.h"
19 #include "llvm/InitializePasses.h"
20 #include "llvm/Pass.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/raw_ostream.h"
28 #define DEBUG_TYPE "hexagon-cext-opt"
32 static cl::opt
<unsigned> CountThreshold(
33 "hexagon-cext-threshold", cl::init(3), cl::Hidden
,
34 cl::desc("Minimum number of extenders to trigger replacement"));
36 static cl::opt
<unsigned>
37 ReplaceLimit("hexagon-cext-limit", cl::init(0), cl::Hidden
,
38 cl::desc("Maximum number of replacements"));
41 void initializeHexagonConstExtendersPass(PassRegistry
&);
42 FunctionPass
*createHexagonConstExtenders();
45 static int32_t adjustUp(int32_t V
, uint8_t A
, uint8_t O
) {
46 assert(isPowerOf2_32(A
));
47 int32_t U
= (V
& -A
) + O
;
48 return U
>= V
? U
: U
+A
;
51 static int32_t adjustDown(int32_t V
, uint8_t A
, uint8_t O
) {
52 assert(isPowerOf2_32(A
));
53 int32_t U
= (V
& -A
) + O
;
54 return U
<= V
? U
: U
-A
;
59 // The range of values between Min and Max that are of form Align*N+Offset,
60 // for some integer N. Min and Max are required to be of that form as well,
61 // except in the case of an empty range.
62 int32_t Min
= INT_MIN
, Max
= INT_MAX
;
66 OffsetRange() = default;
67 OffsetRange(int32_t L
, int32_t H
, uint8_t A
, uint8_t O
= 0)
68 : Min(L
), Max(H
), Align(A
), Offset(O
) {}
69 OffsetRange
&intersect(OffsetRange A
) {
74 if (Offset
>= A
.Offset
&& (Offset
- A
.Offset
) % A
.Align
== 0) {
75 Min
= adjustUp(std::max(Min
, A
.Min
), Align
, Offset
);
76 Max
= adjustDown(std::min(Max
, A
.Max
), Align
, Offset
);
78 // Make an empty range.
82 // Canonicalize empty ranges.
84 std::tie(Min
, Max
, Align
) = std::make_tuple(0, -1, 1);
87 OffsetRange
&shift(int32_t S
) {
90 Offset
= (Offset
+S
) % Align
;
93 OffsetRange
&extendBy(int32_t D
) {
94 // If D < 0, extend Min, otherwise extend Max.
95 assert(D
% Align
== 0);
97 Min
= (INT_MIN
-D
< Min
) ? Min
+D
: INT_MIN
;
99 Max
= (INT_MAX
-D
> Max
) ? Max
+D
: INT_MAX
;
105 bool contains(int32_t V
) const {
106 return Min
<= V
&& V
<= Max
&& (V
-Offset
) % Align
== 0;
108 bool operator==(const OffsetRange
&R
) const {
109 return Min
== R
.Min
&& Max
== R
.Max
&& Align
== R
.Align
;
111 bool operator!=(const OffsetRange
&R
) const {
112 return !operator==(R
);
114 bool operator<(const OffsetRange
&R
) const {
119 return Align
< R
.Align
;
121 static OffsetRange
zero() { return {0, 0, 1}; }
126 Node(const OffsetRange
&R
) : MaxEnd(R
.Max
), Range(R
) {}
130 const OffsetRange
&Range
;
131 Node
*Left
= nullptr, *Right
= nullptr;
134 Node
*Root
= nullptr;
136 void add(const OffsetRange
&R
) {
139 void erase(const Node
*N
) {
140 Root
= remove(Root
, N
);
143 void order(SmallVectorImpl
<Node
*> &Seq
) const {
146 SmallVector
<Node
*,8> nodesWith(int32_t P
, bool CheckAlign
= true) {
147 SmallVector
<Node
*,8> Nodes
;
148 nodesWith(Root
, P
, CheckAlign
, Nodes
);
153 SmallVector
<Node
*,8> Nodes
;
155 for (Node
*N
: Nodes
)
160 void dump(const Node
*N
) const;
161 void order(Node
*N
, SmallVectorImpl
<Node
*> &Seq
) const;
162 void nodesWith(Node
*N
, int32_t P
, bool CheckA
,
163 SmallVectorImpl
<Node
*> &Seq
) const;
165 Node
*add(Node
*N
, const OffsetRange
&R
);
166 Node
*remove(Node
*N
, const Node
*D
);
167 Node
*rotateLeft(Node
*Lower
, Node
*Higher
);
168 Node
*rotateRight(Node
*Lower
, Node
*Higher
);
169 unsigned height(Node
*N
) {
170 return N
!= nullptr ? N
->Height
: 0;
172 Node
*update(Node
*N
) {
173 assert(N
!= nullptr);
174 N
->Height
= 1 + std::max(height(N
->Left
), height(N
->Right
));
176 N
->MaxEnd
= std::max(N
->MaxEnd
, N
->Left
->MaxEnd
);
178 N
->MaxEnd
= std::max(N
->MaxEnd
, N
->Right
->MaxEnd
);
181 Node
*rebalance(Node
*N
) {
182 assert(N
!= nullptr);
183 int32_t Balance
= height(N
->Right
) - height(N
->Left
);
185 return rotateRight(N
->Left
, N
);
187 return rotateLeft(N
->Right
, N
);
193 MachineBasicBlock
*Block
= nullptr;
194 MachineBasicBlock::iterator At
;
196 Loc(MachineBasicBlock
*B
, MachineBasicBlock::iterator It
)
198 if (B
->end() == It
) {
201 assert(It
->getParent() == B
);
202 Pos
= std::distance(B
->begin(), It
);
205 bool operator<(Loc A
) const {
206 if (Block
!= A
.Block
)
207 return Block
->getNumber() < A
.Block
->getNumber();
210 return Pos
!= -1 && Pos
< A
.Pos
;
216 struct HexagonConstExtenders
: public MachineFunctionPass
{
218 HexagonConstExtenders() : MachineFunctionPass(ID
) {}
220 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
221 AU
.addRequired
<MachineDominatorTreeWrapperPass
>();
222 AU
.addPreserved
<MachineDominatorTreeWrapperPass
>();
223 MachineFunctionPass::getAnalysisUsage(AU
);
226 StringRef
getPassName() const override
{
227 return "Hexagon constant-extender optimization";
229 bool runOnMachineFunction(MachineFunction
&MF
) override
;
233 Register() = default;
234 Register(llvm::Register R
, unsigned S
) : Reg(R
), Sub(S
) {}
235 Register(const MachineOperand
&Op
)
236 : Reg(Op
.getReg()), Sub(Op
.getSubReg()) {}
237 Register
&operator=(const MachineOperand
&Op
) {
240 Sub
= Op
.getSubReg();
241 } else if (Op
.isFI()) {
242 Reg
= llvm::Register::index2StackSlot(Op
.getIndex());
246 bool isVReg() const {
247 return Reg
!= 0 && !Reg
.isStack() && Reg
.isVirtual();
249 bool isSlot() const { return Reg
!= 0 && Reg
.isStack(); }
250 operator MachineOperand() const {
252 return MachineOperand::CreateReg(Reg
, /*Def*/false, /*Imp*/false,
253 /*Kill*/false, /*Dead*/false, /*Undef*/false,
254 /*EarlyClobber*/false, Sub
);
256 int FI
= llvm::Register::stackSlot2Index(Reg
);
257 return MachineOperand::CreateFI(FI
);
259 llvm_unreachable("Cannot create MachineOperand");
261 bool operator==(Register R
) const { return Reg
== R
.Reg
&& Sub
== R
.Sub
; }
262 bool operator!=(Register R
) const { return !operator==(R
); }
263 bool operator<(Register R
) const {
265 return Reg
< R
.Reg
|| (Reg
== R
.Reg
&& Sub
< R
.Sub
);
272 // A subexpression in which the extender is used. In general, this
273 // represents an expression where adding D to the extender will be
274 // equivalent to adding D to the expression as a whole. In other
275 // words, expr(add(##V,D) = add(expr(##V),D).
277 // The original motivation for this are the io/ur addressing modes,
278 // where the offset is extended. Consider the io example:
279 // In memw(Rs+##V), the ##V could be replaced by a register Rt to
280 // form the rr mode: memw(Rt+Rs<<0). In such case, however, the
281 // register Rt must have exactly the value of ##V. If there was
282 // another instruction memw(Rs+##V+4), it would need a different Rt.
283 // Now, if Rt was initialized as "##V+Rs<<0", both of these
284 // instructions could use the same Rt, just with different offsets.
285 // Here it's clear that "initializer+4" should be the same as if
286 // the offset 4 was added to the ##V in the initializer.
288 // The only kinds of expressions that support the requirement of
289 // commuting with addition are addition and subtraction from ##V.
290 // Include shifting the Rs to account for the ur addressing mode:
298 ExtExpr(Register RS
, bool NG
, unsigned SH
) : Rs(RS
), S(SH
), Neg(NG
) {}
299 // Expression is trivial if it does not modify the extender.
300 bool trivial() const {
303 bool operator==(const ExtExpr
&Ex
) const {
304 return Rs
== Ex
.Rs
&& S
== Ex
.S
&& Neg
== Ex
.Neg
;
306 bool operator!=(const ExtExpr
&Ex
) const {
307 return !operator==(Ex
);
309 bool operator<(const ExtExpr
&Ex
) const {
314 return !Neg
&& Ex
.Neg
;
319 MachineInstr
*UseMI
= nullptr;
320 unsigned OpNum
= -1u;
321 // The subexpression in which the extender is used (e.g. address
324 // Optional register that is assigned the value of Expr.
326 // Def means that the output of the instruction may differ from the
327 // original by a constant c, and that the difference can be corrected
328 // by adding/subtracting c in all users of the defined register.
331 MachineOperand
&getOp() {
332 return UseMI
->getOperand(OpNum
);
334 const MachineOperand
&getOp() const {
335 return UseMI
->getOperand(OpNum
);
341 const ConstantFP
*CFP
; // MO_FPImmediate
342 const char *SymbolName
; // MO_ExternalSymbol
343 const GlobalValue
*GV
; // MO_GlobalAddress
344 const BlockAddress
*BA
; // MO_BlockAddress
345 int64_t ImmVal
; // MO_Immediate, MO_TargetIndex,
346 // and MO_ConstantPoolIndex
348 unsigned Kind
; // Same as in MachineOperand.
349 unsigned char TF
; // TargetFlags.
351 ExtRoot(const MachineOperand
&Op
);
352 bool operator==(const ExtRoot
&ER
) const {
353 return Kind
== ER
.Kind
&& V
.ImmVal
== ER
.V
.ImmVal
;
355 bool operator!=(const ExtRoot
&ER
) const {
356 return !operator==(ER
);
358 bool operator<(const ExtRoot
&ER
) const;
361 struct ExtValue
: public ExtRoot
{
364 ExtValue(const MachineOperand
&Op
);
365 ExtValue(const ExtDesc
&ED
) : ExtValue(ED
.getOp()) {}
366 ExtValue(const ExtRoot
&ER
, int32_t Off
) : ExtRoot(ER
), Offset(Off
) {}
367 bool operator<(const ExtValue
&EV
) const;
368 bool operator==(const ExtValue
&EV
) const {
369 return ExtRoot(*this) == ExtRoot(EV
) && Offset
== EV
.Offset
;
371 bool operator!=(const ExtValue
&EV
) const {
372 return !operator==(EV
);
374 explicit operator MachineOperand() const;
377 using IndexList
= SetVector
<unsigned>;
378 using ExtenderInit
= std::pair
<ExtValue
, ExtExpr
>;
379 using AssignmentMap
= std::map
<ExtenderInit
, IndexList
>;
380 using LocDefList
= std::vector
<std::pair
<Loc
, IndexList
>>;
382 const HexagonSubtarget
*HST
= nullptr;
383 const HexagonInstrInfo
*HII
= nullptr;
384 const HexagonRegisterInfo
*HRI
= nullptr;
385 MachineDominatorTree
*MDT
= nullptr;
386 MachineRegisterInfo
*MRI
= nullptr;
387 std::vector
<ExtDesc
> Extenders
;
388 std::vector
<unsigned> NewRegs
;
390 bool isStoreImmediate(unsigned Opc
) const;
391 bool isRegOffOpcode(unsigned ExtOpc
) const ;
392 unsigned getRegOffOpcode(unsigned ExtOpc
) const;
393 unsigned getDirectRegReplacement(unsigned ExtOpc
) const;
394 OffsetRange
getOffsetRange(Register R
, const MachineInstr
&MI
) const;
395 OffsetRange
getOffsetRange(const ExtDesc
&ED
) const;
396 OffsetRange
getOffsetRange(Register Rd
) const;
398 void recordExtender(MachineInstr
&MI
, unsigned OpNum
);
399 void collectInstr(MachineInstr
&MI
);
400 void collect(MachineFunction
&MF
);
401 void assignInits(const ExtRoot
&ER
, unsigned Begin
, unsigned End
,
402 AssignmentMap
&IMap
);
403 void calculatePlacement(const ExtenderInit
&ExtI
, const IndexList
&Refs
,
405 Register
insertInitializer(Loc DefL
, const ExtenderInit
&ExtI
);
406 bool replaceInstrExact(const ExtDesc
&ED
, Register ExtR
);
407 bool replaceInstrExpr(const ExtDesc
&ED
, const ExtenderInit
&ExtI
,
408 Register ExtR
, int32_t &Diff
);
409 bool replaceInstr(unsigned Idx
, Register ExtR
, const ExtenderInit
&ExtI
);
410 bool replaceExtenders(const AssignmentMap
&IMap
);
412 unsigned getOperandIndex(const MachineInstr
&MI
,
413 const MachineOperand
&Op
) const;
414 const MachineOperand
&getPredicateOp(const MachineInstr
&MI
) const;
415 const MachineOperand
&getLoadResultOp(const MachineInstr
&MI
) const;
416 const MachineOperand
&getStoredValueOp(const MachineInstr
&MI
) const;
418 friend struct PrintRegister
;
419 friend struct PrintExpr
;
420 friend struct PrintInit
;
421 friend struct PrintIMap
;
422 friend raw_ostream
&operator<< (raw_ostream
&OS
,
423 const struct PrintRegister
&P
);
424 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintExpr
&P
);
425 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintInit
&P
);
426 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtDesc
&ED
);
427 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtRoot
&ER
);
428 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtValue
&EV
);
429 friend raw_ostream
&operator<< (raw_ostream
&OS
, const OffsetRange
&OR
);
430 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintIMap
&P
);
433 using HCE
= HexagonConstExtenders
;
435 LLVM_ATTRIBUTE_UNUSED
436 raw_ostream
&operator<< (raw_ostream
&OS
, const OffsetRange
&OR
) {
439 OS
<< '[' << OR
.Min
<< ',' << OR
.Max
<< "]a" << unsigned(OR
.Align
)
440 << '+' << unsigned(OR
.Offset
);
444 struct PrintRegister
{
445 PrintRegister(HCE::Register R
, const HexagonRegisterInfo
&I
)
448 const HexagonRegisterInfo
&HRI
;
451 LLVM_ATTRIBUTE_UNUSED
452 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegister
&P
) {
454 OS
<< printReg(P
.Rs
.Reg
, &P
.HRI
, P
.Rs
.Sub
);
461 PrintExpr(const HCE::ExtExpr
&E
, const HexagonRegisterInfo
&I
)
463 const HCE::ExtExpr
&Ex
;
464 const HexagonRegisterInfo
&HRI
;
467 LLVM_ATTRIBUTE_UNUSED
468 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintExpr
&P
) {
469 OS
<< "## " << (P
.Ex
.Neg
? "- " : "+ ");
470 if (P
.Ex
.Rs
.Reg
!= 0)
471 OS
<< printReg(P
.Ex
.Rs
.Reg
, &P
.HRI
, P
.Ex
.Rs
.Sub
);
474 OS
<< " << " << P
.Ex
.S
;
479 PrintInit(const HCE::ExtenderInit
&EI
, const HexagonRegisterInfo
&I
)
480 : ExtI(EI
), HRI(I
) {}
481 const HCE::ExtenderInit
&ExtI
;
482 const HexagonRegisterInfo
&HRI
;
485 LLVM_ATTRIBUTE_UNUSED
486 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintInit
&P
) {
487 OS
<< '[' << P
.ExtI
.first
<< ", "
488 << PrintExpr(P
.ExtI
.second
, P
.HRI
) << ']';
492 LLVM_ATTRIBUTE_UNUSED
493 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtDesc
&ED
) {
494 assert(ED
.OpNum
!= -1u);
495 const MachineBasicBlock
&MBB
= *ED
.getOp().getParent()->getParent();
496 const MachineFunction
&MF
= *MBB
.getParent();
497 const auto &HRI
= *MF
.getSubtarget
<HexagonSubtarget
>().getRegisterInfo();
498 OS
<< "bb#" << MBB
.getNumber() << ": ";
500 OS
<< printReg(ED
.Rd
.Reg
, &HRI
, ED
.Rd
.Sub
);
503 OS
<< " = " << PrintExpr(ED
.Expr
, HRI
);
509 LLVM_ATTRIBUTE_UNUSED
510 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtRoot
&ER
) {
512 case MachineOperand::MO_Immediate
:
513 OS
<< "imm:" << ER
.V
.ImmVal
;
515 case MachineOperand::MO_FPImmediate
:
516 OS
<< "fpi:" << *ER
.V
.CFP
;
518 case MachineOperand::MO_ExternalSymbol
:
519 OS
<< "sym:" << *ER
.V
.SymbolName
;
521 case MachineOperand::MO_GlobalAddress
:
522 OS
<< "gad:" << ER
.V
.GV
->getName();
524 case MachineOperand::MO_BlockAddress
:
525 OS
<< "blk:" << *ER
.V
.BA
;
527 case MachineOperand::MO_TargetIndex
:
528 OS
<< "tgi:" << ER
.V
.ImmVal
;
530 case MachineOperand::MO_ConstantPoolIndex
:
531 OS
<< "cpi:" << ER
.V
.ImmVal
;
533 case MachineOperand::MO_JumpTableIndex
:
534 OS
<< "jti:" << ER
.V
.ImmVal
;
537 OS
<< "???:" << ER
.V
.ImmVal
;
543 LLVM_ATTRIBUTE_UNUSED
544 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtValue
&EV
) {
545 OS
<< HCE::ExtRoot(EV
) << " off:" << EV
.Offset
;
550 PrintIMap(const HCE::AssignmentMap
&M
, const HexagonRegisterInfo
&I
)
552 const HCE::AssignmentMap
&IMap
;
553 const HexagonRegisterInfo
&HRI
;
556 LLVM_ATTRIBUTE_UNUSED
557 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintIMap
&P
) {
559 for (const std::pair
<const HCE::ExtenderInit
, HCE::IndexList
> &Q
: P
.IMap
) {
560 OS
<< " " << PrintInit(Q
.first
, P
.HRI
) << " -> {";
561 for (unsigned I
: Q
.second
)
570 INITIALIZE_PASS_BEGIN(HexagonConstExtenders
, "hexagon-cext-opt",
571 "Hexagon constant-extender optimization", false, false)
572 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass
)
573 INITIALIZE_PASS_END(HexagonConstExtenders
, "hexagon-cext-opt",
574 "Hexagon constant-extender optimization", false, false)
576 static unsigned ReplaceCounter
= 0;
581 LLVM_DUMP_METHOD
void RangeTree::dump() const {
582 dbgs() << "Root: " << Root
<< '\n';
587 LLVM_DUMP_METHOD
void RangeTree::dump(const Node
*N
) const {
588 dbgs() << "Node: " << N
<< '\n';
589 dbgs() << " Height: " << N
->Height
<< '\n';
590 dbgs() << " Count: " << N
->Count
<< '\n';
591 dbgs() << " MaxEnd: " << N
->MaxEnd
<< '\n';
592 dbgs() << " Range: " << N
->Range
<< '\n';
593 dbgs() << " Left: " << N
->Left
<< '\n';
594 dbgs() << " Right: " << N
->Right
<< "\n\n";
603 void RangeTree::order(Node
*N
, SmallVectorImpl
<Node
*> &Seq
) const {
608 order(N
->Right
, Seq
);
611 void RangeTree::nodesWith(Node
*N
, int32_t P
, bool CheckA
,
612 SmallVectorImpl
<Node
*> &Seq
) const {
613 if (N
== nullptr || N
->MaxEnd
< P
)
615 nodesWith(N
->Left
, P
, CheckA
, Seq
);
616 if (N
->Range
.Min
<= P
) {
617 if ((CheckA
&& N
->Range
.contains(P
)) || (!CheckA
&& P
<= N
->Range
.Max
))
619 nodesWith(N
->Right
, P
, CheckA
, Seq
);
623 RangeTree::Node
*RangeTree::add(Node
*N
, const OffsetRange
&R
) {
633 N
->Left
= add(N
->Left
, R
);
635 N
->Right
= add(N
->Right
, R
);
636 return rebalance(update(N
));
639 RangeTree::Node
*RangeTree::remove(Node
*N
, const Node
*D
) {
640 assert(N
!= nullptr);
643 assert(N
->Range
!= D
->Range
&& "N and D should not be equal");
644 if (D
->Range
< N
->Range
)
645 N
->Left
= remove(N
->Left
, D
);
647 N
->Right
= remove(N
->Right
, D
);
648 return rebalance(update(N
));
651 // We got to the node we need to remove. If any of its children are
652 // missing, simply replace it with the other child.
653 if (N
->Left
== nullptr || N
->Right
== nullptr)
654 return (N
->Left
== nullptr) ? N
->Right
: N
->Left
;
656 // Find the rightmost child of N->Left, remove it and plug it in place
661 M
->Left
= remove(N
->Left
, M
);
663 return rebalance(update(M
));
666 RangeTree::Node
*RangeTree::rotateLeft(Node
*Lower
, Node
*Higher
) {
667 assert(Higher
->Right
== Lower
);
668 // The Lower node is on the right from Higher. Make sure that Lower's
669 // balance is greater to the right. Otherwise the rotation will create
670 // an unbalanced tree again.
671 if (height(Lower
->Left
) > height(Lower
->Right
))
672 Lower
= rotateRight(Lower
->Left
, Lower
);
673 assert(height(Lower
->Left
) <= height(Lower
->Right
));
674 Higher
->Right
= Lower
->Left
;
676 Lower
->Left
= Higher
;
681 RangeTree::Node
*RangeTree::rotateRight(Node
*Lower
, Node
*Higher
) {
682 assert(Higher
->Left
== Lower
);
683 // The Lower node is on the left from Higher. Make sure that Lower's
684 // balance is greater to the left. Otherwise the rotation will create
685 // an unbalanced tree again.
686 if (height(Lower
->Left
) < height(Lower
->Right
))
687 Lower
= rotateLeft(Lower
->Right
, Lower
);
688 assert(height(Lower
->Left
) >= height(Lower
->Right
));
689 Higher
->Left
= Lower
->Right
;
691 Lower
->Right
= Higher
;
697 HCE::ExtRoot::ExtRoot(const MachineOperand
&Op
) {
698 // Always store ImmVal, since it's the field used for comparisons.
701 ; // Keep 0. Do not use Op.getImm() for value here (treat 0 as the root).
702 else if (Op
.isFPImm())
703 V
.CFP
= Op
.getFPImm();
704 else if (Op
.isSymbol())
705 V
.SymbolName
= Op
.getSymbolName();
706 else if (Op
.isGlobal())
707 V
.GV
= Op
.getGlobal();
708 else if (Op
.isBlockAddress())
709 V
.BA
= Op
.getBlockAddress();
710 else if (Op
.isCPI() || Op
.isTargetIndex() || Op
.isJTI())
711 V
.ImmVal
= Op
.getIndex();
713 llvm_unreachable("Unexpected operand type");
716 TF
= Op
.getTargetFlags();
719 bool HCE::ExtRoot::operator< (const HCE::ExtRoot
&ER
) const {
721 return Kind
< ER
.Kind
;
723 case MachineOperand::MO_Immediate
:
724 case MachineOperand::MO_TargetIndex
:
725 case MachineOperand::MO_ConstantPoolIndex
:
726 case MachineOperand::MO_JumpTableIndex
:
727 return V
.ImmVal
< ER
.V
.ImmVal
;
728 case MachineOperand::MO_FPImmediate
: {
729 const APFloat
&ThisF
= V
.CFP
->getValueAPF();
730 const APFloat
&OtherF
= ER
.V
.CFP
->getValueAPF();
731 return ThisF
.bitcastToAPInt().ult(OtherF
.bitcastToAPInt());
733 case MachineOperand::MO_ExternalSymbol
:
734 return StringRef(V
.SymbolName
) < StringRef(ER
.V
.SymbolName
);
735 case MachineOperand::MO_GlobalAddress
:
736 // Do not use GUIDs, since they depend on the source path. Moving the
737 // source file to a different directory could cause different GUID
738 // values for a pair of given symbols. These symbols could then compare
739 // "less" in one directory, but "greater" in another.
740 assert(!V
.GV
->getName().empty() && !ER
.V
.GV
->getName().empty());
741 return V
.GV
->getName() < ER
.V
.GV
->getName();
742 case MachineOperand::MO_BlockAddress
: {
743 const BasicBlock
*ThisB
= V
.BA
->getBasicBlock();
744 const BasicBlock
*OtherB
= ER
.V
.BA
->getBasicBlock();
745 assert(ThisB
->getParent() == OtherB
->getParent());
746 const Function
&F
= *ThisB
->getParent();
747 return std::distance(F
.begin(), ThisB
->getIterator()) <
748 std::distance(F
.begin(), OtherB
->getIterator());
751 return V
.ImmVal
< ER
.V
.ImmVal
;
754 HCE::ExtValue::ExtValue(const MachineOperand
&Op
) : ExtRoot(Op
) {
756 Offset
= Op
.getImm();
757 else if (Op
.isFPImm() || Op
.isJTI())
759 else if (Op
.isSymbol() || Op
.isGlobal() || Op
.isBlockAddress() ||
760 Op
.isCPI() || Op
.isTargetIndex())
761 Offset
= Op
.getOffset();
763 llvm_unreachable("Unexpected operand type");
766 bool HCE::ExtValue::operator< (const HCE::ExtValue
&EV
) const {
767 const ExtRoot
&ER
= *this;
768 if (!(ER
== ExtRoot(EV
)))
770 return Offset
< EV
.Offset
;
773 HCE::ExtValue::operator MachineOperand() const {
775 case MachineOperand::MO_Immediate
:
776 return MachineOperand::CreateImm(V
.ImmVal
+ Offset
);
777 case MachineOperand::MO_FPImmediate
:
779 return MachineOperand::CreateFPImm(V
.CFP
);
780 case MachineOperand::MO_ExternalSymbol
:
782 return MachineOperand::CreateES(V
.SymbolName
, TF
);
783 case MachineOperand::MO_GlobalAddress
:
784 return MachineOperand::CreateGA(V
.GV
, Offset
, TF
);
785 case MachineOperand::MO_BlockAddress
:
786 return MachineOperand::CreateBA(V
.BA
, Offset
, TF
);
787 case MachineOperand::MO_TargetIndex
:
788 return MachineOperand::CreateTargetIndex(V
.ImmVal
, Offset
, TF
);
789 case MachineOperand::MO_ConstantPoolIndex
:
790 return MachineOperand::CreateCPI(V
.ImmVal
, Offset
, TF
);
791 case MachineOperand::MO_JumpTableIndex
:
793 return MachineOperand::CreateJTI(V
.ImmVal
, TF
);
795 llvm_unreachable("Unhandled kind");
799 bool HCE::isStoreImmediate(unsigned Opc
) const {
801 case Hexagon::S4_storeirbt_io
:
802 case Hexagon::S4_storeirbf_io
:
803 case Hexagon::S4_storeirht_io
:
804 case Hexagon::S4_storeirhf_io
:
805 case Hexagon::S4_storeirit_io
:
806 case Hexagon::S4_storeirif_io
:
807 case Hexagon::S4_storeirb_io
:
808 case Hexagon::S4_storeirh_io
:
809 case Hexagon::S4_storeiri_io
:
817 bool HCE::isRegOffOpcode(unsigned Opc
) const {
819 case Hexagon::L2_loadrub_io
:
820 case Hexagon::L2_loadrb_io
:
821 case Hexagon::L2_loadruh_io
:
822 case Hexagon::L2_loadrh_io
:
823 case Hexagon::L2_loadri_io
:
824 case Hexagon::L2_loadrd_io
:
825 case Hexagon::L2_loadbzw2_io
:
826 case Hexagon::L2_loadbzw4_io
:
827 case Hexagon::L2_loadbsw2_io
:
828 case Hexagon::L2_loadbsw4_io
:
829 case Hexagon::L2_loadalignh_io
:
830 case Hexagon::L2_loadalignb_io
:
831 case Hexagon::L2_ploadrubt_io
:
832 case Hexagon::L2_ploadrubf_io
:
833 case Hexagon::L2_ploadrbt_io
:
834 case Hexagon::L2_ploadrbf_io
:
835 case Hexagon::L2_ploadruht_io
:
836 case Hexagon::L2_ploadruhf_io
:
837 case Hexagon::L2_ploadrht_io
:
838 case Hexagon::L2_ploadrhf_io
:
839 case Hexagon::L2_ploadrit_io
:
840 case Hexagon::L2_ploadrif_io
:
841 case Hexagon::L2_ploadrdt_io
:
842 case Hexagon::L2_ploadrdf_io
:
843 case Hexagon::S2_storerb_io
:
844 case Hexagon::S2_storerh_io
:
845 case Hexagon::S2_storerf_io
:
846 case Hexagon::S2_storeri_io
:
847 case Hexagon::S2_storerd_io
:
848 case Hexagon::S2_pstorerbt_io
:
849 case Hexagon::S2_pstorerbf_io
:
850 case Hexagon::S2_pstorerht_io
:
851 case Hexagon::S2_pstorerhf_io
:
852 case Hexagon::S2_pstorerft_io
:
853 case Hexagon::S2_pstorerff_io
:
854 case Hexagon::S2_pstorerit_io
:
855 case Hexagon::S2_pstorerif_io
:
856 case Hexagon::S2_pstorerdt_io
:
857 case Hexagon::S2_pstorerdf_io
:
858 case Hexagon::A2_addi
:
866 unsigned HCE::getRegOffOpcode(unsigned ExtOpc
) const {
867 // If there exists an instruction that takes a register and offset,
868 // that corresponds to the ExtOpc, return it, otherwise return 0.
869 using namespace Hexagon
;
871 case A2_tfrsi
: return A2_addi
;
875 const MCInstrDesc
&D
= HII
->get(ExtOpc
);
876 if (D
.mayLoad() || D
.mayStore()) {
877 uint64_t F
= D
.TSFlags
;
878 unsigned AM
= (F
>> HexagonII::AddrModePos
) & HexagonII::AddrModeMask
;
880 case HexagonII::Absolute
:
881 case HexagonII::AbsoluteSet
:
882 case HexagonII::BaseLongOffset
:
886 case L4_loadrub_ur
: return L2_loadrub_io
;
889 case L4_loadrb_ur
: return L2_loadrb_io
;
892 case L4_loadruh_ur
: return L2_loadruh_io
;
895 case L4_loadrh_ur
: return L2_loadrh_io
;
898 case L4_loadri_ur
: return L2_loadri_io
;
901 case L4_loadrd_ur
: return L2_loadrd_io
;
903 case L4_loadbzw2_ur
: return L2_loadbzw2_io
;
905 case L4_loadbzw4_ur
: return L2_loadbzw4_io
;
907 case L4_loadbsw2_ur
: return L2_loadbsw2_io
;
909 case L4_loadbsw4_ur
: return L2_loadbsw4_io
;
910 case L4_loadalignh_ap
:
911 case L4_loadalignh_ur
: return L2_loadalignh_io
;
912 case L4_loadalignb_ap
:
913 case L4_loadalignb_ur
: return L2_loadalignb_io
;
914 case L4_ploadrubt_abs
: return L2_ploadrubt_io
;
915 case L4_ploadrubf_abs
: return L2_ploadrubf_io
;
916 case L4_ploadrbt_abs
: return L2_ploadrbt_io
;
917 case L4_ploadrbf_abs
: return L2_ploadrbf_io
;
918 case L4_ploadruht_abs
: return L2_ploadruht_io
;
919 case L4_ploadruhf_abs
: return L2_ploadruhf_io
;
920 case L4_ploadrht_abs
: return L2_ploadrht_io
;
921 case L4_ploadrhf_abs
: return L2_ploadrhf_io
;
922 case L4_ploadrit_abs
: return L2_ploadrit_io
;
923 case L4_ploadrif_abs
: return L2_ploadrif_io
;
924 case L4_ploadrdt_abs
: return L2_ploadrdt_io
;
925 case L4_ploadrdf_abs
: return L2_ploadrdf_io
;
928 case S4_storerb_ur
: return S2_storerb_io
;
931 case S4_storerh_ur
: return S2_storerh_io
;
934 case S4_storerf_ur
: return S2_storerf_io
;
937 case S4_storeri_ur
: return S2_storeri_io
;
940 case S4_storerd_ur
: return S2_storerd_io
;
941 case S4_pstorerbt_abs
: return S2_pstorerbt_io
;
942 case S4_pstorerbf_abs
: return S2_pstorerbf_io
;
943 case S4_pstorerht_abs
: return S2_pstorerht_io
;
944 case S4_pstorerhf_abs
: return S2_pstorerhf_io
;
945 case S4_pstorerft_abs
: return S2_pstorerft_io
;
946 case S4_pstorerff_abs
: return S2_pstorerff_io
;
947 case S4_pstorerit_abs
: return S2_pstorerit_io
;
948 case S4_pstorerif_abs
: return S2_pstorerif_io
;
949 case S4_pstorerdt_abs
: return S2_pstorerdt_io
;
950 case S4_pstorerdf_abs
: return S2_pstorerdf_io
;
955 case HexagonII::BaseImmOffset
:
956 if (!isStoreImmediate(ExtOpc
))
966 unsigned HCE::getDirectRegReplacement(unsigned ExtOpc
) const {
968 case Hexagon::A2_addi
: return Hexagon::A2_add
;
969 case Hexagon::A2_andir
: return Hexagon::A2_and
;
970 case Hexagon::A2_combineii
: return Hexagon::A4_combineri
;
971 case Hexagon::A2_orir
: return Hexagon::A2_or
;
972 case Hexagon::A2_paddif
: return Hexagon::A2_paddf
;
973 case Hexagon::A2_paddit
: return Hexagon::A2_paddt
;
974 case Hexagon::A2_subri
: return Hexagon::A2_sub
;
975 case Hexagon::A2_tfrsi
: return TargetOpcode::COPY
;
976 case Hexagon::A4_cmpbeqi
: return Hexagon::A4_cmpbeq
;
977 case Hexagon::A4_cmpbgti
: return Hexagon::A4_cmpbgt
;
978 case Hexagon::A4_cmpbgtui
: return Hexagon::A4_cmpbgtu
;
979 case Hexagon::A4_cmpheqi
: return Hexagon::A4_cmpheq
;
980 case Hexagon::A4_cmphgti
: return Hexagon::A4_cmphgt
;
981 case Hexagon::A4_cmphgtui
: return Hexagon::A4_cmphgtu
;
982 case Hexagon::A4_combineii
: return Hexagon::A4_combineir
;
983 case Hexagon::A4_combineir
: return TargetOpcode::REG_SEQUENCE
;
984 case Hexagon::A4_combineri
: return TargetOpcode::REG_SEQUENCE
;
985 case Hexagon::A4_rcmpeqi
: return Hexagon::A4_rcmpeq
;
986 case Hexagon::A4_rcmpneqi
: return Hexagon::A4_rcmpneq
;
987 case Hexagon::C2_cmoveif
: return Hexagon::A2_tfrpf
;
988 case Hexagon::C2_cmoveit
: return Hexagon::A2_tfrpt
;
989 case Hexagon::C2_cmpeqi
: return Hexagon::C2_cmpeq
;
990 case Hexagon::C2_cmpgti
: return Hexagon::C2_cmpgt
;
991 case Hexagon::C2_cmpgtui
: return Hexagon::C2_cmpgtu
;
992 case Hexagon::C2_muxii
: return Hexagon::C2_muxir
;
993 case Hexagon::C2_muxir
: return Hexagon::C2_mux
;
994 case Hexagon::C2_muxri
: return Hexagon::C2_mux
;
995 case Hexagon::C4_cmpltei
: return Hexagon::C4_cmplte
;
996 case Hexagon::C4_cmplteui
: return Hexagon::C4_cmplteu
;
997 case Hexagon::C4_cmpneqi
: return Hexagon::C4_cmpneq
;
998 case Hexagon::M2_accii
: return Hexagon::M2_acci
; // T -> T
1000 case Hexagon::M2_macsip
: return Hexagon::M2_maci
; // T -> T
1001 case Hexagon::M2_mpysin
: return Hexagon::M2_mpyi
;
1002 case Hexagon::M2_mpysip
: return Hexagon::M2_mpyi
;
1003 case Hexagon::M2_mpysmi
: return Hexagon::M2_mpyi
;
1004 case Hexagon::M2_naccii
: return Hexagon::M2_nacci
; // T -> T
1005 case Hexagon::M4_mpyri_addi
: return Hexagon::M4_mpyri_addr
;
1006 case Hexagon::M4_mpyri_addr
: return Hexagon::M4_mpyrr_addr
; // _ -> T
1007 case Hexagon::M4_mpyrr_addi
: return Hexagon::M4_mpyrr_addr
; // _ -> T
1008 case Hexagon::S4_addaddi
: return Hexagon::M2_acci
; // _ -> T
1009 case Hexagon::S4_addi_asl_ri
: return Hexagon::S2_asl_i_r_acc
; // T -> T
1010 case Hexagon::S4_addi_lsr_ri
: return Hexagon::S2_lsr_i_r_acc
; // T -> T
1011 case Hexagon::S4_andi_asl_ri
: return Hexagon::S2_asl_i_r_and
; // T -> T
1012 case Hexagon::S4_andi_lsr_ri
: return Hexagon::S2_lsr_i_r_and
; // T -> T
1013 case Hexagon::S4_ori_asl_ri
: return Hexagon::S2_asl_i_r_or
; // T -> T
1014 case Hexagon::S4_ori_lsr_ri
: return Hexagon::S2_lsr_i_r_or
; // T -> T
1015 case Hexagon::S4_subaddi
: return Hexagon::M2_subacc
; // _ -> T
1016 case Hexagon::S4_subi_asl_ri
: return Hexagon::S2_asl_i_r_nac
; // T -> T
1017 case Hexagon::S4_subi_lsr_ri
: return Hexagon::S2_lsr_i_r_nac
; // T -> T
1019 // Store-immediates:
1020 case Hexagon::S4_storeirbf_io
: return Hexagon::S2_pstorerbf_io
;
1021 case Hexagon::S4_storeirb_io
: return Hexagon::S2_storerb_io
;
1022 case Hexagon::S4_storeirbt_io
: return Hexagon::S2_pstorerbt_io
;
1023 case Hexagon::S4_storeirhf_io
: return Hexagon::S2_pstorerhf_io
;
1024 case Hexagon::S4_storeirh_io
: return Hexagon::S2_storerh_io
;
1025 case Hexagon::S4_storeirht_io
: return Hexagon::S2_pstorerht_io
;
1026 case Hexagon::S4_storeirif_io
: return Hexagon::S2_pstorerif_io
;
1027 case Hexagon::S4_storeiri_io
: return Hexagon::S2_storeri_io
;
1028 case Hexagon::S4_storeirit_io
: return Hexagon::S2_pstorerit_io
;
1036 // Return the allowable deviation from the current value of Rb (i.e. the
1037 // range of values that can be added to the current value) which the
1038 // instruction MI can accommodate.
1039 // The instruction MI is a user of register Rb, which is defined via an
1040 // extender. It may be possible for MI to be tweaked to work for a register
1041 // defined with a slightly different value. For example
1042 // ... = L2_loadrub_io Rb, 1
1043 // can be modifed to be
1044 // ... = L2_loadrub_io Rb', 0
1046 // The range for Rb would be [Min+1, Max+1], where [Min, Max] is a range
1047 // for L2_loadrub with offset 0. That means that Rb could be replaced with
1048 // Rc, where Rc-Rb belongs to [Min+1, Max+1].
1049 OffsetRange
HCE::getOffsetRange(Register Rb
, const MachineInstr
&MI
) const {
1050 unsigned Opc
= MI
.getOpcode();
1051 // Instructions that are constant-extended may be replaced with something
1052 // else that no longer offers the same range as the original.
1053 if (!isRegOffOpcode(Opc
) || HII
->isConstExtended(MI
))
1054 return OffsetRange::zero();
1056 if (Opc
== Hexagon::A2_addi
) {
1057 const MachineOperand
&Op1
= MI
.getOperand(1), &Op2
= MI
.getOperand(2);
1058 if (Rb
!= Register(Op1
) || !Op2
.isImm())
1059 return OffsetRange::zero();
1060 OffsetRange R
= { -(1<<15)+1, (1<<15)-1, 1 };
1061 return R
.shift(Op2
.getImm());
1064 // HII::getBaseAndOffsetPosition returns the increment position as "offset".
1065 if (HII
->isPostIncrement(MI
))
1066 return OffsetRange::zero();
1068 const MCInstrDesc
&D
= HII
->get(Opc
);
1069 assert(D
.mayLoad() || D
.mayStore());
1071 unsigned BaseP
, OffP
;
1072 if (!HII
->getBaseAndOffsetPosition(MI
, BaseP
, OffP
) ||
1073 Rb
!= Register(MI
.getOperand(BaseP
)) ||
1074 !MI
.getOperand(OffP
).isImm())
1075 return OffsetRange::zero();
1077 uint64_t F
= (D
.TSFlags
>> HexagonII::MemAccessSizePos
) &
1078 HexagonII::MemAccesSizeMask
;
1079 uint8_t A
= HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F
));
1080 unsigned L
= Log2_32(A
);
1081 unsigned S
= 10+L
; // sint11_L
1082 int32_t Min
= -alignDown((1<<S
)-1, A
);
1084 // The range will be shifted by Off. To prefer non-negative offsets,
1085 // adjust Max accordingly.
1086 int32_t Off
= MI
.getOperand(OffP
).getImm();
1087 int32_t Max
= Off
>= 0 ? 0 : -Off
;
1089 OffsetRange R
= { Min
, Max
, A
};
1090 return R
.shift(Off
);
1093 // Return the allowable deviation from the current value of the extender ED,
1094 // for which the instruction corresponding to ED can be modified without
1095 // using an extender.
1096 // The instruction uses the extender directly. It will be replaced with
1097 // another instruction, say MJ, where the extender will be replaced with a
1098 // register. MJ can allow some variability with respect to the value of
1099 // that register, as is the case with indexed memory instructions.
1100 OffsetRange
HCE::getOffsetRange(const ExtDesc
&ED
) const {
1101 // The only way that there can be a non-zero range available is if
1102 // the instruction using ED will be converted to an indexed memory
1104 unsigned IdxOpc
= getRegOffOpcode(ED
.UseMI
->getOpcode());
1107 return OffsetRange::zero();
1108 case Hexagon::A2_addi
: // s16
1109 return { -32767, 32767, 1 };
1110 case Hexagon::A2_subri
: // s10
1111 return { -511, 511, 1 };
1114 if (!ED
.UseMI
->mayLoad() && !ED
.UseMI
->mayStore())
1115 return OffsetRange::zero();
1116 const MCInstrDesc
&D
= HII
->get(IdxOpc
);
1117 uint64_t F
= (D
.TSFlags
>> HexagonII::MemAccessSizePos
) &
1118 HexagonII::MemAccesSizeMask
;
1119 uint8_t A
= HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F
));
1120 unsigned L
= Log2_32(A
);
1121 unsigned S
= 10+L
; // sint11_L
1122 int32_t Min
= -alignDown((1<<S
)-1, A
);
1123 int32_t Max
= 0; // Force non-negative offsets.
1124 return { Min
, Max
, A
};
1127 // Get the allowable deviation from the current value of Rd by checking
1129 OffsetRange
HCE::getOffsetRange(Register Rd
) const {
1131 for (const MachineOperand
&Op
: MRI
->use_operands(Rd
.Reg
)) {
1132 // Make sure that the register being used by this operand is identical
1133 // to the register that was defined: using a different subregister
1134 // precludes any non-trivial range.
1135 if (Rd
!= Register(Op
))
1136 return OffsetRange::zero();
1137 Range
.intersect(getOffsetRange(Rd
, *Op
.getParent()));
1142 void HCE::recordExtender(MachineInstr
&MI
, unsigned OpNum
) {
1143 unsigned Opc
= MI
.getOpcode();
1147 bool IsLoad
= MI
.mayLoad();
1148 bool IsStore
= MI
.mayStore();
1150 // Fixed stack slots have negative indexes, and they cannot be used
1151 // with TRI::stackSlot2Index and TRI::index2StackSlot. This is somewhat
1152 // unfortunate, but should not be a frequent thing.
1153 for (MachineOperand
&Op
: MI
.operands())
1154 if (Op
.isFI() && Op
.getIndex() < 0)
1157 if (IsLoad
|| IsStore
) {
1158 unsigned AM
= HII
->getAddrMode(MI
);
1160 // (Re: ##Off + Rb<<S) = Rd: ##Val
1161 case HexagonII::Absolute
: // (__: ## + __<<_)
1163 case HexagonII::AbsoluteSet
: // (Rd: ## + __<<_)
1164 ED
.Rd
= MI
.getOperand(OpNum
-1);
1167 case HexagonII::BaseImmOffset
: // (__: ## + Rs<<0)
1168 // Store-immediates are treated as non-memory operations, since
1169 // it's the value being stored that is extended (as opposed to
1170 // a part of the address).
1171 if (!isStoreImmediate(Opc
))
1172 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1174 case HexagonII::BaseLongOffset
: // (__: ## + Rs<<S)
1175 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-2);
1176 ED
.Expr
.S
= MI
.getOperand(OpNum
-1).getImm();
1179 llvm_unreachable("Unhandled memory instruction");
1183 case Hexagon::A2_tfrsi
: // (Rd: ## + __<<_)
1184 ED
.Rd
= MI
.getOperand(0);
1187 case Hexagon::A2_combineii
: // (Rd: ## + __<<_)
1188 case Hexagon::A4_combineir
:
1189 ED
.Rd
= { MI
.getOperand(0).getReg(), Hexagon::isub_hi
};
1192 case Hexagon::A4_combineri
: // (Rd: ## + __<<_)
1193 ED
.Rd
= { MI
.getOperand(0).getReg(), Hexagon::isub_lo
};
1196 case Hexagon::A2_addi
: // (Rd: ## + Rs<<0)
1197 ED
.Rd
= MI
.getOperand(0);
1198 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1200 case Hexagon::M2_accii
: // (__: ## + Rs<<0)
1201 case Hexagon::M2_naccii
:
1202 case Hexagon::S4_addaddi
:
1203 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1205 case Hexagon::A2_subri
: // (Rd: ## - Rs<<0)
1206 ED
.Rd
= MI
.getOperand(0);
1207 ED
.Expr
.Rs
= MI
.getOperand(OpNum
+1);
1210 case Hexagon::S4_subaddi
: // (__: ## - Rs<<0)
1211 ED
.Expr
.Rs
= MI
.getOperand(OpNum
+1);
1214 default: // (__: ## + __<<_)
1221 // Ignore unnamed globals.
1222 ExtRoot
ER(ED
.getOp());
1223 if (ER
.Kind
== MachineOperand::MO_GlobalAddress
)
1224 if (ER
.V
.GV
->getName().empty())
1226 // Ignore block address that points to block in another function
1227 if (ER
.Kind
== MachineOperand::MO_BlockAddress
)
1228 if (ER
.V
.BA
->getFunction() != &(MI
.getMF()->getFunction()))
1230 Extenders
.push_back(ED
);
1233 void HCE::collectInstr(MachineInstr
&MI
) {
1234 if (!HII
->isConstExtended(MI
))
1237 // Skip some non-convertible instructions.
1238 unsigned Opc
= MI
.getOpcode();
1240 case Hexagon::M2_macsin
: // There is no Rx -= mpyi(Rs,Rt).
1241 case Hexagon::C4_addipc
:
1242 case Hexagon::S4_or_andi
:
1243 case Hexagon::S4_or_andix
:
1244 case Hexagon::S4_or_ori
:
1247 recordExtender(MI
, HII
->getCExtOpNum(MI
));
1250 void HCE::collect(MachineFunction
&MF
) {
1252 for (MachineBasicBlock
&MBB
: MF
) {
1253 // Skip unreachable blocks.
1254 if (MBB
.getNumber() == -1)
1256 for (MachineInstr
&MI
: MBB
)
1261 void HCE::assignInits(const ExtRoot
&ER
, unsigned Begin
, unsigned End
,
1262 AssignmentMap
&IMap
) {
1263 // Basic correctness: make sure that all extenders in the range [Begin..End)
1264 // share the same root ER.
1265 for (unsigned I
= Begin
; I
!= End
; ++I
)
1266 assert(ER
== ExtRoot(Extenders
[I
].getOp()));
1268 // Construct the list of ranges, such that for each P in Ranges[I],
1269 // a register Reg = ER+P can be used in place of Extender[I]. If the
1270 // instruction allows, uses in the form of Reg+Off are considered
1271 // (here, Off = required_value - P).
1272 std::vector
<OffsetRange
> Ranges(End
-Begin
);
1274 // For each extender that is a def, visit all uses of the defined register,
1275 // and produce an offset range that works for all uses. The def doesn't
1276 // have to be checked, because it can become dead if all uses can be updated
1277 // to use a different reg/offset.
1278 for (unsigned I
= Begin
; I
!= End
; ++I
) {
1279 const ExtDesc
&ED
= Extenders
[I
];
1283 LLVM_DEBUG(dbgs() << " =" << I
<< ". " << EV
<< " " << ED
<< '\n');
1284 assert(ED
.Rd
.Reg
!= 0);
1285 Ranges
[I
-Begin
] = getOffsetRange(ED
.Rd
).shift(EV
.Offset
);
1286 // A2_tfrsi is a special case: it will be replaced with A2_addi, which
1287 // has a 16-bit signed offset. This means that A2_tfrsi not only has a
1288 // range coming from its uses, but also from the fact that its replacement
1289 // has a range as well.
1290 if (ED
.UseMI
->getOpcode() == Hexagon::A2_tfrsi
) {
1291 int32_t D
= alignDown(32767, Ranges
[I
-Begin
].Align
); // XXX hardcoded
1292 Ranges
[I
-Begin
].extendBy(-D
).extendBy(D
);
1296 // Visit all non-def extenders. For each one, determine the offset range
1297 // available for it.
1298 for (unsigned I
= Begin
; I
!= End
; ++I
) {
1299 const ExtDesc
&ED
= Extenders
[I
];
1303 LLVM_DEBUG(dbgs() << " " << I
<< ". " << EV
<< " " << ED
<< '\n');
1304 OffsetRange Dev
= getOffsetRange(ED
);
1305 Ranges
[I
-Begin
].intersect(Dev
.shift(EV
.Offset
));
1308 // Here for each I there is a corresponding Range[I]. Construct the
1309 // inverse map, that to each range will assign the set of indexes in
1310 // [Begin..End) that this range corresponds to.
1311 std::map
<OffsetRange
, IndexList
> RangeMap
;
1312 for (unsigned I
= Begin
; I
!= End
; ++I
)
1313 RangeMap
[Ranges
[I
-Begin
]].insert(I
);
1316 dbgs() << "Ranges\n";
1317 for (unsigned I
= Begin
; I
!= End
; ++I
)
1318 dbgs() << " " << I
<< ". " << Ranges
[I
-Begin
] << '\n';
1319 dbgs() << "RangeMap\n";
1320 for (auto &P
: RangeMap
) {
1321 dbgs() << " " << P
.first
<< " ->";
1322 for (unsigned I
: P
.second
)
1328 // Select the definition points, and generate the assignment between
1329 // these points and the uses.
1331 // For each candidate offset, keep a pair CandData consisting of
1332 // the total number of ranges containing that candidate, and the
1333 // vector of corresponding RangeTree nodes.
1334 using CandData
= std::pair
<unsigned, SmallVector
<RangeTree::Node
*,8>>;
1335 std::map
<int32_t, CandData
> CandMap
;
1338 for (const OffsetRange
&R
: Ranges
)
1340 SmallVector
<RangeTree::Node
*,8> Nodes
;
1343 auto MaxAlign
= [](const SmallVectorImpl
<RangeTree::Node
*> &Nodes
,
1344 uint8_t Align
, uint8_t Offset
) {
1345 for (RangeTree::Node
*N
: Nodes
) {
1346 if (N
->Range
.Align
<= Align
|| N
->Range
.Offset
< Offset
)
1348 if ((N
->Range
.Offset
- Offset
) % Align
!= 0)
1350 Align
= N
->Range
.Align
;
1351 Offset
= N
->Range
.Offset
;
1353 return std::make_pair(Align
, Offset
);
1356 // Construct the set of all potential definition points from the endpoints
1357 // of the ranges. If a given endpoint also belongs to a different range,
1358 // but with a higher alignment, also consider the more-highly-aligned
1359 // value of this endpoint.
1360 std::set
<int32_t> CandSet
;
1361 for (RangeTree::Node
*N
: Nodes
) {
1362 const OffsetRange
&R
= N
->Range
;
1363 auto P0
= MaxAlign(Tree
.nodesWith(R
.Min
, false), R
.Align
, R
.Offset
);
1364 CandSet
.insert(R
.Min
);
1365 if (R
.Align
< P0
.first
)
1366 CandSet
.insert(adjustUp(R
.Min
, P0
.first
, P0
.second
));
1367 auto P1
= MaxAlign(Tree
.nodesWith(R
.Max
, false), R
.Align
, R
.Offset
);
1368 CandSet
.insert(R
.Max
);
1369 if (R
.Align
< P1
.first
)
1370 CandSet
.insert(adjustDown(R
.Max
, P1
.first
, P1
.second
));
1373 // Build the assignment map: candidate C -> { list of extender indexes }.
1374 // This has to be done iteratively:
1375 // - pick the candidate that covers the maximum number of extenders,
1376 // - add the candidate to the map,
1377 // - remove the extenders from the pool.
1379 using CMap
= std::map
<int32_t,unsigned>;
1381 for (auto It
= CandSet
.begin(), Et
= CandSet
.end(); It
!= Et
; ) {
1382 auto &&V
= Tree
.nodesWith(*It
);
1383 unsigned N
= std::accumulate(V
.begin(), V
.end(), 0u,
1384 [](unsigned Acc
, const RangeTree::Node
*N
) {
1385 return Acc
+ N
->Count
;
1388 Counts
.insert({*It
, N
});
1389 It
= (N
!= 0) ? std::next(It
) : CandSet
.erase(It
);
1394 // Find the best candidate with respect to the number of extenders covered.
1395 auto BestIt
= llvm::max_element(
1396 Counts
, [](const CMap::value_type
&A
, const CMap::value_type
&B
) {
1397 return A
.second
< B
.second
|| (A
.second
== B
.second
&& A
< B
);
1399 int32_t Best
= BestIt
->first
;
1400 ExtValue
BestV(ER
, Best
);
1401 for (RangeTree::Node
*N
: Tree
.nodesWith(Best
)) {
1402 for (unsigned I
: RangeMap
[N
->Range
])
1403 IMap
[{BestV
,Extenders
[I
].Expr
}].insert(I
);
1408 LLVM_DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap
, *HRI
));
1410 // There is some ambiguity in what initializer should be used, if the
1411 // descriptor's subexpression is non-trivial: it can be the entire
1412 // subexpression (which is what has been done so far), or it can be
1413 // the extender's value itself, if all corresponding extenders have the
1414 // exact value of the initializer (i.e. require offset of 0).
1416 // To reduce the number of initializers, merge such special cases.
1417 for (std::pair
<const ExtenderInit
,IndexList
> &P
: IMap
) {
1418 // Skip trivial initializers.
1419 if (P
.first
.second
.trivial())
1421 // If the corresponding trivial initializer does not exist, skip this
1423 const ExtValue
&EV
= P
.first
.first
;
1424 AssignmentMap::iterator F
= IMap
.find({EV
, ExtExpr()});
1425 if (F
== IMap
.end())
1428 // Finally, check if all extenders have the same value as the initializer.
1429 // Make sure that extenders that are a part of a stack address are not
1430 // merged with those that aren't. Stack addresses need an offset field
1431 // (to be used by frame index elimination), while non-stack expressions
1432 // can be replaced with forms (such as rr) that do not have such a field.
1435 // Collected 3 extenders
1436 // =2. imm:0 off:32968 bb#2: %7 = ## + __ << 0, def
1437 // 0. imm:0 off:267 bb#0: __ = ## + SS#1 << 0
1438 // 1. imm:0 off:267 bb#1: __ = ## + SS#1 << 0
1440 // 0. [-756,267]a1+0
1441 // 1. [-756,267]a1+0
1442 // 2. [201,65735]a1+0
1444 // [-756,267]a1+0 -> 0 1
1445 // [201,65735]a1+0 -> 2
1446 // IMap (before fixup) = {
1447 // [imm:0 off:267, ## + __ << 0] -> { 2 }
1448 // [imm:0 off:267, ## + SS#1 << 0] -> { 0 1 }
1450 // IMap (after fixup) = {
1451 // [imm:0 off:267, ## + __ << 0] -> { 2 0 1 }
1452 // [imm:0 off:267, ## + SS#1 << 0] -> { }
1454 // Inserted def in bb#0 for initializer: [imm:0 off:267, ## + __ << 0]
1455 // %12:intregs = A2_tfrsi 267
1458 // %12:intregs = A2_tfrsi 267
1459 // S4_pstorerbt_rr %3, %12, %stack.1, 0, killed %4
1462 // if (p0.new) memb(r0+r29<<#4) = r2
1464 bool IsStack
= any_of(F
->second
, [this](unsigned I
) {
1465 return Extenders
[I
].Expr
.Rs
.isSlot();
1467 auto SameValue
= [&EV
,this,IsStack
](unsigned I
) {
1468 const ExtDesc
&ED
= Extenders
[I
];
1469 return ED
.Expr
.Rs
.isSlot() == IsStack
&&
1470 ExtValue(ED
).Offset
== EV
.Offset
;
1472 if (all_of(P
.second
, SameValue
)) {
1473 F
->second
.insert(P
.second
.begin(), P
.second
.end());
1478 LLVM_DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap
, *HRI
));
1481 void HCE::calculatePlacement(const ExtenderInit
&ExtI
, const IndexList
&Refs
,
1486 // The placement calculation is somewhat simple right now: it finds a
1487 // single location for the def that dominates all refs. Since this may
1488 // place the def far from the uses, producing several locations for
1489 // defs that collectively dominate all refs could be better.
1490 // For now only do the single one.
1491 DenseSet
<MachineBasicBlock
*> Blocks
;
1492 DenseSet
<MachineInstr
*> RefMIs
;
1493 const ExtDesc
&ED0
= Extenders
[Refs
[0]];
1494 MachineBasicBlock
*DomB
= ED0
.UseMI
->getParent();
1495 RefMIs
.insert(ED0
.UseMI
);
1496 Blocks
.insert(DomB
);
1497 for (unsigned i
= 1, e
= Refs
.size(); i
!= e
; ++i
) {
1498 const ExtDesc
&ED
= Extenders
[Refs
[i
]];
1499 MachineBasicBlock
*MBB
= ED
.UseMI
->getParent();
1500 RefMIs
.insert(ED
.UseMI
);
1501 DomB
= MDT
->findNearestCommonDominator(DomB
, MBB
);
1506 // The block DomB should be dominated by the def of each register used
1507 // in the initializer.
1508 Register Rs
= ExtI
.second
.Rs
; // Only one reg allowed now.
1509 const MachineInstr
*DefI
= Rs
.isVReg() ? MRI
->getVRegDef(Rs
.Reg
) : nullptr;
1511 // This should be guaranteed given that the entire expression is used
1512 // at each instruction in Refs. Add an assertion just in case.
1513 assert(!DefI
|| MDT
->dominates(DefI
->getParent(), DomB
));
1516 MachineBasicBlock::iterator It
;
1517 if (Blocks
.count(DomB
)) {
1518 // Try to find the latest possible location for the def.
1519 MachineBasicBlock::iterator End
= DomB
->end();
1520 for (It
= DomB
->begin(); It
!= End
; ++It
)
1521 if (RefMIs
.count(&*It
))
1523 assert(It
!= End
&& "Should have found a ref in DomB");
1525 // DomB does not contain any refs.
1526 It
= DomB
->getFirstTerminator();
1528 Loc
DefLoc(DomB
, It
);
1529 Defs
.emplace_back(DefLoc
, Refs
);
1532 HCE::Register
HCE::insertInitializer(Loc DefL
, const ExtenderInit
&ExtI
) {
1533 llvm::Register DefR
= MRI
->createVirtualRegister(&Hexagon::IntRegsRegClass
);
1534 MachineBasicBlock
&MBB
= *DefL
.Block
;
1535 MachineBasicBlock::iterator At
= DefL
.At
;
1536 DebugLoc dl
= DefL
.Block
->findDebugLoc(DefL
.At
);
1537 const ExtValue
&EV
= ExtI
.first
;
1538 MachineOperand
ExtOp(EV
);
1540 const ExtExpr
&Ex
= ExtI
.second
;
1541 const MachineInstr
*InitI
= nullptr;
1543 if (Ex
.Rs
.isSlot()) {
1544 assert(Ex
.S
== 0 && "Cannot have a shift of a stack slot");
1545 assert(!Ex
.Neg
&& "Cannot subtract a stack slot");
1546 // DefR = PS_fi Rb,##EV
1547 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::PS_fi
), DefR
)
1548 .add(MachineOperand(Ex
.Rs
))
1551 assert((Ex
.Rs
.Reg
== 0 || Ex
.Rs
.isVReg()) && "Expecting virtual register");
1554 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_tfrsi
), DefR
)
1556 } else if (Ex
.S
== 0) {
1558 // DefR = sub(##EV,Rb)
1559 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_subri
), DefR
)
1561 .add(MachineOperand(Ex
.Rs
));
1563 // DefR = add(Rb,##EV)
1564 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_addi
), DefR
)
1565 .add(MachineOperand(Ex
.Rs
))
1569 if (HST
->useCompound()) {
1570 unsigned NewOpc
= Ex
.Neg
? Hexagon::S4_subi_asl_ri
1571 : Hexagon::S4_addi_asl_ri
;
1572 // DefR = add(##EV,asl(Rb,S))
1573 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
), DefR
)
1575 .add(MachineOperand(Ex
.Rs
))
1578 // No compounds are available. It is not clear whether we should
1579 // even process such extenders where the initializer cannot be
1580 // a single instruction, but do it for now.
1581 llvm::Register TmpR
= MRI
->createVirtualRegister(&Hexagon::IntRegsRegClass
);
1582 BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::S2_asl_i_r
), TmpR
)
1583 .add(MachineOperand(Ex
.Rs
))
1586 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_subri
), DefR
)
1588 .add(MachineOperand(Register(TmpR
, 0)));
1590 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_addi
), DefR
)
1591 .add(MachineOperand(Register(TmpR
, 0)))
1599 LLVM_DEBUG(dbgs() << "Inserted def in bb#" << MBB
.getNumber()
1600 << " for initializer: " << PrintInit(ExtI
, *HRI
) << "\n "
1605 // Replace the extender at index Idx with the register ExtR.
1606 bool HCE::replaceInstrExact(const ExtDesc
&ED
, Register ExtR
) {
1607 MachineInstr
&MI
= *ED
.UseMI
;
1608 MachineBasicBlock
&MBB
= *MI
.getParent();
1609 MachineBasicBlock::iterator At
= MI
.getIterator();
1610 DebugLoc dl
= MI
.getDebugLoc();
1611 unsigned ExtOpc
= MI
.getOpcode();
1613 // With a few exceptions, direct replacement amounts to creating an
1614 // instruction with a corresponding register opcode, with all operands
1615 // the same, except for the register used in place of the extender.
1616 unsigned RegOpc
= getDirectRegReplacement(ExtOpc
);
1618 if (RegOpc
== TargetOpcode::REG_SEQUENCE
) {
1619 if (ExtOpc
== Hexagon::A4_combineri
)
1620 BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
))
1621 .add(MI
.getOperand(0))
1622 .add(MI
.getOperand(1))
1623 .addImm(Hexagon::isub_hi
)
1624 .add(MachineOperand(ExtR
))
1625 .addImm(Hexagon::isub_lo
);
1626 else if (ExtOpc
== Hexagon::A4_combineir
)
1627 BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
))
1628 .add(MI
.getOperand(0))
1629 .add(MachineOperand(ExtR
))
1630 .addImm(Hexagon::isub_hi
)
1631 .add(MI
.getOperand(2))
1632 .addImm(Hexagon::isub_lo
);
1634 llvm_unreachable("Unexpected opcode became REG_SEQUENCE");
1638 if (ExtOpc
== Hexagon::C2_cmpgei
|| ExtOpc
== Hexagon::C2_cmpgeui
) {
1639 unsigned NewOpc
= ExtOpc
== Hexagon::C2_cmpgei
? Hexagon::C2_cmplt
1640 : Hexagon::C2_cmpltu
;
1641 BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
))
1642 .add(MI
.getOperand(0))
1643 .add(MachineOperand(ExtR
))
1644 .add(MI
.getOperand(1));
1650 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
));
1651 unsigned RegN
= ED
.OpNum
;
1652 // Copy all operands except the one that has the extender.
1653 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
1655 MIB
.add(MI
.getOperand(i
));
1657 MIB
.add(MachineOperand(ExtR
));
1659 MIB
.cloneMemRefs(MI
);
1664 if (MI
.mayLoadOrStore() && !isStoreImmediate(ExtOpc
)) {
1665 // For memory instructions, there is an asymmetry in the addressing
1666 // modes. Addressing modes allowing extenders can be replaced with
1667 // addressing modes that use registers, but the order of operands
1668 // (or even their number) may be different.
1670 // BaseImmOffset (io) -> BaseRegOffset (rr)
1671 // BaseLongOffset (ur) -> BaseRegOffset (rr)
1672 unsigned RegOpc
, Shift
;
1673 unsigned AM
= HII
->getAddrMode(MI
);
1674 if (AM
== HexagonII::BaseImmOffset
) {
1675 RegOpc
= HII
->changeAddrMode_io_rr(ExtOpc
);
1677 } else if (AM
== HexagonII::BaseLongOffset
) {
1678 // Loads: Rd = L4_loadri_ur Rs, S, ##
1679 // Stores: S4_storeri_ur Rs, S, ##, Rt
1680 RegOpc
= HII
->changeAddrMode_ur_rr(ExtOpc
);
1681 Shift
= MI
.getOperand(MI
.mayLoad() ? 2 : 1).getImm();
1683 llvm_unreachable("Unexpected addressing mode");
1686 if (RegOpc
== -1u) {
1687 dbgs() << "\nExtOpc: " << HII
->getName(ExtOpc
) << " has no rr version\n";
1688 llvm_unreachable("No corresponding rr instruction");
1692 unsigned BaseP
, OffP
;
1693 HII
->getBaseAndOffsetPosition(MI
, BaseP
, OffP
);
1695 // Build an rr instruction: (RegOff + RegBase<<0)
1696 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
));
1697 // First, add the def for loads.
1699 MIB
.add(getLoadResultOp(MI
));
1700 // Handle possible predication.
1701 if (HII
->isPredicated(MI
))
1702 MIB
.add(getPredicateOp(MI
));
1703 // Build the address.
1704 MIB
.add(MachineOperand(ExtR
)); // RegOff
1705 MIB
.add(MI
.getOperand(BaseP
)); // RegBase
1706 MIB
.addImm(Shift
); // << Shift
1707 // Add the stored value for stores.
1709 MIB
.add(getStoredValueOp(MI
));
1710 MIB
.cloneMemRefs(MI
);
1716 dbgs() << '\n' << MI
;
1718 llvm_unreachable("Unhandled exact replacement");
1722 // Replace the extender ED with a form corresponding to the initializer ExtI.
1723 bool HCE::replaceInstrExpr(const ExtDesc
&ED
, const ExtenderInit
&ExtI
,
1724 Register ExtR
, int32_t &Diff
) {
1725 MachineInstr
&MI
= *ED
.UseMI
;
1726 MachineBasicBlock
&MBB
= *MI
.getParent();
1727 MachineBasicBlock::iterator At
= MI
.getIterator();
1728 DebugLoc dl
= MI
.getDebugLoc();
1729 unsigned ExtOpc
= MI
.getOpcode();
1731 if (ExtOpc
== Hexagon::A2_tfrsi
) {
1732 // A2_tfrsi is a special case: it's replaced with A2_addi, which introduces
1733 // another range. One range is the one that's common to all tfrsi's uses,
1734 // this one is the range of immediates in A2_addi. When calculating ranges,
1735 // the addi's 16-bit argument was included, so now we need to make it such
1736 // that the produced value is in the range for the uses alone.
1737 // Most of the time, simply adding Diff will make the addi produce exact
1738 // result, but if Diff is outside of the 16-bit range, some adjustment
1740 unsigned IdxOpc
= getRegOffOpcode(ExtOpc
);
1741 assert(IdxOpc
== Hexagon::A2_addi
);
1743 // Clamp Diff to the 16 bit range.
1744 int32_t D
= isInt
<16>(Diff
) ? Diff
: (Diff
> 0 ? 32767 : -32768);
1746 // Split Diff into two values: one that is close to min/max int16,
1747 // and the other being the rest, and such that both have the same
1748 // "alignment" as Diff.
1750 OffsetRange R
= getOffsetRange(MI
.getOperand(0));
1751 uint32_t A
= std::min
<uint32_t>(R
.Align
, 1u << llvm::countr_zero(UD
));
1754 BuildMI(MBB
, At
, dl
, HII
->get(IdxOpc
))
1755 .add(MI
.getOperand(0))
1756 .add(MachineOperand(ExtR
))
1760 // Make sure the output is within allowable range for uses.
1761 // "Diff" is a difference in the "opposite direction", i.e. Ext - DefV,
1762 // not DefV - Ext, as the getOffsetRange would calculate.
1763 OffsetRange Uses
= getOffsetRange(MI
.getOperand(0));
1764 if (!Uses
.contains(-Diff
))
1765 dbgs() << "Diff: " << -Diff
<< " out of range " << Uses
1767 assert(Uses
.contains(-Diff
));
1773 const ExtValue
&EV
= ExtI
.first
; (void)EV
;
1774 const ExtExpr
&Ex
= ExtI
.second
; (void)Ex
;
1776 if (ExtOpc
== Hexagon::A2_addi
|| ExtOpc
== Hexagon::A2_subri
) {
1777 // If addi/subri are replaced with the exactly matching initializer,
1778 // they amount to COPY.
1779 // Check that the initializer is an exact match (for simplicity).
1781 bool IsAddi
= ExtOpc
== Hexagon::A2_addi
;
1782 const MachineOperand
&RegOp
= MI
.getOperand(IsAddi
? 1 : 2);
1783 const MachineOperand
&ImmOp
= MI
.getOperand(IsAddi
? 2 : 1);
1784 assert(Ex
.Rs
== RegOp
&& EV
== ImmOp
&& Ex
.Neg
!= IsAddi
&&
1785 "Initializer mismatch");
1787 BuildMI(MBB
, At
, dl
, HII
->get(TargetOpcode::COPY
))
1788 .add(MI
.getOperand(0))
1789 .add(MachineOperand(ExtR
));
1794 if (ExtOpc
== Hexagon::M2_accii
|| ExtOpc
== Hexagon::M2_naccii
||
1795 ExtOpc
== Hexagon::S4_addaddi
|| ExtOpc
== Hexagon::S4_subaddi
) {
1796 // M2_accii: add(Rt,add(Rs,V)) (tied)
1797 // M2_naccii: sub(Rt,add(Rs,V))
1798 // S4_addaddi: add(Rt,add(Rs,V))
1799 // S4_subaddi: add(Rt,sub(V,Rs))
1800 // Check that Rs and V match the initializer expression. The Rs+V is the
1801 // combination that is considered "subexpression" for V, although Rx+V
1802 // would also be valid.
1804 bool IsSub
= ExtOpc
== Hexagon::S4_subaddi
;
1805 Register Rs
= MI
.getOperand(IsSub
? 3 : 2);
1806 ExtValue V
= MI
.getOperand(IsSub
? 2 : 3);
1807 assert(EV
== V
&& Rs
== Ex
.Rs
&& IsSub
== Ex
.Neg
&& "Initializer mismatch");
1809 unsigned NewOpc
= ExtOpc
== Hexagon::M2_naccii
? Hexagon::A2_sub
1811 BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
))
1812 .add(MI
.getOperand(0))
1813 .add(MI
.getOperand(1))
1814 .add(MachineOperand(ExtR
));
1819 if (MI
.mayLoadOrStore()) {
1820 unsigned IdxOpc
= getRegOffOpcode(ExtOpc
);
1821 assert(IdxOpc
&& "Expecting indexed opcode");
1822 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(IdxOpc
));
1823 // Construct the new indexed instruction.
1824 // First, add the def for loads.
1826 MIB
.add(getLoadResultOp(MI
));
1827 // Handle possible predication.
1828 if (HII
->isPredicated(MI
))
1829 MIB
.add(getPredicateOp(MI
));
1830 // Build the address.
1831 MIB
.add(MachineOperand(ExtR
));
1833 // Add the stored value for stores.
1835 MIB
.add(getStoredValueOp(MI
));
1836 MIB
.cloneMemRefs(MI
);
1842 dbgs() << '\n' << PrintInit(ExtI
, *HRI
) << " " << MI
;
1844 llvm_unreachable("Unhandled expr replacement");
1848 bool HCE::replaceInstr(unsigned Idx
, Register ExtR
, const ExtenderInit
&ExtI
) {
1849 if (ReplaceLimit
.getNumOccurrences()) {
1850 if (ReplaceLimit
<= ReplaceCounter
)
1854 const ExtDesc
&ED
= Extenders
[Idx
];
1855 assert((!ED
.IsDef
|| ED
.Rd
.Reg
!= 0) && "Missing Rd for def");
1856 const ExtValue
&DefV
= ExtI
.first
;
1857 assert(ExtRoot(ExtValue(ED
)) == ExtRoot(DefV
) && "Extender root mismatch");
1858 const ExtExpr
&DefEx
= ExtI
.second
;
1861 int32_t Diff
= EV
.Offset
- DefV
.Offset
;
1862 const MachineInstr
&MI
= *ED
.UseMI
;
1863 LLVM_DEBUG(dbgs() << __func__
<< " Idx:" << Idx
<< " ExtR:"
1864 << PrintRegister(ExtR
, *HRI
) << " Diff:" << Diff
<< '\n');
1866 // These two addressing modes must be converted into indexed forms
1867 // regardless of what the initializer looks like.
1868 bool IsAbs
= false, IsAbsSet
= false;
1869 if (MI
.mayLoadOrStore()) {
1870 unsigned AM
= HII
->getAddrMode(MI
);
1871 IsAbs
= AM
== HexagonII::Absolute
;
1872 IsAbsSet
= AM
== HexagonII::AbsoluteSet
;
1875 // If it's a def, remember all operands that need to be updated.
1876 // If ED is a def, and Diff is not 0, then all uses of the register Rd
1877 // defined by ED must be in the form (Rd, imm), i.e. the immediate offset
1878 // must follow the Rd in the operand list.
1879 std::vector
<std::pair
<MachineInstr
*,unsigned>> RegOps
;
1880 if (ED
.IsDef
&& Diff
!= 0) {
1881 for (MachineOperand
&Op
: MRI
->use_operands(ED
.Rd
.Reg
)) {
1882 MachineInstr
&UI
= *Op
.getParent();
1883 RegOps
.push_back({&UI
, getOperandIndex(UI
, Op
)});
1887 // Replace the instruction.
1888 bool Replaced
= false;
1889 if (Diff
== 0 && DefEx
.trivial() && !IsAbs
&& !IsAbsSet
)
1890 Replaced
= replaceInstrExact(ED
, ExtR
);
1892 Replaced
= replaceInstrExpr(ED
, ExtI
, ExtR
, Diff
);
1894 if (Diff
!= 0 && Replaced
&& ED
.IsDef
) {
1895 // Update offsets of the def's uses.
1896 for (std::pair
<MachineInstr
*,unsigned> P
: RegOps
) {
1897 unsigned J
= P
.second
;
1898 assert(P
.first
->getNumOperands() > J
+1 &&
1899 P
.first
->getOperand(J
+1).isImm());
1900 MachineOperand
&ImmOp
= P
.first
->getOperand(J
+1);
1901 ImmOp
.setImm(ImmOp
.getImm() + Diff
);
1903 // If it was an absolute-set instruction, the "set" part has been removed.
1904 // ExtR will now be the register with the extended value, and since all
1905 // users of Rd have been updated, all that needs to be done is to replace
1908 assert(ED
.Rd
.Sub
== 0 && ExtR
.Sub
== 0);
1909 MRI
->replaceRegWith(ED
.Rd
.Reg
, ExtR
.Reg
);
1916 bool HCE::replaceExtenders(const AssignmentMap
&IMap
) {
1918 bool Changed
= false;
1920 for (const std::pair
<const ExtenderInit
, IndexList
> &P
: IMap
) {
1921 const IndexList
&Idxs
= P
.second
;
1922 if (Idxs
.size() < CountThreshold
)
1926 calculatePlacement(P
.first
, Idxs
, Defs
);
1927 for (const std::pair
<Loc
,IndexList
> &Q
: Defs
) {
1928 Register DefR
= insertInitializer(Q
.first
, P
.first
);
1929 NewRegs
.push_back(DefR
.Reg
);
1930 for (unsigned I
: Q
.second
)
1931 Changed
|= replaceInstr(I
, DefR
, P
.first
);
1937 unsigned HCE::getOperandIndex(const MachineInstr
&MI
,
1938 const MachineOperand
&Op
) const {
1939 for (unsigned i
= 0, n
= MI
.getNumOperands(); i
!= n
; ++i
)
1940 if (&MI
.getOperand(i
) == &Op
)
1942 llvm_unreachable("Not an operand of MI");
1945 const MachineOperand
&HCE::getPredicateOp(const MachineInstr
&MI
) const {
1946 assert(HII
->isPredicated(MI
));
1947 for (const MachineOperand
&Op
: MI
.operands()) {
1948 if (!Op
.isReg() || !Op
.isUse() ||
1949 MRI
->getRegClass(Op
.getReg()) != &Hexagon::PredRegsRegClass
)
1951 assert(Op
.getSubReg() == 0 && "Predicate register with a subregister");
1954 llvm_unreachable("Predicate operand not found");
1957 const MachineOperand
&HCE::getLoadResultOp(const MachineInstr
&MI
) const {
1958 assert(MI
.mayLoad());
1959 return MI
.getOperand(0);
1962 const MachineOperand
&HCE::getStoredValueOp(const MachineInstr
&MI
) const {
1963 assert(MI
.mayStore());
1964 return MI
.getOperand(MI
.getNumExplicitOperands()-1);
1967 bool HCE::runOnMachineFunction(MachineFunction
&MF
) {
1968 if (skipFunction(MF
.getFunction()))
1970 if (MF
.getFunction().hasPersonalityFn()) {
1971 LLVM_DEBUG(dbgs() << getPassName() << ": skipping " << MF
.getName()
1972 << " due to exception handling\n");
1975 LLVM_DEBUG(MF
.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
1977 HST
= &MF
.getSubtarget
<HexagonSubtarget
>();
1978 HII
= HST
->getInstrInfo();
1979 HRI
= HST
->getRegisterInfo();
1980 MDT
= &getAnalysis
<MachineDominatorTreeWrapperPass
>().getDomTree();
1981 MRI
= &MF
.getRegInfo();
1985 llvm::sort(Extenders
, [this](const ExtDesc
&A
, const ExtDesc
&B
) {
1986 ExtValue
VA(A
), VB(B
);
1989 const MachineInstr
*MA
= A
.UseMI
;
1990 const MachineInstr
*MB
= B
.UseMI
;
1992 // If it's the same instruction, compare operand numbers.
1993 return A
.OpNum
< B
.OpNum
;
1996 const MachineBasicBlock
*BA
= MA
->getParent();
1997 const MachineBasicBlock
*BB
= MB
->getParent();
1998 assert(BA
->getNumber() != -1 && BB
->getNumber() != -1);
2000 return BA
->getNumber() < BB
->getNumber();
2001 return MDT
->dominates(MA
, MB
);
2004 bool Changed
= false;
2005 LLVM_DEBUG(dbgs() << "Collected " << Extenders
.size() << " extenders\n");
2006 for (unsigned I
= 0, E
= Extenders
.size(); I
!= E
; ) {
2008 const ExtRoot
&T
= Extenders
[B
].getOp();
2009 while (I
!= E
&& ExtRoot(Extenders
[I
].getOp()) == T
)
2013 assignInits(T
, B
, I
, IMap
);
2014 Changed
|= replaceExtenders(IMap
);
2019 MF
.print(dbgs() << "After " << getPassName() << '\n', nullptr);
2021 dbgs() << "No changes\n";
2026 FunctionPass
*llvm::createHexagonConstExtenders() {
2027 return new HexagonConstExtenders();