1 //===- HexagonConstExtenders.cpp ------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "HexagonInstrInfo.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/CodeGen/MachineDominators.h"
14 #include "llvm/CodeGen/MachineFunctionPass.h"
15 #include "llvm/CodeGen/MachineInstrBuilder.h"
16 #include "llvm/CodeGen/MachineRegisterInfo.h"
17 #include "llvm/CodeGen/Register.h"
18 #include "llvm/Pass.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/raw_ostream.h"
26 #define DEBUG_TYPE "hexagon-cext-opt"
30 static cl::opt
<unsigned> CountThreshold("hexagon-cext-threshold",
31 cl::init(3), cl::Hidden
, cl::ZeroOrMore
,
32 cl::desc("Minimum number of extenders to trigger replacement"));
34 static cl::opt
<unsigned> ReplaceLimit("hexagon-cext-limit", cl::init(0),
35 cl::Hidden
, cl::ZeroOrMore
, cl::desc("Maximum number of replacements"));
38 void initializeHexagonConstExtendersPass(PassRegistry
&);
39 FunctionPass
*createHexagonConstExtenders();
42 static int32_t adjustUp(int32_t V
, uint8_t A
, uint8_t O
) {
43 assert(isPowerOf2_32(A
));
44 int32_t U
= (V
& -A
) + O
;
45 return U
>= V
? U
: U
+A
;
48 static int32_t adjustDown(int32_t V
, uint8_t A
, uint8_t O
) {
49 assert(isPowerOf2_32(A
));
50 int32_t U
= (V
& -A
) + O
;
51 return U
<= V
? U
: U
-A
;
56 // The range of values between Min and Max that are of form Align*N+Offset,
57 // for some integer N. Min and Max are required to be of that form as well,
58 // except in the case of an empty range.
59 int32_t Min
= INT_MIN
, Max
= INT_MAX
;
63 OffsetRange() = default;
64 OffsetRange(int32_t L
, int32_t H
, uint8_t A
, uint8_t O
= 0)
65 : Min(L
), Max(H
), Align(A
), Offset(O
) {}
66 OffsetRange
&intersect(OffsetRange A
) {
71 if (Offset
>= A
.Offset
&& (Offset
- A
.Offset
) % A
.Align
== 0) {
72 Min
= adjustUp(std::max(Min
, A
.Min
), Align
, Offset
);
73 Max
= adjustDown(std::min(Max
, A
.Max
), Align
, Offset
);
75 // Make an empty range.
79 // Canonicalize empty ranges.
81 std::tie(Min
, Max
, Align
) = std::make_tuple(0, -1, 1);
84 OffsetRange
&shift(int32_t S
) {
87 Offset
= (Offset
+S
) % Align
;
90 OffsetRange
&extendBy(int32_t D
) {
91 // If D < 0, extend Min, otherwise extend Max.
92 assert(D
% Align
== 0);
94 Min
= (INT_MIN
-D
< Min
) ? Min
+D
: INT_MIN
;
96 Max
= (INT_MAX
-D
> Max
) ? Max
+D
: INT_MAX
;
102 bool contains(int32_t V
) const {
103 return Min
<= V
&& V
<= Max
&& (V
-Offset
) % Align
== 0;
105 bool operator==(const OffsetRange
&R
) const {
106 return Min
== R
.Min
&& Max
== R
.Max
&& Align
== R
.Align
;
108 bool operator!=(const OffsetRange
&R
) const {
109 return !operator==(R
);
111 bool operator<(const OffsetRange
&R
) const {
116 return Align
< R
.Align
;
118 static OffsetRange
zero() { return {0, 0, 1}; }
123 Node(const OffsetRange
&R
) : MaxEnd(R
.Max
), Range(R
) {}
127 const OffsetRange
&Range
;
128 Node
*Left
= nullptr, *Right
= nullptr;
131 Node
*Root
= nullptr;
133 void add(const OffsetRange
&R
) {
136 void erase(const Node
*N
) {
137 Root
= remove(Root
, N
);
140 void order(SmallVectorImpl
<Node
*> &Seq
) const {
143 SmallVector
<Node
*,8> nodesWith(int32_t P
, bool CheckAlign
= true) {
144 SmallVector
<Node
*,8> Nodes
;
145 nodesWith(Root
, P
, CheckAlign
, Nodes
);
150 SmallVector
<Node
*,8> Nodes
;
152 for (Node
*N
: Nodes
)
157 void dump(const Node
*N
) const;
158 void order(Node
*N
, SmallVectorImpl
<Node
*> &Seq
) const;
159 void nodesWith(Node
*N
, int32_t P
, bool CheckA
,
160 SmallVectorImpl
<Node
*> &Seq
) const;
162 Node
*add(Node
*N
, const OffsetRange
&R
);
163 Node
*remove(Node
*N
, const Node
*D
);
164 Node
*rotateLeft(Node
*Lower
, Node
*Higher
);
165 Node
*rotateRight(Node
*Lower
, Node
*Higher
);
166 unsigned height(Node
*N
) {
167 return N
!= nullptr ? N
->Height
: 0;
169 Node
*update(Node
*N
) {
170 assert(N
!= nullptr);
171 N
->Height
= 1 + std::max(height(N
->Left
), height(N
->Right
));
173 N
->MaxEnd
= std::max(N
->MaxEnd
, N
->Left
->MaxEnd
);
175 N
->MaxEnd
= std::max(N
->MaxEnd
, N
->Right
->MaxEnd
);
178 Node
*rebalance(Node
*N
) {
179 assert(N
!= nullptr);
180 int32_t Balance
= height(N
->Right
) - height(N
->Left
);
182 return rotateRight(N
->Left
, N
);
184 return rotateLeft(N
->Right
, N
);
190 MachineBasicBlock
*Block
= nullptr;
191 MachineBasicBlock::iterator At
;
193 Loc(MachineBasicBlock
*B
, MachineBasicBlock::iterator It
)
195 if (B
->end() == It
) {
198 assert(It
->getParent() == B
);
199 Pos
= std::distance(B
->begin(), It
);
202 bool operator<(Loc A
) const {
203 if (Block
!= A
.Block
)
204 return Block
->getNumber() < A
.Block
->getNumber();
207 return Pos
!= -1 && Pos
< A
.Pos
;
213 struct HexagonConstExtenders
: public MachineFunctionPass
{
215 HexagonConstExtenders() : MachineFunctionPass(ID
) {}
217 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
218 AU
.addRequired
<MachineDominatorTree
>();
219 AU
.addPreserved
<MachineDominatorTree
>();
220 MachineFunctionPass::getAnalysisUsage(AU
);
223 StringRef
getPassName() const override
{
224 return "Hexagon constant-extender optimization";
226 bool runOnMachineFunction(MachineFunction
&MF
) override
;
230 Register() = default;
231 Register(unsigned R
, unsigned S
) : Reg(R
), Sub(S
) {}
232 Register(const MachineOperand
&Op
)
233 : Reg(Op
.getReg()), Sub(Op
.getSubReg()) {}
234 Register
&operator=(const MachineOperand
&Op
) {
237 Sub
= Op
.getSubReg();
238 } else if (Op
.isFI()) {
239 Reg
= llvm::Register::index2StackSlot(Op
.getIndex());
243 bool isVReg() const {
244 return Reg
!= 0 && !llvm::Register::isStackSlot(Reg
) &&
245 llvm::Register::isVirtualRegister(Reg
);
247 bool isSlot() const {
248 return Reg
!= 0 && llvm::Register::isStackSlot(Reg
);
250 operator MachineOperand() const {
252 return MachineOperand::CreateReg(Reg
, /*Def*/false, /*Imp*/false,
253 /*Kill*/false, /*Dead*/false, /*Undef*/false,
254 /*EarlyClobber*/false, Sub
);
255 if (llvm::Register::isStackSlot(Reg
)) {
256 int FI
= llvm::Register::stackSlot2Index(Reg
);
257 return MachineOperand::CreateFI(FI
);
259 llvm_unreachable("Cannot create MachineOperand");
261 bool operator==(Register R
) const { return Reg
== R
.Reg
&& Sub
== R
.Sub
; }
262 bool operator!=(Register R
) const { return !operator==(R
); }
263 bool operator<(Register R
) const {
265 return Reg
< R
.Reg
|| (Reg
== R
.Reg
&& Sub
< R
.Sub
);
267 unsigned Reg
= 0, Sub
= 0;
271 // A subexpression in which the extender is used. In general, this
272 // represents an expression where adding D to the extender will be
273 // equivalent to adding D to the expression as a whole. In other
274 // words, expr(add(##V,D) = add(expr(##V),D).
276 // The original motivation for this are the io/ur addressing modes,
277 // where the offset is extended. Consider the io example:
278 // In memw(Rs+##V), the ##V could be replaced by a register Rt to
279 // form the rr mode: memw(Rt+Rs<<0). In such case, however, the
280 // register Rt must have exactly the value of ##V. If there was
281 // another instruction memw(Rs+##V+4), it would need a different Rt.
282 // Now, if Rt was initialized as "##V+Rs<<0", both of these
283 // instructions could use the same Rt, just with different offsets.
284 // Here it's clear that "initializer+4" should be the same as if
285 // the offset 4 was added to the ##V in the initializer.
287 // The only kinds of expressions that support the requirement of
288 // commuting with addition are addition and subtraction from ##V.
289 // Include shifting the Rs to account for the ur addressing mode:
297 ExtExpr(Register RS
, bool NG
, unsigned SH
) : Rs(RS
), S(SH
), Neg(NG
) {}
298 // Expression is trivial if it does not modify the extender.
299 bool trivial() const {
302 bool operator==(const ExtExpr
&Ex
) const {
303 return Rs
== Ex
.Rs
&& S
== Ex
.S
&& Neg
== Ex
.Neg
;
305 bool operator!=(const ExtExpr
&Ex
) const {
306 return !operator==(Ex
);
308 bool operator<(const ExtExpr
&Ex
) const {
313 return !Neg
&& Ex
.Neg
;
318 MachineInstr
*UseMI
= nullptr;
319 unsigned OpNum
= -1u;
320 // The subexpression in which the extender is used (e.g. address
323 // Optional register that is assigned the value of Expr.
325 // Def means that the output of the instruction may differ from the
326 // original by a constant c, and that the difference can be corrected
327 // by adding/subtracting c in all users of the defined register.
330 MachineOperand
&getOp() {
331 return UseMI
->getOperand(OpNum
);
333 const MachineOperand
&getOp() const {
334 return UseMI
->getOperand(OpNum
);
340 const ConstantFP
*CFP
; // MO_FPImmediate
341 const char *SymbolName
; // MO_ExternalSymbol
342 const GlobalValue
*GV
; // MO_GlobalAddress
343 const BlockAddress
*BA
; // MO_BlockAddress
344 int64_t ImmVal
; // MO_Immediate, MO_TargetIndex,
345 // and MO_ConstantPoolIndex
347 unsigned Kind
; // Same as in MachineOperand.
348 unsigned char TF
; // TargetFlags.
350 ExtRoot(const MachineOperand
&Op
);
351 bool operator==(const ExtRoot
&ER
) const {
352 return Kind
== ER
.Kind
&& V
.ImmVal
== ER
.V
.ImmVal
;
354 bool operator!=(const ExtRoot
&ER
) const {
355 return !operator==(ER
);
357 bool operator<(const ExtRoot
&ER
) const;
360 struct ExtValue
: public ExtRoot
{
363 ExtValue(const MachineOperand
&Op
);
364 ExtValue(const ExtDesc
&ED
) : ExtValue(ED
.getOp()) {}
365 ExtValue(const ExtRoot
&ER
, int32_t Off
) : ExtRoot(ER
), Offset(Off
) {}
366 bool operator<(const ExtValue
&EV
) const;
367 bool operator==(const ExtValue
&EV
) const {
368 return ExtRoot(*this) == ExtRoot(EV
) && Offset
== EV
.Offset
;
370 bool operator!=(const ExtValue
&EV
) const {
371 return !operator==(EV
);
373 explicit operator MachineOperand() const;
376 using IndexList
= SetVector
<unsigned>;
377 using ExtenderInit
= std::pair
<ExtValue
, ExtExpr
>;
378 using AssignmentMap
= std::map
<ExtenderInit
, IndexList
>;
379 using LocDefList
= std::vector
<std::pair
<Loc
, IndexList
>>;
381 const HexagonInstrInfo
*HII
= nullptr;
382 const HexagonRegisterInfo
*HRI
= nullptr;
383 MachineDominatorTree
*MDT
= nullptr;
384 MachineRegisterInfo
*MRI
= nullptr;
385 std::vector
<ExtDesc
> Extenders
;
386 std::vector
<unsigned> NewRegs
;
388 bool isStoreImmediate(unsigned Opc
) const;
389 bool isRegOffOpcode(unsigned ExtOpc
) const ;
390 unsigned getRegOffOpcode(unsigned ExtOpc
) const;
391 unsigned getDirectRegReplacement(unsigned ExtOpc
) const;
392 OffsetRange
getOffsetRange(Register R
, const MachineInstr
&MI
) const;
393 OffsetRange
getOffsetRange(const ExtDesc
&ED
) const;
394 OffsetRange
getOffsetRange(Register Rd
) const;
396 void recordExtender(MachineInstr
&MI
, unsigned OpNum
);
397 void collectInstr(MachineInstr
&MI
);
398 void collect(MachineFunction
&MF
);
399 void assignInits(const ExtRoot
&ER
, unsigned Begin
, unsigned End
,
400 AssignmentMap
&IMap
);
401 void calculatePlacement(const ExtenderInit
&ExtI
, const IndexList
&Refs
,
403 Register
insertInitializer(Loc DefL
, const ExtenderInit
&ExtI
);
404 bool replaceInstrExact(const ExtDesc
&ED
, Register ExtR
);
405 bool replaceInstrExpr(const ExtDesc
&ED
, const ExtenderInit
&ExtI
,
406 Register ExtR
, int32_t &Diff
);
407 bool replaceInstr(unsigned Idx
, Register ExtR
, const ExtenderInit
&ExtI
);
408 bool replaceExtenders(const AssignmentMap
&IMap
);
410 unsigned getOperandIndex(const MachineInstr
&MI
,
411 const MachineOperand
&Op
) const;
412 const MachineOperand
&getPredicateOp(const MachineInstr
&MI
) const;
413 const MachineOperand
&getLoadResultOp(const MachineInstr
&MI
) const;
414 const MachineOperand
&getStoredValueOp(const MachineInstr
&MI
) const;
416 friend struct PrintRegister
;
417 friend struct PrintExpr
;
418 friend struct PrintInit
;
419 friend struct PrintIMap
;
420 friend raw_ostream
&operator<< (raw_ostream
&OS
,
421 const struct PrintRegister
&P
);
422 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintExpr
&P
);
423 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintInit
&P
);
424 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtDesc
&ED
);
425 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtRoot
&ER
);
426 friend raw_ostream
&operator<< (raw_ostream
&OS
, const ExtValue
&EV
);
427 friend raw_ostream
&operator<< (raw_ostream
&OS
, const OffsetRange
&OR
);
428 friend raw_ostream
&operator<< (raw_ostream
&OS
, const struct PrintIMap
&P
);
431 using HCE
= HexagonConstExtenders
;
433 LLVM_ATTRIBUTE_UNUSED
434 raw_ostream
&operator<< (raw_ostream
&OS
, const OffsetRange
&OR
) {
437 OS
<< '[' << OR
.Min
<< ',' << OR
.Max
<< "]a" << unsigned(OR
.Align
)
438 << '+' << unsigned(OR
.Offset
);
442 struct PrintRegister
{
443 PrintRegister(HCE::Register R
, const HexagonRegisterInfo
&I
)
446 const HexagonRegisterInfo
&HRI
;
449 LLVM_ATTRIBUTE_UNUSED
450 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintRegister
&P
) {
452 OS
<< printReg(P
.Rs
.Reg
, &P
.HRI
, P
.Rs
.Sub
);
459 PrintExpr(const HCE::ExtExpr
&E
, const HexagonRegisterInfo
&I
)
461 const HCE::ExtExpr
&Ex
;
462 const HexagonRegisterInfo
&HRI
;
465 LLVM_ATTRIBUTE_UNUSED
466 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintExpr
&P
) {
467 OS
<< "## " << (P
.Ex
.Neg
? "- " : "+ ");
468 if (P
.Ex
.Rs
.Reg
!= 0)
469 OS
<< printReg(P
.Ex
.Rs
.Reg
, &P
.HRI
, P
.Ex
.Rs
.Sub
);
472 OS
<< " << " << P
.Ex
.S
;
477 PrintInit(const HCE::ExtenderInit
&EI
, const HexagonRegisterInfo
&I
)
478 : ExtI(EI
), HRI(I
) {}
479 const HCE::ExtenderInit
&ExtI
;
480 const HexagonRegisterInfo
&HRI
;
483 LLVM_ATTRIBUTE_UNUSED
484 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintInit
&P
) {
485 OS
<< '[' << P
.ExtI
.first
<< ", "
486 << PrintExpr(P
.ExtI
.second
, P
.HRI
) << ']';
490 LLVM_ATTRIBUTE_UNUSED
491 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtDesc
&ED
) {
492 assert(ED
.OpNum
!= -1u);
493 const MachineBasicBlock
&MBB
= *ED
.getOp().getParent()->getParent();
494 const MachineFunction
&MF
= *MBB
.getParent();
495 const auto &HRI
= *MF
.getSubtarget
<HexagonSubtarget
>().getRegisterInfo();
496 OS
<< "bb#" << MBB
.getNumber() << ": ";
498 OS
<< printReg(ED
.Rd
.Reg
, &HRI
, ED
.Rd
.Sub
);
501 OS
<< " = " << PrintExpr(ED
.Expr
, HRI
);
507 LLVM_ATTRIBUTE_UNUSED
508 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtRoot
&ER
) {
510 case MachineOperand::MO_Immediate
:
511 OS
<< "imm:" << ER
.V
.ImmVal
;
513 case MachineOperand::MO_FPImmediate
:
514 OS
<< "fpi:" << *ER
.V
.CFP
;
516 case MachineOperand::MO_ExternalSymbol
:
517 OS
<< "sym:" << *ER
.V
.SymbolName
;
519 case MachineOperand::MO_GlobalAddress
:
520 OS
<< "gad:" << ER
.V
.GV
->getName();
522 case MachineOperand::MO_BlockAddress
:
523 OS
<< "blk:" << *ER
.V
.BA
;
525 case MachineOperand::MO_TargetIndex
:
526 OS
<< "tgi:" << ER
.V
.ImmVal
;
528 case MachineOperand::MO_ConstantPoolIndex
:
529 OS
<< "cpi:" << ER
.V
.ImmVal
;
531 case MachineOperand::MO_JumpTableIndex
:
532 OS
<< "jti:" << ER
.V
.ImmVal
;
535 OS
<< "???:" << ER
.V
.ImmVal
;
541 LLVM_ATTRIBUTE_UNUSED
542 raw_ostream
&operator<< (raw_ostream
&OS
, const HCE::ExtValue
&EV
) {
543 OS
<< HCE::ExtRoot(EV
) << " off:" << EV
.Offset
;
548 PrintIMap(const HCE::AssignmentMap
&M
, const HexagonRegisterInfo
&I
)
550 const HCE::AssignmentMap
&IMap
;
551 const HexagonRegisterInfo
&HRI
;
554 LLVM_ATTRIBUTE_UNUSED
555 raw_ostream
&operator<< (raw_ostream
&OS
, const PrintIMap
&P
) {
557 for (const std::pair
<HCE::ExtenderInit
,HCE::IndexList
> &Q
: P
.IMap
) {
558 OS
<< " " << PrintInit(Q
.first
, P
.HRI
) << " -> {";
559 for (unsigned I
: Q
.second
)
568 INITIALIZE_PASS_BEGIN(HexagonConstExtenders
, "hexagon-cext-opt",
569 "Hexagon constant-extender optimization", false, false)
570 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree
)
571 INITIALIZE_PASS_END(HexagonConstExtenders
, "hexagon-cext-opt",
572 "Hexagon constant-extender optimization", false, false)
574 static unsigned ReplaceCounter
= 0;
579 LLVM_DUMP_METHOD
void RangeTree::dump() const {
580 dbgs() << "Root: " << Root
<< '\n';
585 LLVM_DUMP_METHOD
void RangeTree::dump(const Node
*N
) const {
586 dbgs() << "Node: " << N
<< '\n';
587 dbgs() << " Height: " << N
->Height
<< '\n';
588 dbgs() << " Count: " << N
->Count
<< '\n';
589 dbgs() << " MaxEnd: " << N
->MaxEnd
<< '\n';
590 dbgs() << " Range: " << N
->Range
<< '\n';
591 dbgs() << " Left: " << N
->Left
<< '\n';
592 dbgs() << " Right: " << N
->Right
<< "\n\n";
601 void RangeTree::order(Node
*N
, SmallVectorImpl
<Node
*> &Seq
) const {
606 order(N
->Right
, Seq
);
609 void RangeTree::nodesWith(Node
*N
, int32_t P
, bool CheckA
,
610 SmallVectorImpl
<Node
*> &Seq
) const {
611 if (N
== nullptr || N
->MaxEnd
< P
)
613 nodesWith(N
->Left
, P
, CheckA
, Seq
);
614 if (N
->Range
.Min
<= P
) {
615 if ((CheckA
&& N
->Range
.contains(P
)) || (!CheckA
&& P
<= N
->Range
.Max
))
617 nodesWith(N
->Right
, P
, CheckA
, Seq
);
621 RangeTree::Node
*RangeTree::add(Node
*N
, const OffsetRange
&R
) {
631 N
->Left
= add(N
->Left
, R
);
633 N
->Right
= add(N
->Right
, R
);
634 return rebalance(update(N
));
637 RangeTree::Node
*RangeTree::remove(Node
*N
, const Node
*D
) {
638 assert(N
!= nullptr);
641 assert(N
->Range
!= D
->Range
&& "N and D should not be equal");
642 if (D
->Range
< N
->Range
)
643 N
->Left
= remove(N
->Left
, D
);
645 N
->Right
= remove(N
->Right
, D
);
646 return rebalance(update(N
));
649 // We got to the node we need to remove. If any of its children are
650 // missing, simply replace it with the other child.
651 if (N
->Left
== nullptr || N
->Right
== nullptr)
652 return (N
->Left
== nullptr) ? N
->Right
: N
->Left
;
654 // Find the rightmost child of N->Left, remove it and plug it in place
659 M
->Left
= remove(N
->Left
, M
);
661 return rebalance(update(M
));
664 RangeTree::Node
*RangeTree::rotateLeft(Node
*Lower
, Node
*Higher
) {
665 assert(Higher
->Right
== Lower
);
666 // The Lower node is on the right from Higher. Make sure that Lower's
667 // balance is greater to the right. Otherwise the rotation will create
668 // an unbalanced tree again.
669 if (height(Lower
->Left
) > height(Lower
->Right
))
670 Lower
= rotateRight(Lower
->Left
, Lower
);
671 assert(height(Lower
->Left
) <= height(Lower
->Right
));
672 Higher
->Right
= Lower
->Left
;
674 Lower
->Left
= Higher
;
679 RangeTree::Node
*RangeTree::rotateRight(Node
*Lower
, Node
*Higher
) {
680 assert(Higher
->Left
== Lower
);
681 // The Lower node is on the left from Higher. Make sure that Lower's
682 // balance is greater to the left. Otherwise the rotation will create
683 // an unbalanced tree again.
684 if (height(Lower
->Left
) < height(Lower
->Right
))
685 Lower
= rotateLeft(Lower
->Right
, Lower
);
686 assert(height(Lower
->Left
) >= height(Lower
->Right
));
687 Higher
->Left
= Lower
->Right
;
689 Lower
->Right
= Higher
;
695 HCE::ExtRoot::ExtRoot(const MachineOperand
&Op
) {
696 // Always store ImmVal, since it's the field used for comparisons.
699 ; // Keep 0. Do not use Op.getImm() for value here (treat 0 as the root).
700 else if (Op
.isFPImm())
701 V
.CFP
= Op
.getFPImm();
702 else if (Op
.isSymbol())
703 V
.SymbolName
= Op
.getSymbolName();
704 else if (Op
.isGlobal())
705 V
.GV
= Op
.getGlobal();
706 else if (Op
.isBlockAddress())
707 V
.BA
= Op
.getBlockAddress();
708 else if (Op
.isCPI() || Op
.isTargetIndex() || Op
.isJTI())
709 V
.ImmVal
= Op
.getIndex();
711 llvm_unreachable("Unexpected operand type");
714 TF
= Op
.getTargetFlags();
717 bool HCE::ExtRoot::operator< (const HCE::ExtRoot
&ER
) const {
719 return Kind
< ER
.Kind
;
721 case MachineOperand::MO_Immediate
:
722 case MachineOperand::MO_TargetIndex
:
723 case MachineOperand::MO_ConstantPoolIndex
:
724 case MachineOperand::MO_JumpTableIndex
:
725 return V
.ImmVal
< ER
.V
.ImmVal
;
726 case MachineOperand::MO_FPImmediate
: {
727 const APFloat
&ThisF
= V
.CFP
->getValueAPF();
728 const APFloat
&OtherF
= ER
.V
.CFP
->getValueAPF();
729 return ThisF
.bitcastToAPInt().ult(OtherF
.bitcastToAPInt());
731 case MachineOperand::MO_ExternalSymbol
:
732 return StringRef(V
.SymbolName
) < StringRef(ER
.V
.SymbolName
);
733 case MachineOperand::MO_GlobalAddress
:
734 // Do not use GUIDs, since they depend on the source path. Moving the
735 // source file to a different directory could cause different GUID
736 // values for a pair of given symbols. These symbols could then compare
737 // "less" in one directory, but "greater" in another.
738 assert(!V
.GV
->getName().empty() && !ER
.V
.GV
->getName().empty());
739 return V
.GV
->getName() < ER
.V
.GV
->getName();
740 case MachineOperand::MO_BlockAddress
: {
741 const BasicBlock
*ThisB
= V
.BA
->getBasicBlock();
742 const BasicBlock
*OtherB
= ER
.V
.BA
->getBasicBlock();
743 assert(ThisB
->getParent() == OtherB
->getParent());
744 const Function
&F
= *ThisB
->getParent();
745 return std::distance(F
.begin(), ThisB
->getIterator()) <
746 std::distance(F
.begin(), OtherB
->getIterator());
749 return V
.ImmVal
< ER
.V
.ImmVal
;
752 HCE::ExtValue::ExtValue(const MachineOperand
&Op
) : ExtRoot(Op
) {
754 Offset
= Op
.getImm();
755 else if (Op
.isFPImm() || Op
.isJTI())
757 else if (Op
.isSymbol() || Op
.isGlobal() || Op
.isBlockAddress() ||
758 Op
.isCPI() || Op
.isTargetIndex())
759 Offset
= Op
.getOffset();
761 llvm_unreachable("Unexpected operand type");
764 bool HCE::ExtValue::operator< (const HCE::ExtValue
&EV
) const {
765 const ExtRoot
&ER
= *this;
766 if (!(ER
== ExtRoot(EV
)))
768 return Offset
< EV
.Offset
;
771 HCE::ExtValue::operator MachineOperand() const {
773 case MachineOperand::MO_Immediate
:
774 return MachineOperand::CreateImm(V
.ImmVal
+ Offset
);
775 case MachineOperand::MO_FPImmediate
:
777 return MachineOperand::CreateFPImm(V
.CFP
);
778 case MachineOperand::MO_ExternalSymbol
:
780 return MachineOperand::CreateES(V
.SymbolName
, TF
);
781 case MachineOperand::MO_GlobalAddress
:
782 return MachineOperand::CreateGA(V
.GV
, Offset
, TF
);
783 case MachineOperand::MO_BlockAddress
:
784 return MachineOperand::CreateBA(V
.BA
, Offset
, TF
);
785 case MachineOperand::MO_TargetIndex
:
786 return MachineOperand::CreateTargetIndex(V
.ImmVal
, Offset
, TF
);
787 case MachineOperand::MO_ConstantPoolIndex
:
788 return MachineOperand::CreateCPI(V
.ImmVal
, Offset
, TF
);
789 case MachineOperand::MO_JumpTableIndex
:
791 return MachineOperand::CreateJTI(V
.ImmVal
, TF
);
793 llvm_unreachable("Unhandled kind");
797 bool HCE::isStoreImmediate(unsigned Opc
) const {
799 case Hexagon::S4_storeirbt_io
:
800 case Hexagon::S4_storeirbf_io
:
801 case Hexagon::S4_storeirht_io
:
802 case Hexagon::S4_storeirhf_io
:
803 case Hexagon::S4_storeirit_io
:
804 case Hexagon::S4_storeirif_io
:
805 case Hexagon::S4_storeirb_io
:
806 case Hexagon::S4_storeirh_io
:
807 case Hexagon::S4_storeiri_io
:
815 bool HCE::isRegOffOpcode(unsigned Opc
) const {
817 case Hexagon::L2_loadrub_io
:
818 case Hexagon::L2_loadrb_io
:
819 case Hexagon::L2_loadruh_io
:
820 case Hexagon::L2_loadrh_io
:
821 case Hexagon::L2_loadri_io
:
822 case Hexagon::L2_loadrd_io
:
823 case Hexagon::L2_loadbzw2_io
:
824 case Hexagon::L2_loadbzw4_io
:
825 case Hexagon::L2_loadbsw2_io
:
826 case Hexagon::L2_loadbsw4_io
:
827 case Hexagon::L2_loadalignh_io
:
828 case Hexagon::L2_loadalignb_io
:
829 case Hexagon::L2_ploadrubt_io
:
830 case Hexagon::L2_ploadrubf_io
:
831 case Hexagon::L2_ploadrbt_io
:
832 case Hexagon::L2_ploadrbf_io
:
833 case Hexagon::L2_ploadruht_io
:
834 case Hexagon::L2_ploadruhf_io
:
835 case Hexagon::L2_ploadrht_io
:
836 case Hexagon::L2_ploadrhf_io
:
837 case Hexagon::L2_ploadrit_io
:
838 case Hexagon::L2_ploadrif_io
:
839 case Hexagon::L2_ploadrdt_io
:
840 case Hexagon::L2_ploadrdf_io
:
841 case Hexagon::S2_storerb_io
:
842 case Hexagon::S2_storerh_io
:
843 case Hexagon::S2_storerf_io
:
844 case Hexagon::S2_storeri_io
:
845 case Hexagon::S2_storerd_io
:
846 case Hexagon::S2_pstorerbt_io
:
847 case Hexagon::S2_pstorerbf_io
:
848 case Hexagon::S2_pstorerht_io
:
849 case Hexagon::S2_pstorerhf_io
:
850 case Hexagon::S2_pstorerft_io
:
851 case Hexagon::S2_pstorerff_io
:
852 case Hexagon::S2_pstorerit_io
:
853 case Hexagon::S2_pstorerif_io
:
854 case Hexagon::S2_pstorerdt_io
:
855 case Hexagon::S2_pstorerdf_io
:
856 case Hexagon::A2_addi
:
864 unsigned HCE::getRegOffOpcode(unsigned ExtOpc
) const {
865 // If there exists an instruction that takes a register and offset,
866 // that corresponds to the ExtOpc, return it, otherwise return 0.
867 using namespace Hexagon
;
869 case A2_tfrsi
: return A2_addi
;
873 const MCInstrDesc
&D
= HII
->get(ExtOpc
);
874 if (D
.mayLoad() || D
.mayStore()) {
875 uint64_t F
= D
.TSFlags
;
876 unsigned AM
= (F
>> HexagonII::AddrModePos
) & HexagonII::AddrModeMask
;
878 case HexagonII::Absolute
:
879 case HexagonII::AbsoluteSet
:
880 case HexagonII::BaseLongOffset
:
884 case L4_loadrub_ur
: return L2_loadrub_io
;
887 case L4_loadrb_ur
: return L2_loadrb_io
;
890 case L4_loadruh_ur
: return L2_loadruh_io
;
893 case L4_loadrh_ur
: return L2_loadrh_io
;
896 case L4_loadri_ur
: return L2_loadri_io
;
899 case L4_loadrd_ur
: return L2_loadrd_io
;
901 case L4_loadbzw2_ur
: return L2_loadbzw2_io
;
903 case L4_loadbzw4_ur
: return L2_loadbzw4_io
;
905 case L4_loadbsw2_ur
: return L2_loadbsw2_io
;
907 case L4_loadbsw4_ur
: return L2_loadbsw4_io
;
908 case L4_loadalignh_ap
:
909 case L4_loadalignh_ur
: return L2_loadalignh_io
;
910 case L4_loadalignb_ap
:
911 case L4_loadalignb_ur
: return L2_loadalignb_io
;
912 case L4_ploadrubt_abs
: return L2_ploadrubt_io
;
913 case L4_ploadrubf_abs
: return L2_ploadrubf_io
;
914 case L4_ploadrbt_abs
: return L2_ploadrbt_io
;
915 case L4_ploadrbf_abs
: return L2_ploadrbf_io
;
916 case L4_ploadruht_abs
: return L2_ploadruht_io
;
917 case L4_ploadruhf_abs
: return L2_ploadruhf_io
;
918 case L4_ploadrht_abs
: return L2_ploadrht_io
;
919 case L4_ploadrhf_abs
: return L2_ploadrhf_io
;
920 case L4_ploadrit_abs
: return L2_ploadrit_io
;
921 case L4_ploadrif_abs
: return L2_ploadrif_io
;
922 case L4_ploadrdt_abs
: return L2_ploadrdt_io
;
923 case L4_ploadrdf_abs
: return L2_ploadrdf_io
;
926 case S4_storerb_ur
: return S2_storerb_io
;
929 case S4_storerh_ur
: return S2_storerh_io
;
932 case S4_storerf_ur
: return S2_storerf_io
;
935 case S4_storeri_ur
: return S2_storeri_io
;
938 case S4_storerd_ur
: return S2_storerd_io
;
939 case S4_pstorerbt_abs
: return S2_pstorerbt_io
;
940 case S4_pstorerbf_abs
: return S2_pstorerbf_io
;
941 case S4_pstorerht_abs
: return S2_pstorerht_io
;
942 case S4_pstorerhf_abs
: return S2_pstorerhf_io
;
943 case S4_pstorerft_abs
: return S2_pstorerft_io
;
944 case S4_pstorerff_abs
: return S2_pstorerff_io
;
945 case S4_pstorerit_abs
: return S2_pstorerit_io
;
946 case S4_pstorerif_abs
: return S2_pstorerif_io
;
947 case S4_pstorerdt_abs
: return S2_pstorerdt_io
;
948 case S4_pstorerdf_abs
: return S2_pstorerdf_io
;
953 case HexagonII::BaseImmOffset
:
954 if (!isStoreImmediate(ExtOpc
))
964 unsigned HCE::getDirectRegReplacement(unsigned ExtOpc
) const {
966 case Hexagon::A2_addi
: return Hexagon::A2_add
;
967 case Hexagon::A2_andir
: return Hexagon::A2_and
;
968 case Hexagon::A2_combineii
: return Hexagon::A4_combineri
;
969 case Hexagon::A2_orir
: return Hexagon::A2_or
;
970 case Hexagon::A2_paddif
: return Hexagon::A2_paddf
;
971 case Hexagon::A2_paddit
: return Hexagon::A2_paddt
;
972 case Hexagon::A2_subri
: return Hexagon::A2_sub
;
973 case Hexagon::A2_tfrsi
: return TargetOpcode::COPY
;
974 case Hexagon::A4_cmpbeqi
: return Hexagon::A4_cmpbeq
;
975 case Hexagon::A4_cmpbgti
: return Hexagon::A4_cmpbgt
;
976 case Hexagon::A4_cmpbgtui
: return Hexagon::A4_cmpbgtu
;
977 case Hexagon::A4_cmpheqi
: return Hexagon::A4_cmpheq
;
978 case Hexagon::A4_cmphgti
: return Hexagon::A4_cmphgt
;
979 case Hexagon::A4_cmphgtui
: return Hexagon::A4_cmphgtu
;
980 case Hexagon::A4_combineii
: return Hexagon::A4_combineir
;
981 case Hexagon::A4_combineir
: return TargetOpcode::REG_SEQUENCE
;
982 case Hexagon::A4_combineri
: return TargetOpcode::REG_SEQUENCE
;
983 case Hexagon::A4_rcmpeqi
: return Hexagon::A4_rcmpeq
;
984 case Hexagon::A4_rcmpneqi
: return Hexagon::A4_rcmpneq
;
985 case Hexagon::C2_cmoveif
: return Hexagon::A2_tfrpf
;
986 case Hexagon::C2_cmoveit
: return Hexagon::A2_tfrpt
;
987 case Hexagon::C2_cmpeqi
: return Hexagon::C2_cmpeq
;
988 case Hexagon::C2_cmpgti
: return Hexagon::C2_cmpgt
;
989 case Hexagon::C2_cmpgtui
: return Hexagon::C2_cmpgtu
;
990 case Hexagon::C2_muxii
: return Hexagon::C2_muxir
;
991 case Hexagon::C2_muxir
: return Hexagon::C2_mux
;
992 case Hexagon::C2_muxri
: return Hexagon::C2_mux
;
993 case Hexagon::C4_cmpltei
: return Hexagon::C4_cmplte
;
994 case Hexagon::C4_cmplteui
: return Hexagon::C4_cmplteu
;
995 case Hexagon::C4_cmpneqi
: return Hexagon::C4_cmpneq
;
996 case Hexagon::M2_accii
: return Hexagon::M2_acci
; // T -> T
998 case Hexagon::M2_macsip
: return Hexagon::M2_maci
; // T -> T
999 case Hexagon::M2_mpysin
: return Hexagon::M2_mpyi
;
1000 case Hexagon::M2_mpysip
: return Hexagon::M2_mpyi
;
1001 case Hexagon::M2_mpysmi
: return Hexagon::M2_mpyi
;
1002 case Hexagon::M2_naccii
: return Hexagon::M2_nacci
; // T -> T
1003 case Hexagon::M4_mpyri_addi
: return Hexagon::M4_mpyri_addr
;
1004 case Hexagon::M4_mpyri_addr
: return Hexagon::M4_mpyrr_addr
; // _ -> T
1005 case Hexagon::M4_mpyrr_addi
: return Hexagon::M4_mpyrr_addr
; // _ -> T
1006 case Hexagon::S4_addaddi
: return Hexagon::M2_acci
; // _ -> T
1007 case Hexagon::S4_addi_asl_ri
: return Hexagon::S2_asl_i_r_acc
; // T -> T
1008 case Hexagon::S4_addi_lsr_ri
: return Hexagon::S2_lsr_i_r_acc
; // T -> T
1009 case Hexagon::S4_andi_asl_ri
: return Hexagon::S2_asl_i_r_and
; // T -> T
1010 case Hexagon::S4_andi_lsr_ri
: return Hexagon::S2_lsr_i_r_and
; // T -> T
1011 case Hexagon::S4_ori_asl_ri
: return Hexagon::S2_asl_i_r_or
; // T -> T
1012 case Hexagon::S4_ori_lsr_ri
: return Hexagon::S2_lsr_i_r_or
; // T -> T
1013 case Hexagon::S4_subaddi
: return Hexagon::M2_subacc
; // _ -> T
1014 case Hexagon::S4_subi_asl_ri
: return Hexagon::S2_asl_i_r_nac
; // T -> T
1015 case Hexagon::S4_subi_lsr_ri
: return Hexagon::S2_lsr_i_r_nac
; // T -> T
1017 // Store-immediates:
1018 case Hexagon::S4_storeirbf_io
: return Hexagon::S2_pstorerbf_io
;
1019 case Hexagon::S4_storeirb_io
: return Hexagon::S2_storerb_io
;
1020 case Hexagon::S4_storeirbt_io
: return Hexagon::S2_pstorerbt_io
;
1021 case Hexagon::S4_storeirhf_io
: return Hexagon::S2_pstorerhf_io
;
1022 case Hexagon::S4_storeirh_io
: return Hexagon::S2_storerh_io
;
1023 case Hexagon::S4_storeirht_io
: return Hexagon::S2_pstorerht_io
;
1024 case Hexagon::S4_storeirif_io
: return Hexagon::S2_pstorerif_io
;
1025 case Hexagon::S4_storeiri_io
: return Hexagon::S2_storeri_io
;
1026 case Hexagon::S4_storeirit_io
: return Hexagon::S2_pstorerit_io
;
1034 // Return the allowable deviation from the current value of Rb (i.e. the
1035 // range of values that can be added to the current value) which the
1036 // instruction MI can accommodate.
1037 // The instruction MI is a user of register Rb, which is defined via an
1038 // extender. It may be possible for MI to be tweaked to work for a register
1039 // defined with a slightly different value. For example
1040 // ... = L2_loadrub_io Rb, 1
1041 // can be modifed to be
1042 // ... = L2_loadrub_io Rb', 0
1044 // The range for Rb would be [Min+1, Max+1], where [Min, Max] is a range
1045 // for L2_loadrub with offset 0. That means that Rb could be replaced with
1046 // Rc, where Rc-Rb belongs to [Min+1, Max+1].
1047 OffsetRange
HCE::getOffsetRange(Register Rb
, const MachineInstr
&MI
) const {
1048 unsigned Opc
= MI
.getOpcode();
1049 // Instructions that are constant-extended may be replaced with something
1050 // else that no longer offers the same range as the original.
1051 if (!isRegOffOpcode(Opc
) || HII
->isConstExtended(MI
))
1052 return OffsetRange::zero();
1054 if (Opc
== Hexagon::A2_addi
) {
1055 const MachineOperand
&Op1
= MI
.getOperand(1), &Op2
= MI
.getOperand(2);
1056 if (Rb
!= Register(Op1
) || !Op2
.isImm())
1057 return OffsetRange::zero();
1058 OffsetRange R
= { -(1<<15)+1, (1<<15)-1, 1 };
1059 return R
.shift(Op2
.getImm());
1062 // HII::getBaseAndOffsetPosition returns the increment position as "offset".
1063 if (HII
->isPostIncrement(MI
))
1064 return OffsetRange::zero();
1066 const MCInstrDesc
&D
= HII
->get(Opc
);
1067 assert(D
.mayLoad() || D
.mayStore());
1069 unsigned BaseP
, OffP
;
1070 if (!HII
->getBaseAndOffsetPosition(MI
, BaseP
, OffP
) ||
1071 Rb
!= Register(MI
.getOperand(BaseP
)) ||
1072 !MI
.getOperand(OffP
).isImm())
1073 return OffsetRange::zero();
1075 uint64_t F
= (D
.TSFlags
>> HexagonII::MemAccessSizePos
) &
1076 HexagonII::MemAccesSizeMask
;
1077 uint8_t A
= HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F
));
1078 unsigned L
= Log2_32(A
);
1079 unsigned S
= 10+L
; // sint11_L
1080 int32_t Min
= -alignDown((1<<S
)-1, A
);
1082 // The range will be shifted by Off. To prefer non-negative offsets,
1083 // adjust Max accordingly.
1084 int32_t Off
= MI
.getOperand(OffP
).getImm();
1085 int32_t Max
= Off
>= 0 ? 0 : -Off
;
1087 OffsetRange R
= { Min
, Max
, A
};
1088 return R
.shift(Off
);
1091 // Return the allowable deviation from the current value of the extender ED,
1092 // for which the instruction corresponding to ED can be modified without
1093 // using an extender.
1094 // The instruction uses the extender directly. It will be replaced with
1095 // another instruction, say MJ, where the extender will be replaced with a
1096 // register. MJ can allow some variability with respect to the value of
1097 // that register, as is the case with indexed memory instructions.
1098 OffsetRange
HCE::getOffsetRange(const ExtDesc
&ED
) const {
1099 // The only way that there can be a non-zero range available is if
1100 // the instruction using ED will be converted to an indexed memory
1102 unsigned IdxOpc
= getRegOffOpcode(ED
.UseMI
->getOpcode());
1105 return OffsetRange::zero();
1106 case Hexagon::A2_addi
: // s16
1107 return { -32767, 32767, 1 };
1108 case Hexagon::A2_subri
: // s10
1109 return { -511, 511, 1 };
1112 if (!ED
.UseMI
->mayLoad() && !ED
.UseMI
->mayStore())
1113 return OffsetRange::zero();
1114 const MCInstrDesc
&D
= HII
->get(IdxOpc
);
1115 uint64_t F
= (D
.TSFlags
>> HexagonII::MemAccessSizePos
) &
1116 HexagonII::MemAccesSizeMask
;
1117 uint8_t A
= HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F
));
1118 unsigned L
= Log2_32(A
);
1119 unsigned S
= 10+L
; // sint11_L
1120 int32_t Min
= -alignDown((1<<S
)-1, A
);
1121 int32_t Max
= 0; // Force non-negative offsets.
1122 return { Min
, Max
, A
};
1125 // Get the allowable deviation from the current value of Rd by checking
1127 OffsetRange
HCE::getOffsetRange(Register Rd
) const {
1129 for (const MachineOperand
&Op
: MRI
->use_operands(Rd
.Reg
)) {
1130 // Make sure that the register being used by this operand is identical
1131 // to the register that was defined: using a different subregister
1132 // precludes any non-trivial range.
1133 if (Rd
!= Register(Op
))
1134 return OffsetRange::zero();
1135 Range
.intersect(getOffsetRange(Rd
, *Op
.getParent()));
1140 void HCE::recordExtender(MachineInstr
&MI
, unsigned OpNum
) {
1141 unsigned Opc
= MI
.getOpcode();
1145 bool IsLoad
= MI
.mayLoad();
1146 bool IsStore
= MI
.mayStore();
1148 // Fixed stack slots have negative indexes, and they cannot be used
1149 // with TRI::stackSlot2Index and TRI::index2StackSlot. This is somewhat
1150 // unfortunate, but should not be a frequent thing.
1151 for (MachineOperand
&Op
: MI
.operands())
1152 if (Op
.isFI() && Op
.getIndex() < 0)
1155 if (IsLoad
|| IsStore
) {
1156 unsigned AM
= HII
->getAddrMode(MI
);
1158 // (Re: ##Off + Rb<<S) = Rd: ##Val
1159 case HexagonII::Absolute
: // (__: ## + __<<_)
1161 case HexagonII::AbsoluteSet
: // (Rd: ## + __<<_)
1162 ED
.Rd
= MI
.getOperand(OpNum
-1);
1165 case HexagonII::BaseImmOffset
: // (__: ## + Rs<<0)
1166 // Store-immediates are treated as non-memory operations, since
1167 // it's the value being stored that is extended (as opposed to
1168 // a part of the address).
1169 if (!isStoreImmediate(Opc
))
1170 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1172 case HexagonII::BaseLongOffset
: // (__: ## + Rs<<S)
1173 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-2);
1174 ED
.Expr
.S
= MI
.getOperand(OpNum
-1).getImm();
1177 llvm_unreachable("Unhandled memory instruction");
1181 case Hexagon::A2_tfrsi
: // (Rd: ## + __<<_)
1182 ED
.Rd
= MI
.getOperand(0);
1185 case Hexagon::A2_combineii
: // (Rd: ## + __<<_)
1186 case Hexagon::A4_combineir
:
1187 ED
.Rd
= { MI
.getOperand(0).getReg(), Hexagon::isub_hi
};
1190 case Hexagon::A4_combineri
: // (Rd: ## + __<<_)
1191 ED
.Rd
= { MI
.getOperand(0).getReg(), Hexagon::isub_lo
};
1194 case Hexagon::A2_addi
: // (Rd: ## + Rs<<0)
1195 ED
.Rd
= MI
.getOperand(0);
1196 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1198 case Hexagon::M2_accii
: // (__: ## + Rs<<0)
1199 case Hexagon::M2_naccii
:
1200 case Hexagon::S4_addaddi
:
1201 ED
.Expr
.Rs
= MI
.getOperand(OpNum
-1);
1203 case Hexagon::A2_subri
: // (Rd: ## - Rs<<0)
1204 ED
.Rd
= MI
.getOperand(0);
1205 ED
.Expr
.Rs
= MI
.getOperand(OpNum
+1);
1208 case Hexagon::S4_subaddi
: // (__: ## - Rs<<0)
1209 ED
.Expr
.Rs
= MI
.getOperand(OpNum
+1);
1212 default: // (__: ## + __<<_)
1219 // Ignore unnamed globals.
1220 ExtRoot
ER(ED
.getOp());
1221 if (ER
.Kind
== MachineOperand::MO_GlobalAddress
)
1222 if (ER
.V
.GV
->getName().empty())
1224 Extenders
.push_back(ED
);
1227 void HCE::collectInstr(MachineInstr
&MI
) {
1228 if (!HII
->isConstExtended(MI
))
1231 // Skip some non-convertible instructions.
1232 unsigned Opc
= MI
.getOpcode();
1234 case Hexagon::M2_macsin
: // There is no Rx -= mpyi(Rs,Rt).
1235 case Hexagon::C4_addipc
:
1236 case Hexagon::S4_or_andi
:
1237 case Hexagon::S4_or_andix
:
1238 case Hexagon::S4_or_ori
:
1241 recordExtender(MI
, HII
->getCExtOpNum(MI
));
1244 void HCE::collect(MachineFunction
&MF
) {
1246 for (MachineBasicBlock
&MBB
: MF
) {
1247 // Skip unreachable blocks.
1248 if (MBB
.getNumber() == -1)
1250 for (MachineInstr
&MI
: MBB
)
1255 void HCE::assignInits(const ExtRoot
&ER
, unsigned Begin
, unsigned End
,
1256 AssignmentMap
&IMap
) {
1257 // Sanity check: make sure that all extenders in the range [Begin..End)
1258 // share the same root ER.
1259 for (unsigned I
= Begin
; I
!= End
; ++I
)
1260 assert(ER
== ExtRoot(Extenders
[I
].getOp()));
1262 // Construct the list of ranges, such that for each P in Ranges[I],
1263 // a register Reg = ER+P can be used in place of Extender[I]. If the
1264 // instruction allows, uses in the form of Reg+Off are considered
1265 // (here, Off = required_value - P).
1266 std::vector
<OffsetRange
> Ranges(End
-Begin
);
1268 // For each extender that is a def, visit all uses of the defined register,
1269 // and produce an offset range that works for all uses. The def doesn't
1270 // have to be checked, because it can become dead if all uses can be updated
1271 // to use a different reg/offset.
1272 for (unsigned I
= Begin
; I
!= End
; ++I
) {
1273 const ExtDesc
&ED
= Extenders
[I
];
1277 LLVM_DEBUG(dbgs() << " =" << I
<< ". " << EV
<< " " << ED
<< '\n');
1278 assert(ED
.Rd
.Reg
!= 0);
1279 Ranges
[I
-Begin
] = getOffsetRange(ED
.Rd
).shift(EV
.Offset
);
1280 // A2_tfrsi is a special case: it will be replaced with A2_addi, which
1281 // has a 16-bit signed offset. This means that A2_tfrsi not only has a
1282 // range coming from its uses, but also from the fact that its replacement
1283 // has a range as well.
1284 if (ED
.UseMI
->getOpcode() == Hexagon::A2_tfrsi
) {
1285 int32_t D
= alignDown(32767, Ranges
[I
-Begin
].Align
); // XXX hardcoded
1286 Ranges
[I
-Begin
].extendBy(-D
).extendBy(D
);
1290 // Visit all non-def extenders. For each one, determine the offset range
1291 // available for it.
1292 for (unsigned I
= Begin
; I
!= End
; ++I
) {
1293 const ExtDesc
&ED
= Extenders
[I
];
1297 LLVM_DEBUG(dbgs() << " " << I
<< ". " << EV
<< " " << ED
<< '\n');
1298 OffsetRange Dev
= getOffsetRange(ED
);
1299 Ranges
[I
-Begin
].intersect(Dev
.shift(EV
.Offset
));
1302 // Here for each I there is a corresponding Range[I]. Construct the
1303 // inverse map, that to each range will assign the set of indexes in
1304 // [Begin..End) that this range corresponds to.
1305 std::map
<OffsetRange
, IndexList
> RangeMap
;
1306 for (unsigned I
= Begin
; I
!= End
; ++I
)
1307 RangeMap
[Ranges
[I
-Begin
]].insert(I
);
1310 dbgs() << "Ranges\n";
1311 for (unsigned I
= Begin
; I
!= End
; ++I
)
1312 dbgs() << " " << I
<< ". " << Ranges
[I
-Begin
] << '\n';
1313 dbgs() << "RangeMap\n";
1314 for (auto &P
: RangeMap
) {
1315 dbgs() << " " << P
.first
<< " ->";
1316 for (unsigned I
: P
.second
)
1322 // Select the definition points, and generate the assignment between
1323 // these points and the uses.
1325 // For each candidate offset, keep a pair CandData consisting of
1326 // the total number of ranges containing that candidate, and the
1327 // vector of corresponding RangeTree nodes.
1328 using CandData
= std::pair
<unsigned, SmallVector
<RangeTree::Node
*,8>>;
1329 std::map
<int32_t, CandData
> CandMap
;
1332 for (const OffsetRange
&R
: Ranges
)
1334 SmallVector
<RangeTree::Node
*,8> Nodes
;
1337 auto MaxAlign
= [](const SmallVectorImpl
<RangeTree::Node
*> &Nodes
,
1338 uint8_t Align
, uint8_t Offset
) {
1339 for (RangeTree::Node
*N
: Nodes
) {
1340 if (N
->Range
.Align
<= Align
|| N
->Range
.Offset
< Offset
)
1342 if ((N
->Range
.Offset
- Offset
) % Align
!= 0)
1344 Align
= N
->Range
.Align
;
1345 Offset
= N
->Range
.Offset
;
1347 return std::make_pair(Align
, Offset
);
1350 // Construct the set of all potential definition points from the endpoints
1351 // of the ranges. If a given endpoint also belongs to a different range,
1352 // but with a higher alignment, also consider the more-highly-aligned
1353 // value of this endpoint.
1354 std::set
<int32_t> CandSet
;
1355 for (RangeTree::Node
*N
: Nodes
) {
1356 const OffsetRange
&R
= N
->Range
;
1357 auto P0
= MaxAlign(Tree
.nodesWith(R
.Min
, false), R
.Align
, R
.Offset
);
1358 CandSet
.insert(R
.Min
);
1359 if (R
.Align
< P0
.first
)
1360 CandSet
.insert(adjustUp(R
.Min
, P0
.first
, P0
.second
));
1361 auto P1
= MaxAlign(Tree
.nodesWith(R
.Max
, false), R
.Align
, R
.Offset
);
1362 CandSet
.insert(R
.Max
);
1363 if (R
.Align
< P1
.first
)
1364 CandSet
.insert(adjustDown(R
.Max
, P1
.first
, P1
.second
));
1367 // Build the assignment map: candidate C -> { list of extender indexes }.
1368 // This has to be done iteratively:
1369 // - pick the candidate that covers the maximum number of extenders,
1370 // - add the candidate to the map,
1371 // - remove the extenders from the pool.
1373 using CMap
= std::map
<int32_t,unsigned>;
1375 for (auto It
= CandSet
.begin(), Et
= CandSet
.end(); It
!= Et
; ) {
1376 auto &&V
= Tree
.nodesWith(*It
);
1377 unsigned N
= std::accumulate(V
.begin(), V
.end(), 0u,
1378 [](unsigned Acc
, const RangeTree::Node
*N
) {
1379 return Acc
+ N
->Count
;
1382 Counts
.insert({*It
, N
});
1383 It
= (N
!= 0) ? std::next(It
) : CandSet
.erase(It
);
1388 // Find the best candidate with respect to the number of extenders covered.
1389 auto BestIt
= std::max_element(Counts
.begin(), Counts
.end(),
1390 [](const CMap::value_type
&A
, const CMap::value_type
&B
) {
1391 return A
.second
< B
.second
||
1392 (A
.second
== B
.second
&& A
< B
);
1394 int32_t Best
= BestIt
->first
;
1395 ExtValue
BestV(ER
, Best
);
1396 for (RangeTree::Node
*N
: Tree
.nodesWith(Best
)) {
1397 for (unsigned I
: RangeMap
[N
->Range
])
1398 IMap
[{BestV
,Extenders
[I
].Expr
}].insert(I
);
1403 LLVM_DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap
, *HRI
));
1405 // There is some ambiguity in what initializer should be used, if the
1406 // descriptor's subexpression is non-trivial: it can be the entire
1407 // subexpression (which is what has been done so far), or it can be
1408 // the extender's value itself, if all corresponding extenders have the
1409 // exact value of the initializer (i.e. require offset of 0).
1411 // To reduce the number of initializers, merge such special cases.
1412 for (std::pair
<const ExtenderInit
,IndexList
> &P
: IMap
) {
1413 // Skip trivial initializers.
1414 if (P
.first
.second
.trivial())
1416 // If the corresponding trivial initializer does not exist, skip this
1418 const ExtValue
&EV
= P
.first
.first
;
1419 AssignmentMap::iterator F
= IMap
.find({EV
, ExtExpr()});
1420 if (F
== IMap
.end())
1423 // Finally, check if all extenders have the same value as the initializer.
1424 // Make sure that extenders that are a part of a stack address are not
1425 // merged with those that aren't. Stack addresses need an offset field
1426 // (to be used by frame index elimination), while non-stack expressions
1427 // can be replaced with forms (such as rr) that do not have such a field.
1430 // Collected 3 extenders
1431 // =2. imm:0 off:32968 bb#2: %7 = ## + __ << 0, def
1432 // 0. imm:0 off:267 bb#0: __ = ## + SS#1 << 0
1433 // 1. imm:0 off:267 bb#1: __ = ## + SS#1 << 0
1435 // 0. [-756,267]a1+0
1436 // 1. [-756,267]a1+0
1437 // 2. [201,65735]a1+0
1439 // [-756,267]a1+0 -> 0 1
1440 // [201,65735]a1+0 -> 2
1441 // IMap (before fixup) = {
1442 // [imm:0 off:267, ## + __ << 0] -> { 2 }
1443 // [imm:0 off:267, ## + SS#1 << 0] -> { 0 1 }
1445 // IMap (after fixup) = {
1446 // [imm:0 off:267, ## + __ << 0] -> { 2 0 1 }
1447 // [imm:0 off:267, ## + SS#1 << 0] -> { }
1449 // Inserted def in bb#0 for initializer: [imm:0 off:267, ## + __ << 0]
1450 // %12:intregs = A2_tfrsi 267
1453 // %12:intregs = A2_tfrsi 267
1454 // S4_pstorerbt_rr %3, %12, %stack.1, 0, killed %4
1457 // if (p0.new) memb(r0+r29<<#4) = r2
1459 bool IsStack
= any_of(F
->second
, [this](unsigned I
) {
1460 return Extenders
[I
].Expr
.Rs
.isSlot();
1462 auto SameValue
= [&EV
,this,IsStack
](unsigned I
) {
1463 const ExtDesc
&ED
= Extenders
[I
];
1464 return ED
.Expr
.Rs
.isSlot() == IsStack
&&
1465 ExtValue(ED
).Offset
== EV
.Offset
;
1467 if (all_of(P
.second
, SameValue
)) {
1468 F
->second
.insert(P
.second
.begin(), P
.second
.end());
1473 LLVM_DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap
, *HRI
));
1476 void HCE::calculatePlacement(const ExtenderInit
&ExtI
, const IndexList
&Refs
,
1481 // The placement calculation is somewhat simple right now: it finds a
1482 // single location for the def that dominates all refs. Since this may
1483 // place the def far from the uses, producing several locations for
1484 // defs that collectively dominate all refs could be better.
1485 // For now only do the single one.
1486 DenseSet
<MachineBasicBlock
*> Blocks
;
1487 DenseSet
<MachineInstr
*> RefMIs
;
1488 const ExtDesc
&ED0
= Extenders
[Refs
[0]];
1489 MachineBasicBlock
*DomB
= ED0
.UseMI
->getParent();
1490 RefMIs
.insert(ED0
.UseMI
);
1491 Blocks
.insert(DomB
);
1492 for (unsigned i
= 1, e
= Refs
.size(); i
!= e
; ++i
) {
1493 const ExtDesc
&ED
= Extenders
[Refs
[i
]];
1494 MachineBasicBlock
*MBB
= ED
.UseMI
->getParent();
1495 RefMIs
.insert(ED
.UseMI
);
1496 DomB
= MDT
->findNearestCommonDominator(DomB
, MBB
);
1501 // The block DomB should be dominated by the def of each register used
1502 // in the initializer.
1503 Register Rs
= ExtI
.second
.Rs
; // Only one reg allowed now.
1504 const MachineInstr
*DefI
= Rs
.isVReg() ? MRI
->getVRegDef(Rs
.Reg
) : nullptr;
1506 // This should be guaranteed given that the entire expression is used
1507 // at each instruction in Refs. Add an assertion just in case.
1508 assert(!DefI
|| MDT
->dominates(DefI
->getParent(), DomB
));
1511 MachineBasicBlock::iterator It
;
1512 if (Blocks
.count(DomB
)) {
1513 // Try to find the latest possible location for the def.
1514 MachineBasicBlock::iterator End
= DomB
->end();
1515 for (It
= DomB
->begin(); It
!= End
; ++It
)
1516 if (RefMIs
.count(&*It
))
1518 assert(It
!= End
&& "Should have found a ref in DomB");
1520 // DomB does not contain any refs.
1521 It
= DomB
->getFirstTerminator();
1523 Loc
DefLoc(DomB
, It
);
1524 Defs
.emplace_back(DefLoc
, Refs
);
1527 HCE::Register
HCE::insertInitializer(Loc DefL
, const ExtenderInit
&ExtI
) {
1528 llvm::Register DefR
= MRI
->createVirtualRegister(&Hexagon::IntRegsRegClass
);
1529 MachineBasicBlock
&MBB
= *DefL
.Block
;
1530 MachineBasicBlock::iterator At
= DefL
.At
;
1531 DebugLoc dl
= DefL
.Block
->findDebugLoc(DefL
.At
);
1532 const ExtValue
&EV
= ExtI
.first
;
1533 MachineOperand
ExtOp(EV
);
1535 const ExtExpr
&Ex
= ExtI
.second
;
1536 const MachineInstr
*InitI
= nullptr;
1538 if (Ex
.Rs
.isSlot()) {
1539 assert(Ex
.S
== 0 && "Cannot have a shift of a stack slot");
1540 assert(!Ex
.Neg
&& "Cannot subtract a stack slot");
1541 // DefR = PS_fi Rb,##EV
1542 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::PS_fi
), DefR
)
1543 .add(MachineOperand(Ex
.Rs
))
1546 assert((Ex
.Rs
.Reg
== 0 || Ex
.Rs
.isVReg()) && "Expecting virtual register");
1549 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_tfrsi
), DefR
)
1551 } else if (Ex
.S
== 0) {
1553 // DefR = sub(##EV,Rb)
1554 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_subri
), DefR
)
1556 .add(MachineOperand(Ex
.Rs
));
1558 // DefR = add(Rb,##EV)
1559 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(Hexagon::A2_addi
), DefR
)
1560 .add(MachineOperand(Ex
.Rs
))
1564 unsigned NewOpc
= Ex
.Neg
? Hexagon::S4_subi_asl_ri
1565 : Hexagon::S4_addi_asl_ri
;
1566 // DefR = add(##EV,asl(Rb,S))
1567 InitI
= BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
), DefR
)
1569 .add(MachineOperand(Ex
.Rs
))
1576 LLVM_DEBUG(dbgs() << "Inserted def in bb#" << MBB
.getNumber()
1577 << " for initializer: " << PrintInit(ExtI
, *HRI
) << "\n "
1582 // Replace the extender at index Idx with the register ExtR.
1583 bool HCE::replaceInstrExact(const ExtDesc
&ED
, Register ExtR
) {
1584 MachineInstr
&MI
= *ED
.UseMI
;
1585 MachineBasicBlock
&MBB
= *MI
.getParent();
1586 MachineBasicBlock::iterator At
= MI
.getIterator();
1587 DebugLoc dl
= MI
.getDebugLoc();
1588 unsigned ExtOpc
= MI
.getOpcode();
1590 // With a few exceptions, direct replacement amounts to creating an
1591 // instruction with a corresponding register opcode, with all operands
1592 // the same, except for the register used in place of the extender.
1593 unsigned RegOpc
= getDirectRegReplacement(ExtOpc
);
1595 if (RegOpc
== TargetOpcode::REG_SEQUENCE
) {
1596 if (ExtOpc
== Hexagon::A4_combineri
)
1597 BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
))
1598 .add(MI
.getOperand(0))
1599 .add(MI
.getOperand(1))
1600 .addImm(Hexagon::isub_hi
)
1601 .add(MachineOperand(ExtR
))
1602 .addImm(Hexagon::isub_lo
);
1603 else if (ExtOpc
== Hexagon::A4_combineir
)
1604 BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
))
1605 .add(MI
.getOperand(0))
1606 .add(MachineOperand(ExtR
))
1607 .addImm(Hexagon::isub_hi
)
1608 .add(MI
.getOperand(2))
1609 .addImm(Hexagon::isub_lo
);
1611 llvm_unreachable("Unexpected opcode became REG_SEQUENCE");
1615 if (ExtOpc
== Hexagon::C2_cmpgei
|| ExtOpc
== Hexagon::C2_cmpgeui
) {
1616 unsigned NewOpc
= ExtOpc
== Hexagon::C2_cmpgei
? Hexagon::C2_cmplt
1617 : Hexagon::C2_cmpltu
;
1618 BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
))
1619 .add(MI
.getOperand(0))
1620 .add(MachineOperand(ExtR
))
1621 .add(MI
.getOperand(1));
1627 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
));
1628 unsigned RegN
= ED
.OpNum
;
1629 // Copy all operands except the one that has the extender.
1630 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
1632 MIB
.add(MI
.getOperand(i
));
1634 MIB
.add(MachineOperand(ExtR
));
1636 MIB
.cloneMemRefs(MI
);
1641 if ((MI
.mayLoad() || MI
.mayStore()) && !isStoreImmediate(ExtOpc
)) {
1642 // For memory instructions, there is an asymmetry in the addressing
1643 // modes. Addressing modes allowing extenders can be replaced with
1644 // addressing modes that use registers, but the order of operands
1645 // (or even their number) may be different.
1647 // BaseImmOffset (io) -> BaseRegOffset (rr)
1648 // BaseLongOffset (ur) -> BaseRegOffset (rr)
1649 unsigned RegOpc
, Shift
;
1650 unsigned AM
= HII
->getAddrMode(MI
);
1651 if (AM
== HexagonII::BaseImmOffset
) {
1652 RegOpc
= HII
->changeAddrMode_io_rr(ExtOpc
);
1654 } else if (AM
== HexagonII::BaseLongOffset
) {
1655 // Loads: Rd = L4_loadri_ur Rs, S, ##
1656 // Stores: S4_storeri_ur Rs, S, ##, Rt
1657 RegOpc
= HII
->changeAddrMode_ur_rr(ExtOpc
);
1658 Shift
= MI
.getOperand(MI
.mayLoad() ? 2 : 1).getImm();
1660 llvm_unreachable("Unexpected addressing mode");
1663 if (RegOpc
== -1u) {
1664 dbgs() << "\nExtOpc: " << HII
->getName(ExtOpc
) << " has no rr version\n";
1665 llvm_unreachable("No corresponding rr instruction");
1669 unsigned BaseP
, OffP
;
1670 HII
->getBaseAndOffsetPosition(MI
, BaseP
, OffP
);
1672 // Build an rr instruction: (RegOff + RegBase<<0)
1673 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(RegOpc
));
1674 // First, add the def for loads.
1676 MIB
.add(getLoadResultOp(MI
));
1677 // Handle possible predication.
1678 if (HII
->isPredicated(MI
))
1679 MIB
.add(getPredicateOp(MI
));
1680 // Build the address.
1681 MIB
.add(MachineOperand(ExtR
)); // RegOff
1682 MIB
.add(MI
.getOperand(BaseP
)); // RegBase
1683 MIB
.addImm(Shift
); // << Shift
1684 // Add the stored value for stores.
1686 MIB
.add(getStoredValueOp(MI
));
1687 MIB
.cloneMemRefs(MI
);
1693 dbgs() << '\n' << MI
;
1695 llvm_unreachable("Unhandled exact replacement");
1699 // Replace the extender ED with a form corresponding to the initializer ExtI.
1700 bool HCE::replaceInstrExpr(const ExtDesc
&ED
, const ExtenderInit
&ExtI
,
1701 Register ExtR
, int32_t &Diff
) {
1702 MachineInstr
&MI
= *ED
.UseMI
;
1703 MachineBasicBlock
&MBB
= *MI
.getParent();
1704 MachineBasicBlock::iterator At
= MI
.getIterator();
1705 DebugLoc dl
= MI
.getDebugLoc();
1706 unsigned ExtOpc
= MI
.getOpcode();
1708 if (ExtOpc
== Hexagon::A2_tfrsi
) {
1709 // A2_tfrsi is a special case: it's replaced with A2_addi, which introduces
1710 // another range. One range is the one that's common to all tfrsi's uses,
1711 // this one is the range of immediates in A2_addi. When calculating ranges,
1712 // the addi's 16-bit argument was included, so now we need to make it such
1713 // that the produced value is in the range for the uses alone.
1714 // Most of the time, simply adding Diff will make the addi produce exact
1715 // result, but if Diff is outside of the 16-bit range, some adjustment
1717 unsigned IdxOpc
= getRegOffOpcode(ExtOpc
);
1718 assert(IdxOpc
== Hexagon::A2_addi
);
1720 // Clamp Diff to the 16 bit range.
1721 int32_t D
= isInt
<16>(Diff
) ? Diff
: (Diff
> 0 ? 32767 : -32768);
1723 // Split Diff into two values: one that is close to min/max int16,
1724 // and the other being the rest, and such that both have the same
1725 // "alignment" as Diff.
1727 OffsetRange R
= getOffsetRange(MI
.getOperand(0));
1728 uint32_t A
= std::min
<uint32_t>(R
.Align
, 1u << countTrailingZeros(UD
));
1731 BuildMI(MBB
, At
, dl
, HII
->get(IdxOpc
))
1732 .add(MI
.getOperand(0))
1733 .add(MachineOperand(ExtR
))
1737 // Make sure the output is within allowable range for uses.
1738 // "Diff" is a difference in the "opposite direction", i.e. Ext - DefV,
1739 // not DefV - Ext, as the getOffsetRange would calculate.
1740 OffsetRange Uses
= getOffsetRange(MI
.getOperand(0));
1741 if (!Uses
.contains(-Diff
))
1742 dbgs() << "Diff: " << -Diff
<< " out of range " << Uses
1744 assert(Uses
.contains(-Diff
));
1750 const ExtValue
&EV
= ExtI
.first
; (void)EV
;
1751 const ExtExpr
&Ex
= ExtI
.second
; (void)Ex
;
1753 if (ExtOpc
== Hexagon::A2_addi
|| ExtOpc
== Hexagon::A2_subri
) {
1754 // If addi/subri are replaced with the exactly matching initializer,
1755 // they amount to COPY.
1756 // Check that the initializer is an exact match (for simplicity).
1758 bool IsAddi
= ExtOpc
== Hexagon::A2_addi
;
1759 const MachineOperand
&RegOp
= MI
.getOperand(IsAddi
? 1 : 2);
1760 const MachineOperand
&ImmOp
= MI
.getOperand(IsAddi
? 2 : 1);
1761 assert(Ex
.Rs
== RegOp
&& EV
== ImmOp
&& Ex
.Neg
!= IsAddi
&&
1762 "Initializer mismatch");
1764 BuildMI(MBB
, At
, dl
, HII
->get(TargetOpcode::COPY
))
1765 .add(MI
.getOperand(0))
1766 .add(MachineOperand(ExtR
));
1771 if (ExtOpc
== Hexagon::M2_accii
|| ExtOpc
== Hexagon::M2_naccii
||
1772 ExtOpc
== Hexagon::S4_addaddi
|| ExtOpc
== Hexagon::S4_subaddi
) {
1773 // M2_accii: add(Rt,add(Rs,V)) (tied)
1774 // M2_naccii: sub(Rt,add(Rs,V))
1775 // S4_addaddi: add(Rt,add(Rs,V))
1776 // S4_subaddi: add(Rt,sub(V,Rs))
1777 // Check that Rs and V match the initializer expression. The Rs+V is the
1778 // combination that is considered "subexpression" for V, although Rx+V
1779 // would also be valid.
1781 bool IsSub
= ExtOpc
== Hexagon::S4_subaddi
;
1782 Register Rs
= MI
.getOperand(IsSub
? 3 : 2);
1783 ExtValue V
= MI
.getOperand(IsSub
? 2 : 3);
1784 assert(EV
== V
&& Rs
== Ex
.Rs
&& IsSub
== Ex
.Neg
&& "Initializer mismatch");
1786 unsigned NewOpc
= ExtOpc
== Hexagon::M2_naccii
? Hexagon::A2_sub
1788 BuildMI(MBB
, At
, dl
, HII
->get(NewOpc
))
1789 .add(MI
.getOperand(0))
1790 .add(MI
.getOperand(1))
1791 .add(MachineOperand(ExtR
));
1796 if (MI
.mayLoad() || MI
.mayStore()) {
1797 unsigned IdxOpc
= getRegOffOpcode(ExtOpc
);
1798 assert(IdxOpc
&& "Expecting indexed opcode");
1799 MachineInstrBuilder MIB
= BuildMI(MBB
, At
, dl
, HII
->get(IdxOpc
));
1800 // Construct the new indexed instruction.
1801 // First, add the def for loads.
1803 MIB
.add(getLoadResultOp(MI
));
1804 // Handle possible predication.
1805 if (HII
->isPredicated(MI
))
1806 MIB
.add(getPredicateOp(MI
));
1807 // Build the address.
1808 MIB
.add(MachineOperand(ExtR
));
1810 // Add the stored value for stores.
1812 MIB
.add(getStoredValueOp(MI
));
1813 MIB
.cloneMemRefs(MI
);
1819 dbgs() << '\n' << PrintInit(ExtI
, *HRI
) << " " << MI
;
1821 llvm_unreachable("Unhandled expr replacement");
1825 bool HCE::replaceInstr(unsigned Idx
, Register ExtR
, const ExtenderInit
&ExtI
) {
1826 if (ReplaceLimit
.getNumOccurrences()) {
1827 if (ReplaceLimit
<= ReplaceCounter
)
1831 const ExtDesc
&ED
= Extenders
[Idx
];
1832 assert((!ED
.IsDef
|| ED
.Rd
.Reg
!= 0) && "Missing Rd for def");
1833 const ExtValue
&DefV
= ExtI
.first
;
1834 assert(ExtRoot(ExtValue(ED
)) == ExtRoot(DefV
) && "Extender root mismatch");
1835 const ExtExpr
&DefEx
= ExtI
.second
;
1838 int32_t Diff
= EV
.Offset
- DefV
.Offset
;
1839 const MachineInstr
&MI
= *ED
.UseMI
;
1840 LLVM_DEBUG(dbgs() << __func__
<< " Idx:" << Idx
<< " ExtR:"
1841 << PrintRegister(ExtR
, *HRI
) << " Diff:" << Diff
<< '\n');
1843 // These two addressing modes must be converted into indexed forms
1844 // regardless of what the initializer looks like.
1845 bool IsAbs
= false, IsAbsSet
= false;
1846 if (MI
.mayLoad() || MI
.mayStore()) {
1847 unsigned AM
= HII
->getAddrMode(MI
);
1848 IsAbs
= AM
== HexagonII::Absolute
;
1849 IsAbsSet
= AM
== HexagonII::AbsoluteSet
;
1852 // If it's a def, remember all operands that need to be updated.
1853 // If ED is a def, and Diff is not 0, then all uses of the register Rd
1854 // defined by ED must be in the form (Rd, imm), i.e. the immediate offset
1855 // must follow the Rd in the operand list.
1856 std::vector
<std::pair
<MachineInstr
*,unsigned>> RegOps
;
1857 if (ED
.IsDef
&& Diff
!= 0) {
1858 for (MachineOperand
&Op
: MRI
->use_operands(ED
.Rd
.Reg
)) {
1859 MachineInstr
&UI
= *Op
.getParent();
1860 RegOps
.push_back({&UI
, getOperandIndex(UI
, Op
)});
1864 // Replace the instruction.
1865 bool Replaced
= false;
1866 if (Diff
== 0 && DefEx
.trivial() && !IsAbs
&& !IsAbsSet
)
1867 Replaced
= replaceInstrExact(ED
, ExtR
);
1869 Replaced
= replaceInstrExpr(ED
, ExtI
, ExtR
, Diff
);
1871 if (Diff
!= 0 && Replaced
&& ED
.IsDef
) {
1872 // Update offsets of the def's uses.
1873 for (std::pair
<MachineInstr
*,unsigned> P
: RegOps
) {
1874 unsigned J
= P
.second
;
1875 assert(P
.first
->getNumOperands() > J
+1 &&
1876 P
.first
->getOperand(J
+1).isImm());
1877 MachineOperand
&ImmOp
= P
.first
->getOperand(J
+1);
1878 ImmOp
.setImm(ImmOp
.getImm() + Diff
);
1880 // If it was an absolute-set instruction, the "set" part has been removed.
1881 // ExtR will now be the register with the extended value, and since all
1882 // users of Rd have been updated, all that needs to be done is to replace
1885 assert(ED
.Rd
.Sub
== 0 && ExtR
.Sub
== 0);
1886 MRI
->replaceRegWith(ED
.Rd
.Reg
, ExtR
.Reg
);
1893 bool HCE::replaceExtenders(const AssignmentMap
&IMap
) {
1895 bool Changed
= false;
1897 for (const std::pair
<ExtenderInit
,IndexList
> &P
: IMap
) {
1898 const IndexList
&Idxs
= P
.second
;
1899 if (Idxs
.size() < CountThreshold
)
1903 calculatePlacement(P
.first
, Idxs
, Defs
);
1904 for (const std::pair
<Loc
,IndexList
> &Q
: Defs
) {
1905 Register DefR
= insertInitializer(Q
.first
, P
.first
);
1906 NewRegs
.push_back(DefR
.Reg
);
1907 for (unsigned I
: Q
.second
)
1908 Changed
|= replaceInstr(I
, DefR
, P
.first
);
1914 unsigned HCE::getOperandIndex(const MachineInstr
&MI
,
1915 const MachineOperand
&Op
) const {
1916 for (unsigned i
= 0, n
= MI
.getNumOperands(); i
!= n
; ++i
)
1917 if (&MI
.getOperand(i
) == &Op
)
1919 llvm_unreachable("Not an operand of MI");
1922 const MachineOperand
&HCE::getPredicateOp(const MachineInstr
&MI
) const {
1923 assert(HII
->isPredicated(MI
));
1924 for (const MachineOperand
&Op
: MI
.operands()) {
1925 if (!Op
.isReg() || !Op
.isUse() ||
1926 MRI
->getRegClass(Op
.getReg()) != &Hexagon::PredRegsRegClass
)
1928 assert(Op
.getSubReg() == 0 && "Predicate register with a subregister");
1931 llvm_unreachable("Predicate operand not found");
1934 const MachineOperand
&HCE::getLoadResultOp(const MachineInstr
&MI
) const {
1935 assert(MI
.mayLoad());
1936 return MI
.getOperand(0);
1939 const MachineOperand
&HCE::getStoredValueOp(const MachineInstr
&MI
) const {
1940 assert(MI
.mayStore());
1941 return MI
.getOperand(MI
.getNumExplicitOperands()-1);
1944 bool HCE::runOnMachineFunction(MachineFunction
&MF
) {
1945 if (skipFunction(MF
.getFunction()))
1947 if (MF
.getFunction().hasPersonalityFn()) {
1948 LLVM_DEBUG(dbgs() << getPassName() << ": skipping " << MF
.getName()
1949 << " due to exception handling\n");
1952 LLVM_DEBUG(MF
.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
1954 HII
= MF
.getSubtarget
<HexagonSubtarget
>().getInstrInfo();
1955 HRI
= MF
.getSubtarget
<HexagonSubtarget
>().getRegisterInfo();
1956 MDT
= &getAnalysis
<MachineDominatorTree
>();
1957 MRI
= &MF
.getRegInfo();
1961 llvm::sort(Extenders
, [this](const ExtDesc
&A
, const ExtDesc
&B
) {
1962 ExtValue
VA(A
), VB(B
);
1965 const MachineInstr
*MA
= A
.UseMI
;
1966 const MachineInstr
*MB
= B
.UseMI
;
1968 // If it's the same instruction, compare operand numbers.
1969 return A
.OpNum
< B
.OpNum
;
1972 const MachineBasicBlock
*BA
= MA
->getParent();
1973 const MachineBasicBlock
*BB
= MB
->getParent();
1974 assert(BA
->getNumber() != -1 && BB
->getNumber() != -1);
1976 return BA
->getNumber() < BB
->getNumber();
1977 return MDT
->dominates(MA
, MB
);
1980 bool Changed
= false;
1981 LLVM_DEBUG(dbgs() << "Collected " << Extenders
.size() << " extenders\n");
1982 for (unsigned I
= 0, E
= Extenders
.size(); I
!= E
; ) {
1984 const ExtRoot
&T
= Extenders
[B
].getOp();
1985 while (I
!= E
&& ExtRoot(Extenders
[I
].getOp()) == T
)
1989 assignInits(T
, B
, I
, IMap
);
1990 Changed
|= replaceExtenders(IMap
);
1995 MF
.print(dbgs() << "After " << getPassName() << '\n', nullptr);
1997 dbgs() << "No changes\n";
2002 FunctionPass
*llvm::createHexagonConstExtenders() {
2003 return new HexagonConstExtenders();