1 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "HexagonISelDAGToDAG.h"
11 #include "HexagonISelLowering.h"
12 #include "HexagonTargetMachine.h"
13 #include "llvm/ADT/BitVector.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/CodeGen/MachineInstrBuilder.h"
16 #include "llvm/CodeGen/SelectionDAGISel.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/IntrinsicsHexagon.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/MathExtras.h"
30 #include <unordered_map>
34 #define DEBUG_TYPE "hexagon-isel"
39 // --------------------------------------------------------------------
40 // Implementation of permutation networks.
42 // Implementation of the node routing through butterfly networks:
48 // Forward delta network consists of log(N) steps, where N is the number
49 // of inputs. In each step, an input can stay in place, or it can get
50 // routed to another position[1]. The step after that consists of two
51 // networks, each half in size in terms of the number of nodes. In those
52 // terms, in the given step, an input can go to either the upper or the
53 // lower network in the next step.
55 // [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
56 // positions as long as there is no conflict.
58 // Here's a delta network for 8 inputs, only the switching routes are
62 // |- 1 ---------------|- 2 -----|- 3 -|
64 // Inp[0] *** *** *** *** Out[0]
68 // Inp[1] *** \ / *** X *** *** Out[1]
72 // Inp[2] *** \ \ / / *** X *** *** Out[2]
76 // Inp[3] *** \ / \ / \ / *** *** *** Out[3]
82 // Inp[4] *** / \ / \ / \ *** *** *** Out[4]
86 // Inp[5] *** / / \ \ *** X *** *** Out[5]
90 // Inp[6] *** / \ *** X *** *** Out[6]
94 // Inp[7] *** *** *** *** Out[7]
97 // Reverse delta network is same as delta network, with the steps in
98 // the opposite order.
101 // Benes network is a forward delta network immediately followed by
102 // a reverse delta network.
104 enum class ColorKind
{ None
, Red
, Black
};
106 // Graph coloring utility used to partition nodes into two groups:
107 // they will correspond to nodes routed to the upper and lower networks.
110 using MapType
= std::map
<Node
, ColorKind
>;
111 static constexpr Node Ignore
= Node(-1);
113 Coloring(ArrayRef
<Node
> Ord
) : Order(Ord
) {
119 const MapType
&colors() const {
123 ColorKind
other(ColorKind Color
) {
124 if (Color
== ColorKind::None
)
125 return ColorKind::Red
;
126 return Color
== ColorKind::Red
? ColorKind::Black
: ColorKind::Red
;
129 LLVM_DUMP_METHOD
void dump() const;
132 ArrayRef
<Node
> Order
;
134 std::set
<Node
> Needed
;
136 using NodeSet
= std::set
<Node
>;
137 std::map
<Node
,NodeSet
> Edges
;
139 Node
conj(Node Pos
) {
140 Node Num
= Order
.size();
141 return (Pos
< Num
/2) ? Pos
+ Num
/2 : Pos
- Num
/2;
144 ColorKind
getColor(Node N
) {
145 auto F
= Colors
.find(N
);
146 return F
!= Colors
.end() ? F
->second
: ColorKind::None
;
149 std::pair
<bool, ColorKind
> getUniqueColor(const NodeSet
&Nodes
);
156 std::pair
<bool, ColorKind
> Coloring::getUniqueColor(const NodeSet
&Nodes
) {
157 auto Color
= ColorKind::None
;
158 for (Node N
: Nodes
) {
159 ColorKind ColorN
= getColor(N
);
160 if (ColorN
== ColorKind::None
)
162 if (Color
== ColorKind::None
)
164 else if (Color
!= ColorKind::None
&& Color
!= ColorN
)
165 return { false, ColorKind::None
};
167 return { true, Color
};
170 void Coloring::build() {
171 // Add Order[P] and Order[conj(P)] to Edges.
172 for (unsigned P
= 0; P
!= Order
.size(); ++P
) {
176 Node PC
= Order
[conj(P
)];
177 if (PC
!= Ignore
&& PC
!= I
)
181 // Add I and conj(I) to Edges.
182 for (unsigned I
= 0; I
!= Order
.size(); ++I
) {
183 if (!Needed
.count(I
))
186 // This will create an entry in the edge table, even if I is not
187 // connected to any other node. This is necessary, because it still
188 // needs to be colored.
189 NodeSet
&Is
= Edges
[I
];
195 bool Coloring::color() {
196 SetVector
<Node
> FirstQ
;
197 auto Enqueue
= [this,&FirstQ
] (Node N
) {
200 for (unsigned I
= 0; I
!= Q
.size(); ++I
) {
201 NodeSet
&Ns
= Edges
[Q
[I
]];
202 Q
.insert(Ns
.begin(), Ns
.end());
204 FirstQ
.insert(Q
.begin(), Q
.end());
206 for (Node N
: Needed
)
209 for (Node N
: FirstQ
) {
212 NodeSet
&Ns
= Edges
[N
];
213 auto P
= getUniqueColor(Ns
);
216 Colors
[N
] = other(P
.second
);
219 // First, color nodes that don't have any dups.
220 for (auto E
: Edges
) {
222 if (!Needed
.count(conj(N
)) || Colors
.count(N
))
224 auto P
= getUniqueColor(E
.second
);
227 Colors
[N
] = other(P
.second
);
230 // Now, nodes that are still uncolored. Since the graph can be modified
231 // in this step, create a work queue.
232 std::vector
<Node
> WorkQ
;
233 for (auto E
: Edges
) {
235 if (!Colors
.count(N
))
239 for (Node N
: WorkQ
) {
240 NodeSet
&Ns
= Edges
[N
];
241 auto P
= getUniqueColor(Ns
);
243 Colors
[N
] = other(P
.second
);
247 // Coloring failed. Split this node.
249 ColorKind ColorN
= other(ColorKind::None
);
250 ColorKind ColorC
= other(ColorN
);
251 NodeSet
&Cs
= Edges
[C
];
253 for (Node M
: CopyNs
) {
254 ColorKind ColorM
= getColor(M
);
255 if (ColorM
== ColorC
) {
256 // Connect M with C, disconnect M from N.
267 // Explicitly assign "None" to all uncolored nodes.
268 for (unsigned I
= 0; I
!= Order
.size(); ++I
)
269 if (Colors
.count(I
) == 0)
270 Colors
[I
] = ColorKind::None
;
275 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
276 void Coloring::dump() const {
277 dbgs() << "{ Order: {";
278 for (Node P
: Order
) {
285 dbgs() << " Needed: {";
286 for (Node N
: Needed
)
290 dbgs() << " Edges: {\n";
291 for (auto E
: Edges
) {
292 dbgs() << " " << E
.first
<< " -> {";
293 for (auto N
: E
.second
)
299 auto ColorKindToName
= [](ColorKind C
) {
301 case ColorKind::None
:
305 case ColorKind::Black
:
308 llvm_unreachable("all ColorKinds should be handled by the switch above");
311 dbgs() << " Colors: {\n";
312 for (auto C
: Colors
)
313 dbgs() << " " << C
.first
<< " -> " << ColorKindToName(C
.second
) << "\n";
319 // Base class of for reordering networks. They don't strictly need to be
320 // permutations, as outputs with repeated occurrences of an input element
323 using Controls
= std::vector
<uint8_t>;
324 using ElemType
= int;
325 static constexpr ElemType Ignore
= ElemType(-1);
337 PermNetwork(ArrayRef
<ElemType
> Ord
, unsigned Mult
= 1) {
338 Order
.assign(Ord
.data(), Ord
.data()+Ord
.size());
341 unsigned S
= Order
.size();
345 Table
.resize(Order
.size());
346 for (RowType
&Row
: Table
)
347 Row
.resize(Mult
*Log
, None
);
350 void getControls(Controls
&V
, unsigned StartAt
, uint8_t Dir
) const {
351 unsigned Size
= Order
.size();
353 for (unsigned I
= 0; I
!= Size
; ++I
) {
355 for (unsigned L
= 0; L
!= Log
; ++L
) {
356 unsigned C
= ctl(I
, StartAt
+L
) == Switch
;
362 assert(isUInt
<8>(W
));
367 uint8_t ctl(ElemType Pos
, unsigned Step
) const {
368 return Table
[Pos
][Step
];
370 unsigned size() const {
373 unsigned steps() const {
379 std::vector
<ElemType
> Order
;
380 using RowType
= std::vector
<uint8_t>;
381 std::vector
<RowType
> Table
;
384 struct ForwardDeltaNetwork
: public PermNetwork
{
385 ForwardDeltaNetwork(ArrayRef
<ElemType
> Ord
) : PermNetwork(Ord
) {}
387 bool run(Controls
&V
) {
388 if (!route(Order
.data(), Table
.data(), size(), 0))
390 getControls(V
, 0, Forward
);
395 bool route(ElemType
*P
, RowType
*T
, unsigned Size
, unsigned Step
);
398 struct ReverseDeltaNetwork
: public PermNetwork
{
399 ReverseDeltaNetwork(ArrayRef
<ElemType
> Ord
) : PermNetwork(Ord
) {}
401 bool run(Controls
&V
) {
402 if (!route(Order
.data(), Table
.data(), size(), 0))
404 getControls(V
, 0, Reverse
);
409 bool route(ElemType
*P
, RowType
*T
, unsigned Size
, unsigned Step
);
412 struct BenesNetwork
: public PermNetwork
{
413 BenesNetwork(ArrayRef
<ElemType
> Ord
) : PermNetwork(Ord
, 2) {}
415 bool run(Controls
&F
, Controls
&R
) {
416 if (!route(Order
.data(), Table
.data(), size(), 0))
419 getControls(F
, 0, Forward
);
420 getControls(R
, Log
, Reverse
);
425 bool route(ElemType
*P
, RowType
*T
, unsigned Size
, unsigned Step
);
429 bool ForwardDeltaNetwork::route(ElemType
*P
, RowType
*T
, unsigned Size
,
431 bool UseUp
= false, UseDown
= false;
434 // Cannot use coloring here, because coloring is used to determine
435 // the "big" switch, i.e. the one that changes halves, and in a forward
436 // network, a color can be simultaneously routed to both halves in the
437 // step we're working on.
438 for (ElemType J
= 0; J
!= Num
; ++J
) {
440 // I is the position in the input,
441 // J is the position in the output.
446 S
= (J
< Num
/2) ? Pass
: Switch
;
448 S
= (J
< Num
/2) ? Switch
: Pass
;
450 // U is the element in the table that needs to be updated.
451 ElemType U
= (S
== Pass
) ? I
: (I
< Num
/2 ? I
+Num
/2 : I
-Num
/2);
456 if (T
[U
][Step
] != S
&& T
[U
][Step
] != None
)
461 for (ElemType J
= 0; J
!= Num
; ++J
)
462 if (P
[J
] != Ignore
&& P
[J
] >= Num
/2)
466 if (UseUp
&& !route(P
, T
, Size
/2, Step
+1))
468 if (UseDown
&& !route(P
+Size
/2, T
+Size
/2, Size
/2, Step
+1))
474 bool ReverseDeltaNetwork::route(ElemType
*P
, RowType
*T
, unsigned Size
,
476 unsigned Pets
= Log
-1 - Step
;
477 bool UseUp
= false, UseDown
= false;
480 // In this step half-switching occurs, so coloring can be used.
481 Coloring
G({P
,Size
});
482 const Coloring::MapType
&M
= G
.colors();
486 ColorKind ColorUp
= ColorKind::None
;
487 for (ElemType J
= 0; J
!= Num
; ++J
) {
489 // I is the position in the input,
490 // J is the position in the output.
493 ColorKind C
= M
.at(I
);
494 if (C
== ColorKind::None
)
496 // During "Step", inputs cannot switch halves, so if the "up" color
497 // is still unknown, make sure that it is selected in such a way that
498 // "I" will stay in the same half.
499 bool InpUp
= I
< Num
/2;
500 if (ColorUp
== ColorKind::None
)
501 ColorUp
= InpUp
? C
: G
.other(C
);
502 if ((C
== ColorUp
) != InpUp
) {
503 // If I should go to a different half than where is it now, give up.
509 S
= (J
< Num
/2) ? Pass
: Switch
;
512 S
= (J
< Num
/2) ? Switch
: Pass
;
518 // Reorder the working permutation according to the computed switch table
519 // for the last step (i.e. Pets).
520 for (ElemType J
= 0, E
= Size
/ 2; J
!= E
; ++J
) {
521 ElemType PJ
= P
[J
]; // Current values of P[J]
522 ElemType PC
= P
[J
+Size
/2]; // and P[conj(J)]
523 ElemType QJ
= PJ
; // New values of P[J]
524 ElemType QC
= PC
; // and P[conj(J)]
525 if (T
[J
][Pets
] == Switch
)
527 if (T
[J
+Size
/2][Pets
] == Switch
)
533 for (ElemType J
= 0; J
!= Num
; ++J
)
534 if (P
[J
] != Ignore
&& P
[J
] >= Num
/2)
538 if (UseUp
&& !route(P
, T
, Size
/2, Step
+1))
540 if (UseDown
&& !route(P
+Size
/2, T
+Size
/2, Size
/2, Step
+1))
546 bool BenesNetwork::route(ElemType
*P
, RowType
*T
, unsigned Size
,
548 Coloring
G({P
,Size
});
549 const Coloring::MapType
&M
= G
.colors();
554 unsigned Pets
= 2*Log
-1 - Step
;
555 bool UseUp
= false, UseDown
= false;
557 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
558 // result in different controls. Let's pick the one where the first
559 // control will be "Pass".
560 ColorKind ColorUp
= ColorKind::None
;
561 for (ElemType J
= 0; J
!= Num
; ++J
) {
565 ColorKind C
= M
.at(I
);
566 if (C
== ColorKind::None
)
568 if (ColorUp
== ColorKind::None
) {
569 ColorUp
= (I
< Num
/ 2) ? ColorKind::Red
: ColorKind::Black
;
571 unsigned CI
= (I
< Num
/2) ? I
+Num
/2 : I
-Num
/2;
576 T
[CI
][Step
] = Switch
;
577 T
[J
][Pets
] = (J
< Num
/2) ? Pass
: Switch
;
581 T
[CI
][Step
] = Switch
;
584 T
[J
][Pets
] = (J
< Num
/2) ? Switch
: Pass
;
589 // Reorder the working permutation according to the computed switch table
590 // for the last step (i.e. Pets).
591 for (ElemType J
= 0; J
!= Num
/2; ++J
) {
592 ElemType PJ
= P
[J
]; // Current values of P[J]
593 ElemType PC
= P
[J
+Num
/2]; // and P[conj(J)]
594 ElemType QJ
= PJ
; // New values of P[J]
595 ElemType QC
= PC
; // and P[conj(J)]
596 if (T
[J
][Pets
] == Switch
)
598 if (T
[J
+Num
/2][Pets
] == Switch
)
604 for (ElemType J
= 0; J
!= Num
; ++J
)
605 if (P
[J
] != Ignore
&& P
[J
] >= Num
/2)
609 if (UseUp
&& !route(P
, T
, Size
/2, Step
+1))
611 if (UseDown
&& !route(P
+Size
/2, T
+Size
/2, Size
/2, Step
+1))
617 // --------------------------------------------------------------------
618 // Support for building selection results (output instructions that are
619 // parts of the final selection).
623 OpRef(SDValue V
) : OpV(V
) {}
624 bool isValue() const { return OpV
.getNode() != nullptr; }
625 bool isValid() const { return isValue() || !(OpN
& Invalid
); }
626 bool isUndef() const { return OpN
& Undef
; }
627 static OpRef
res(int N
) { return OpRef(Whole
| (N
& Index
)); }
628 static OpRef
fail() { return OpRef(Invalid
); }
630 static OpRef
lo(const OpRef
&R
) {
631 assert(!R
.isValue());
632 return OpRef(R
.OpN
& (Undef
| Index
| LoHalf
));
634 static OpRef
hi(const OpRef
&R
) {
635 assert(!R
.isValue());
636 return OpRef(R
.OpN
& (Undef
| Index
| HiHalf
));
638 static OpRef
undef(MVT Ty
) { return OpRef(Undef
| Ty
.SimpleTy
); }
641 SDValue OpV
= SDValue();
643 // Reference to the operand of the input node:
644 // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
646 // If bit 30 is set, it's the high half of the operand.
647 // If bit 29 is set, it's the low half of the operand.
651 Invalid
= 0x10000000,
654 Whole
= LoHalf
| HiHalf
,
656 Index
= 0x0FFFFFFF, // Mask of the index value.
661 void print(raw_ostream
&OS
, const SelectionDAG
&G
) const;
664 OpRef(unsigned N
) : OpN(N
) {}
667 struct NodeTemplate
{
668 NodeTemplate() = default;
671 std::vector
<OpRef
> Ops
;
673 LLVM_DUMP_METHOD
void print(raw_ostream
&OS
, const SelectionDAG
&G
) const;
677 ResultStack(SDNode
*Inp
)
678 : InpNode(Inp
), InpTy(Inp
->getValueType(0).getSimpleVT()) {}
681 unsigned push(const NodeTemplate
&Res
) {
683 return List
.size()-1;
685 unsigned push(unsigned Opc
, MVT Ty
, std::vector
<OpRef
> &&Ops
) {
692 bool empty() const { return List
.empty(); }
693 unsigned size() const { return List
.size(); }
694 unsigned top() const { return size()-1; }
695 const NodeTemplate
&operator[](unsigned I
) const { return List
[I
]; }
696 unsigned reset(unsigned NewTop
) {
697 List
.resize(NewTop
+1);
701 using BaseType
= std::vector
<NodeTemplate
>;
702 BaseType::iterator
begin() { return List
.begin(); }
703 BaseType::iterator
end() { return List
.end(); }
704 BaseType::const_iterator
begin() const { return List
.begin(); }
705 BaseType::const_iterator
end() const { return List
.end(); }
710 void print(raw_ostream
&OS
, const SelectionDAG
&G
) const;
714 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
715 void OpRef::print(raw_ostream
&OS
, const SelectionDAG
&G
) const {
717 OpV
.getNode()->print(OS
, &G
);
728 if ((OpN
& Whole
) != Whole
) {
729 assert((OpN
& Whole
) == LoHalf
|| (OpN
& Whole
) == HiHalf
);
735 OS
<< '#' << SignExtend32(OpN
& Index
, IndexBits
);
738 void NodeTemplate::print(raw_ostream
&OS
, const SelectionDAG
&G
) const {
739 const TargetInstrInfo
&TII
= *G
.getSubtarget().getInstrInfo();
740 OS
<< format("%8s", EVT(Ty
).getEVTString().c_str()) << " "
743 for (const auto &R
: Ops
) {
752 void ResultStack::print(raw_ostream
&OS
, const SelectionDAG
&G
) const {
753 OS
<< "Input node:\n";
757 OS
<< "Result templates:\n";
758 for (unsigned I
= 0, E
= List
.size(); I
!= E
; ++I
) {
759 OS
<< '[' << I
<< "] ";
760 List
[I
].print(OS
, G
);
768 ShuffleMask(ArrayRef
<int> M
) : Mask(M
) {
772 MinSrc
= (MinSrc
== -1) ? M
: std::min(MinSrc
, M
);
773 MaxSrc
= (MaxSrc
== -1) ? M
: std::max(MaxSrc
, M
);
778 int MinSrc
= -1, MaxSrc
= -1;
780 ShuffleMask
lo() const {
781 size_t H
= Mask
.size()/2;
782 return ShuffleMask(Mask
.take_front(H
));
784 ShuffleMask
hi() const {
785 size_t H
= Mask
.size()/2;
786 return ShuffleMask(Mask
.take_back(H
));
789 void print(raw_ostream
&OS
) const {
790 OS
<< "MinSrc:" << MinSrc
<< ", MaxSrc:" << MaxSrc
<< " {";
797 LLVM_ATTRIBUTE_UNUSED
798 raw_ostream
&operator<<(raw_ostream
&OS
, const ShuffleMask
&SM
) {
805 using MaskT
= SmallVector
<int, 128>;
806 // Vdd = vshuffvdd(Vu, Vv, Rt)
807 // Vdd = vdealvdd(Vu, Vv, Rt)
808 // Vd = vpack(Vu, Vv, Size, TakeOdd)
809 // Vd = vshuff(Vu, Vv, Size, TakeOdd)
810 // Vd = vdeal(Vu, Vv, Size, TakeOdd)
811 // Vd = vdealb4w(Vu, Vv)
813 ArrayRef
<int> lo(ArrayRef
<int> Vuu
) { return Vuu
.take_front(Vuu
.size() / 2); }
814 ArrayRef
<int> hi(ArrayRef
<int> Vuu
) { return Vuu
.take_back(Vuu
.size() / 2); }
816 MaskT
vshuffvdd(ArrayRef
<int> Vu
, ArrayRef
<int> Vv
, unsigned Rt
) {
819 std::copy(Vv
.begin(), Vv
.end(), Vdd
.begin());
820 std::copy(Vu
.begin(), Vu
.end(), Vdd
.begin() + Len
);
822 auto Vd0
= MutableArrayRef
<int>(Vdd
).take_front(Len
);
823 auto Vd1
= MutableArrayRef
<int>(Vdd
).take_back(Len
);
825 for (int Offset
= 1; Offset
< Len
; Offset
*= 2) {
826 if ((Rt
& Offset
) == 0)
828 for (int i
= 0; i
!= Len
; ++i
) {
829 if ((i
& Offset
) == 0)
830 std::swap(Vd1
[i
], Vd0
[i
+ Offset
]);
836 MaskT
vdealvdd(ArrayRef
<int> Vu
, ArrayRef
<int> Vv
, unsigned Rt
) {
839 std::copy(Vv
.begin(), Vv
.end(), Vdd
.begin());
840 std::copy(Vu
.begin(), Vu
.end(), Vdd
.begin() + Len
);
842 auto Vd0
= MutableArrayRef
<int>(Vdd
).take_front(Len
);
843 auto Vd1
= MutableArrayRef
<int>(Vdd
).take_back(Len
);
845 for (int Offset
= Len
/ 2; Offset
> 0; Offset
/= 2) {
846 if ((Rt
& Offset
) == 0)
848 for (int i
= 0; i
!= Len
; ++i
) {
849 if ((i
& Offset
) == 0)
850 std::swap(Vd1
[i
], Vd0
[i
+ Offset
]);
856 MaskT
vpack(ArrayRef
<int> Vu
, ArrayRef
<int> Vv
, unsigned Size
, bool TakeOdd
) {
859 auto Odd
= static_cast<int>(TakeOdd
);
860 for (int i
= 0, e
= Len
/ (2 * Size
); i
!= e
; ++i
) {
861 for (int b
= 0; b
!= static_cast<int>(Size
); ++b
) {
863 Vd
[i
* Size
+ b
] = Vv
[(2 * i
+ Odd
) * Size
+ b
];
864 Vd
[i
* Size
+ b
+ Len
/ 2] = Vu
[(2 * i
+ Odd
) * Size
+ b
];
871 MaskT
vshuff(ArrayRef
<int> Vu
, ArrayRef
<int> Vv
, unsigned Size
, bool TakeOdd
) {
874 auto Odd
= static_cast<int>(TakeOdd
);
875 for (int i
= 0, e
= Len
/ (2 * Size
); i
!= e
; ++i
) {
876 for (int b
= 0; b
!= static_cast<int>(Size
); ++b
) {
877 Vd
[(2 * i
+ 0) * Size
+ b
] = Vv
[(2 * i
+ Odd
) * Size
+ b
];
878 Vd
[(2 * i
+ 1) * Size
+ b
] = Vu
[(2 * i
+ Odd
) * Size
+ b
];
884 MaskT
vdeal(ArrayRef
<int> Vu
, ArrayRef
<int> Vv
, unsigned Size
, bool TakeOdd
) {
886 MaskT T
= vdealvdd(Vu
, Vv
, Len
- 2 * Size
);
887 return vpack(hi(T
), lo(T
), Size
, TakeOdd
);
890 MaskT
vdealb4w(ArrayRef
<int> Vu
, ArrayRef
<int> Vv
) {
893 for (int i
= 0, e
= Len
/ 4; i
!= e
; ++i
) {
894 Vd
[0 * (Len
/ 4) + i
] = Vv
[4 * i
+ 0];
895 Vd
[1 * (Len
/ 4) + i
] = Vv
[4 * i
+ 2];
896 Vd
[2 * (Len
/ 4) + i
] = Vu
[4 * i
+ 0];
897 Vd
[3 * (Len
/ 4) + i
] = Vu
[4 * i
+ 2];
902 template <typename ShuffFunc
, typename
... OptArgs
>
903 auto mask(ShuffFunc S
, unsigned Length
, OptArgs
... args
) -> MaskT
{
904 MaskT
Vu(Length
), Vv(Length
);
905 std::iota(Vu
.begin(), Vu
.end(), Length
); // High
906 std::iota(Vv
.begin(), Vv
.end(), 0); // Low
907 return S(Vu
, Vv
, args
...);
910 } // namespace shuffles
912 // --------------------------------------------------------------------
913 // The HvxSelector class.
915 static const HexagonTargetLowering
&getHexagonLowering(SelectionDAG
&G
) {
916 return static_cast<const HexagonTargetLowering
&>(G
.getTargetLoweringInfo());
918 static const HexagonSubtarget
&getHexagonSubtarget(SelectionDAG
&G
) {
919 return G
.getSubtarget
<HexagonSubtarget
>();
924 const HexagonTargetLowering
&Lower
;
925 HexagonDAGToDAGISel
&ISel
;
927 const HexagonSubtarget
&HST
;
928 const unsigned HwLen
;
930 HvxSelector(HexagonDAGToDAGISel
&HS
, SelectionDAG
&G
)
931 : Lower(getHexagonLowering(G
)), ISel(HS
), DAG(G
),
932 HST(getHexagonSubtarget(G
)), HwLen(HST
.getVectorLength()) {}
934 MVT
getSingleVT(MVT ElemTy
) const {
935 assert(ElemTy
!= MVT::i1
&& "Use getBoolVT for predicates");
936 unsigned NumElems
= HwLen
/ (ElemTy
.getSizeInBits() / 8);
937 return MVT::getVectorVT(ElemTy
, NumElems
);
940 MVT
getPairVT(MVT ElemTy
) const {
941 assert(ElemTy
!= MVT::i1
); // Suspicious: there are no predicate pairs.
942 unsigned NumElems
= (2 * HwLen
) / (ElemTy
.getSizeInBits() / 8);
943 return MVT::getVectorVT(ElemTy
, NumElems
);
946 MVT
getBoolVT() const {
947 // Return HwLen x i1.
948 return MVT::getVectorVT(MVT::i1
, HwLen
);
951 void selectExtractSubvector(SDNode
*N
);
952 void selectShuffle(SDNode
*N
);
953 void selectRor(SDNode
*N
);
954 void selectVAlign(SDNode
*N
);
956 static SmallVector
<uint32_t, 8> getPerfectCompletions(ShuffleMask SM
,
958 static SmallVector
<uint32_t, 8> completeToPerfect(
959 ArrayRef
<uint32_t> Completions
, unsigned Width
);
960 static std::optional
<int> rotationDistance(ShuffleMask SM
, unsigned WrapAt
);
963 void select(SDNode
*ISelN
);
964 void materialize(const ResultStack
&Results
);
966 SDValue
getConst32(int Val
, const SDLoc
&dl
);
967 SDValue
getVectorConstant(ArrayRef
<uint8_t> Data
, const SDLoc
&dl
);
973 OpRef
concats(OpRef Va
, OpRef Vb
, ResultStack
&Results
);
974 OpRef
funnels(OpRef Va
, OpRef Vb
, int Amount
, ResultStack
&Results
);
976 OpRef
packs(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
,
977 MutableArrayRef
<int> NewMask
, unsigned Options
= None
);
978 OpRef
packp(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
,
979 MutableArrayRef
<int> NewMask
);
980 OpRef
vmuxs(ArrayRef
<uint8_t> Bytes
, OpRef Va
, OpRef Vb
,
981 ResultStack
&Results
);
982 OpRef
vmuxp(ArrayRef
<uint8_t> Bytes
, OpRef Va
, OpRef Vb
,
983 ResultStack
&Results
);
985 OpRef
shuffs1(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
986 OpRef
shuffs2(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
);
987 OpRef
shuffp1(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
988 OpRef
shuffp2(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
);
990 OpRef
butterfly(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
991 OpRef
contracting(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
);
992 OpRef
expanding(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
993 OpRef
perfect(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
995 bool selectVectorConstants(SDNode
*N
);
996 bool scalarizeShuffle(ArrayRef
<int> Mask
, const SDLoc
&dl
, MVT ResTy
,
997 SDValue Va
, SDValue Vb
, SDNode
*N
);
1001 static void splitMask(ArrayRef
<int> Mask
, MutableArrayRef
<int> MaskL
,
1002 MutableArrayRef
<int> MaskR
) {
1003 unsigned VecLen
= Mask
.size();
1004 assert(MaskL
.size() == VecLen
&& MaskR
.size() == VecLen
);
1005 for (unsigned I
= 0; I
!= VecLen
; ++I
) {
1008 MaskL
[I
] = MaskR
[I
] = -1;
1009 } else if (unsigned(M
) < VecLen
) {
1014 MaskR
[I
] = M
-VecLen
;
1019 static std::pair
<int,unsigned> findStrip(ArrayRef
<int> A
, int Inc
,
1021 assert(A
.size() > 0 && A
.size() >= MaxLen
);
1024 for (unsigned I
= 1; I
!= MaxLen
; ++I
) {
1025 if (A
[I
] - E
!= Inc
)
1029 return { F
, MaxLen
};
1032 static bool isUndef(ArrayRef
<int> Mask
) {
1033 for (int Idx
: Mask
)
1039 static bool isIdentity(ArrayRef
<int> Mask
) {
1040 for (int I
= 0, E
= Mask
.size(); I
!= E
; ++I
) {
1042 if (M
>= 0 && M
!= I
)
1048 static bool isLowHalfOnly(ArrayRef
<int> Mask
) {
1049 int L
= Mask
.size();
1051 // Check if the second half of the mask is all-undef.
1052 return llvm::all_of(Mask
.drop_front(L
/ 2), [](int M
) { return M
< 0; });
1055 static SmallVector
<unsigned, 4> getInputSegmentList(ShuffleMask SM
,
1057 assert(isPowerOf2_32(SegLen
));
1058 SmallVector
<unsigned, 4> SegList
;
1059 if (SM
.MaxSrc
== -1)
1062 unsigned Shift
= Log2_32(SegLen
);
1063 BitVector
Segs(alignTo(SM
.MaxSrc
+ 1, SegLen
) >> Shift
);
1065 for (int M
: SM
.Mask
) {
1067 Segs
.set(M
>> Shift
);
1070 for (unsigned B
: Segs
.set_bits())
1071 SegList
.push_back(B
);
1075 static SmallVector
<unsigned, 4> getOutputSegmentMap(ShuffleMask SM
,
1077 // Calculate the layout of the output segments in terms of the input
1079 // For example [1,3,1,0] means that the output consists of 4 output
1080 // segments, where the first output segment has only elements of the
1081 // input segment at index 1. The next output segment only has elements
1082 // of the input segment 3, etc.
1083 // If an output segment only has undef elements, the value will be ~0u.
1084 // If an output segment has elements from more than one input segment,
1085 // the corresponding value will be ~1u.
1086 unsigned MaskLen
= SM
.Mask
.size();
1087 assert(MaskLen
% SegLen
== 0);
1088 SmallVector
<unsigned, 4> Map(MaskLen
/ SegLen
);
1090 for (int S
= 0, E
= Map
.size(); S
!= E
; ++S
) {
1092 for (int I
= 0; I
!= static_cast<int>(SegLen
); ++I
) {
1093 int M
= SM
.Mask
[S
*SegLen
+ I
];
1096 unsigned G
= M
/ SegLen
; // Input segment of this element.
1099 } else if (Idx
!= G
) {
1110 static void packSegmentMask(ArrayRef
<int> Mask
, ArrayRef
<unsigned> OutSegMap
,
1111 unsigned SegLen
, MutableArrayRef
<int> PackedMask
) {
1112 SmallVector
<unsigned, 4> InvMap
;
1113 for (int I
= OutSegMap
.size() - 1; I
>= 0; --I
) {
1114 unsigned S
= OutSegMap
[I
];
1115 assert(S
!= ~0u && "Unexpected undef");
1116 assert(S
!= ~1u && "Unexpected multi");
1117 if (InvMap
.size() <= S
)
1122 unsigned Shift
= Log2_32(SegLen
);
1123 for (int I
= 0, E
= Mask
.size(); I
!= E
; ++I
) {
1126 int OutIdx
= InvMap
[M
>> Shift
];
1127 M
= (M
& (SegLen
-1)) + SegLen
*OutIdx
;
1133 bool HvxSelector::selectVectorConstants(SDNode
*N
) {
1134 // Constant vectors are generated as loads from constant pools or as
1135 // splats of a constant value. Since they are generated during the
1136 // selection process, the main selection algorithm is not aware of them.
1137 // Select them directly here.
1138 SmallVector
<SDNode
*,4> Nodes
;
1139 SetVector
<SDNode
*> WorkQ
;
1141 // The DAG can change (due to CSE) during selection, so cache all the
1142 // unselected nodes first to avoid traversing a mutating DAG.
1144 for (unsigned i
= 0; i
!= WorkQ
.size(); ++i
) {
1145 SDNode
*W
= WorkQ
[i
];
1146 if (!W
->isMachineOpcode() && W
->getOpcode() == HexagonISD::ISEL
)
1148 for (unsigned j
= 0, f
= W
->getNumOperands(); j
!= f
; ++j
)
1149 WorkQ
.insert(W
->getOperand(j
).getNode());
1152 for (SDNode
*L
: Nodes
)
1155 return !Nodes
.empty();
1158 void HvxSelector::materialize(const ResultStack
&Results
) {
1159 DEBUG_WITH_TYPE("isel", {
1160 dbgs() << "Materializing\n";
1161 Results
.print(dbgs(), DAG
);
1163 if (Results
.empty())
1165 const SDLoc
&dl(Results
.InpNode
);
1166 std::vector
<SDValue
> Output
;
1168 for (unsigned I
= 0, E
= Results
.size(); I
!= E
; ++I
) {
1169 const NodeTemplate
&Node
= Results
[I
];
1170 std::vector
<SDValue
> Ops
;
1171 for (const OpRef
&R
: Node
.Ops
) {
1172 assert(R
.isValid());
1174 Ops
.push_back(R
.OpV
);
1177 if (R
.OpN
& OpRef::Undef
) {
1178 MVT::SimpleValueType SVT
= MVT::SimpleValueType(R
.OpN
& OpRef::Index
);
1179 Ops
.push_back(ISel
.selectUndef(dl
, MVT(SVT
)));
1182 // R is an index of a result.
1183 unsigned Part
= R
.OpN
& OpRef::Whole
;
1184 int Idx
= SignExtend32(R
.OpN
& OpRef::Index
, OpRef::IndexBits
);
1187 assert(Idx
>= 0 && unsigned(Idx
) < Output
.size());
1188 SDValue Op
= Output
[Idx
];
1189 MVT OpTy
= Op
.getValueType().getSimpleVT();
1190 if (Part
!= OpRef::Whole
) {
1191 assert(Part
== OpRef::LoHalf
|| Part
== OpRef::HiHalf
);
1192 MVT HalfTy
= MVT::getVectorVT(OpTy
.getVectorElementType(),
1193 OpTy
.getVectorNumElements()/2);
1194 unsigned Sub
= (Part
== OpRef::LoHalf
) ? Hexagon::vsub_lo
1196 Op
= DAG
.getTargetExtractSubreg(Sub
, dl
, HalfTy
, Op
);
1199 } // for (Node : Results)
1201 assert(Node
.Ty
!= MVT::Other
);
1202 SDNode
*ResN
= (Node
.Opc
== TargetOpcode::COPY
)
1203 ? Ops
.front().getNode()
1204 : DAG
.getMachineNode(Node
.Opc
, dl
, Node
.Ty
, Ops
);
1205 Output
.push_back(SDValue(ResN
, 0));
1208 SDNode
*OutN
= Output
.back().getNode();
1209 SDNode
*InpN
= Results
.InpNode
;
1210 DEBUG_WITH_TYPE("isel", {
1211 dbgs() << "Generated node:\n";
1215 ISel
.ReplaceNode(InpN
, OutN
);
1216 selectVectorConstants(OutN
);
1217 DAG
.RemoveDeadNodes();
1220 OpRef
HvxSelector::concats(OpRef Lo
, OpRef Hi
, ResultStack
&Results
) {
1221 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1222 const SDLoc
&dl(Results
.InpNode
);
1223 Results
.push(TargetOpcode::REG_SEQUENCE
, getPairVT(MVT::i8
), {
1224 getConst32(Hexagon::HvxWRRegClassID
, dl
),
1225 Lo
, getConst32(Hexagon::vsub_lo
, dl
),
1226 Hi
, getConst32(Hexagon::vsub_hi
, dl
),
1228 return OpRef::res(Results
.top());
1231 OpRef
HvxSelector::funnels(OpRef Va
, OpRef Vb
, int Amount
,
1232 ResultStack
&Results
) {
1233 // Do a funnel shift towards the low end (shift right) by Amount bytes.
1234 // If Amount < 0, treat it as shift left, i.e. do a shift right by
1236 auto VecLen
= static_cast<int>(HwLen
);
1240 if (Amount
== VecLen
)
1243 MVT Ty
= getSingleVT(MVT::i8
);
1244 const SDLoc
&dl(Results
.InpNode
);
1248 if (Amount
> VecLen
) {
1253 if (isUInt
<3>(Amount
)) {
1254 SDValue A
= getConst32(Amount
, dl
);
1255 Results
.push(Hexagon::V6_valignbi
, Ty
, {Vb
, Va
, A
});
1256 } else if (isUInt
<3>(VecLen
- Amount
)) {
1257 SDValue A
= getConst32(VecLen
- Amount
, dl
);
1258 Results
.push(Hexagon::V6_vlalignbi
, Ty
, {Vb
, Va
, A
});
1260 SDValue A
= getConst32(Amount
, dl
);
1261 Results
.push(Hexagon::A2_tfrsi
, Ty
, {A
});
1262 Results
.push(Hexagon::V6_valignb
, Ty
, {Vb
, Va
, OpRef::res(-1)});
1264 return OpRef::res(Results
.top());
1267 // Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1268 // pack these halves into a single vector, and remap SM into NewMask to use
1269 // the new vector instead.
1270 OpRef
HvxSelector::packs(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1271 ResultStack
&Results
, MutableArrayRef
<int> NewMask
,
1273 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1274 if (!Va
.isValid() || !Vb
.isValid())
1275 return OpRef::fail();
1278 std::copy(SM
.Mask
.begin(), SM
.Mask
.end(), NewMask
.begin());
1282 std::copy(SM
.Mask
.begin(), SM
.Mask
.end(), NewMask
.begin());
1283 ShuffleVectorSDNode::commuteMask(NewMask
);
1287 MVT Ty
= getSingleVT(MVT::i8
);
1288 MVT PairTy
= getPairVT(MVT::i8
);
1289 OpRef Inp
[2] = {Va
, Vb
};
1290 unsigned VecLen
= SM
.Mask
.size();
1292 auto valign
= [this](OpRef Lo
, OpRef Hi
, unsigned Amt
, MVT Ty
,
1293 ResultStack
&Results
) {
1296 const SDLoc
&dl(Results
.InpNode
);
1297 if (isUInt
<3>(Amt
) || isUInt
<3>(HwLen
- Amt
)) {
1298 bool IsRight
= isUInt
<3>(Amt
); // Right align.
1299 SDValue S
= getConst32(IsRight
? Amt
: HwLen
- Amt
, dl
);
1300 unsigned Opc
= IsRight
? Hexagon::V6_valignbi
: Hexagon::V6_vlalignbi
;
1301 Results
.push(Opc
, Ty
, {Hi
, Lo
, S
});
1302 return OpRef::res(Results
.top());
1304 Results
.push(Hexagon::A2_tfrsi
, MVT::i32
, {getConst32(Amt
, dl
)});
1305 OpRef A
= OpRef::res(Results
.top());
1306 Results
.push(Hexagon::V6_valignb
, Ty
, {Hi
, Lo
, A
});
1307 return OpRef::res(Results
.top());
1310 // Segment is a vector half.
1311 unsigned SegLen
= HwLen
/ 2;
1313 // Check if we can shuffle vector halves around to get the used elements
1314 // into a single vector.
1315 shuffles::MaskT
MaskH(SM
.Mask
);
1316 SmallVector
<unsigned, 4> SegList
= getInputSegmentList(SM
.Mask
, SegLen
);
1317 unsigned SegCount
= SegList
.size();
1318 SmallVector
<unsigned, 4> SegMap
= getOutputSegmentMap(SM
.Mask
, SegLen
);
1320 if (SegList
.empty())
1321 return OpRef::undef(Ty
);
1324 // In the following part of the function, where the segments are rearranged,
1325 // the shuffle mask SM can be of any length that is a multiple of a vector
1326 // (i.e. a multiple of 2*SegLen), and non-zero.
1327 // The output segment map is computed, and it may have any even number of
1328 // entries, but the rearrangement of input segments will be done based only
1329 // on the first two (non-undef) entries in the segment map.
1330 // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1331 // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1332 // a single vector V = 3:1. The output mask will then be updated to use
1333 // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1335 // Picking the segments based on the output map is an optimization. For
1336 // correctness it is only necessary that Seg0 and Seg1 are the two input
1337 // segments that are used in the output.
1339 unsigned Seg0
= ~0u, Seg1
= ~0u;
1340 for (unsigned X
: SegMap
) {
1345 else if (Seg1
!= ~0u)
1347 if (X
== ~1u || X
!= Seg0
)
1351 if (SegCount
== 1) {
1352 unsigned SrcOp
= SegList
[0] / 2;
1353 for (int I
= 0; I
!= static_cast<int>(VecLen
); ++I
) {
1364 if (SegCount
== 2) {
1365 // Seg0 should not be undef here: this would imply a SegList
1366 // with <= 1 elements, which was checked earlier.
1367 assert(Seg0
!= ~0u);
1369 // If Seg0 or Seg1 are "multi-defined", pick them from the input
1370 // segment list instead.
1371 if (Seg0
== ~1u || Seg1
== ~1u) {
1375 } else if (Seg0
== ~1u) {
1376 Seg0
= SegList
[0] != Seg1
? SegList
[0] : SegList
[1];
1378 assert(Seg1
== ~1u);
1379 Seg1
= SegList
[0] != Seg0
? SegList
[0] : SegList
[1];
1382 assert(Seg0
!= ~1u && Seg1
!= ~1u);
1384 assert(Seg0
!= Seg1
&& "Expecting different segments");
1385 const SDLoc
&dl(Results
.InpNode
);
1386 Results
.push(Hexagon::A2_tfrsi
, MVT::i32
, {getConst32(SegLen
, dl
)});
1387 OpRef HL
= OpRef::res(Results
.top());
1391 if (Seg0
/ 2 == Seg1
/ 2) {
1392 // Same input vector.
1396 Results
.push(Hexagon::V6_vror
, Ty
, {Inp
[Seg0
/ 2], HL
});
1397 Va
= OpRef::res(Results
.top());
1399 packSegmentMask(SM
.Mask
, {Seg0
, Seg1
}, SegLen
, MaskH
);
1400 } else if (Seg0
% 2 == Seg1
% 2) {
1401 // Picking AC, BD, CA, or DB.
1402 // vshuff(CD,AB,HL) -> BD:AC
1403 // vshuff(AB,CD,HL) -> DB:CA
1404 auto Vs
= (Seg0
== 0 || Seg0
== 1) ? std::make_pair(Vb
, Va
) // AC or BD
1405 : std::make_pair(Va
, Vb
); // CA or DB
1406 Results
.push(Hexagon::V6_vshuffvdd
, PairTy
, {Vs
.first
, Vs
.second
, HL
});
1407 OpRef P
= OpRef::res(Results
.top());
1408 Va
= (Seg0
== 0 || Seg0
== 2) ? OpRef::lo(P
) : OpRef::hi(P
);
1409 packSegmentMask(SM
.Mask
, {Seg0
, Seg1
}, SegLen
, MaskH
);
1411 // Picking AD, BC, CB, or DA.
1412 if ((Seg0
== 0 && Seg1
== 3) || (Seg0
== 2 && Seg1
== 1)) {
1413 // AD or BC: this can be done using vmux.
1414 // Q = V6_pred_scalar2 SegLen
1415 // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1416 Results
.push(Hexagon::V6_pred_scalar2
, getBoolVT(), {HL
});
1417 OpRef Qt
= OpRef::res(Results
.top());
1418 auto Vs
= (Seg0
== 0) ? std::make_pair(Va
, Vb
) // AD
1419 : std::make_pair(Vb
, Va
); // CB
1420 Results
.push(Hexagon::V6_vmux
, Ty
, {Qt
, Vs
.first
, Vs
.second
});
1421 Va
= OpRef::res(Results
.top());
1422 packSegmentMask(SM
.Mask
, {Seg0
, Seg1
}, SegLen
, MaskH
);
1424 // BC or DA: this could be done via valign by SegLen.
1425 // Do nothing here, because valign (if possible) will be generated
1426 // later on (make sure the Seg0 values are as expected).
1427 assert(Seg0
== 1 || Seg0
== 3);
1432 // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1433 // FIXME: maybe remove this?
1434 ShuffleMask
SMH(MaskH
);
1435 assert(SMH
.Mask
.size() == VecLen
);
1436 shuffles::MaskT
MaskA(SMH
.Mask
);
1438 if (SMH
.MaxSrc
- SMH
.MinSrc
>= static_cast<int>(HwLen
)) {
1439 // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1440 shuffles::MaskT
Swapped(SMH
.Mask
);
1441 ShuffleVectorSDNode::commuteMask(Swapped
);
1442 ShuffleMask
SW(Swapped
);
1443 if (SW
.MaxSrc
- SW
.MinSrc
< static_cast<int>(HwLen
)) {
1444 MaskA
.assign(SW
.Mask
.begin(), SW
.Mask
.end());
1448 ShuffleMask
SMA(MaskA
);
1449 assert(SMA
.Mask
.size() == VecLen
);
1451 if (SMA
.MaxSrc
- SMA
.MinSrc
< static_cast<int>(HwLen
)) {
1452 int ShiftR
= SMA
.MinSrc
;
1453 if (ShiftR
>= static_cast<int>(HwLen
)) {
1455 Vb
= OpRef::undef(Ty
);
1458 OpRef RetVal
= valign(Va
, Vb
, ShiftR
, Ty
, Results
);
1460 for (int I
= 0; I
!= static_cast<int>(VecLen
); ++I
) {
1461 int M
= SMA
.Mask
[I
];
1469 // By here, packing by segment (half-vector) shuffling, and vector alignment
1470 // failed. Try vmux.
1471 // Note: since this is using the original mask, Va and Vb must not have been
1474 if (Options
& PackMux
) {
1475 // If elements picked from Va and Vb have all different (source) indexes
1476 // (relative to the start of the argument), do a mux, and update the mask.
1477 BitVector
Picked(HwLen
);
1478 SmallVector
<uint8_t,128> MuxBytes(HwLen
);
1480 for (int I
= 0; I
!= static_cast<int>(VecLen
); ++I
) {
1484 if (M
>= static_cast<int>(HwLen
))
1495 return vmuxs(MuxBytes
, Va
, Vb
, Results
);
1497 return OpRef::fail();
1500 // Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1501 // pack these vectors into a pair, and remap SM into NewMask to use the
1502 // new pair instead.
1503 OpRef
HvxSelector::packp(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1504 ResultStack
&Results
, MutableArrayRef
<int> NewMask
) {
1505 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1506 SmallVector
<unsigned, 4> SegList
= getInputSegmentList(SM
.Mask
, HwLen
);
1507 if (SegList
.empty())
1508 return OpRef::undef(getPairVT(MVT::i8
));
1510 // If more than two halves are used, bail.
1511 // TODO: be more aggressive here?
1512 unsigned SegCount
= SegList
.size();
1514 return OpRef::fail();
1516 MVT HalfTy
= getSingleVT(MVT::i8
);
1518 OpRef Inp
[2] = { Va
, Vb
};
1519 OpRef Out
[2] = { OpRef::undef(HalfTy
), OpRef::undef(HalfTy
) };
1521 // Really make sure we have at most 2 vectors used in the mask.
1522 assert(SegCount
<= 2);
1524 for (int I
= 0, E
= SegList
.size(); I
!= E
; ++I
) {
1525 unsigned S
= SegList
[I
];
1526 OpRef Op
= Inp
[S
/ 2];
1527 Out
[I
] = (S
& 1) ? OpRef::hi(Op
) : OpRef::lo(Op
);
1530 // NOTE: Using SegList as the packing map here (not SegMap). This works,
1531 // because we're not concerned here about the order of the segments (i.e.
1532 // single vectors) in the output pair. Changing the order of vectors is
1533 // free (as opposed to changing the order of vector halves as in packs),
1534 // and so there is no extra cost added in case the order needs to be
1536 packSegmentMask(SM
.Mask
, SegList
, HwLen
, NewMask
);
1537 return concats(Out
[0], Out
[1], Results
);
1540 OpRef
HvxSelector::vmuxs(ArrayRef
<uint8_t> Bytes
, OpRef Va
, OpRef Vb
,
1541 ResultStack
&Results
) {
1542 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1543 MVT ByteTy
= getSingleVT(MVT::i8
);
1544 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
1545 const SDLoc
&dl(Results
.InpNode
);
1546 SDValue B
= getVectorConstant(Bytes
, dl
);
1547 Results
.push(Hexagon::V6_vd0
, ByteTy
, {});
1548 Results
.push(Hexagon::V6_veqb
, BoolTy
, {OpRef(B
), OpRef::res(-1)});
1549 Results
.push(Hexagon::V6_vmux
, ByteTy
, {OpRef::res(-1), Vb
, Va
});
1550 return OpRef::res(Results
.top());
1553 OpRef
HvxSelector::vmuxp(ArrayRef
<uint8_t> Bytes
, OpRef Va
, OpRef Vb
,
1554 ResultStack
&Results
) {
1555 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1556 size_t S
= Bytes
.size() / 2;
1557 OpRef L
= vmuxs(Bytes
.take_front(S
), OpRef::lo(Va
), OpRef::lo(Vb
), Results
);
1558 OpRef H
= vmuxs(Bytes
.drop_front(S
), OpRef::hi(Va
), OpRef::hi(Vb
), Results
);
1559 return concats(L
, H
, Results
);
1562 OpRef
HvxSelector::shuffs1(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
1563 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1564 unsigned VecLen
= SM
.Mask
.size();
1565 assert(HwLen
== VecLen
);
1567 assert(all_of(SM
.Mask
, [this](int M
) { return M
== -1 || M
< int(HwLen
); }));
1569 if (isIdentity(SM
.Mask
))
1571 if (isUndef(SM
.Mask
))
1572 return OpRef::undef(getSingleVT(MVT::i8
));
1574 // First, check for rotations.
1575 if (auto Dist
= rotationDistance(SM
, VecLen
)) {
1576 OpRef Rotate
= funnels(Va
, Va
, *Dist
, Results
);
1577 if (Rotate
.isValid())
1580 unsigned HalfLen
= HwLen
/ 2;
1581 assert(isPowerOf2_32(HalfLen
));
1583 // Handle special case where the output is the same half of the input
1584 // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1585 std::pair
<int, unsigned> Strip1
= findStrip(SM
.Mask
, 1, HalfLen
);
1586 if ((Strip1
.first
& ~HalfLen
) == 0 && Strip1
.second
== HalfLen
) {
1587 std::pair
<int, unsigned> Strip2
=
1588 findStrip(SM
.Mask
.drop_front(HalfLen
), 1, HalfLen
);
1589 if (Strip1
== Strip2
) {
1590 const SDLoc
&dl(Results
.InpNode
);
1591 Results
.push(Hexagon::A2_tfrsi
, MVT::i32
, {getConst32(HalfLen
, dl
)});
1592 Results
.push(Hexagon::V6_vshuffvdd
, getPairVT(MVT::i8
),
1593 {Va
, Va
, OpRef::res(Results
.top())});
1594 OpRef S
= OpRef::res(Results
.top());
1595 return (Strip1
.first
== 0) ? OpRef::lo(S
) : OpRef::hi(S
);
1599 OpRef P
= perfect(SM
, Va
, Results
);
1602 return butterfly(SM
, Va
, Results
);
1605 OpRef
HvxSelector::shuffs2(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1606 ResultStack
&Results
) {
1607 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1608 if (isUndef(SM
.Mask
))
1609 return OpRef::undef(getSingleVT(MVT::i8
));
1611 OpRef C
= contracting(SM
, Va
, Vb
, Results
);
1615 int VecLen
= SM
.Mask
.size();
1616 shuffles::MaskT
PackedMask(VecLen
);
1617 OpRef P
= packs(SM
, Va
, Vb
, Results
, PackedMask
);
1619 return shuffs1(ShuffleMask(PackedMask
), P
, Results
);
1621 // TODO: Before we split the mask, try perfect shuffle on concatenated
1624 shuffles::MaskT
MaskL(VecLen
), MaskR(VecLen
);
1625 splitMask(SM
.Mask
, MaskL
, MaskR
);
1627 OpRef L
= shuffs1(ShuffleMask(MaskL
), Va
, Results
);
1628 OpRef R
= shuffs1(ShuffleMask(MaskR
), Vb
, Results
);
1629 if (!L
.isValid() || !R
.isValid())
1630 return OpRef::fail();
1632 SmallVector
<uint8_t, 128> Bytes(VecLen
);
1633 for (int I
= 0; I
!= VecLen
; ++I
) {
1637 return vmuxs(Bytes
, L
, R
, Results
);
1640 OpRef
HvxSelector::shuffp1(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
1641 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1642 int VecLen
= SM
.Mask
.size();
1644 if (isIdentity(SM
.Mask
))
1646 if (isUndef(SM
.Mask
))
1647 return OpRef::undef(getPairVT(MVT::i8
));
1649 shuffles::MaskT
PackedMask(VecLen
);
1650 OpRef P
= packs(SM
, OpRef::lo(Va
), OpRef::hi(Va
), Results
, PackedMask
);
1652 ShuffleMask
PM(PackedMask
);
1653 OpRef E
= expanding(PM
, P
, Results
);
1657 OpRef L
= shuffs1(PM
.lo(), P
, Results
);
1658 OpRef H
= shuffs1(PM
.hi(), P
, Results
);
1659 if (L
.isValid() && H
.isValid())
1660 return concats(L
, H
, Results
);
1663 if (!isLowHalfOnly(SM
.Mask
)) {
1664 // Doing a perfect shuffle on a low-half mask (i.e. where the upper half
1665 // is all-undef) may produce a perfect shuffle that generates legitimate
1666 // upper half. This isn't wrong, but if the perfect shuffle was possible,
1667 // then there is a good chance that a shorter (contracting) code may be
1668 // used as well (e.g. V6_vshuffeb, etc).
1669 OpRef R
= perfect(SM
, Va
, Results
);
1672 // TODO commute the mask and try the opposite order of the halves.
1675 OpRef L
= shuffs2(SM
.lo(), OpRef::lo(Va
), OpRef::hi(Va
), Results
);
1676 OpRef H
= shuffs2(SM
.hi(), OpRef::lo(Va
), OpRef::hi(Va
), Results
);
1677 if (L
.isValid() && H
.isValid())
1678 return concats(L
, H
, Results
);
1680 return OpRef::fail();
1683 OpRef
HvxSelector::shuffp2(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1684 ResultStack
&Results
) {
1685 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1686 if (isUndef(SM
.Mask
))
1687 return OpRef::undef(getPairVT(MVT::i8
));
1689 int VecLen
= SM
.Mask
.size();
1690 SmallVector
<int,256> PackedMask(VecLen
);
1691 OpRef P
= packp(SM
, Va
, Vb
, Results
, PackedMask
);
1693 return shuffp1(ShuffleMask(PackedMask
), P
, Results
);
1695 SmallVector
<int,256> MaskL(VecLen
), MaskR(VecLen
);
1696 splitMask(SM
.Mask
, MaskL
, MaskR
);
1698 OpRef L
= shuffp1(ShuffleMask(MaskL
), Va
, Results
);
1699 OpRef R
= shuffp1(ShuffleMask(MaskR
), Vb
, Results
);
1700 if (!L
.isValid() || !R
.isValid())
1701 return OpRef::fail();
1704 SmallVector
<uint8_t,256> Bytes(VecLen
);
1705 for (int I
= 0; I
!= VecLen
; ++I
) {
1709 return vmuxp(Bytes
, L
, R
, Results
);
1713 struct Deleter
: public SelectionDAG::DAGNodeDeletedListener
{
1714 template <typename T
>
1715 Deleter(SelectionDAG
&D
, T
&C
)
1716 : SelectionDAG::DAGNodeDeletedListener(D
, [&C
] (SDNode
*N
, SDNode
*E
) {
1721 template <typename T
>
1722 struct NullifyingVector
: public T
{
1723 DenseMap
<SDNode
*, SDNode
**> Refs
;
1724 NullifyingVector(T
&&V
) : T(V
) {
1725 for (unsigned i
= 0, e
= T::size(); i
!= e
; ++i
) {
1726 SDNode
*&N
= T::operator[](i
);
1730 void erase(SDNode
*N
) {
1731 auto F
= Refs
.find(N
);
1732 if (F
!= Refs
.end())
1733 *F
->second
= nullptr;
1738 void HvxSelector::select(SDNode
*ISelN
) {
1739 // What's important here is to select the right set of nodes. The main
1740 // selection algorithm loops over nodes in a topological order, i.e. users
1741 // are visited before their operands.
1743 // It is an error to have an unselected node with a selected operand, and
1744 // there is an assertion in the main selector code to enforce that.
1746 // Such a situation could occur if we selected a node, which is both a
1747 // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1749 assert(ISelN
->getOpcode() == HexagonISD::ISEL
);
1750 SDNode
*N0
= ISelN
->getOperand(0).getNode();
1752 // There could have been nodes created (i.e. inserted into the DAG)
1753 // that are now dead. Remove them, in case they use any of the nodes
1754 // to select (and make them look shared).
1755 DAG
.RemoveDeadNodes();
1757 SetVector
<SDNode
*> SubNodes
;
1759 if (!N0
->isMachineOpcode()) {
1760 // Don't want to select N0 if it's shared with another node, except if
1761 // it's shared with other ISELs.
1762 auto IsISelN
= [](SDNode
*T
) { return T
->getOpcode() == HexagonISD::ISEL
; };
1763 if (llvm::all_of(N0
->uses(), IsISelN
))
1764 SubNodes
.insert(N0
);
1766 if (SubNodes
.empty()) {
1767 ISel
.ReplaceNode(ISelN
, N0
);
1771 // Need to manually select the nodes that are dominated by the ISEL. Other
1772 // nodes are reachable from the rest of the DAG, and so will be selected
1773 // by the DAG selection routine.
1774 SetVector
<SDNode
*> Dom
, NonDom
;
1777 auto IsDomRec
= [&Dom
, &NonDom
] (SDNode
*T
, auto Rec
) -> bool {
1780 if (T
->use_empty() || NonDom
.count(T
))
1782 for (SDNode
*U
: T
->uses()) {
1783 // If T is reachable from a known non-dominated node, then T itself
1784 // is non-dominated.
1794 auto IsDom
= [&IsDomRec
] (SDNode
*T
) { return IsDomRec(T
, IsDomRec
); };
1796 // Add the rest of nodes dominated by ISEL to SubNodes.
1797 for (unsigned I
= 0; I
!= SubNodes
.size(); ++I
) {
1798 for (SDValue Op
: SubNodes
[I
]->ops()) {
1799 SDNode
*O
= Op
.getNode();
1805 // Do a topological sort of nodes from Dom.
1806 SetVector
<SDNode
*> TmpQ
;
1808 std::map
<SDNode
*, unsigned> OpCount
;
1809 for (SDNode
*T
: Dom
) {
1810 unsigned NumDomOps
= llvm::count_if(T
->ops(), [&Dom
](const SDUse
&U
) {
1811 return Dom
.count(U
.getNode());
1814 OpCount
.insert({T
, NumDomOps
});
1819 for (unsigned I
= 0; I
!= TmpQ
.size(); ++I
) {
1820 SDNode
*S
= TmpQ
[I
];
1821 for (SDNode
*U
: S
->uses()) {
1824 auto F
= OpCount
.find(U
);
1825 assert(F
!= OpCount
.end());
1826 if (F
->second
> 0 && !--F
->second
)
1827 TmpQ
.insert(F
->first
);
1831 // Remove the marker.
1832 ISel
.ReplaceNode(ISelN
, N0
);
1834 assert(SubNodes
.size() == TmpQ
.size());
1835 NullifyingVector
<decltype(TmpQ
)::vector_type
> Queue(TmpQ
.takeVector());
1837 Deleter
DUQ(DAG
, Queue
);
1838 for (SDNode
*S
: reverse(Queue
)) {
1841 DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S
->dump(&DAG
);});
1846 bool HvxSelector::scalarizeShuffle(ArrayRef
<int> Mask
, const SDLoc
&dl
,
1847 MVT ResTy
, SDValue Va
, SDValue Vb
,
1849 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1850 MVT ElemTy
= ResTy
.getVectorElementType();
1851 assert(ElemTy
== MVT::i8
);
1852 unsigned VecLen
= Mask
.size();
1853 bool HavePairs
= (2*HwLen
== VecLen
);
1854 MVT SingleTy
= getSingleVT(MVT::i8
);
1856 // The prior attempts to handle this shuffle may have left a bunch of
1857 // dead nodes in the DAG (such as constants). These nodes will be added
1858 // at the end of DAG's node list, which at that point had already been
1859 // sorted topologically. In the main selection loop, the node list is
1860 // traversed backwards from the root node, which means that any new
1861 // nodes (from the end of the list) will not be visited.
1862 // Scalarization will replace the shuffle node with the scalarized
1863 // expression, and if that expression reused any if the leftoever (dead)
1864 // nodes, these nodes would not be selected (since the "local" selection
1865 // only visits nodes that are not in AllNodes).
1866 // To avoid this issue, remove all dead nodes from the DAG now.
1867 // DAG.RemoveDeadNodes();
1869 SmallVector
<SDValue
,128> Ops
;
1870 LLVMContext
&Ctx
= *DAG
.getContext();
1871 MVT LegalTy
= Lower
.getTypeToTransformTo(Ctx
, ElemTy
).getSimpleVT();
1872 for (int I
: Mask
) {
1874 Ops
.push_back(ISel
.selectUndef(dl
, LegalTy
));
1887 Vec
= DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, SingleTy
, Vec
);
1889 Vec
= DAG
.getTargetExtractSubreg(Hexagon::vsub_hi
, dl
, SingleTy
, Vec
);
1893 SDValue Idx
= DAG
.getConstant(M
, dl
, MVT::i32
);
1894 SDValue Ex
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, LegalTy
, {Vec
, Idx
});
1895 SDValue L
= Lower
.LowerOperation(Ex
, DAG
);
1896 assert(L
.getNode());
1901 if (2*HwLen
== VecLen
) {
1902 SDValue B0
= DAG
.getBuildVector(SingleTy
, dl
, {Ops
.data(), HwLen
});
1903 SDValue L0
= Lower
.LowerOperation(B0
, DAG
);
1904 SDValue B1
= DAG
.getBuildVector(SingleTy
, dl
, {Ops
.data()+HwLen
, HwLen
});
1905 SDValue L1
= Lower
.LowerOperation(B1
, DAG
);
1906 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1907 // functions may expect to be called only for illegal operations, so
1908 // make sure that they are not called for legal ones. Develop a better
1909 // mechanism for dealing with this.
1910 LV
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, ResTy
, {L0
, L1
});
1912 SDValue BV
= DAG
.getBuildVector(ResTy
, dl
, Ops
);
1913 LV
= Lower
.LowerOperation(BV
, DAG
);
1916 assert(!N
->use_empty());
1917 SDValue IS
= DAG
.getNode(HexagonISD::ISEL
, dl
, ResTy
, LV
);
1918 ISel
.ReplaceNode(N
, IS
.getNode());
1919 select(IS
.getNode());
1920 DAG
.RemoveDeadNodes();
1924 SmallVector
<uint32_t, 8> HvxSelector::getPerfectCompletions(ShuffleMask SM
,
1926 auto possibilities
= [](ArrayRef
<uint8_t> Bs
, unsigned Width
) -> uint32_t {
1927 unsigned Impossible
= ~(1u << Width
) + 1;
1928 for (unsigned I
= 0, E
= Bs
.size(); I
!= E
; ++I
) {
1932 if (~Impossible
== 0)
1934 for (unsigned Log
= 0; Log
!= Width
; ++Log
) {
1935 if (Impossible
& (1u << Log
))
1937 unsigned Expected
= (I
>> Log
) % 2;
1939 Impossible
|= (1u << Log
);
1945 SmallVector
<uint32_t, 8> Worklist(Width
);
1947 for (unsigned BitIdx
= 0; BitIdx
!= Width
; ++BitIdx
) {
1948 SmallVector
<uint8_t> BitValues(SM
.Mask
.size());
1949 for (int i
= 0, e
= SM
.Mask
.size(); i
!= e
; ++i
) {
1952 BitValues
[i
] = 0xff;
1954 BitValues
[i
] = (M
& (1u << BitIdx
)) != 0;
1956 Worklist
[BitIdx
] = possibilities(BitValues
, Width
);
1959 // If there is a word P in Worklist that matches multiple possibilities,
1960 // then if any other word Q matches any of the possibilities matched by P,
1961 // then Q matches all the possibilities matched by P. In fact, P == Q.
1962 // In other words, for each words P, Q, the sets of possibilities matched
1963 // by P and Q are either equal or disjoint (no partial overlap).
1965 // Illustration: For 4-bit values there are 4 complete sequences:
1966 // a: 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
1967 // b: 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
1968 // c: 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1
1969 // d: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
1971 // Words containing unknown bits that match two of the complete
1973 // ab: 0 u u 1 0 u u 1 0 u u 1 0 u u 1
1974 // ac: 0 u 0 u u 1 u 1 0 u 0 u u 1 u 1
1975 // ad: 0 u 0 u 0 u 0 u u 1 u 1 u 1 u 1
1976 // bc: 0 0 u u u u 1 1 0 0 u u u u 1 1
1977 // bd: 0 0 u u 0 0 u u u u 1 1 u u 1 1
1978 // cd: 0 0 0 0 u u u u u u u u 1 1 1 1
1980 // Proof of the claim above:
1981 // Let P be a word that matches s0 and s1. For that to happen, all known
1982 // bits in P must match s0 and s1 exactly.
1983 // Assume there is Q that matches s1. Note that since P and Q came from
1984 // the same shuffle mask, the positions of unknown bits in P and Q match
1985 // exactly, which makes the indices of known bits be exactly the same
1986 // between P and Q. Since P matches s0 and s1, the known bits of P much
1987 // match both s0 and s1. Also, since Q matches s1, the known bits in Q
1988 // are exactly the same as in s1, which means that they are exactly the
1989 // same as in P. This implies that P == Q.
1991 // There can be a situation where there are more entries with the same
1992 // bits set than there are set bits (e.g. value 9 occuring more than 2
1993 // times). In such cases it will be impossible to complete this to a
1995 SmallVector
<uint32_t, 8> Sorted(Worklist
);
1996 llvm::sort(Sorted
.begin(), Sorted
.end());
1998 for (unsigned I
= 0, E
= Sorted
.size(); I
!= E
;) {
1999 unsigned P
= Sorted
[I
], Count
= 1;
2000 while (++I
!= E
&& P
== Sorted
[I
])
2002 if ((unsigned)llvm::popcount(P
) < Count
) {
2003 // Reset all occurences of P, if there are more occurrences of P
2004 // than there are bits in P.
2005 for (unsigned &Q
: Worklist
) {
2015 SmallVector
<uint32_t, 8>
2016 HvxSelector::completeToPerfect(ArrayRef
<uint32_t> Completions
, unsigned Width
) {
2017 // Pick a completion if there are multiple possibilities. For now just
2018 // select any valid completion.
2019 SmallVector
<uint32_t, 8> Comps(Completions
);
2021 for (unsigned I
= 0; I
!= Width
; ++I
) {
2022 uint32_t P
= Comps
[I
];
2024 if (isPowerOf2_32(P
))
2026 // T = least significant bit of P.
2027 uint32_t T
= P
^ ((P
- 1) & P
);
2028 // Clear T in all remaining words matching P.
2029 for (unsigned J
= I
+ 1; J
!= Width
; ++J
) {
2037 // Check that we have generated a valid completion.
2039 for (uint32_t C
: Comps
) {
2040 assert(isPowerOf2_32(C
));
2043 assert(OrAll
== (1u << Width
) -1);
2049 std::optional
<int> HvxSelector::rotationDistance(ShuffleMask SM
,
2051 std::optional
<int> Dist
;
2052 for (int I
= 0, E
= SM
.Mask
.size(); I
!= E
; ++I
) {
2057 if ((I
+ *Dist
) % static_cast<int>(WrapAt
) != M
)
2058 return std::nullopt
;
2060 // Integer a%b operator assumes rounding towards zero by /, so it
2061 // "misbehaves" when a crosses 0 (the remainder also changes sign).
2062 // Add WrapAt in an attempt to keep I+Dist non-negative.
2065 Dist
= *Dist
+ WrapAt
;
2071 OpRef
HvxSelector::contracting(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
2072 ResultStack
&Results
) {
2073 DEBUG_WITH_TYPE("isel", { dbgs() << __func__
<< '\n'; });
2074 if (!Va
.isValid() || !Vb
.isValid())
2075 return OpRef::fail();
2077 // Contracting shuffles, i.e. instructions that always discard some bytes
2078 // from the operand vectors.
2084 // V6_vpack{e,o}{b,h}
2086 int VecLen
= SM
.Mask
.size();
2088 // First, check for funnel shifts.
2089 if (auto Dist
= rotationDistance(SM
, 2 * VecLen
)) {
2090 OpRef Funnel
= funnels(Va
, Vb
, *Dist
, Results
);
2091 if (Funnel
.isValid())
2095 MVT SingleTy
= getSingleVT(MVT::i8
);
2096 MVT PairTy
= getPairVT(MVT::i8
);
2098 auto same
= [](ArrayRef
<int> Mask1
, ArrayRef
<int> Mask2
) -> bool {
2099 return Mask1
== Mask2
;
2102 using PackConfig
= std::pair
<unsigned, bool>;
2103 PackConfig Packs
[] = {
2104 {1, false}, // byte, even
2105 {1, true}, // byte, odd
2106 {2, false}, // half, even
2107 {2, true}, // half, odd
2111 unsigned Opcodes
[] = {
2112 Hexagon::V6_vpackeb
,
2113 Hexagon::V6_vpackob
,
2114 Hexagon::V6_vpackeh
,
2115 Hexagon::V6_vpackoh
,
2117 for (int i
= 0, e
= std::size(Opcodes
); i
!= e
; ++i
) {
2118 auto [Size
, Odd
] = Packs
[i
];
2119 if (same(SM
.Mask
, shuffles::mask(shuffles::vpack
, HwLen
, Size
, Odd
))) {
2120 Results
.push(Opcodes
[i
], SingleTy
, {Vb
, Va
});
2121 return OpRef::res(Results
.top());
2127 unsigned Opcodes
[] = {
2128 Hexagon::V6_vshuffeb
,
2129 Hexagon::V6_vshuffob
,
2130 Hexagon::V6_vshufeh
,
2131 Hexagon::V6_vshufoh
,
2133 for (int i
= 0, e
= std::size(Opcodes
); i
!= e
; ++i
) {
2134 auto [Size
, Odd
] = Packs
[i
];
2135 if (same(SM
.Mask
, shuffles::mask(shuffles::vshuff
, HwLen
, Size
, Odd
))) {
2136 Results
.push(Opcodes
[i
], SingleTy
, {Vb
, Va
});
2137 return OpRef::res(Results
.top());
2143 // There is no "V6_vdealeb", etc, but the supposed behavior of vdealeb
2144 // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such
2145 // variants of "deal" can be done similarly.
2146 unsigned Opcodes
[] = {
2147 Hexagon::V6_vpackeb
,
2148 Hexagon::V6_vpackob
,
2149 Hexagon::V6_vpackeh
,
2150 Hexagon::V6_vpackoh
,
2152 const SDLoc
&dl(Results
.InpNode
);
2154 for (int i
= 0, e
= std::size(Opcodes
); i
!= e
; ++i
) {
2155 auto [Size
, Odd
] = Packs
[i
];
2156 if (same(SM
.Mask
, shuffles::mask(shuffles::vdeal
, HwLen
, Size
, Odd
))) {
2157 Results
.push(Hexagon::A2_tfrsi
, MVT::i32
, {getConst32(-2 * Size
, dl
)});
2158 Results
.push(Hexagon::V6_vdealvdd
, PairTy
, {Vb
, Va
, OpRef::res(-1)});
2159 auto vdeal
= OpRef::res(Results
.top());
2160 Results
.push(Opcodes
[i
], SingleTy
,
2161 {OpRef::hi(vdeal
), OpRef::lo(vdeal
)});
2162 return OpRef::res(Results
.top());
2167 if (same(SM
.Mask
, shuffles::mask(shuffles::vdealb4w
, HwLen
))) {
2168 Results
.push(Hexagon::V6_vdealb4w
, SingleTy
, {Vb
, Va
});
2169 return OpRef::res(Results
.top());
2172 return OpRef::fail();
2175 OpRef
HvxSelector::expanding(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
2176 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
2177 // Expanding shuffles (using all elements and inserting into larger vector):
2179 // V6_vunpacku{b,h} [*]
2181 // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
2183 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
2184 // they are not shuffles.
2186 // The argument is a single vector.
2188 int VecLen
= SM
.Mask
.size();
2189 assert(2*HwLen
== unsigned(VecLen
) && "Expecting vector-pair type");
2191 std::pair
<int,unsigned> Strip
= findStrip(SM
.Mask
, 1, VecLen
);
2193 // The patterns for the unpacks, in terms of the starting offsets of the
2194 // consecutive strips (L = length of the strip, N = VecLen):
2196 // vunpacku: 0, -1, L, -1, 2L, -1 ...
2198 if (Strip
.first
!= 0)
2199 return OpRef::fail();
2201 // The vunpackus only handle byte and half-word.
2202 if (Strip
.second
!= 1 && Strip
.second
!= 2)
2203 return OpRef::fail();
2206 int L
= Strip
.second
;
2208 // First, check the non-ignored strips.
2209 for (int I
= 2*L
; I
< N
; I
+= 2*L
) {
2210 auto S
= findStrip(SM
.Mask
.drop_front(I
), 1, N
-I
);
2211 if (S
.second
!= unsigned(L
))
2212 return OpRef::fail();
2214 return OpRef::fail();
2217 for (int I
= L
; I
< N
; I
+= 2*L
) {
2218 auto S
= findStrip(SM
.Mask
.drop_front(I
), 0, N
-I
);
2219 if (S
.first
!= -1 || S
.second
!= unsigned(L
))
2220 return OpRef::fail();
2223 unsigned Opc
= Strip
.second
== 1 ? Hexagon::V6_vunpackub
2224 : Hexagon::V6_vunpackuh
;
2225 Results
.push(Opc
, getPairVT(MVT::i8
), {Va
});
2226 return OpRef::res(Results
.top());
2229 OpRef
HvxSelector::perfect(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
2230 DEBUG_WITH_TYPE("isel", { dbgs() << __func__
<< '\n'; });
2234 // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
2235 // V6_vshuffvdd (V6_vshuff)
2236 // V6_dealvdd (V6_vdeal)
2238 int VecLen
= SM
.Mask
.size();
2239 assert(isPowerOf2_32(VecLen
) && Log2_32(VecLen
) <= 8);
2240 unsigned LogLen
= Log2_32(VecLen
);
2241 unsigned HwLog
= Log2_32(HwLen
);
2242 // The result length must be the same as the length of a single vector,
2243 // or a vector pair.
2244 assert(LogLen
== HwLog
|| LogLen
== HwLog
+ 1);
2245 bool HavePairs
= LogLen
== HwLog
+ 1;
2247 SmallVector
<unsigned, 8> Perm(LogLen
);
2249 // Check if this could be a perfect shuffle, or a combination of perfect
2252 // Consider this permutation (using hex digits to make the ASCII diagrams
2254 // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
2255 // This is a "deal" operation: divide the input into two halves, and
2256 // create the output by picking elements by alternating between these two
2258 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
2261 // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
2262 // a somwehat different mechanism that could be used to perform shuffle/
2263 // deal operations: a 2x2 transpose.
2264 // Consider the halves of inputs again, they can be interpreted as a 2x8
2265 // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
2266 // together. Now, when considering 2 elements at a time, it will be a 2x4
2267 // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
2270 // With groups of 4, this will become a single 2x2 matrix, and so on.
2272 // The 2x2 transpose instruction works by transposing each of the 2x2
2273 // matrices (or "sub-matrices"), given a specific group size. For example,
2274 // if the group size is 1 (i.e. each element is its own group), there
2275 // will be four transposes of the four 2x2 matrices that form the 2x8.
2276 // For example, with the inputs as above, the result will be:
2279 // Now, this result can be tranposed again, but with the group size of 2:
2282 // If we then transpose that result, but with the group size of 4, we get:
2285 // If we concatenate these two rows, it will be
2286 // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
2287 // which is the same as the "deal" [*] above.
2289 // In general, a "deal" of individual elements is a series of 2x2 transposes,
2290 // with changing group size. HVX has two instructions:
2291 // Vdd = V6_vdealvdd Vu, Vv, Rt
2292 // Vdd = V6_shufvdd Vu, Vv, Rt
2293 // that perform exactly that. The register Rt controls which transposes are
2294 // going to happen: a bit at position n (counting from 0) indicates that a
2295 // transpose with a group size of 2^n will take place. If multiple bits are
2296 // set, multiple transposes will happen: vdealvdd will perform them starting
2297 // with the largest group size, vshuffvdd will do them in the reverse order.
2299 // The main observation is that each 2x2 transpose corresponds to swapping
2300 // columns of bits in the binary representation of the values.
2302 // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
2303 // in a given column. The * denote the columns that will be swapped.
2304 // The transpose with the group size 2^n corresponds to swapping columns
2305 // 3 (the highest log) and log2(n):
2307 // 3 2 1 0 0 2 1 3 0 2 3 1
2309 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2310 // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
2311 // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
2312 // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
2313 // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
2314 // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
2315 // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
2316 // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
2317 // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
2318 // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
2319 // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
2320 // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
2321 // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
2322 // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
2323 // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
2324 // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
2326 // There is one special case that is not a perfect shuffle, but can be
2327 // turned into one easily: when the shuffle operates on a vector pair,
2328 // but the two vectors in the pair are swapped. The code that identifies
2329 // perfect shuffles will reject it, unless the order is reversed.
2330 shuffles::MaskT
MaskStorage(SM
.Mask
);
2331 bool InvertedPair
= false;
2332 if (HavePairs
&& SM
.Mask
[0] >= int(HwLen
)) {
2333 for (int i
= 0, e
= SM
.Mask
.size(); i
!= e
; ++i
) {
2335 MaskStorage
[i
] = M
>= int(HwLen
) ? M
- HwLen
: M
+ HwLen
;
2337 InvertedPair
= true;
2338 SM
= ShuffleMask(MaskStorage
);
2341 auto Comps
= getPerfectCompletions(SM
, LogLen
);
2342 if (llvm::is_contained(Comps
, 0))
2343 return OpRef::fail();
2345 auto Pick
= completeToPerfect(Comps
, LogLen
);
2346 for (unsigned I
= 0; I
!= LogLen
; ++I
)
2347 Perm
[I
] = Log2_32(Pick
[I
]);
2349 // Once we have Perm, represent it as cycles. Denote the maximum log2
2350 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2351 // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2352 // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2353 // order being from left to right. Any (contiguous) segment where the
2354 // values ai, ai+1...aj are either all increasing or all decreasing,
2355 // can be implemented via a single vshuffvdd/vdealvdd respectively.
2357 // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2358 // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2359 // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2360 // can be used to generate a sequence of vshuffvdd/vdealvdd.
2363 // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2364 // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2365 // (4 0 1)(4 0)(4 2 3)(4 2).
2366 // It can then be expanded into swaps as
2367 // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2368 // and broken up into "increasing" segments as
2369 // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2370 // This is equivalent to
2371 // (4 0 1)(4 0 2 3)(4 2),
2372 // which can be implemented as 3 vshufvdd instructions.
2374 using CycleType
= SmallVector
<unsigned, 8>;
2375 std::set
<CycleType
> Cycles
;
2376 std::set
<unsigned> All
;
2378 for (unsigned I
: Perm
)
2381 // If the cycle contains LogLen-1, move it to the front of the cycle.
2382 // Otherwise, return the cycle unchanged.
2383 auto canonicalize
= [LogLen
](const CycleType
&C
) -> CycleType
{
2384 unsigned LogPos
, N
= C
.size();
2385 for (LogPos
= 0; LogPos
!= N
; ++LogPos
)
2386 if (C
[LogPos
] == LogLen
- 1)
2391 CycleType
NewC(C
.begin() + LogPos
, C
.end());
2392 NewC
.append(C
.begin(), C
.begin() + LogPos
);
2396 auto pfs
= [](const std::set
<CycleType
> &Cs
, unsigned Len
) {
2397 // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2398 // for bytes zero is included, for halfwords is not.
2401 const CycleType
&C
= *Cs
.begin();
2402 if (C
[0] != Len
- 1)
2404 int D
= Len
- C
.size();
2405 if (D
!= 0 && D
!= 1)
2408 bool IsDeal
= true, IsShuff
= true;
2409 for (unsigned I
= 1; I
!= Len
- D
; ++I
) {
2410 if (C
[I
] != Len
- 1 - I
)
2412 if (C
[I
] != I
- (1 - D
)) // I-1, I
2415 // At most one, IsDeal or IsShuff, can be non-zero.
2416 assert(!(IsDeal
|| IsShuff
) || IsDeal
!= IsShuff
);
2417 static unsigned Deals
[] = {Hexagon::V6_vdealb
, Hexagon::V6_vdealh
};
2418 static unsigned Shufs
[] = {Hexagon::V6_vshuffb
, Hexagon::V6_vshuffh
};
2419 return IsDeal
? Deals
[D
] : (IsShuff
? Shufs
[D
] : 0);
2422 while (!All
.empty()) {
2423 unsigned A
= *All
.begin();
2427 for (unsigned B
= Perm
[A
]; B
!= A
; B
= Perm
[B
]) {
2433 Cycles
.insert(canonicalize(C
));
2436 MVT SingleTy
= getSingleVT(MVT::i8
);
2437 MVT PairTy
= getPairVT(MVT::i8
);
2439 // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2440 if (unsigned(VecLen
) == HwLen
) {
2441 if (unsigned SingleOpc
= pfs(Cycles
, LogLen
)) {
2442 Results
.push(SingleOpc
, SingleTy
, {Va
});
2443 return OpRef::res(Results
.top());
2447 // From the cycles, construct the sequence of values that will
2448 // then form the control values for vdealvdd/vshuffvdd, i.e.
2449 // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2450 // This essentially strips the M value from the cycles where
2451 // it's present, and performs the insertion of M (then stripping)
2452 // for cycles without M (as described in an earlier comment).
2453 SmallVector
<unsigned, 8> SwapElems
;
2454 // When the input is extended (i.e. single vector becomes a pair),
2455 // this is done by using an "undef" vector as the second input.
2456 // However, then we get
2457 // input 1: GOODBITS
2458 // input 2: ........
2460 // input 1: ....BITS
2461 // input 2: ....GOOD
2462 // Then at the end, this needs to be undone. To accomplish this,
2463 // artificially add "LogLen-1" at both ends of the sequence.
2465 SwapElems
.push_back(LogLen
- 1);
2466 for (const CycleType
&C
: Cycles
) {
2467 // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2468 unsigned First
= (C
[0] == LogLen
- 1) ? 1 : 0;
2469 SwapElems
.append(C
.begin() + First
, C
.end());
2471 SwapElems
.push_back(C
[0]);
2474 SwapElems
.push_back(LogLen
- 1);
2476 const SDLoc
&dl(Results
.InpNode
);
2477 OpRef Arg
= HavePairs
? Va
: concats(Va
, OpRef::undef(SingleTy
), Results
);
2479 Arg
= concats(OpRef::hi(Arg
), OpRef::lo(Arg
), Results
);
2481 for (unsigned I
= 0, E
= SwapElems
.size(); I
!= E
;) {
2482 bool IsInc
= I
== E
- 1 || SwapElems
[I
] < SwapElems
[I
+ 1];
2483 unsigned S
= (1u << SwapElems
[I
]);
2485 while (++I
< E
- 1 && IsInc
== (SwapElems
[I
] < SwapElems
[I
+ 1]))
2486 S
|= 1u << SwapElems
[I
];
2487 // The above loop will not add a bit for the final SwapElems[I+1],
2489 S
|= 1u << SwapElems
[I
];
2494 Results
.push(Hexagon::A2_tfrsi
, MVT::i32
, {getConst32(S
, dl
)});
2495 Res
.Opc
= IsInc
? Hexagon::V6_vshuffvdd
: Hexagon::V6_vdealvdd
;
2497 Res
.Ops
= {OpRef::hi(Arg
), OpRef::lo(Arg
), OpRef::res(-1)};
2499 Arg
= OpRef::res(Results
.top());
2502 return HavePairs
? Arg
: OpRef::lo(Arg
);
2505 OpRef
HvxSelector::butterfly(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
2506 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
2507 // Butterfly shuffles.
2513 // The assumption here is that all elements picked by Mask are in the
2514 // first operand to the vector_shuffle. This assumption is enforced
2517 MVT ResTy
= getSingleVT(MVT::i8
);
2518 PermNetwork::Controls FC
, RC
;
2519 const SDLoc
&dl(Results
.InpNode
);
2520 int VecLen
= SM
.Mask
.size();
2522 for (int M
: SM
.Mask
) {
2523 if (M
!= -1 && M
>= VecLen
)
2524 return OpRef::fail();
2527 // Try the deltas/benes for both single vectors and vector pairs.
2528 ForwardDeltaNetwork
FN(SM
.Mask
);
2530 SDValue Ctl
= getVectorConstant(FC
, dl
);
2531 Results
.push(Hexagon::V6_vdelta
, ResTy
, {Va
, OpRef(Ctl
)});
2532 return OpRef::res(Results
.top());
2535 // Try reverse delta.
2536 ReverseDeltaNetwork
RN(SM
.Mask
);
2538 SDValue Ctl
= getVectorConstant(RC
, dl
);
2539 Results
.push(Hexagon::V6_vrdelta
, ResTy
, {Va
, OpRef(Ctl
)});
2540 return OpRef::res(Results
.top());
2544 BenesNetwork
BN(SM
.Mask
);
2545 if (BN
.run(FC
, RC
)) {
2546 SDValue CtlF
= getVectorConstant(FC
, dl
);
2547 SDValue CtlR
= getVectorConstant(RC
, dl
);
2548 Results
.push(Hexagon::V6_vdelta
, ResTy
, {Va
, OpRef(CtlF
)});
2549 Results
.push(Hexagon::V6_vrdelta
, ResTy
,
2550 {OpRef::res(-1), OpRef(CtlR
)});
2551 return OpRef::res(Results
.top());
2554 return OpRef::fail();
2557 SDValue
HvxSelector::getConst32(int Val
, const SDLoc
&dl
) {
2558 return DAG
.getTargetConstant(Val
, dl
, MVT::i32
);
2561 SDValue
HvxSelector::getVectorConstant(ArrayRef
<uint8_t> Data
,
2563 SmallVector
<SDValue
, 128> Elems
;
2564 for (uint8_t C
: Data
)
2565 Elems
.push_back(DAG
.getConstant(C
, dl
, MVT::i8
));
2566 MVT VecTy
= MVT::getVectorVT(MVT::i8
, Data
.size());
2567 SDValue BV
= DAG
.getBuildVector(VecTy
, dl
, Elems
);
2568 SDValue LV
= Lower
.LowerOperation(BV
, DAG
);
2569 DAG
.RemoveDeadNode(BV
.getNode());
2570 return DAG
.getNode(HexagonISD::ISEL
, dl
, VecTy
, LV
);
2573 void HvxSelector::selectExtractSubvector(SDNode
*N
) {
2574 SDValue Inp
= N
->getOperand(0);
2575 MVT ResTy
= N
->getValueType(0).getSimpleVT();
2576 unsigned Idx
= N
->getConstantOperandVal(1);
2578 [[maybe_unused
]] MVT InpTy
= Inp
.getValueType().getSimpleVT();
2579 [[maybe_unused
]] unsigned ResLen
= ResTy
.getVectorNumElements();
2580 assert(InpTy
.getVectorElementType() == ResTy
.getVectorElementType());
2581 assert(2 * ResLen
== InpTy
.getVectorNumElements());
2582 assert(Idx
== 0 || Idx
== ResLen
);
2584 unsigned SubReg
= Idx
== 0 ? Hexagon::vsub_lo
: Hexagon::vsub_hi
;
2585 SDValue Ext
= DAG
.getTargetExtractSubreg(SubReg
, SDLoc(N
), ResTy
, Inp
);
2587 ISel
.ReplaceNode(N
, Ext
.getNode());
2590 void HvxSelector::selectShuffle(SDNode
*N
) {
2591 DEBUG_WITH_TYPE("isel", {
2592 dbgs() << "Starting " << __func__
<< " on node:\n";
2595 MVT ResTy
= N
->getValueType(0).getSimpleVT();
2596 // Assume that vector shuffles operate on vectors of bytes.
2597 assert(ResTy
.isVector() && ResTy
.getVectorElementType() == MVT::i8
);
2599 auto *SN
= cast
<ShuffleVectorSDNode
>(N
);
2600 std::vector
<int> Mask(SN
->getMask().begin(), SN
->getMask().end());
2601 // This shouldn't really be necessary. Is it?
2602 for (int &Idx
: Mask
)
2603 if (Idx
!= -1 && Idx
< 0)
2606 unsigned VecLen
= Mask
.size();
2607 bool HavePairs
= (2*HwLen
== VecLen
);
2608 assert(ResTy
.getSizeInBits() / 8 == VecLen
);
2610 // Vd = vector_shuffle Va, Vb, Mask
2613 bool UseLeft
= false, UseRight
= false;
2614 for (unsigned I
= 0; I
!= VecLen
; ++I
) {
2617 unsigned Idx
= Mask
[I
];
2618 assert(Idx
< 2*VecLen
);
2625 DEBUG_WITH_TYPE("isel", {
2626 dbgs() << "VecLen=" << VecLen
<< " HwLen=" << HwLen
<< " UseLeft="
2627 << UseLeft
<< " UseRight=" << UseRight
<< " HavePairs="
2628 << HavePairs
<< '\n';
2630 // If the mask is all -1's, generate "undef".
2631 if (!UseLeft
&& !UseRight
) {
2632 ISel
.ReplaceNode(N
, ISel
.selectUndef(SDLoc(SN
), ResTy
).getNode());
2636 SDValue Vec0
= N
->getOperand(0);
2637 SDValue Vec1
= N
->getOperand(1);
2638 assert(Vec0
.getValueType() == ResTy
&& Vec1
.getValueType() == ResTy
);
2640 ResultStack
Results(SN
);
2641 OpRef Va
= OpRef::undef(ResTy
);
2642 OpRef Vb
= OpRef::undef(ResTy
);
2644 if (!Vec0
.isUndef()) {
2645 Results
.push(TargetOpcode::COPY
, ResTy
, {Vec0
});
2646 Va
= OpRef::OpRef::res(Results
.top());
2648 if (!Vec1
.isUndef()) {
2649 Results
.push(TargetOpcode::COPY
, ResTy
, {Vec1
});
2650 Vb
= OpRef::res(Results
.top());
2653 OpRef Res
= !HavePairs
? shuffs2(ShuffleMask(Mask
), Va
, Vb
, Results
)
2654 : shuffp2(ShuffleMask(Mask
), Va
, Vb
, Results
);
2656 bool Done
= Res
.isValid();
2658 // Make sure that Res is on the stack before materializing.
2659 Results
.push(TargetOpcode::COPY
, ResTy
, {Res
});
2660 materialize(Results
);
2662 Done
= scalarizeShuffle(Mask
, SDLoc(N
), ResTy
, Vec0
, Vec1
, N
);
2667 dbgs() << "Unhandled shuffle:\n";
2670 llvm_unreachable("Failed to select vector shuffle");
2674 void HvxSelector::selectRor(SDNode
*N
) {
2675 // If this is a rotation by less than 8, use V6_valignbi.
2676 MVT Ty
= N
->getValueType(0).getSimpleVT();
2678 SDValue VecV
= N
->getOperand(0);
2679 SDValue RotV
= N
->getOperand(1);
2680 SDNode
*NewN
= nullptr;
2682 if (auto *CN
= dyn_cast
<ConstantSDNode
>(RotV
.getNode())) {
2683 unsigned S
= CN
->getZExtValue() % HST
.getVectorLength();
2685 NewN
= VecV
.getNode();
2686 } else if (isUInt
<3>(S
)) {
2687 NewN
= DAG
.getMachineNode(Hexagon::V6_valignbi
, dl
, Ty
,
2688 {VecV
, VecV
, getConst32(S
, dl
)});
2693 NewN
= DAG
.getMachineNode(Hexagon::V6_vror
, dl
, Ty
, {VecV
, RotV
});
2695 ISel
.ReplaceNode(N
, NewN
);
2698 void HvxSelector::selectVAlign(SDNode
*N
) {
2699 SDValue Vv
= N
->getOperand(0);
2700 SDValue Vu
= N
->getOperand(1);
2701 SDValue Rt
= N
->getOperand(2);
2702 SDNode
*NewN
= DAG
.getMachineNode(Hexagon::V6_valignb
, SDLoc(N
),
2703 N
->getValueType(0), {Vv
, Vu
, Rt
});
2704 ISel
.ReplaceNode(N
, NewN
);
2705 DAG
.RemoveDeadNode(N
);
2708 void HexagonDAGToDAGISel::PreprocessHvxISelDAG() {
2709 auto getNodes
= [this]() -> std::vector
<SDNode
*> {
2710 std::vector
<SDNode
*> T
;
2711 T
.reserve(CurDAG
->allnodes_size());
2712 for (SDNode
&N
: CurDAG
->allnodes())
2717 ppHvxShuffleOfShuffle(getNodes());
2720 template <> struct std::hash
<SDValue
> {
2721 std::size_t operator()(SDValue V
) const {
2722 return std::hash
<const void *>()(V
.getNode()) +
2723 std::hash
<unsigned>()(V
.getResNo());
2727 void HexagonDAGToDAGISel::ppHvxShuffleOfShuffle(std::vector
<SDNode
*> &&Nodes
) {
2729 // t10: v64i32 = ...
2730 // t46: v128i8 = vector_shuffle<...> t44, t45
2731 // t48: v128i8 = vector_shuffle<...> t44, t45
2732 // t42: v128i8 = vector_shuffle<...> t46, t48
2733 // t12: v32i32 = extract_subvector t10, Constant:i32<0>
2734 // t44: v128i8 = bitcast t12
2735 // t15: v32i32 = extract_subvector t10, Constant:i32<32>
2736 // t45: v128i8 = bitcast t15
2737 SelectionDAG
&DAG
= *CurDAG
;
2738 unsigned HwLen
= HST
->getVectorLength();
2740 struct SubVectorInfo
{
2741 SubVectorInfo(SDValue S
, unsigned H
) : Src(S
), HalfIdx(H
) {}
2746 using MapType
= std::unordered_map
<SDValue
, unsigned>;
2748 auto getMaskElt
= [&](unsigned Idx
, ShuffleVectorSDNode
*Shuff0
,
2749 ShuffleVectorSDNode
*Shuff1
,
2750 const MapType
&OpMap
) -> int {
2751 // Treat Shuff0 and Shuff1 as operands to another vector shuffle, and
2752 // Idx as a (non-undef) element of the top level shuffle's mask, that
2753 // is, index into concat(Shuff0, Shuff1).
2754 // Assuming that Shuff0 and Shuff1 both operate on subvectors of the
2755 // same source vector (as described by OpMap), return the index of
2756 // that source vector corresponding to Idx.
2757 ShuffleVectorSDNode
*OpShuff
= Idx
< HwLen
? Shuff0
: Shuff1
;
2761 // Get the mask index that M points at in the corresponding operand.
2762 int MaybeN
= OpShuff
->getMaskElt(Idx
);
2766 auto N
= static_cast<unsigned>(MaybeN
);
2767 unsigned SrcBase
= N
< HwLen
? OpMap
.at(OpShuff
->getOperand(0))
2768 : OpMap
.at(OpShuff
->getOperand(1));
2775 auto fold3
= [&](SDValue TopShuff
, SDValue Inp
, MapType
&&OpMap
) -> SDValue
{
2776 // Fold all 3 shuffles into a single one.
2777 auto *This
= cast
<ShuffleVectorSDNode
>(TopShuff
);
2778 auto *S0
= cast
<ShuffleVectorSDNode
>(TopShuff
.getOperand(0));
2779 auto *S1
= cast
<ShuffleVectorSDNode
>(TopShuff
.getOperand(1));
2780 ArrayRef
<int> TopMask
= This
->getMask();
2781 // This should be guaranteed by type checks in the caller, and the fact
2782 // that all shuffles should have been promoted to operate on MVT::i8.
2783 assert(TopMask
.size() == S0
->getMask().size() &&
2784 TopMask
.size() == S1
->getMask().size());
2785 assert(TopMask
.size() == HwLen
);
2787 SmallVector
<int, 256> FoldedMask(2 * HwLen
);
2788 for (unsigned I
= 0; I
!= HwLen
; ++I
) {
2789 int MaybeM
= TopMask
[I
];
2792 getMaskElt(static_cast<unsigned>(MaybeM
), S0
, S1
, OpMap
);
2797 // The second half of the result will be all-undef.
2798 std::fill(FoldedMask
.begin() + HwLen
, FoldedMask
.end(), -1);
2801 // FoldedShuffle = (Shuffle Inp, undef, FoldedMask)
2802 // (LoHalf FoldedShuffle)
2803 const SDLoc
&dl(TopShuff
);
2804 MVT SingleTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
2805 MVT PairTy
= MVT::getVectorVT(MVT::i8
, 2 * HwLen
);
2806 SDValue FoldedShuff
=
2807 DAG
.getVectorShuffle(PairTy
, dl
, DAG
.getBitcast(PairTy
, Inp
),
2808 DAG
.getUNDEF(PairTy
), FoldedMask
);
2809 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, SingleTy
, FoldedShuff
,
2810 DAG
.getConstant(0, dl
, MVT::i32
));
2813 auto getSourceInfo
= [](SDValue V
) -> std::optional
<SubVectorInfo
> {
2814 while (V
.getOpcode() == ISD::BITCAST
)
2815 V
= V
.getOperand(0);
2816 if (V
.getOpcode() != ISD::EXTRACT_SUBVECTOR
)
2817 return std::nullopt
;
2818 return SubVectorInfo(V
.getOperand(0),
2819 !cast
<ConstantSDNode
>(V
.getOperand(1))->isZero());
2822 for (SDNode
*N
: Nodes
) {
2823 if (N
->getOpcode() != ISD::VECTOR_SHUFFLE
)
2825 EVT ResTy
= N
->getValueType(0);
2826 if (ResTy
.getVectorElementType() != MVT::i8
)
2828 if (ResTy
.getVectorNumElements() != HwLen
)
2831 SDValue V0
= N
->getOperand(0);
2832 SDValue V1
= N
->getOperand(1);
2833 if (V0
.getOpcode() != ISD::VECTOR_SHUFFLE
)
2835 if (V1
.getOpcode() != ISD::VECTOR_SHUFFLE
)
2837 if (V0
.getValueType() != ResTy
|| V1
.getValueType() != ResTy
)
2840 // Check if all operands of the two operand shuffles are extract_subvectors
2841 // from the same vector pair.
2842 auto V0A
= getSourceInfo(V0
.getOperand(0));
2843 if (!V0A
.has_value())
2845 auto V0B
= getSourceInfo(V0
.getOperand(1));
2846 if (!V0B
.has_value() || V0B
->Src
!= V0A
->Src
)
2848 auto V1A
= getSourceInfo(V1
.getOperand(0));
2849 if (!V1A
.has_value() || V1A
->Src
!= V0A
->Src
)
2851 auto V1B
= getSourceInfo(V1
.getOperand(1));
2852 if (!V1B
.has_value() || V1B
->Src
!= V0A
->Src
)
2855 // The source must be a pair. This should be guaranteed here,
2856 // but check just in case.
2857 assert(V0A
->Src
.getValueType().getSizeInBits() == 16 * HwLen
);
2860 {V0
.getOperand(0), V0A
->HalfIdx
* HwLen
},
2861 {V0
.getOperand(1), V0B
->HalfIdx
* HwLen
},
2862 {V1
.getOperand(0), V1A
->HalfIdx
* HwLen
},
2863 {V1
.getOperand(1), V1B
->HalfIdx
* HwLen
},
2865 SDValue NewS
= fold3(SDValue(N
, 0), V0A
->Src
, std::move(OpMap
));
2866 ReplaceNode(N
, NewS
.getNode());
2870 void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode
*N
) {
2871 HvxSelector(*this, *CurDAG
).selectExtractSubvector(N
);
2874 void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode
*N
) {
2875 HvxSelector(*this, *CurDAG
).selectShuffle(N
);
2878 void HexagonDAGToDAGISel::SelectHvxRor(SDNode
*N
) {
2879 HvxSelector(*this, *CurDAG
).selectRor(N
);
2882 void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode
*N
) {
2883 HvxSelector(*this, *CurDAG
).selectVAlign(N
);
2886 void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode
*N
) {
2888 SDValue Chain
= N
->getOperand(0);
2889 SDValue Address
= N
->getOperand(2);
2890 SDValue Predicate
= N
->getOperand(3);
2891 SDValue Base
= N
->getOperand(4);
2892 SDValue Modifier
= N
->getOperand(5);
2893 SDValue Offset
= N
->getOperand(6);
2894 SDValue ImmOperand
= CurDAG
->getTargetConstant(0, dl
, MVT::i32
);
2897 unsigned IntNo
= N
->getConstantOperandVal(1);
2900 llvm_unreachable("Unexpected HVX gather intrinsic.");
2901 case Intrinsic::hexagon_V6_vgathermhq
:
2902 case Intrinsic::hexagon_V6_vgathermhq_128B
:
2903 Opcode
= Hexagon::V6_vgathermhq_pseudo
;
2905 case Intrinsic::hexagon_V6_vgathermwq
:
2906 case Intrinsic::hexagon_V6_vgathermwq_128B
:
2907 Opcode
= Hexagon::V6_vgathermwq_pseudo
;
2909 case Intrinsic::hexagon_V6_vgathermhwq
:
2910 case Intrinsic::hexagon_V6_vgathermhwq_128B
:
2911 Opcode
= Hexagon::V6_vgathermhwq_pseudo
;
2915 SDVTList VTs
= CurDAG
->getVTList(MVT::Other
);
2916 SDValue Ops
[] = { Address
, ImmOperand
,
2917 Predicate
, Base
, Modifier
, Offset
, Chain
};
2918 SDNode
*Result
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
2920 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2921 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
2923 ReplaceNode(N
, Result
);
2926 void HexagonDAGToDAGISel::SelectV65Gather(SDNode
*N
) {
2928 SDValue Chain
= N
->getOperand(0);
2929 SDValue Address
= N
->getOperand(2);
2930 SDValue Base
= N
->getOperand(3);
2931 SDValue Modifier
= N
->getOperand(4);
2932 SDValue Offset
= N
->getOperand(5);
2933 SDValue ImmOperand
= CurDAG
->getTargetConstant(0, dl
, MVT::i32
);
2936 unsigned IntNo
= N
->getConstantOperandVal(1);
2939 llvm_unreachable("Unexpected HVX gather intrinsic.");
2940 case Intrinsic::hexagon_V6_vgathermh
:
2941 case Intrinsic::hexagon_V6_vgathermh_128B
:
2942 Opcode
= Hexagon::V6_vgathermh_pseudo
;
2944 case Intrinsic::hexagon_V6_vgathermw
:
2945 case Intrinsic::hexagon_V6_vgathermw_128B
:
2946 Opcode
= Hexagon::V6_vgathermw_pseudo
;
2948 case Intrinsic::hexagon_V6_vgathermhw
:
2949 case Intrinsic::hexagon_V6_vgathermhw_128B
:
2950 Opcode
= Hexagon::V6_vgathermhw_pseudo
;
2954 SDVTList VTs
= CurDAG
->getVTList(MVT::Other
);
2955 SDValue Ops
[] = { Address
, ImmOperand
, Base
, Modifier
, Offset
, Chain
};
2956 SDNode
*Result
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
2958 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2959 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
2961 ReplaceNode(N
, Result
);
2964 void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode
*N
) {
2965 unsigned IID
= N
->getConstantOperandVal(0);
2968 case Intrinsic::hexagon_V6_vaddcarry
: {
2969 std::array
<SDValue
, 3> Ops
= {
2970 {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3)}};
2971 SDVTList VTs
= CurDAG
->getVTList(MVT::v16i32
, MVT::v64i1
);
2972 Result
= CurDAG
->getMachineNode(Hexagon::V6_vaddcarry
, SDLoc(N
), VTs
, Ops
);
2975 case Intrinsic::hexagon_V6_vaddcarry_128B
: {
2976 std::array
<SDValue
, 3> Ops
= {
2977 {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3)}};
2978 SDVTList VTs
= CurDAG
->getVTList(MVT::v32i32
, MVT::v128i1
);
2979 Result
= CurDAG
->getMachineNode(Hexagon::V6_vaddcarry
, SDLoc(N
), VTs
, Ops
);
2982 case Intrinsic::hexagon_V6_vsubcarry
: {
2983 std::array
<SDValue
, 3> Ops
= {
2984 {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3)}};
2985 SDVTList VTs
= CurDAG
->getVTList(MVT::v16i32
, MVT::v64i1
);
2986 Result
= CurDAG
->getMachineNode(Hexagon::V6_vsubcarry
, SDLoc(N
), VTs
, Ops
);
2989 case Intrinsic::hexagon_V6_vsubcarry_128B
: {
2990 std::array
<SDValue
, 3> Ops
= {
2991 {N
->getOperand(1), N
->getOperand(2), N
->getOperand(3)}};
2992 SDVTList VTs
= CurDAG
->getVTList(MVT::v32i32
, MVT::v128i1
);
2993 Result
= CurDAG
->getMachineNode(Hexagon::V6_vsubcarry
, SDLoc(N
), VTs
, Ops
);
2997 llvm_unreachable("Unexpected HVX dual output intrinsic.");
2999 ReplaceUses(N
, Result
);
3000 ReplaceUses(SDValue(N
, 0), SDValue(Result
, 0));
3001 ReplaceUses(SDValue(N
, 1), SDValue(Result
, 1));
3002 CurDAG
->RemoveDeadNode(N
);