1 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "HexagonISelDAGToDAG.h"
11 #include "HexagonISelLowering.h"
12 #include "HexagonTargetMachine.h"
13 #include "llvm/ADT/SetVector.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/SelectionDAGISel.h"
16 #include "llvm/IR/Intrinsics.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
26 #define DEBUG_TYPE "hexagon-isel"
32 // --------------------------------------------------------------------
33 // Implementation of permutation networks.
35 // Implementation of the node routing through butterfly networks:
41 // Forward delta network consists of log(N) steps, where N is the number
42 // of inputs. In each step, an input can stay in place, or it can get
43 // routed to another position[1]. The step after that consists of two
44 // networks, each half in size in terms of the number of nodes. In those
45 // terms, in the given step, an input can go to either the upper or the
46 // lower network in the next step.
48 // [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
49 // positions as long as there is no conflict.
51 // Here's a delta network for 8 inputs, only the switching routes are
55 // |- 1 ---------------|- 2 -----|- 3 -|
57 // Inp[0] *** *** *** *** Out[0]
61 // Inp[1] *** \ / *** X *** *** Out[1]
65 // Inp[2] *** \ \ / / *** X *** *** Out[2]
69 // Inp[3] *** \ / \ / \ / *** *** *** Out[3]
75 // Inp[4] *** / \ / \ / \ *** *** *** Out[4]
79 // Inp[5] *** / / \ \ *** X *** *** Out[5]
83 // Inp[6] *** / \ *** X *** *** Out[6]
87 // Inp[7] *** *** *** *** Out[7]
90 // Reverse delta network is same as delta network, with the steps in
91 // the opposite order.
94 // Benes network is a forward delta network immediately followed by
95 // a reverse delta network.
97 enum class ColorKind
{ None
, Red
, Black
};
99 // Graph coloring utility used to partition nodes into two groups:
100 // they will correspond to nodes routed to the upper and lower networks.
103 using MapType
= std::map
<Node
, ColorKind
>;
104 static constexpr Node Ignore
= Node(-1);
106 Coloring(ArrayRef
<Node
> Ord
) : Order(Ord
) {
112 const MapType
&colors() const {
116 ColorKind
other(ColorKind Color
) {
117 if (Color
== ColorKind::None
)
118 return ColorKind::Red
;
119 return Color
== ColorKind::Red
? ColorKind::Black
: ColorKind::Red
;
122 LLVM_DUMP_METHOD
void dump() const;
125 ArrayRef
<Node
> Order
;
127 std::set
<Node
> Needed
;
129 using NodeSet
= std::set
<Node
>;
130 std::map
<Node
,NodeSet
> Edges
;
132 Node
conj(Node Pos
) {
133 Node Num
= Order
.size();
134 return (Pos
< Num
/2) ? Pos
+ Num
/2 : Pos
- Num
/2;
137 ColorKind
getColor(Node N
) {
138 auto F
= Colors
.find(N
);
139 return F
!= Colors
.end() ? F
->second
: ColorKind::None
;
142 std::pair
<bool, ColorKind
> getUniqueColor(const NodeSet
&Nodes
);
149 std::pair
<bool, ColorKind
> Coloring::getUniqueColor(const NodeSet
&Nodes
) {
150 auto Color
= ColorKind::None
;
151 for (Node N
: Nodes
) {
152 ColorKind ColorN
= getColor(N
);
153 if (ColorN
== ColorKind::None
)
155 if (Color
== ColorKind::None
)
157 else if (Color
!= ColorKind::None
&& Color
!= ColorN
)
158 return { false, ColorKind::None
};
160 return { true, Color
};
163 void Coloring::build() {
164 // Add Order[P] and Order[conj(P)] to Edges.
165 for (unsigned P
= 0; P
!= Order
.size(); ++P
) {
169 Node PC
= Order
[conj(P
)];
170 if (PC
!= Ignore
&& PC
!= I
)
174 // Add I and conj(I) to Edges.
175 for (unsigned I
= 0; I
!= Order
.size(); ++I
) {
176 if (!Needed
.count(I
))
179 // This will create an entry in the edge table, even if I is not
180 // connected to any other node. This is necessary, because it still
181 // needs to be colored.
182 NodeSet
&Is
= Edges
[I
];
188 bool Coloring::color() {
189 SetVector
<Node
> FirstQ
;
190 auto Enqueue
= [this,&FirstQ
] (Node N
) {
193 for (unsigned I
= 0; I
!= Q
.size(); ++I
) {
194 NodeSet
&Ns
= Edges
[Q
[I
]];
195 Q
.insert(Ns
.begin(), Ns
.end());
197 FirstQ
.insert(Q
.begin(), Q
.end());
199 for (Node N
: Needed
)
202 for (Node N
: FirstQ
) {
205 NodeSet
&Ns
= Edges
[N
];
206 auto P
= getUniqueColor(Ns
);
209 Colors
[N
] = other(P
.second
);
212 // First, color nodes that don't have any dups.
213 for (auto E
: Edges
) {
215 if (!Needed
.count(conj(N
)) || Colors
.count(N
))
217 auto P
= getUniqueColor(E
.second
);
220 Colors
[N
] = other(P
.second
);
223 // Now, nodes that are still uncolored. Since the graph can be modified
224 // in this step, create a work queue.
225 std::vector
<Node
> WorkQ
;
226 for (auto E
: Edges
) {
228 if (!Colors
.count(N
))
232 for (unsigned I
= 0; I
< WorkQ
.size(); ++I
) {
234 NodeSet
&Ns
= Edges
[N
];
235 auto P
= getUniqueColor(Ns
);
237 Colors
[N
] = other(P
.second
);
241 // Coloring failed. Split this node.
243 ColorKind ColorN
= other(ColorKind::None
);
244 ColorKind ColorC
= other(ColorN
);
245 NodeSet
&Cs
= Edges
[C
];
247 for (Node M
: CopyNs
) {
248 ColorKind ColorM
= getColor(M
);
249 if (ColorM
== ColorC
) {
250 // Connect M with C, disconnect M from N.
261 // Explicitly assign "None" to all uncolored nodes.
262 for (unsigned I
= 0; I
!= Order
.size(); ++I
)
263 if (Colors
.count(I
) == 0)
264 Colors
[I
] = ColorKind::None
;
269 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
270 void Coloring::dump() const {
271 dbgs() << "{ Order: {";
272 for (unsigned I
= 0; I
!= Order
.size(); ++I
) {
280 dbgs() << " Needed: {";
281 for (Node N
: Needed
)
285 dbgs() << " Edges: {\n";
286 for (auto E
: Edges
) {
287 dbgs() << " " << E
.first
<< " -> {";
288 for (auto N
: E
.second
)
294 auto ColorKindToName
= [](ColorKind C
) {
296 case ColorKind::None
:
300 case ColorKind::Black
:
303 llvm_unreachable("all ColorKinds should be handled by the switch above");
306 dbgs() << " Colors: {\n";
307 for (auto C
: Colors
)
308 dbgs() << " " << C
.first
<< " -> " << ColorKindToName(C
.second
) << "\n";
314 // Base class of for reordering networks. They don't strictly need to be
315 // permutations, as outputs with repeated occurrences of an input element
318 using Controls
= std::vector
<uint8_t>;
319 using ElemType
= int;
320 static constexpr ElemType Ignore
= ElemType(-1);
332 PermNetwork(ArrayRef
<ElemType
> Ord
, unsigned Mult
= 1) {
333 Order
.assign(Ord
.data(), Ord
.data()+Ord
.size());
336 unsigned S
= Order
.size();
340 Table
.resize(Order
.size());
341 for (RowType
&Row
: Table
)
342 Row
.resize(Mult
*Log
, None
);
345 void getControls(Controls
&V
, unsigned StartAt
, uint8_t Dir
) const {
346 unsigned Size
= Order
.size();
348 for (unsigned I
= 0; I
!= Size
; ++I
) {
350 for (unsigned L
= 0; L
!= Log
; ++L
) {
351 unsigned C
= ctl(I
, StartAt
+L
) == Switch
;
357 assert(isUInt
<8>(W
));
362 uint8_t ctl(ElemType Pos
, unsigned Step
) const {
363 return Table
[Pos
][Step
];
365 unsigned size() const {
368 unsigned steps() const {
374 std::vector
<ElemType
> Order
;
375 using RowType
= std::vector
<uint8_t>;
376 std::vector
<RowType
> Table
;
379 struct ForwardDeltaNetwork
: public PermNetwork
{
380 ForwardDeltaNetwork(ArrayRef
<ElemType
> Ord
) : PermNetwork(Ord
) {}
382 bool run(Controls
&V
) {
383 if (!route(Order
.data(), Table
.data(), size(), 0))
385 getControls(V
, 0, Forward
);
390 bool route(ElemType
*P
, RowType
*T
, unsigned Size
, unsigned Step
);
393 struct ReverseDeltaNetwork
: public PermNetwork
{
394 ReverseDeltaNetwork(ArrayRef
<ElemType
> Ord
) : PermNetwork(Ord
) {}
396 bool run(Controls
&V
) {
397 if (!route(Order
.data(), Table
.data(), size(), 0))
399 getControls(V
, 0, Reverse
);
404 bool route(ElemType
*P
, RowType
*T
, unsigned Size
, unsigned Step
);
407 struct BenesNetwork
: public PermNetwork
{
408 BenesNetwork(ArrayRef
<ElemType
> Ord
) : PermNetwork(Ord
, 2) {}
410 bool run(Controls
&F
, Controls
&R
) {
411 if (!route(Order
.data(), Table
.data(), size(), 0))
414 getControls(F
, 0, Forward
);
415 getControls(R
, Log
, Reverse
);
420 bool route(ElemType
*P
, RowType
*T
, unsigned Size
, unsigned Step
);
424 bool ForwardDeltaNetwork::route(ElemType
*P
, RowType
*T
, unsigned Size
,
426 bool UseUp
= false, UseDown
= false;
429 // Cannot use coloring here, because coloring is used to determine
430 // the "big" switch, i.e. the one that changes halves, and in a forward
431 // network, a color can be simultaneously routed to both halves in the
432 // step we're working on.
433 for (ElemType J
= 0; J
!= Num
; ++J
) {
435 // I is the position in the input,
436 // J is the position in the output.
441 S
= (J
< Num
/2) ? Pass
: Switch
;
443 S
= (J
< Num
/2) ? Switch
: Pass
;
445 // U is the element in the table that needs to be updated.
446 ElemType U
= (S
== Pass
) ? I
: (I
< Num
/2 ? I
+Num
/2 : I
-Num
/2);
451 if (T
[U
][Step
] != S
&& T
[U
][Step
] != None
)
456 for (ElemType J
= 0; J
!= Num
; ++J
)
457 if (P
[J
] != Ignore
&& P
[J
] >= Num
/2)
461 if (UseUp
&& !route(P
, T
, Size
/2, Step
+1))
463 if (UseDown
&& !route(P
+Size
/2, T
+Size
/2, Size
/2, Step
+1))
469 bool ReverseDeltaNetwork::route(ElemType
*P
, RowType
*T
, unsigned Size
,
471 unsigned Pets
= Log
-1 - Step
;
472 bool UseUp
= false, UseDown
= false;
475 // In this step half-switching occurs, so coloring can be used.
476 Coloring
G({P
,Size
});
477 const Coloring::MapType
&M
= G
.colors();
481 ColorKind ColorUp
= ColorKind::None
;
482 for (ElemType J
= 0; J
!= Num
; ++J
) {
484 // I is the position in the input,
485 // J is the position in the output.
488 ColorKind C
= M
.at(I
);
489 if (C
== ColorKind::None
)
491 // During "Step", inputs cannot switch halves, so if the "up" color
492 // is still unknown, make sure that it is selected in such a way that
493 // "I" will stay in the same half.
494 bool InpUp
= I
< Num
/2;
495 if (ColorUp
== ColorKind::None
)
496 ColorUp
= InpUp
? C
: G
.other(C
);
497 if ((C
== ColorUp
) != InpUp
) {
498 // If I should go to a different half than where is it now, give up.
504 S
= (J
< Num
/2) ? Pass
: Switch
;
507 S
= (J
< Num
/2) ? Switch
: Pass
;
513 // Reorder the working permutation according to the computed switch table
514 // for the last step (i.e. Pets).
515 for (ElemType J
= 0, E
= Size
/ 2; J
!= E
; ++J
) {
516 ElemType PJ
= P
[J
]; // Current values of P[J]
517 ElemType PC
= P
[J
+Size
/2]; // and P[conj(J)]
518 ElemType QJ
= PJ
; // New values of P[J]
519 ElemType QC
= PC
; // and P[conj(J)]
520 if (T
[J
][Pets
] == Switch
)
522 if (T
[J
+Size
/2][Pets
] == Switch
)
528 for (ElemType J
= 0; J
!= Num
; ++J
)
529 if (P
[J
] != Ignore
&& P
[J
] >= Num
/2)
533 if (UseUp
&& !route(P
, T
, Size
/2, Step
+1))
535 if (UseDown
&& !route(P
+Size
/2, T
+Size
/2, Size
/2, Step
+1))
541 bool BenesNetwork::route(ElemType
*P
, RowType
*T
, unsigned Size
,
543 Coloring
G({P
,Size
});
544 const Coloring::MapType
&M
= G
.colors();
549 unsigned Pets
= 2*Log
-1 - Step
;
550 bool UseUp
= false, UseDown
= false;
552 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
553 // result in different controls. Let's pick the one where the first
554 // control will be "Pass".
555 ColorKind ColorUp
= ColorKind::None
;
556 for (ElemType J
= 0; J
!= Num
; ++J
) {
560 ColorKind C
= M
.at(I
);
561 if (C
== ColorKind::None
)
563 if (ColorUp
== ColorKind::None
) {
564 ColorUp
= (I
< Num
/ 2) ? ColorKind::Red
: ColorKind::Black
;
566 unsigned CI
= (I
< Num
/2) ? I
+Num
/2 : I
-Num
/2;
571 T
[CI
][Step
] = Switch
;
572 T
[J
][Pets
] = (J
< Num
/2) ? Pass
: Switch
;
576 T
[CI
][Step
] = Switch
;
579 T
[J
][Pets
] = (J
< Num
/2) ? Switch
: Pass
;
584 // Reorder the working permutation according to the computed switch table
585 // for the last step (i.e. Pets).
586 for (ElemType J
= 0; J
!= Num
/2; ++J
) {
587 ElemType PJ
= P
[J
]; // Current values of P[J]
588 ElemType PC
= P
[J
+Num
/2]; // and P[conj(J)]
589 ElemType QJ
= PJ
; // New values of P[J]
590 ElemType QC
= PC
; // and P[conj(J)]
591 if (T
[J
][Pets
] == Switch
)
593 if (T
[J
+Num
/2][Pets
] == Switch
)
599 for (ElemType J
= 0; J
!= Num
; ++J
)
600 if (P
[J
] != Ignore
&& P
[J
] >= Num
/2)
604 if (UseUp
&& !route(P
, T
, Size
/2, Step
+1))
606 if (UseDown
&& !route(P
+Size
/2, T
+Size
/2, Size
/2, Step
+1))
612 // --------------------------------------------------------------------
613 // Support for building selection results (output instructions that are
614 // parts of the final selection).
618 OpRef(SDValue V
) : OpV(V
) {}
619 bool isValue() const { return OpV
.getNode() != nullptr; }
620 bool isValid() const { return isValue() || !(OpN
& Invalid
); }
621 static OpRef
res(int N
) { return OpRef(Whole
| (N
& Index
)); }
622 static OpRef
fail() { return OpRef(Invalid
); }
624 static OpRef
lo(const OpRef
&R
) {
625 assert(!R
.isValue());
626 return OpRef(R
.OpN
& (Undef
| Index
| LoHalf
));
628 static OpRef
hi(const OpRef
&R
) {
629 assert(!R
.isValue());
630 return OpRef(R
.OpN
& (Undef
| Index
| HiHalf
));
632 static OpRef
undef(MVT Ty
) { return OpRef(Undef
| Ty
.SimpleTy
); }
635 SDValue OpV
= SDValue();
637 // Reference to the operand of the input node:
638 // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
640 // If bit 30 is set, it's the high half of the operand.
641 // If bit 29 is set, it's the low half of the operand.
645 Invalid
= 0x10000000,
648 Whole
= LoHalf
| HiHalf
,
650 Index
= 0x0FFFFFFF, // Mask of the index value.
655 void print(raw_ostream
&OS
, const SelectionDAG
&G
) const;
658 OpRef(unsigned N
) : OpN(N
) {}
661 struct NodeTemplate
{
662 NodeTemplate() = default;
665 std::vector
<OpRef
> Ops
;
667 LLVM_DUMP_METHOD
void print(raw_ostream
&OS
, const SelectionDAG
&G
) const;
671 ResultStack(SDNode
*Inp
)
672 : InpNode(Inp
), InpTy(Inp
->getValueType(0).getSimpleVT()) {}
675 unsigned push(const NodeTemplate
&Res
) {
677 return List
.size()-1;
679 unsigned push(unsigned Opc
, MVT Ty
, std::vector
<OpRef
> &&Ops
) {
686 bool empty() const { return List
.empty(); }
687 unsigned size() const { return List
.size(); }
688 unsigned top() const { return size()-1; }
689 const NodeTemplate
&operator[](unsigned I
) const { return List
[I
]; }
690 unsigned reset(unsigned NewTop
) {
691 List
.resize(NewTop
+1);
695 using BaseType
= std::vector
<NodeTemplate
>;
696 BaseType::iterator
begin() { return List
.begin(); }
697 BaseType::iterator
end() { return List
.end(); }
698 BaseType::const_iterator
begin() const { return List
.begin(); }
699 BaseType::const_iterator
end() const { return List
.end(); }
704 void print(raw_ostream
&OS
, const SelectionDAG
&G
) const;
708 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
709 void OpRef::print(raw_ostream
&OS
, const SelectionDAG
&G
) const {
711 OpV
.getNode()->print(OS
, &G
);
722 if ((OpN
& Whole
) != Whole
) {
723 assert((OpN
& Whole
) == LoHalf
|| (OpN
& Whole
) == HiHalf
);
729 OS
<< '#' << SignExtend32(OpN
& Index
, IndexBits
);
732 void NodeTemplate::print(raw_ostream
&OS
, const SelectionDAG
&G
) const {
733 const TargetInstrInfo
&TII
= *G
.getSubtarget().getInstrInfo();
734 OS
<< format("%8s", EVT(Ty
).getEVTString().c_str()) << " "
737 for (const auto &R
: Ops
) {
746 void ResultStack::print(raw_ostream
&OS
, const SelectionDAG
&G
) const {
747 OS
<< "Input node:\n";
751 OS
<< "Result templates:\n";
752 for (unsigned I
= 0, E
= List
.size(); I
!= E
; ++I
) {
753 OS
<< '[' << I
<< "] ";
754 List
[I
].print(OS
, G
);
762 ShuffleMask(ArrayRef
<int> M
) : Mask(M
) {
763 for (unsigned I
= 0, E
= Mask
.size(); I
!= E
; ++I
) {
767 MinSrc
= (MinSrc
== -1) ? M
: std::min(MinSrc
, M
);
768 MaxSrc
= (MaxSrc
== -1) ? M
: std::max(MaxSrc
, M
);
773 int MinSrc
= -1, MaxSrc
= -1;
775 ShuffleMask
lo() const {
776 size_t H
= Mask
.size()/2;
777 return ShuffleMask(Mask
.take_front(H
));
779 ShuffleMask
hi() const {
780 size_t H
= Mask
.size()/2;
781 return ShuffleMask(Mask
.take_back(H
));
784 void print(raw_ostream
&OS
) const {
785 OS
<< "MinSrc:" << MinSrc
<< ", MaxSrc:" << MaxSrc
<< " {";
793 // --------------------------------------------------------------------
794 // The HvxSelector class.
796 static const HexagonTargetLowering
&getHexagonLowering(SelectionDAG
&G
) {
797 return static_cast<const HexagonTargetLowering
&>(G
.getTargetLoweringInfo());
799 static const HexagonSubtarget
&getHexagonSubtarget(SelectionDAG
&G
) {
800 return static_cast<const HexagonSubtarget
&>(G
.getSubtarget());
805 const HexagonTargetLowering
&Lower
;
806 HexagonDAGToDAGISel
&ISel
;
808 const HexagonSubtarget
&HST
;
809 const unsigned HwLen
;
811 HvxSelector(HexagonDAGToDAGISel
&HS
, SelectionDAG
&G
)
812 : Lower(getHexagonLowering(G
)), ISel(HS
), DAG(G
),
813 HST(getHexagonSubtarget(G
)), HwLen(HST
.getVectorLength()) {}
815 MVT
getSingleVT(MVT ElemTy
) const {
816 unsigned NumElems
= HwLen
/ (ElemTy
.getSizeInBits()/8);
817 return MVT::getVectorVT(ElemTy
, NumElems
);
820 MVT
getPairVT(MVT ElemTy
) const {
821 unsigned NumElems
= (2*HwLen
) / (ElemTy
.getSizeInBits()/8);
822 return MVT::getVectorVT(ElemTy
, NumElems
);
825 void selectShuffle(SDNode
*N
);
826 void selectRor(SDNode
*N
);
827 void selectVAlign(SDNode
*N
);
830 void materialize(const ResultStack
&Results
);
832 SDValue
getVectorConstant(ArrayRef
<uint8_t> Data
, const SDLoc
&dl
);
838 OpRef
concat(OpRef Va
, OpRef Vb
, ResultStack
&Results
);
839 OpRef
packs(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
,
840 MutableArrayRef
<int> NewMask
, unsigned Options
= None
);
841 OpRef
packp(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
,
842 MutableArrayRef
<int> NewMask
);
843 OpRef
vmuxs(ArrayRef
<uint8_t> Bytes
, OpRef Va
, OpRef Vb
,
844 ResultStack
&Results
);
845 OpRef
vmuxp(ArrayRef
<uint8_t> Bytes
, OpRef Va
, OpRef Vb
,
846 ResultStack
&Results
);
848 OpRef
shuffs1(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
849 OpRef
shuffs2(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
);
850 OpRef
shuffp1(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
851 OpRef
shuffp2(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
);
853 OpRef
butterfly(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
854 OpRef
contracting(ShuffleMask SM
, OpRef Va
, OpRef Vb
, ResultStack
&Results
);
855 OpRef
expanding(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
856 OpRef
perfect(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
);
858 bool selectVectorConstants(SDNode
*N
);
859 bool scalarizeShuffle(ArrayRef
<int> Mask
, const SDLoc
&dl
, MVT ResTy
,
860 SDValue Va
, SDValue Vb
, SDNode
*N
);
865 static void splitMask(ArrayRef
<int> Mask
, MutableArrayRef
<int> MaskL
,
866 MutableArrayRef
<int> MaskR
) {
867 unsigned VecLen
= Mask
.size();
868 assert(MaskL
.size() == VecLen
&& MaskR
.size() == VecLen
);
869 for (unsigned I
= 0; I
!= VecLen
; ++I
) {
872 MaskL
[I
] = MaskR
[I
] = -1;
873 } else if (unsigned(M
) < VecLen
) {
883 static std::pair
<int,unsigned> findStrip(ArrayRef
<int> A
, int Inc
,
885 assert(A
.size() > 0 && A
.size() >= MaxLen
);
888 for (unsigned I
= 1; I
!= MaxLen
; ++I
) {
893 return { F
, MaxLen
};
896 static bool isUndef(ArrayRef
<int> Mask
) {
903 static bool isIdentity(ArrayRef
<int> Mask
) {
904 for (int I
= 0, E
= Mask
.size(); I
!= E
; ++I
) {
906 if (M
>= 0 && M
!= I
)
912 static bool isPermutation(ArrayRef
<int> Mask
) {
913 // Check by adding all numbers only works if there is no overflow.
914 assert(Mask
.size() < 0x00007FFF && "Sanity failure");
916 for (int Idx
: Mask
) {
922 return 2*Sum
== N
*(N
-1);
925 bool HvxSelector::selectVectorConstants(SDNode
*N
) {
926 // Constant vectors are generated as loads from constant pools or as
927 // splats of a constant value. Since they are generated during the
928 // selection process, the main selection algorithm is not aware of them.
929 // Select them directly here.
930 SmallVector
<SDNode
*,4> Nodes
;
931 SetVector
<SDNode
*> WorkQ
;
933 // The one-use test for VSPLATW's operand may fail due to dead nodes
934 // left over in the DAG.
935 DAG
.RemoveDeadNodes();
937 // The DAG can change (due to CSE) during selection, so cache all the
938 // unselected nodes first to avoid traversing a mutating DAG.
940 auto IsNodeToSelect
= [] (SDNode
*N
) {
941 if (N
->isMachineOpcode())
943 switch (N
->getOpcode()) {
944 case HexagonISD::VZERO
:
945 case HexagonISD::VSPLATW
:
948 SDValue Addr
= cast
<LoadSDNode
>(N
)->getBasePtr();
949 unsigned AddrOpc
= Addr
.getOpcode();
950 if (AddrOpc
== HexagonISD::AT_PCREL
|| AddrOpc
== HexagonISD::CP
)
951 if (Addr
.getOperand(0).getOpcode() == ISD::TargetConstantPool
)
956 // Make sure to select the operand of VSPLATW.
957 bool IsSplatOp
= N
->hasOneUse() &&
958 N
->use_begin()->getOpcode() == HexagonISD::VSPLATW
;
963 for (unsigned i
= 0; i
!= WorkQ
.size(); ++i
) {
964 SDNode
*W
= WorkQ
[i
];
965 if (IsNodeToSelect(W
))
967 for (unsigned j
= 0, f
= W
->getNumOperands(); j
!= f
; ++j
)
968 WorkQ
.insert(W
->getOperand(j
).getNode());
971 for (SDNode
*L
: Nodes
)
974 return !Nodes
.empty();
977 void HvxSelector::materialize(const ResultStack
&Results
) {
978 DEBUG_WITH_TYPE("isel", {
979 dbgs() << "Materializing\n";
980 Results
.print(dbgs(), DAG
);
984 const SDLoc
&dl(Results
.InpNode
);
985 std::vector
<SDValue
> Output
;
987 for (unsigned I
= 0, E
= Results
.size(); I
!= E
; ++I
) {
988 const NodeTemplate
&Node
= Results
[I
];
989 std::vector
<SDValue
> Ops
;
990 for (const OpRef
&R
: Node
.Ops
) {
993 Ops
.push_back(R
.OpV
);
996 if (R
.OpN
& OpRef::Undef
) {
997 MVT::SimpleValueType SVT
= MVT::SimpleValueType(R
.OpN
& OpRef::Index
);
998 Ops
.push_back(ISel
.selectUndef(dl
, MVT(SVT
)));
1001 // R is an index of a result.
1002 unsigned Part
= R
.OpN
& OpRef::Whole
;
1003 int Idx
= SignExtend32(R
.OpN
& OpRef::Index
, OpRef::IndexBits
);
1006 assert(Idx
>= 0 && unsigned(Idx
) < Output
.size());
1007 SDValue Op
= Output
[Idx
];
1008 MVT OpTy
= Op
.getValueType().getSimpleVT();
1009 if (Part
!= OpRef::Whole
) {
1010 assert(Part
== OpRef::LoHalf
|| Part
== OpRef::HiHalf
);
1011 MVT HalfTy
= MVT::getVectorVT(OpTy
.getVectorElementType(),
1012 OpTy
.getVectorNumElements()/2);
1013 unsigned Sub
= (Part
== OpRef::LoHalf
) ? Hexagon::vsub_lo
1015 Op
= DAG
.getTargetExtractSubreg(Sub
, dl
, HalfTy
, Op
);
1018 } // for (Node : Results)
1020 assert(Node
.Ty
!= MVT::Other
);
1021 SDNode
*ResN
= (Node
.Opc
== TargetOpcode::COPY
)
1022 ? Ops
.front().getNode()
1023 : DAG
.getMachineNode(Node
.Opc
, dl
, Node
.Ty
, Ops
);
1024 Output
.push_back(SDValue(ResN
, 0));
1027 SDNode
*OutN
= Output
.back().getNode();
1028 SDNode
*InpN
= Results
.InpNode
;
1029 DEBUG_WITH_TYPE("isel", {
1030 dbgs() << "Generated node:\n";
1034 ISel
.ReplaceNode(InpN
, OutN
);
1035 selectVectorConstants(OutN
);
1036 DAG
.RemoveDeadNodes();
1039 OpRef
HvxSelector::concat(OpRef Lo
, OpRef Hi
, ResultStack
&Results
) {
1040 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1041 const SDLoc
&dl(Results
.InpNode
);
1042 Results
.push(TargetOpcode::REG_SEQUENCE
, getPairVT(MVT::i8
), {
1043 DAG
.getTargetConstant(Hexagon::HvxWRRegClassID
, dl
, MVT::i32
),
1044 Lo
, DAG
.getTargetConstant(Hexagon::vsub_lo
, dl
, MVT::i32
),
1045 Hi
, DAG
.getTargetConstant(Hexagon::vsub_hi
, dl
, MVT::i32
),
1047 return OpRef::res(Results
.top());
1050 // Va, Vb are single vectors, SM can be arbitrarily long.
1051 OpRef
HvxSelector::packs(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1052 ResultStack
&Results
, MutableArrayRef
<int> NewMask
,
1054 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1055 if (!Va
.isValid() || !Vb
.isValid())
1056 return OpRef::fail();
1058 int VecLen
= SM
.Mask
.size();
1059 MVT Ty
= getSingleVT(MVT::i8
);
1061 auto IsExtSubvector
= [] (ShuffleMask M
) {
1062 assert(M
.MinSrc
>= 0 && M
.MaxSrc
>= 0);
1063 for (int I
= 0, E
= M
.Mask
.size(); I
!= E
; ++I
) {
1064 if (M
.Mask
[I
] >= 0 && M
.Mask
[I
]-I
!= M
.MinSrc
)
1070 if (SM
.MaxSrc
- SM
.MinSrc
< int(HwLen
)) {
1071 if (SM
.MinSrc
== 0 || SM
.MinSrc
== int(HwLen
) || !IsExtSubvector(SM
)) {
1072 // If the mask picks elements from only one of the operands, return
1073 // that operand, and update the mask to use index 0 to refer to the
1074 // first element of that operand.
1075 // If the mask extracts a subvector, it will be handled below, so
1077 if (SM
.MaxSrc
< int(HwLen
)) {
1078 memcpy(NewMask
.data(), SM
.Mask
.data(), sizeof(int)*VecLen
);
1081 if (SM
.MinSrc
>= int(HwLen
)) {
1082 for (int I
= 0; I
!= VecLen
; ++I
) {
1091 int MinSrc
= SM
.MinSrc
;
1092 if (SM
.MaxSrc
< int(HwLen
)) {
1094 } else if (SM
.MinSrc
> int(HwLen
)) {
1096 MinSrc
= SM
.MinSrc
- HwLen
;
1098 const SDLoc
&dl(Results
.InpNode
);
1099 if (isUInt
<3>(MinSrc
) || isUInt
<3>(HwLen
-MinSrc
)) {
1100 bool IsRight
= isUInt
<3>(MinSrc
); // Right align.
1101 SDValue S
= DAG
.getTargetConstant(IsRight
? MinSrc
: HwLen
-MinSrc
,
1103 unsigned Opc
= IsRight
? Hexagon::V6_valignbi
1104 : Hexagon::V6_vlalignbi
;
1105 Results
.push(Opc
, Ty
, {Vb
, Va
, S
});
1107 SDValue S
= DAG
.getTargetConstant(MinSrc
, dl
, MVT::i32
);
1108 Results
.push(Hexagon::A2_tfrsi
, MVT::i32
, {S
});
1109 unsigned Top
= Results
.top();
1110 Results
.push(Hexagon::V6_valignb
, Ty
, {Vb
, Va
, OpRef::res(Top
)});
1112 for (int I
= 0; I
!= VecLen
; ++I
) {
1118 return OpRef::res(Results
.top());
1121 if (Options
& PackMux
) {
1122 // If elements picked from Va and Vb have all different (source) indexes
1123 // (relative to the start of the argument), do a mux, and update the mask.
1124 BitVector
Picked(HwLen
);
1125 SmallVector
<uint8_t,128> MuxBytes(HwLen
);
1127 for (int I
= 0; I
!= VecLen
; ++I
) {
1131 if (M
>= int(HwLen
))
1142 return vmuxs(MuxBytes
, Va
, Vb
, Results
);
1145 return OpRef::fail();
1148 OpRef
HvxSelector::packp(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1149 ResultStack
&Results
, MutableArrayRef
<int> NewMask
) {
1150 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1151 unsigned HalfMask
= 0;
1152 unsigned LogHw
= Log2_32(HwLen
);
1153 for (int M
: SM
.Mask
) {
1156 HalfMask
|= (1u << (M
>> LogHw
));
1160 return OpRef::undef(getPairVT(MVT::i8
));
1162 // If more than two halves are used, bail.
1163 // TODO: be more aggressive here?
1164 if (countPopulation(HalfMask
) > 2)
1165 return OpRef::fail();
1167 MVT HalfTy
= getSingleVT(MVT::i8
);
1169 OpRef Inp
[2] = { Va
, Vb
};
1170 OpRef Out
[2] = { OpRef::undef(HalfTy
), OpRef::undef(HalfTy
) };
1172 uint8_t HalfIdx
[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
1174 for (unsigned I
= 0; I
!= 4; ++I
) {
1175 if ((HalfMask
& (1u << I
)) == 0)
1178 OpRef Op
= Inp
[I
/2];
1179 Out
[Idx
] = (I
& 1) ? OpRef::hi(Op
) : OpRef::lo(Op
);
1183 int VecLen
= SM
.Mask
.size();
1184 for (int I
= 0; I
!= VecLen
; ++I
) {
1187 uint8_t Idx
= HalfIdx
[M
>> LogHw
];
1188 assert(Idx
== 0 || Idx
== 1);
1189 M
= (M
& (HwLen
-1)) + HwLen
*Idx
;
1194 return concat(Out
[0], Out
[1], Results
);
1197 OpRef
HvxSelector::vmuxs(ArrayRef
<uint8_t> Bytes
, OpRef Va
, OpRef Vb
,
1198 ResultStack
&Results
) {
1199 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1200 MVT ByteTy
= getSingleVT(MVT::i8
);
1201 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, 8*HwLen
); // XXX
1202 const SDLoc
&dl(Results
.InpNode
);
1203 SDValue B
= getVectorConstant(Bytes
, dl
);
1204 Results
.push(Hexagon::V6_vd0
, ByteTy
, {});
1205 Results
.push(Hexagon::V6_veqb
, BoolTy
, {OpRef(B
), OpRef::res(-1)});
1206 Results
.push(Hexagon::V6_vmux
, ByteTy
, {OpRef::res(-1), Vb
, Va
});
1207 return OpRef::res(Results
.top());
1210 OpRef
HvxSelector::vmuxp(ArrayRef
<uint8_t> Bytes
, OpRef Va
, OpRef Vb
,
1211 ResultStack
&Results
) {
1212 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1213 size_t S
= Bytes
.size() / 2;
1214 OpRef L
= vmuxs(Bytes
.take_front(S
), OpRef::lo(Va
), OpRef::lo(Vb
), Results
);
1215 OpRef H
= vmuxs(Bytes
.drop_front(S
), OpRef::hi(Va
), OpRef::hi(Vb
), Results
);
1216 return concat(L
, H
, Results
);
1219 OpRef
HvxSelector::shuffs1(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
1220 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1221 unsigned VecLen
= SM
.Mask
.size();
1222 assert(HwLen
== VecLen
);
1224 assert(all_of(SM
.Mask
, [this](int M
) { return M
== -1 || M
< int(HwLen
); }));
1226 if (isIdentity(SM
.Mask
))
1228 if (isUndef(SM
.Mask
))
1229 return OpRef::undef(getSingleVT(MVT::i8
));
1231 OpRef P
= perfect(SM
, Va
, Results
);
1234 return butterfly(SM
, Va
, Results
);
1237 OpRef
HvxSelector::shuffs2(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1238 ResultStack
&Results
) {
1239 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1240 if (isUndef(SM
.Mask
))
1241 return OpRef::undef(getSingleVT(MVT::i8
));
1243 OpRef C
= contracting(SM
, Va
, Vb
, Results
);
1247 int VecLen
= SM
.Mask
.size();
1248 SmallVector
<int,128> NewMask(VecLen
);
1249 OpRef P
= packs(SM
, Va
, Vb
, Results
, NewMask
);
1251 return shuffs1(ShuffleMask(NewMask
), P
, Results
);
1253 SmallVector
<int,128> MaskL(VecLen
), MaskR(VecLen
);
1254 splitMask(SM
.Mask
, MaskL
, MaskR
);
1256 OpRef L
= shuffs1(ShuffleMask(MaskL
), Va
, Results
);
1257 OpRef R
= shuffs1(ShuffleMask(MaskR
), Vb
, Results
);
1258 if (!L
.isValid() || !R
.isValid())
1259 return OpRef::fail();
1261 SmallVector
<uint8_t,128> Bytes(VecLen
);
1262 for (int I
= 0; I
!= VecLen
; ++I
) {
1266 return vmuxs(Bytes
, L
, R
, Results
);
1269 OpRef
HvxSelector::shuffp1(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
1270 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1271 int VecLen
= SM
.Mask
.size();
1273 if (isIdentity(SM
.Mask
))
1275 if (isUndef(SM
.Mask
))
1276 return OpRef::undef(getPairVT(MVT::i8
));
1278 SmallVector
<int,128> PackedMask(VecLen
);
1279 OpRef P
= packs(SM
, OpRef::lo(Va
), OpRef::hi(Va
), Results
, PackedMask
);
1281 ShuffleMask
PM(PackedMask
);
1282 OpRef E
= expanding(PM
, P
, Results
);
1286 OpRef L
= shuffs1(PM
.lo(), P
, Results
);
1287 OpRef H
= shuffs1(PM
.hi(), P
, Results
);
1288 if (L
.isValid() && H
.isValid())
1289 return concat(L
, H
, Results
);
1292 OpRef R
= perfect(SM
, Va
, Results
);
1295 // TODO commute the mask and try the opposite order of the halves.
1297 OpRef L
= shuffs2(SM
.lo(), OpRef::lo(Va
), OpRef::hi(Va
), Results
);
1298 OpRef H
= shuffs2(SM
.hi(), OpRef::lo(Va
), OpRef::hi(Va
), Results
);
1299 if (L
.isValid() && H
.isValid())
1300 return concat(L
, H
, Results
);
1302 return OpRef::fail();
1305 OpRef
HvxSelector::shuffp2(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1306 ResultStack
&Results
) {
1307 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1308 if (isUndef(SM
.Mask
))
1309 return OpRef::undef(getPairVT(MVT::i8
));
1311 int VecLen
= SM
.Mask
.size();
1312 SmallVector
<int,256> PackedMask(VecLen
);
1313 OpRef P
= packp(SM
, Va
, Vb
, Results
, PackedMask
);
1315 return shuffp1(ShuffleMask(PackedMask
), P
, Results
);
1317 SmallVector
<int,256> MaskL(VecLen
), MaskR(VecLen
);
1318 splitMask(SM
.Mask
, MaskL
, MaskR
);
1320 OpRef L
= shuffp1(ShuffleMask(MaskL
), Va
, Results
);
1321 OpRef R
= shuffp1(ShuffleMask(MaskR
), Vb
, Results
);
1322 if (!L
.isValid() || !R
.isValid())
1323 return OpRef::fail();
1326 SmallVector
<uint8_t,256> Bytes(VecLen
);
1327 for (int I
= 0; I
!= VecLen
; ++I
) {
1331 return vmuxp(Bytes
, L
, R
, Results
);
1335 struct Deleter
: public SelectionDAG::DAGNodeDeletedListener
{
1336 template <typename T
>
1337 Deleter(SelectionDAG
&D
, T
&C
)
1338 : SelectionDAG::DAGNodeDeletedListener(D
, [&C
] (SDNode
*N
, SDNode
*E
) {
1343 template <typename T
>
1344 struct NullifyingVector
: public T
{
1345 DenseMap
<SDNode
*, SDNode
**> Refs
;
1346 NullifyingVector(T
&&V
) : T(V
) {
1347 for (unsigned i
= 0, e
= T::size(); i
!= e
; ++i
) {
1348 SDNode
*&N
= T::operator[](i
);
1352 void erase(SDNode
*N
) {
1353 auto F
= Refs
.find(N
);
1354 if (F
!= Refs
.end())
1355 *F
->second
= nullptr;
1360 bool HvxSelector::scalarizeShuffle(ArrayRef
<int> Mask
, const SDLoc
&dl
,
1361 MVT ResTy
, SDValue Va
, SDValue Vb
,
1363 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1364 MVT ElemTy
= ResTy
.getVectorElementType();
1365 assert(ElemTy
== MVT::i8
);
1366 unsigned VecLen
= Mask
.size();
1367 bool HavePairs
= (2*HwLen
== VecLen
);
1368 MVT SingleTy
= getSingleVT(MVT::i8
);
1370 // The prior attempts to handle this shuffle may have left a bunch of
1371 // dead nodes in the DAG (such as constants). These nodes will be added
1372 // at the end of DAG's node list, which at that point had already been
1373 // sorted topologically. In the main selection loop, the node list is
1374 // traversed backwards from the root node, which means that any new
1375 // nodes (from the end of the list) will not be visited.
1376 // Scalarization will replace the shuffle node with the scalarized
1377 // expression, and if that expression reused any if the leftoever (dead)
1378 // nodes, these nodes would not be selected (since the "local" selection
1379 // only visits nodes that are not in AllNodes).
1380 // To avoid this issue, remove all dead nodes from the DAG now.
1381 DAG
.RemoveDeadNodes();
1382 DenseSet
<SDNode
*> AllNodes
;
1383 for (SDNode
&S
: DAG
.allnodes())
1384 AllNodes
.insert(&S
);
1386 Deleter
DUA(DAG
, AllNodes
);
1388 SmallVector
<SDValue
,128> Ops
;
1389 LLVMContext
&Ctx
= *DAG
.getContext();
1390 MVT LegalTy
= Lower
.getTypeToTransformTo(Ctx
, ElemTy
).getSimpleVT();
1391 for (int I
: Mask
) {
1393 Ops
.push_back(ISel
.selectUndef(dl
, LegalTy
));
1406 Vec
= DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, SingleTy
, Vec
);
1408 Vec
= DAG
.getTargetExtractSubreg(Hexagon::vsub_hi
, dl
, SingleTy
, Vec
);
1412 SDValue Idx
= DAG
.getConstant(M
, dl
, MVT::i32
);
1413 SDValue Ex
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, LegalTy
, {Vec
, Idx
});
1414 SDValue L
= Lower
.LowerOperation(Ex
, DAG
);
1415 assert(L
.getNode());
1420 if (2*HwLen
== VecLen
) {
1421 SDValue B0
= DAG
.getBuildVector(SingleTy
, dl
, {Ops
.data(), HwLen
});
1422 SDValue L0
= Lower
.LowerOperation(B0
, DAG
);
1423 SDValue B1
= DAG
.getBuildVector(SingleTy
, dl
, {Ops
.data()+HwLen
, HwLen
});
1424 SDValue L1
= Lower
.LowerOperation(B1
, DAG
);
1425 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1426 // functions may expect to be called only for illegal operations, so
1427 // make sure that they are not called for legal ones. Develop a better
1428 // mechanism for dealing with this.
1429 LV
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, ResTy
, {L0
, L1
});
1431 SDValue BV
= DAG
.getBuildVector(ResTy
, dl
, Ops
);
1432 LV
= Lower
.LowerOperation(BV
, DAG
);
1435 assert(!N
->use_empty());
1436 ISel
.ReplaceNode(N
, LV
.getNode());
1438 if (AllNodes
.count(LV
.getNode())) {
1439 DAG
.RemoveDeadNodes();
1443 // The lowered build-vector node will now need to be selected. It needs
1444 // to be done here because this node and its submodes are not included
1445 // in the main selection loop.
1446 // Implement essentially the same topological ordering algorithm as is
1447 // used in SelectionDAGISel.
1449 SetVector
<SDNode
*> SubNodes
, TmpQ
;
1450 std::map
<SDNode
*,unsigned> NumOps
;
1452 SubNodes
.insert(LV
.getNode());
1453 for (unsigned I
= 0; I
!= SubNodes
.size(); ++I
) {
1455 SDNode
*S
= SubNodes
[I
];
1456 for (SDValue Op
: S
->ops()) {
1457 if (AllNodes
.count(Op
.getNode()))
1459 SubNodes
.insert(Op
.getNode());
1462 NumOps
.insert({S
, OpN
});
1467 for (unsigned I
= 0; I
!= TmpQ
.size(); ++I
) {
1468 SDNode
*S
= TmpQ
[I
];
1469 for (SDNode
*U
: S
->uses()) {
1470 if (!SubNodes
.count(U
))
1472 auto F
= NumOps
.find(U
);
1473 assert(F
!= NumOps
.end());
1474 assert(F
->second
> 0);
1476 TmpQ
.insert(F
->first
);
1479 assert(SubNodes
.size() == TmpQ
.size());
1480 NullifyingVector
<decltype(TmpQ
)::vector_type
> Queue(TmpQ
.takeVector());
1482 Deleter
DUQ(DAG
, Queue
);
1483 for (SDNode
*S
: reverse(Queue
))
1487 DAG
.RemoveDeadNodes();
1491 OpRef
HvxSelector::contracting(ShuffleMask SM
, OpRef Va
, OpRef Vb
,
1492 ResultStack
&Results
) {
1493 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1494 if (!Va
.isValid() || !Vb
.isValid())
1495 return OpRef::fail();
1497 // Contracting shuffles, i.e. instructions that always discard some bytes
1498 // from the operand vectors.
1502 // V6_vpack{e,o}{b,h}
1504 int VecLen
= SM
.Mask
.size();
1505 std::pair
<int,unsigned> Strip
= findStrip(SM
.Mask
, 1, VecLen
);
1506 MVT ResTy
= getSingleVT(MVT::i8
);
1508 // The following shuffles only work for bytes and halfwords. This requires
1509 // the strip length to be 1 or 2.
1510 if (Strip
.second
!= 1 && Strip
.second
!= 2)
1511 return OpRef::fail();
1513 // The patterns for the shuffles, in terms of the starting offsets of the
1514 // consecutive strips (L = length of the strip, N = VecLen):
1516 // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2
1517 // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2
1519 // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2
1520 // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2
1522 // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ...
1524 // The value of the element in the mask following the strip will decide
1525 // what kind of a shuffle this can be.
1526 int NextInMask
= SM
.Mask
[Strip
.second
];
1528 // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask
1529 // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would
1531 if (NextInMask
< VecLen
) {
1532 // vpack{e,o} or vdealb4w
1533 if (Strip
.first
== 0 && Strip
.second
== 1 && NextInMask
== 4) {
1535 // Check if this is vdealb4w (L=1).
1536 for (int I
= 0; I
!= N
/4; ++I
)
1537 if (SM
.Mask
[I
] != 4*I
)
1538 return OpRef::fail();
1539 for (int I
= 0; I
!= N
/4; ++I
)
1540 if (SM
.Mask
[I
+N
/4] != 2 + 4*I
)
1541 return OpRef::fail();
1542 for (int I
= 0; I
!= N
/4; ++I
)
1543 if (SM
.Mask
[I
+N
/2] != N
+ 4*I
)
1544 return OpRef::fail();
1545 for (int I
= 0; I
!= N
/4; ++I
)
1546 if (SM
.Mask
[I
+3*N
/4] != N
+2 + 4*I
)
1547 return OpRef::fail();
1548 // Matched mask for vdealb4w.
1549 Results
.push(Hexagon::V6_vdealb4w
, ResTy
, {Vb
, Va
});
1550 return OpRef::res(Results
.top());
1553 // Check if this is vpack{e,o}.
1555 int L
= Strip
.second
;
1556 // Check if the first strip starts at 0 or at L.
1557 if (Strip
.first
!= 0 && Strip
.first
!= L
)
1558 return OpRef::fail();
1559 // Examine the rest of the mask.
1560 for (int I
= L
; I
< N
; I
+= L
) {
1561 auto S
= findStrip(SM
.Mask
.drop_front(I
), 1, N
-I
);
1562 // Check whether the mask element at the beginning of each strip
1563 // increases by 2L each time.
1564 if (S
.first
- Strip
.first
!= 2*I
)
1565 return OpRef::fail();
1566 // Check whether each strip is of the same length.
1567 if (S
.second
!= unsigned(L
))
1568 return OpRef::fail();
1571 // Strip.first == 0 => vpacke
1572 // Strip.first == L => vpacko
1573 assert(Strip
.first
== 0 || Strip
.first
== L
);
1574 using namespace Hexagon
;
1576 Res
.Opc
= Strip
.second
== 1 // Number of bytes.
1577 ? (Strip
.first
== 0 ? V6_vpackeb
: V6_vpackob
)
1578 : (Strip
.first
== 0 ? V6_vpackeh
: V6_vpackoh
);
1580 Res
.Ops
= { Vb
, Va
};
1582 return OpRef::res(Results
.top());
1585 // Check if this is vshuff{e,o}.
1587 int L
= Strip
.second
;
1588 std::pair
<int,unsigned> PrevS
= Strip
;
1590 for (int I
= L
; I
< N
; I
+= L
) {
1591 auto S
= findStrip(SM
.Mask
.drop_front(I
), 1, N
-I
);
1592 if (S
.second
!= PrevS
.second
)
1593 return OpRef::fail();
1594 int Diff
= Flip
? PrevS
.first
- S
.first
+ 2*L
1595 : S
.first
- PrevS
.first
;
1597 return OpRef::fail();
1601 // Strip.first == 0 => vshuffe
1602 // Strip.first == L => vshuffo
1603 assert(Strip
.first
== 0 || Strip
.first
== L
);
1604 using namespace Hexagon
;
1606 Res
.Opc
= Strip
.second
== 1 // Number of bytes.
1607 ? (Strip
.first
== 0 ? V6_vshuffeb
: V6_vshuffob
)
1608 : (Strip
.first
== 0 ? V6_vshufeh
: V6_vshufoh
);
1610 Res
.Ops
= { Vb
, Va
};
1612 return OpRef::res(Results
.top());
1615 OpRef
HvxSelector::expanding(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
1616 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1617 // Expanding shuffles (using all elements and inserting into larger vector):
1619 // V6_vunpacku{b,h} [*]
1621 // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
1623 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
1624 // they are not shuffles.
1626 // The argument is a single vector.
1628 int VecLen
= SM
.Mask
.size();
1629 assert(2*HwLen
== unsigned(VecLen
) && "Expecting vector-pair type");
1631 std::pair
<int,unsigned> Strip
= findStrip(SM
.Mask
, 1, VecLen
);
1633 // The patterns for the unpacks, in terms of the starting offsets of the
1634 // consecutive strips (L = length of the strip, N = VecLen):
1636 // vunpacku: 0, -1, L, -1, 2L, -1 ...
1638 if (Strip
.first
!= 0)
1639 return OpRef::fail();
1641 // The vunpackus only handle byte and half-word.
1642 if (Strip
.second
!= 1 && Strip
.second
!= 2)
1643 return OpRef::fail();
1646 int L
= Strip
.second
;
1648 // First, check the non-ignored strips.
1649 for (int I
= 2*L
; I
< 2*N
; I
+= 2*L
) {
1650 auto S
= findStrip(SM
.Mask
.drop_front(I
), 1, N
-I
);
1651 if (S
.second
!= unsigned(L
))
1652 return OpRef::fail();
1654 return OpRef::fail();
1657 for (int I
= L
; I
< 2*N
; I
+= 2*L
) {
1658 auto S
= findStrip(SM
.Mask
.drop_front(I
), 0, N
-I
);
1659 if (S
.first
!= -1 || S
.second
!= unsigned(L
))
1660 return OpRef::fail();
1663 unsigned Opc
= Strip
.second
== 1 ? Hexagon::V6_vunpackub
1664 : Hexagon::V6_vunpackuh
;
1665 Results
.push(Opc
, getPairVT(MVT::i8
), {Va
});
1666 return OpRef::res(Results
.top());
1669 OpRef
HvxSelector::perfect(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
1670 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1674 // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
1675 // V6_vshuffvdd (V6_vshuff)
1676 // V6_dealvdd (V6_vdeal)
1678 int VecLen
= SM
.Mask
.size();
1679 assert(isPowerOf2_32(VecLen
) && Log2_32(VecLen
) <= 8);
1680 unsigned LogLen
= Log2_32(VecLen
);
1681 unsigned HwLog
= Log2_32(HwLen
);
1682 // The result length must be the same as the length of a single vector,
1683 // or a vector pair.
1684 assert(LogLen
== HwLog
|| LogLen
== HwLog
+1);
1685 bool Extend
= (LogLen
== HwLog
);
1687 if (!isPermutation(SM
.Mask
))
1688 return OpRef::fail();
1690 SmallVector
<unsigned,8> Perm(LogLen
);
1692 // Check if this could be a perfect shuffle, or a combination of perfect
1695 // Consider this permutation (using hex digits to make the ASCII diagrams
1697 // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
1698 // This is a "deal" operation: divide the input into two halves, and
1699 // create the output by picking elements by alternating between these two
1701 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
1704 // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
1705 // a somwehat different mechanism that could be used to perform shuffle/
1706 // deal operations: a 2x2 transpose.
1707 // Consider the halves of inputs again, they can be interpreted as a 2x8
1708 // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
1709 // together. Now, when considering 2 elements at a time, it will be a 2x4
1710 // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
1713 // With groups of 4, this will become a single 2x2 matrix, and so on.
1715 // The 2x2 transpose instruction works by transposing each of the 2x2
1716 // matrices (or "sub-matrices"), given a specific group size. For example,
1717 // if the group size is 1 (i.e. each element is its own group), there
1718 // will be four transposes of the four 2x2 matrices that form the 2x8.
1719 // For example, with the inputs as above, the result will be:
1722 // Now, this result can be tranposed again, but with the group size of 2:
1725 // If we then transpose that result, but with the group size of 4, we get:
1728 // If we concatenate these two rows, it will be
1729 // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
1730 // which is the same as the "deal" [*] above.
1732 // In general, a "deal" of individual elements is a series of 2x2 transposes,
1733 // with changing group size. HVX has two instructions:
1734 // Vdd = V6_vdealvdd Vu, Vv, Rt
1735 // Vdd = V6_shufvdd Vu, Vv, Rt
1736 // that perform exactly that. The register Rt controls which transposes are
1737 // going to happen: a bit at position n (counting from 0) indicates that a
1738 // transpose with a group size of 2^n will take place. If multiple bits are
1739 // set, multiple transposes will happen: vdealvdd will perform them starting
1740 // with the largest group size, vshuffvdd will do them in the reverse order.
1742 // The main observation is that each 2x2 transpose corresponds to swapping
1743 // columns of bits in the binary representation of the values.
1745 // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
1746 // in a given column. The * denote the columns that will be swapped.
1747 // The transpose with the group size 2^n corresponds to swapping columns
1748 // 3 (the highest log) and log2(n):
1750 // 3 2 1 0 0 2 1 3 0 2 3 1
1752 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1753 // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
1754 // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
1755 // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
1756 // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
1757 // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
1758 // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
1759 // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
1760 // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
1761 // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
1762 // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
1763 // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
1764 // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
1765 // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
1766 // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
1767 // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
1769 auto XorPow2
= [] (ArrayRef
<int> Mask
, unsigned Num
) {
1770 unsigned X
= Mask
[0] ^ Mask
[Num
/2];
1771 // Check that the first half has the X's bits clear.
1772 if ((Mask
[0] & X
) != 0)
1774 for (unsigned I
= 1; I
!= Num
/2; ++I
) {
1775 if (unsigned(Mask
[I
] ^ Mask
[I
+Num
/2]) != X
)
1777 if ((Mask
[I
] & X
) != 0)
1783 // Create a vector of log2's for each column: Perm[i] corresponds to
1784 // the i-th bit (lsb is 0).
1786 for (unsigned I
= VecLen
; I
>= 2; I
>>= 1) {
1787 // Examine the initial segment of Mask of size I.
1788 unsigned X
= XorPow2(SM
.Mask
, I
);
1789 if (!isPowerOf2_32(X
))
1790 return OpRef::fail();
1791 // Check the other segments of Mask.
1792 for (int J
= I
; J
< VecLen
; J
+= I
) {
1793 if (XorPow2(SM
.Mask
.slice(J
, I
), I
) != X
)
1794 return OpRef::fail();
1796 Perm
[Log2_32(X
)] = Log2_32(I
)-1;
1799 // Once we have Perm, represent it as cycles. Denote the maximum log2
1800 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
1801 // written as (M a1 a2 a3 ... an). That cycle can be broken up into
1802 // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
1803 // order being from left to right. Any (contiguous) segment where the
1804 // values ai, ai+1...aj are either all increasing or all decreasing,
1805 // can be implemented via a single vshuffvdd/vdealvdd respectively.
1807 // If there is a cycle (a1 a2 ... an) that does not involve M, it can
1808 // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
1809 // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
1810 // can be used to generate a sequence of vshuffvdd/vdealvdd.
1813 // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
1814 // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
1815 // (4 0 1)(4 0)(4 2 3)(4 2).
1816 // It can then be expanded into swaps as
1817 // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
1818 // and broken up into "increasing" segments as
1819 // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
1820 // This is equivalent to
1821 // (4 0 1)(4 0 2 3)(4 2),
1822 // which can be implemented as 3 vshufvdd instructions.
1824 using CycleType
= SmallVector
<unsigned,8>;
1825 std::set
<CycleType
> Cycles
;
1826 std::set
<unsigned> All
;
1828 for (unsigned I
: Perm
)
1831 // If the cycle contains LogLen-1, move it to the front of the cycle.
1832 // Otherwise, return the cycle unchanged.
1833 auto canonicalize
= [LogLen
](const CycleType
&C
) -> CycleType
{
1834 unsigned LogPos
, N
= C
.size();
1835 for (LogPos
= 0; LogPos
!= N
; ++LogPos
)
1836 if (C
[LogPos
] == LogLen
-1)
1841 CycleType
NewC(C
.begin()+LogPos
, C
.end());
1842 NewC
.append(C
.begin(), C
.begin()+LogPos
);
1846 auto pfs
= [](const std::set
<CycleType
> &Cs
, unsigned Len
) {
1847 // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
1848 // for bytes zero is included, for halfwords is not.
1851 const CycleType
&C
= *Cs
.begin();
1854 int D
= Len
- C
.size();
1855 if (D
!= 0 && D
!= 1)
1858 bool IsDeal
= true, IsShuff
= true;
1859 for (unsigned I
= 1; I
!= Len
-D
; ++I
) {
1860 if (C
[I
] != Len
-1-I
)
1862 if (C
[I
] != I
-(1-D
)) // I-1, I
1865 // At most one, IsDeal or IsShuff, can be non-zero.
1866 assert(!(IsDeal
|| IsShuff
) || IsDeal
!= IsShuff
);
1867 static unsigned Deals
[] = { Hexagon::V6_vdealb
, Hexagon::V6_vdealh
};
1868 static unsigned Shufs
[] = { Hexagon::V6_vshuffb
, Hexagon::V6_vshuffh
};
1869 return IsDeal
? Deals
[D
] : (IsShuff
? Shufs
[D
] : 0);
1872 while (!All
.empty()) {
1873 unsigned A
= *All
.begin();
1877 for (unsigned B
= Perm
[A
]; B
!= A
; B
= Perm
[B
]) {
1883 Cycles
.insert(canonicalize(C
));
1886 MVT SingleTy
= getSingleVT(MVT::i8
);
1887 MVT PairTy
= getPairVT(MVT::i8
);
1889 // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
1890 if (unsigned(VecLen
) == HwLen
) {
1891 if (unsigned SingleOpc
= pfs(Cycles
, LogLen
)) {
1892 Results
.push(SingleOpc
, SingleTy
, {Va
});
1893 return OpRef::res(Results
.top());
1897 SmallVector
<unsigned,8> SwapElems
;
1898 if (HwLen
== unsigned(VecLen
))
1899 SwapElems
.push_back(LogLen
-1);
1901 for (const CycleType
&C
: Cycles
) {
1902 unsigned First
= (C
[0] == LogLen
-1) ? 1 : 0;
1903 SwapElems
.append(C
.begin()+First
, C
.end());
1905 SwapElems
.push_back(C
[0]);
1908 const SDLoc
&dl(Results
.InpNode
);
1909 OpRef Arg
= !Extend
? Va
1910 : concat(Va
, OpRef::undef(SingleTy
), Results
);
1912 for (unsigned I
= 0, E
= SwapElems
.size(); I
!= E
; ) {
1913 bool IsInc
= I
== E
-1 || SwapElems
[I
] < SwapElems
[I
+1];
1914 unsigned S
= (1u << SwapElems
[I
]);
1916 while (++I
< E
-1 && IsInc
== (SwapElems
[I
] < SwapElems
[I
+1]))
1917 S
|= 1u << SwapElems
[I
];
1918 // The above loop will not add a bit for the final SwapElems[I+1],
1920 S
|= 1u << SwapElems
[I
];
1925 Results
.push(Hexagon::A2_tfrsi
, MVT::i32
,
1926 { DAG
.getTargetConstant(S
, dl
, MVT::i32
) });
1927 Res
.Opc
= IsInc
? Hexagon::V6_vshuffvdd
: Hexagon::V6_vdealvdd
;
1929 Res
.Ops
= { OpRef::hi(Arg
), OpRef::lo(Arg
), OpRef::res(-1) };
1931 Arg
= OpRef::res(Results
.top());
1934 return !Extend
? Arg
: OpRef::lo(Arg
);
1937 OpRef
HvxSelector::butterfly(ShuffleMask SM
, OpRef Va
, ResultStack
&Results
) {
1938 DEBUG_WITH_TYPE("isel", {dbgs() << __func__
<< '\n';});
1939 // Butterfly shuffles.
1945 // The assumption here is that all elements picked by Mask are in the
1946 // first operand to the vector_shuffle. This assumption is enforced
1949 MVT ResTy
= getSingleVT(MVT::i8
);
1950 PermNetwork::Controls FC
, RC
;
1951 const SDLoc
&dl(Results
.InpNode
);
1952 int VecLen
= SM
.Mask
.size();
1954 for (int M
: SM
.Mask
) {
1955 if (M
!= -1 && M
>= VecLen
)
1956 return OpRef::fail();
1959 // Try the deltas/benes for both single vectors and vector pairs.
1960 ForwardDeltaNetwork
FN(SM
.Mask
);
1962 SDValue Ctl
= getVectorConstant(FC
, dl
);
1963 Results
.push(Hexagon::V6_vdelta
, ResTy
, {Va
, OpRef(Ctl
)});
1964 return OpRef::res(Results
.top());
1967 // Try reverse delta.
1968 ReverseDeltaNetwork
RN(SM
.Mask
);
1970 SDValue Ctl
= getVectorConstant(RC
, dl
);
1971 Results
.push(Hexagon::V6_vrdelta
, ResTy
, {Va
, OpRef(Ctl
)});
1972 return OpRef::res(Results
.top());
1976 BenesNetwork
BN(SM
.Mask
);
1977 if (BN
.run(FC
, RC
)) {
1978 SDValue CtlF
= getVectorConstant(FC
, dl
);
1979 SDValue CtlR
= getVectorConstant(RC
, dl
);
1980 Results
.push(Hexagon::V6_vdelta
, ResTy
, {Va
, OpRef(CtlF
)});
1981 Results
.push(Hexagon::V6_vrdelta
, ResTy
,
1982 {OpRef::res(-1), OpRef(CtlR
)});
1983 return OpRef::res(Results
.top());
1986 return OpRef::fail();
1989 SDValue
HvxSelector::getVectorConstant(ArrayRef
<uint8_t> Data
,
1991 SmallVector
<SDValue
, 128> Elems
;
1992 for (uint8_t C
: Data
)
1993 Elems
.push_back(DAG
.getConstant(C
, dl
, MVT::i8
));
1994 MVT VecTy
= MVT::getVectorVT(MVT::i8
, Data
.size());
1995 SDValue BV
= DAG
.getBuildVector(VecTy
, dl
, Elems
);
1996 SDValue LV
= Lower
.LowerOperation(BV
, DAG
);
1997 DAG
.RemoveDeadNode(BV
.getNode());
2001 void HvxSelector::selectShuffle(SDNode
*N
) {
2002 DEBUG_WITH_TYPE("isel", {
2003 dbgs() << "Starting " << __func__
<< " on node:\n";
2006 MVT ResTy
= N
->getValueType(0).getSimpleVT();
2007 // Assume that vector shuffles operate on vectors of bytes.
2008 assert(ResTy
.isVector() && ResTy
.getVectorElementType() == MVT::i8
);
2010 auto *SN
= cast
<ShuffleVectorSDNode
>(N
);
2011 std::vector
<int> Mask(SN
->getMask().begin(), SN
->getMask().end());
2012 // This shouldn't really be necessary. Is it?
2013 for (int &Idx
: Mask
)
2014 if (Idx
!= -1 && Idx
< 0)
2017 unsigned VecLen
= Mask
.size();
2018 bool HavePairs
= (2*HwLen
== VecLen
);
2019 assert(ResTy
.getSizeInBits() / 8 == VecLen
);
2021 // Vd = vector_shuffle Va, Vb, Mask
2024 bool UseLeft
= false, UseRight
= false;
2025 for (unsigned I
= 0; I
!= VecLen
; ++I
) {
2028 unsigned Idx
= Mask
[I
];
2029 assert(Idx
< 2*VecLen
);
2036 DEBUG_WITH_TYPE("isel", {
2037 dbgs() << "VecLen=" << VecLen
<< " HwLen=" << HwLen
<< " UseLeft="
2038 << UseLeft
<< " UseRight=" << UseRight
<< " HavePairs="
2039 << HavePairs
<< '\n';
2041 // If the mask is all -1's, generate "undef".
2042 if (!UseLeft
&& !UseRight
) {
2043 ISel
.ReplaceNode(N
, ISel
.selectUndef(SDLoc(SN
), ResTy
).getNode());
2047 SDValue Vec0
= N
->getOperand(0);
2048 SDValue Vec1
= N
->getOperand(1);
2049 ResultStack
Results(SN
);
2050 Results
.push(TargetOpcode::COPY
, ResTy
, {Vec0
});
2051 Results
.push(TargetOpcode::COPY
, ResTy
, {Vec1
});
2052 OpRef Va
= OpRef::res(Results
.top()-1);
2053 OpRef Vb
= OpRef::res(Results
.top());
2055 OpRef Res
= !HavePairs
? shuffs2(ShuffleMask(Mask
), Va
, Vb
, Results
)
2056 : shuffp2(ShuffleMask(Mask
), Va
, Vb
, Results
);
2058 bool Done
= Res
.isValid();
2060 // Make sure that Res is on the stack before materializing.
2061 Results
.push(TargetOpcode::COPY
, ResTy
, {Res
});
2062 materialize(Results
);
2064 Done
= scalarizeShuffle(Mask
, SDLoc(N
), ResTy
, Vec0
, Vec1
, N
);
2069 dbgs() << "Unhandled shuffle:\n";
2072 llvm_unreachable("Failed to select vector shuffle");
2076 void HvxSelector::selectRor(SDNode
*N
) {
2077 // If this is a rotation by less than 8, use V6_valignbi.
2078 MVT Ty
= N
->getValueType(0).getSimpleVT();
2080 SDValue VecV
= N
->getOperand(0);
2081 SDValue RotV
= N
->getOperand(1);
2082 SDNode
*NewN
= nullptr;
2084 if (auto *CN
= dyn_cast
<ConstantSDNode
>(RotV
.getNode())) {
2085 unsigned S
= CN
->getZExtValue() % HST
.getVectorLength();
2087 NewN
= VecV
.getNode();
2088 } else if (isUInt
<3>(S
)) {
2089 SDValue C
= DAG
.getTargetConstant(S
, dl
, MVT::i32
);
2090 NewN
= DAG
.getMachineNode(Hexagon::V6_valignbi
, dl
, Ty
,
2096 NewN
= DAG
.getMachineNode(Hexagon::V6_vror
, dl
, Ty
, {VecV
, RotV
});
2098 ISel
.ReplaceNode(N
, NewN
);
2101 void HvxSelector::selectVAlign(SDNode
*N
) {
2102 SDValue Vv
= N
->getOperand(0);
2103 SDValue Vu
= N
->getOperand(1);
2104 SDValue Rt
= N
->getOperand(2);
2105 SDNode
*NewN
= DAG
.getMachineNode(Hexagon::V6_valignb
, SDLoc(N
),
2106 N
->getValueType(0), {Vv
, Vu
, Rt
});
2107 ISel
.ReplaceNode(N
, NewN
);
2108 DAG
.RemoveDeadNode(N
);
2111 void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode
*N
) {
2112 HvxSelector(*this, *CurDAG
).selectShuffle(N
);
2115 void HexagonDAGToDAGISel::SelectHvxRor(SDNode
*N
) {
2116 HvxSelector(*this, *CurDAG
).selectRor(N
);
2119 void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode
*N
) {
2120 HvxSelector(*this, *CurDAG
).selectVAlign(N
);
2123 void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode
*N
) {
2125 SDValue Chain
= N
->getOperand(0);
2126 SDValue Address
= N
->getOperand(2);
2127 SDValue Predicate
= N
->getOperand(3);
2128 SDValue Base
= N
->getOperand(4);
2129 SDValue Modifier
= N
->getOperand(5);
2130 SDValue Offset
= N
->getOperand(6);
2133 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
2136 llvm_unreachable("Unexpected HVX gather intrinsic.");
2137 case Intrinsic::hexagon_V6_vgathermhq
:
2138 case Intrinsic::hexagon_V6_vgathermhq_128B
:
2139 Opcode
= Hexagon::V6_vgathermhq_pseudo
;
2141 case Intrinsic::hexagon_V6_vgathermwq
:
2142 case Intrinsic::hexagon_V6_vgathermwq_128B
:
2143 Opcode
= Hexagon::V6_vgathermwq_pseudo
;
2145 case Intrinsic::hexagon_V6_vgathermhwq
:
2146 case Intrinsic::hexagon_V6_vgathermhwq_128B
:
2147 Opcode
= Hexagon::V6_vgathermhwq_pseudo
;
2151 SDVTList VTs
= CurDAG
->getVTList(MVT::Other
);
2152 SDValue Ops
[] = { Address
, Predicate
, Base
, Modifier
, Offset
, Chain
};
2153 SDNode
*Result
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
2155 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2156 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
2158 ReplaceNode(N
, Result
);
2161 void HexagonDAGToDAGISel::SelectV65Gather(SDNode
*N
) {
2163 SDValue Chain
= N
->getOperand(0);
2164 SDValue Address
= N
->getOperand(2);
2165 SDValue Base
= N
->getOperand(3);
2166 SDValue Modifier
= N
->getOperand(4);
2167 SDValue Offset
= N
->getOperand(5);
2170 unsigned IntNo
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
2173 llvm_unreachable("Unexpected HVX gather intrinsic.");
2174 case Intrinsic::hexagon_V6_vgathermh
:
2175 case Intrinsic::hexagon_V6_vgathermh_128B
:
2176 Opcode
= Hexagon::V6_vgathermh_pseudo
;
2178 case Intrinsic::hexagon_V6_vgathermw
:
2179 case Intrinsic::hexagon_V6_vgathermw_128B
:
2180 Opcode
= Hexagon::V6_vgathermw_pseudo
;
2182 case Intrinsic::hexagon_V6_vgathermhw
:
2183 case Intrinsic::hexagon_V6_vgathermhw_128B
:
2184 Opcode
= Hexagon::V6_vgathermhw_pseudo
;
2188 SDVTList VTs
= CurDAG
->getVTList(MVT::Other
);
2189 SDValue Ops
[] = { Address
, Base
, Modifier
, Offset
, Chain
};
2190 SDNode
*Result
= CurDAG
->getMachineNode(Opcode
, dl
, VTs
, Ops
);
2192 MachineMemOperand
*MemOp
= cast
<MemIntrinsicSDNode
>(N
)->getMemOperand();
2193 CurDAG
->setNodeMemRefs(cast
<MachineSDNode
>(Result
), {MemOp
});
2195 ReplaceNode(N
, Result
);
2198 void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode
*N
) {
2199 unsigned IID
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
2202 case Intrinsic::hexagon_V6_vaddcarry
: {
2203 SmallVector
<SDValue
, 3> Ops
= { N
->getOperand(1), N
->getOperand(2),
2205 SDVTList VTs
= CurDAG
->getVTList(MVT::v16i32
, MVT::v512i1
);
2206 Result
= CurDAG
->getMachineNode(Hexagon::V6_vaddcarry
, SDLoc(N
), VTs
, Ops
);
2209 case Intrinsic::hexagon_V6_vaddcarry_128B
: {
2210 SmallVector
<SDValue
, 3> Ops
= { N
->getOperand(1), N
->getOperand(2),
2212 SDVTList VTs
= CurDAG
->getVTList(MVT::v32i32
, MVT::v1024i1
);
2213 Result
= CurDAG
->getMachineNode(Hexagon::V6_vaddcarry
, SDLoc(N
), VTs
, Ops
);
2216 case Intrinsic::hexagon_V6_vsubcarry
: {
2217 SmallVector
<SDValue
, 3> Ops
= { N
->getOperand(1), N
->getOperand(2),
2219 SDVTList VTs
= CurDAG
->getVTList(MVT::v16i32
, MVT::v512i1
);
2220 Result
= CurDAG
->getMachineNode(Hexagon::V6_vsubcarry
, SDLoc(N
), VTs
, Ops
);
2223 case Intrinsic::hexagon_V6_vsubcarry_128B
: {
2224 SmallVector
<SDValue
, 3> Ops
= { N
->getOperand(1), N
->getOperand(2),
2226 SDVTList VTs
= CurDAG
->getVTList(MVT::v32i32
, MVT::v1024i1
);
2227 Result
= CurDAG
->getMachineNode(Hexagon::V6_vsubcarry
, SDLoc(N
), VTs
, Ops
);
2231 llvm_unreachable("Unexpected HVX dual output intrinsic.");
2233 ReplaceUses(N
, Result
);
2234 ReplaceUses(SDValue(N
, 0), SDValue(Result
, 0));
2235 ReplaceUses(SDValue(N
, 1), SDValue(Result
, 1));
2236 CurDAG
->RemoveDeadNode(N
);