1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "HexagonISelLowering.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/Support/CommandLine.h"
16 static const MVT LegalV64
[] = { MVT::v64i8
, MVT::v32i16
, MVT::v16i32
};
17 static const MVT LegalW64
[] = { MVT::v128i8
, MVT::v64i16
, MVT::v32i32
};
18 static const MVT LegalV128
[] = { MVT::v128i8
, MVT::v64i16
, MVT::v32i32
};
19 static const MVT LegalW128
[] = { MVT::v256i8
, MVT::v128i16
, MVT::v64i32
};
23 HexagonTargetLowering::initializeHVXLowering() {
24 if (Subtarget
.useHVX64BOps()) {
25 addRegisterClass(MVT::v64i8
, &Hexagon::HvxVRRegClass
);
26 addRegisterClass(MVT::v32i16
, &Hexagon::HvxVRRegClass
);
27 addRegisterClass(MVT::v16i32
, &Hexagon::HvxVRRegClass
);
28 addRegisterClass(MVT::v128i8
, &Hexagon::HvxWRRegClass
);
29 addRegisterClass(MVT::v64i16
, &Hexagon::HvxWRRegClass
);
30 addRegisterClass(MVT::v32i32
, &Hexagon::HvxWRRegClass
);
31 // These "short" boolean vector types should be legal because
32 // they will appear as results of vector compares. If they were
33 // not legal, type legalization would try to make them legal
34 // and that would require using operations that do not use or
35 // produce such types. That, in turn, would imply using custom
36 // nodes, which would be unoptimizable by the DAG combiner.
37 // The idea is to rely on target-independent operations as much
39 addRegisterClass(MVT::v16i1
, &Hexagon::HvxQRRegClass
);
40 addRegisterClass(MVT::v32i1
, &Hexagon::HvxQRRegClass
);
41 addRegisterClass(MVT::v64i1
, &Hexagon::HvxQRRegClass
);
42 addRegisterClass(MVT::v512i1
, &Hexagon::HvxQRRegClass
);
43 } else if (Subtarget
.useHVX128BOps()) {
44 addRegisterClass(MVT::v128i8
, &Hexagon::HvxVRRegClass
);
45 addRegisterClass(MVT::v64i16
, &Hexagon::HvxVRRegClass
);
46 addRegisterClass(MVT::v32i32
, &Hexagon::HvxVRRegClass
);
47 addRegisterClass(MVT::v256i8
, &Hexagon::HvxWRRegClass
);
48 addRegisterClass(MVT::v128i16
, &Hexagon::HvxWRRegClass
);
49 addRegisterClass(MVT::v64i32
, &Hexagon::HvxWRRegClass
);
50 addRegisterClass(MVT::v32i1
, &Hexagon::HvxQRRegClass
);
51 addRegisterClass(MVT::v64i1
, &Hexagon::HvxQRRegClass
);
52 addRegisterClass(MVT::v128i1
, &Hexagon::HvxQRRegClass
);
53 addRegisterClass(MVT::v1024i1
, &Hexagon::HvxQRRegClass
);
56 // Set up operation actions.
58 bool Use64b
= Subtarget
.useHVX64BOps();
59 ArrayRef
<MVT
> LegalV
= Use64b
? LegalV64
: LegalV128
;
60 ArrayRef
<MVT
> LegalW
= Use64b
? LegalW64
: LegalW128
;
61 MVT ByteV
= Use64b
? MVT::v64i8
: MVT::v128i8
;
62 MVT ByteW
= Use64b
? MVT::v128i8
: MVT::v256i8
;
64 auto setPromoteTo
= [this] (unsigned Opc
, MVT FromTy
, MVT ToTy
) {
65 setOperationAction(Opc
, FromTy
, Promote
);
66 AddPromotedToType(Opc
, FromTy
, ToTy
);
69 setOperationAction(ISD::VECTOR_SHUFFLE
, ByteV
, Legal
);
70 setOperationAction(ISD::VECTOR_SHUFFLE
, ByteW
, Legal
);
72 for (MVT T
: LegalV
) {
73 setIndexedLoadAction(ISD::POST_INC
, T
, Legal
);
74 setIndexedStoreAction(ISD::POST_INC
, T
, Legal
);
76 setOperationAction(ISD::AND
, T
, Legal
);
77 setOperationAction(ISD::OR
, T
, Legal
);
78 setOperationAction(ISD::XOR
, T
, Legal
);
79 setOperationAction(ISD::ADD
, T
, Legal
);
80 setOperationAction(ISD::SUB
, T
, Legal
);
81 setOperationAction(ISD::CTPOP
, T
, Legal
);
82 setOperationAction(ISD::CTLZ
, T
, Legal
);
84 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, T
, Legal
);
85 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, T
, Legal
);
86 setOperationAction(ISD::BSWAP
, T
, Legal
);
89 setOperationAction(ISD::CTTZ
, T
, Custom
);
90 setOperationAction(ISD::LOAD
, T
, Custom
);
91 setOperationAction(ISD::MUL
, T
, Custom
);
92 setOperationAction(ISD::MULHS
, T
, Custom
);
93 setOperationAction(ISD::MULHU
, T
, Custom
);
94 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
95 // Make concat-vectors custom to handle concats of more than 2 vectors.
96 setOperationAction(ISD::CONCAT_VECTORS
, T
, Custom
);
97 setOperationAction(ISD::INSERT_SUBVECTOR
, T
, Custom
);
98 setOperationAction(ISD::INSERT_VECTOR_ELT
, T
, Custom
);
99 setOperationAction(ISD::EXTRACT_SUBVECTOR
, T
, Custom
);
100 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, T
, Custom
);
101 setOperationAction(ISD::ANY_EXTEND
, T
, Custom
);
102 setOperationAction(ISD::SIGN_EXTEND
, T
, Custom
);
103 setOperationAction(ISD::ZERO_EXTEND
, T
, Custom
);
105 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG
, T
, Custom
);
106 // HVX only has shifts of words and halfwords.
107 setOperationAction(ISD::SRA
, T
, Custom
);
108 setOperationAction(ISD::SHL
, T
, Custom
);
109 setOperationAction(ISD::SRL
, T
, Custom
);
111 // Promote all shuffles to operate on vectors of bytes.
112 setPromoteTo(ISD::VECTOR_SHUFFLE
, T
, ByteV
);
115 setCondCodeAction(ISD::SETNE
, T
, Expand
);
116 setCondCodeAction(ISD::SETLE
, T
, Expand
);
117 setCondCodeAction(ISD::SETGE
, T
, Expand
);
118 setCondCodeAction(ISD::SETLT
, T
, Expand
);
119 setCondCodeAction(ISD::SETULE
, T
, Expand
);
120 setCondCodeAction(ISD::SETUGE
, T
, Expand
);
121 setCondCodeAction(ISD::SETULT
, T
, Expand
);
124 for (MVT T
: LegalW
) {
125 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
126 // independent) handling of it would convert it to a load, which is
127 // not always the optimal choice.
128 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
129 // Make concat-vectors custom to handle concats of more than 2 vectors.
130 setOperationAction(ISD::CONCAT_VECTORS
, T
, Custom
);
132 // Custom-lower these operations for pairs. Expand them into a concat
133 // of the corresponding operations on individual vectors.
134 setOperationAction(ISD::ANY_EXTEND
, T
, Custom
);
135 setOperationAction(ISD::SIGN_EXTEND
, T
, Custom
);
136 setOperationAction(ISD::ZERO_EXTEND
, T
, Custom
);
137 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Custom
);
138 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG
, T
, Custom
);
139 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, T
, Legal
);
140 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, T
, Legal
);
142 setOperationAction(ISD::LOAD
, T
, Custom
);
143 setOperationAction(ISD::STORE
, T
, Custom
);
144 setOperationAction(ISD::CTLZ
, T
, Custom
);
145 setOperationAction(ISD::CTTZ
, T
, Custom
);
146 setOperationAction(ISD::CTPOP
, T
, Custom
);
148 setOperationAction(ISD::ADD
, T
, Legal
);
149 setOperationAction(ISD::SUB
, T
, Legal
);
150 setOperationAction(ISD::MUL
, T
, Custom
);
151 setOperationAction(ISD::MULHS
, T
, Custom
);
152 setOperationAction(ISD::MULHU
, T
, Custom
);
153 setOperationAction(ISD::AND
, T
, Custom
);
154 setOperationAction(ISD::OR
, T
, Custom
);
155 setOperationAction(ISD::XOR
, T
, Custom
);
156 setOperationAction(ISD::SETCC
, T
, Custom
);
157 setOperationAction(ISD::VSELECT
, T
, Custom
);
159 setOperationAction(ISD::SRA
, T
, Custom
);
160 setOperationAction(ISD::SHL
, T
, Custom
);
161 setOperationAction(ISD::SRL
, T
, Custom
);
163 // Promote all shuffles to operate on vectors of bytes.
164 setPromoteTo(ISD::VECTOR_SHUFFLE
, T
, ByteW
);
170 for (MVT T
: LegalW
) {
171 // Boolean types for vector pairs will overlap with the boolean
172 // types for single vectors, e.g.
173 // v64i8 -> v64i1 (single)
174 // v64i16 -> v64i1 (pair)
175 // Set these actions first, and allow the single actions to overwrite
177 MVT BoolW
= MVT::getVectorVT(MVT::i1
, T
.getVectorNumElements());
178 setOperationAction(ISD::SETCC
, BoolW
, Custom
);
179 setOperationAction(ISD::AND
, BoolW
, Custom
);
180 setOperationAction(ISD::OR
, BoolW
, Custom
);
181 setOperationAction(ISD::XOR
, BoolW
, Custom
);
184 for (MVT T
: LegalV
) {
185 MVT BoolV
= MVT::getVectorVT(MVT::i1
, T
.getVectorNumElements());
186 setOperationAction(ISD::BUILD_VECTOR
, BoolV
, Custom
);
187 setOperationAction(ISD::CONCAT_VECTORS
, BoolV
, Custom
);
188 setOperationAction(ISD::INSERT_SUBVECTOR
, BoolV
, Custom
);
189 setOperationAction(ISD::INSERT_VECTOR_ELT
, BoolV
, Custom
);
190 setOperationAction(ISD::EXTRACT_SUBVECTOR
, BoolV
, Custom
);
191 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, BoolV
, Custom
);
192 setOperationAction(ISD::AND
, BoolV
, Legal
);
193 setOperationAction(ISD::OR
, BoolV
, Legal
);
194 setOperationAction(ISD::XOR
, BoolV
, Legal
);
197 setTargetDAGCombine(ISD::VSELECT
);
201 HexagonTargetLowering::getInt(unsigned IntId
, MVT ResTy
, ArrayRef
<SDValue
> Ops
,
202 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
203 SmallVector
<SDValue
,4> IntOps
;
204 IntOps
.push_back(DAG
.getConstant(IntId
, dl
, MVT::i32
));
205 for (const SDValue
&Op
: Ops
)
206 IntOps
.push_back(Op
);
207 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, ResTy
, IntOps
);
211 HexagonTargetLowering::typeJoin(const TypePair
&Tys
) const {
212 assert(Tys
.first
.getVectorElementType() == Tys
.second
.getVectorElementType());
214 MVT ElemTy
= Tys
.first
.getVectorElementType();
215 return MVT::getVectorVT(ElemTy
, Tys
.first
.getVectorNumElements() +
216 Tys
.second
.getVectorNumElements());
219 HexagonTargetLowering::TypePair
220 HexagonTargetLowering::typeSplit(MVT VecTy
) const {
221 assert(VecTy
.isVector());
222 unsigned NumElem
= VecTy
.getVectorNumElements();
223 assert((NumElem
% 2) == 0 && "Expecting even-sized vector type");
224 MVT HalfTy
= MVT::getVectorVT(VecTy
.getVectorElementType(), NumElem
/2);
225 return { HalfTy
, HalfTy
};
229 HexagonTargetLowering::typeExtElem(MVT VecTy
, unsigned Factor
) const {
230 MVT ElemTy
= VecTy
.getVectorElementType();
231 MVT NewElemTy
= MVT::getIntegerVT(ElemTy
.getSizeInBits() * Factor
);
232 return MVT::getVectorVT(NewElemTy
, VecTy
.getVectorNumElements());
236 HexagonTargetLowering::typeTruncElem(MVT VecTy
, unsigned Factor
) const {
237 MVT ElemTy
= VecTy
.getVectorElementType();
238 MVT NewElemTy
= MVT::getIntegerVT(ElemTy
.getSizeInBits() / Factor
);
239 return MVT::getVectorVT(NewElemTy
, VecTy
.getVectorNumElements());
243 HexagonTargetLowering::opCastElem(SDValue Vec
, MVT ElemTy
,
244 SelectionDAG
&DAG
) const {
245 if (ty(Vec
).getVectorElementType() == ElemTy
)
247 MVT CastTy
= tyVector(Vec
.getValueType().getSimpleVT(), ElemTy
);
248 return DAG
.getBitcast(CastTy
, Vec
);
252 HexagonTargetLowering::opJoin(const VectorPair
&Ops
, const SDLoc
&dl
,
253 SelectionDAG
&DAG
) const {
254 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, typeJoin(ty(Ops
)),
255 Ops
.second
, Ops
.first
);
258 HexagonTargetLowering::VectorPair
259 HexagonTargetLowering::opSplit(SDValue Vec
, const SDLoc
&dl
,
260 SelectionDAG
&DAG
) const {
261 TypePair Tys
= typeSplit(ty(Vec
));
262 if (Vec
.getOpcode() == HexagonISD::QCAT
)
263 return VectorPair(Vec
.getOperand(0), Vec
.getOperand(1));
264 return DAG
.SplitVector(Vec
, dl
, Tys
.first
, Tys
.second
);
268 HexagonTargetLowering::isHvxSingleTy(MVT Ty
) const {
269 return Subtarget
.isHVXVectorType(Ty
) &&
270 Ty
.getSizeInBits() == 8 * Subtarget
.getVectorLength();
274 HexagonTargetLowering::isHvxPairTy(MVT Ty
) const {
275 return Subtarget
.isHVXVectorType(Ty
) &&
276 Ty
.getSizeInBits() == 16 * Subtarget
.getVectorLength();
280 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx
, MVT ElemTy
,
281 SelectionDAG
&DAG
) const {
282 if (ElemIdx
.getValueType().getSimpleVT() != MVT::i32
)
283 ElemIdx
= DAG
.getBitcast(MVT::i32
, ElemIdx
);
285 unsigned ElemWidth
= ElemTy
.getSizeInBits();
289 unsigned L
= Log2_32(ElemWidth
/8);
290 const SDLoc
&dl(ElemIdx
);
291 return DAG
.getNode(ISD::SHL
, dl
, MVT::i32
,
292 {ElemIdx
, DAG
.getConstant(L
, dl
, MVT::i32
)});
296 HexagonTargetLowering::getIndexInWord32(SDValue Idx
, MVT ElemTy
,
297 SelectionDAG
&DAG
) const {
298 unsigned ElemWidth
= ElemTy
.getSizeInBits();
299 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
303 if (ty(Idx
) != MVT::i32
)
304 Idx
= DAG
.getBitcast(MVT::i32
, Idx
);
305 const SDLoc
&dl(Idx
);
306 SDValue Mask
= DAG
.getConstant(32/ElemWidth
- 1, dl
, MVT::i32
);
307 SDValue SubIdx
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, {Idx
, Mask
});
312 HexagonTargetLowering::getByteShuffle(const SDLoc
&dl
, SDValue Op0
,
313 SDValue Op1
, ArrayRef
<int> Mask
,
314 SelectionDAG
&DAG
) const {
316 assert(OpTy
== ty(Op1
));
318 MVT ElemTy
= OpTy
.getVectorElementType();
319 if (ElemTy
== MVT::i8
)
320 return DAG
.getVectorShuffle(OpTy
, dl
, Op0
, Op1
, Mask
);
321 assert(ElemTy
.getSizeInBits() >= 8);
323 MVT ResTy
= tyVector(OpTy
, MVT::i8
);
324 unsigned ElemSize
= ElemTy
.getSizeInBits() / 8;
326 SmallVector
<int,128> ByteMask
;
329 for (unsigned I
= 0; I
!= ElemSize
; ++I
)
330 ByteMask
.push_back(-1);
332 int NewM
= M
*ElemSize
;
333 for (unsigned I
= 0; I
!= ElemSize
; ++I
)
334 ByteMask
.push_back(NewM
+I
);
337 assert(ResTy
.getVectorNumElements() == ByteMask
.size());
338 return DAG
.getVectorShuffle(ResTy
, dl
, opCastElem(Op0
, MVT::i8
, DAG
),
339 opCastElem(Op1
, MVT::i8
, DAG
), ByteMask
);
343 HexagonTargetLowering::buildHvxVectorReg(ArrayRef
<SDValue
> Values
,
344 const SDLoc
&dl
, MVT VecTy
,
345 SelectionDAG
&DAG
) const {
346 unsigned VecLen
= Values
.size();
347 MachineFunction
&MF
= DAG
.getMachineFunction();
348 MVT ElemTy
= VecTy
.getVectorElementType();
349 unsigned ElemWidth
= ElemTy
.getSizeInBits();
350 unsigned HwLen
= Subtarget
.getVectorLength();
352 unsigned ElemSize
= ElemWidth
/ 8;
353 assert(ElemSize
*VecLen
== HwLen
);
354 SmallVector
<SDValue
,32> Words
;
356 if (VecTy
.getVectorElementType() != MVT::i32
) {
357 assert((ElemSize
== 1 || ElemSize
== 2) && "Invalid element size");
358 unsigned OpsPerWord
= (ElemSize
== 1) ? 4 : 2;
359 MVT PartVT
= MVT::getVectorVT(VecTy
.getVectorElementType(), OpsPerWord
);
360 for (unsigned i
= 0; i
!= VecLen
; i
+= OpsPerWord
) {
361 SDValue W
= buildVector32(Values
.slice(i
, OpsPerWord
), dl
, PartVT
, DAG
);
362 Words
.push_back(DAG
.getBitcast(MVT::i32
, W
));
365 Words
.assign(Values
.begin(), Values
.end());
368 unsigned NumWords
= Words
.size();
369 bool IsSplat
= true, IsUndef
= true;
371 for (unsigned i
= 0; i
!= NumWords
&& IsSplat
; ++i
) {
372 if (isUndef(Words
[i
]))
375 if (!SplatV
.getNode())
377 else if (SplatV
!= Words
[i
])
381 return DAG
.getUNDEF(VecTy
);
383 assert(SplatV
.getNode());
384 auto *IdxN
= dyn_cast
<ConstantSDNode
>(SplatV
.getNode());
385 if (IdxN
&& IdxN
->isNullValue())
386 return getZero(dl
, VecTy
, DAG
);
387 return DAG
.getNode(HexagonISD::VSPLATW
, dl
, VecTy
, SplatV
);
390 // Delay recognizing constant vectors until here, so that we can generate
392 SmallVector
<ConstantInt
*, 128> Consts(VecLen
);
393 bool AllConst
= getBuildVectorConstInts(Values
, VecTy
, DAG
, Consts
);
395 ArrayRef
<Constant
*> Tmp((Constant
**)Consts
.begin(),
396 (Constant
**)Consts
.end());
397 Constant
*CV
= ConstantVector::get(Tmp
);
398 unsigned Align
= HwLen
;
399 SDValue CP
= LowerConstantPool(DAG
.getConstantPool(CV
, VecTy
, Align
), DAG
);
400 return DAG
.getLoad(VecTy
, dl
, DAG
.getEntryNode(), CP
,
401 MachinePointerInfo::getConstantPool(MF
), Align
);
404 // A special case is a situation where the vector is built entirely from
405 // elements extracted from another vector. This could be done via a shuffle
406 // more efficiently, but typically, the size of the source vector will not
407 // match the size of the vector being built (which precludes the use of a
408 // shuffle directly).
409 // This only handles a single source vector, and the vector being built
410 // should be of a sub-vector type of the source vector type.
411 auto IsBuildFromExtracts
= [this,&Values
] (SDValue
&SrcVec
,
412 SmallVectorImpl
<int> &SrcIdx
) {
414 for (SDValue V
: Values
) {
416 SrcIdx
.push_back(-1);
419 if (V
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
421 // All extracts should come from the same vector.
422 SDValue T
= V
.getOperand(0);
423 if (Vec
.getNode() != nullptr && T
.getNode() != Vec
.getNode())
426 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(V
.getOperand(1));
429 int I
= C
->getSExtValue();
430 assert(I
>= 0 && "Negative element index");
437 SmallVector
<int,128> ExtIdx
;
439 if (IsBuildFromExtracts(ExtVec
, ExtIdx
)) {
440 MVT ExtTy
= ty(ExtVec
);
441 unsigned ExtLen
= ExtTy
.getVectorNumElements();
442 if (ExtLen
== VecLen
|| ExtLen
== 2*VecLen
) {
443 // Construct a new shuffle mask that will produce a vector with the same
444 // number of elements as the input vector, and such that the vector we
445 // want will be the initial subvector of it.
446 SmallVector
<int,128> Mask
;
447 BitVector
Used(ExtLen
);
449 for (int M
: ExtIdx
) {
454 // Fill the rest of the mask with the unused elements of ExtVec in hopes
455 // that it will result in a permutation of ExtVec's elements. It's still
456 // fine if it doesn't (e.g. if undefs are present, or elements are
457 // repeated), but permutations can always be done efficiently via vdelta
459 for (unsigned I
= 0; I
!= ExtLen
; ++I
) {
460 if (Mask
.size() == ExtLen
)
466 SDValue S
= DAG
.getVectorShuffle(ExtTy
, dl
, ExtVec
,
467 DAG
.getUNDEF(ExtTy
), Mask
);
468 if (ExtLen
== VecLen
)
470 return DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, VecTy
, S
);
474 // Construct two halves in parallel, then or them together.
475 assert(4*Words
.size() == Subtarget
.getVectorLength());
476 SDValue HalfV0
= getInstr(Hexagon::V6_vd0
, dl
, VecTy
, {}, DAG
);
477 SDValue HalfV1
= getInstr(Hexagon::V6_vd0
, dl
, VecTy
, {}, DAG
);
478 SDValue S
= DAG
.getConstant(4, dl
, MVT::i32
);
479 for (unsigned i
= 0; i
!= NumWords
/2; ++i
) {
480 SDValue N
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
,
482 SDValue M
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
,
483 {HalfV1
, Words
[i
+NumWords
/2]});
484 HalfV0
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {N
, S
});
485 HalfV1
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {M
, S
});
488 HalfV0
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
,
489 {HalfV0
, DAG
.getConstant(HwLen
/2, dl
, MVT::i32
)});
490 SDValue DstV
= DAG
.getNode(ISD::OR
, dl
, VecTy
, {HalfV0
, HalfV1
});
495 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV
, const SDLoc
&dl
,
496 unsigned BitBytes
, bool ZeroFill
, SelectionDAG
&DAG
) const {
497 MVT PredTy
= ty(PredV
);
498 unsigned HwLen
= Subtarget
.getVectorLength();
499 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
501 if (Subtarget
.isHVXVectorType(PredTy
, true)) {
502 // Move the vector predicate SubV to a vector register, and scale it
503 // down to match the representation (bytes per type element) that VecV
504 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
505 // in general) element and put them at the front of the resulting
506 // vector. This subvector will then be inserted into the Q2V of VecV.
507 // To avoid having an operation that generates an illegal type (short
508 // vector), generate a full size vector.
510 SDValue T
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, PredV
);
511 SmallVector
<int,128> Mask(HwLen
);
512 // Scale = BitBytes(PredV) / Given BitBytes.
513 unsigned Scale
= HwLen
/ (PredTy
.getVectorNumElements() * BitBytes
);
514 unsigned BlockLen
= PredTy
.getVectorNumElements() * BitBytes
;
516 for (unsigned i
= 0; i
!= HwLen
; ++i
) {
517 unsigned Num
= i
% Scale
;
518 unsigned Off
= i
/ Scale
;
519 Mask
[BlockLen
*Num
+ Off
] = i
;
521 SDValue S
= DAG
.getVectorShuffle(ByteTy
, dl
, T
, DAG
.getUNDEF(ByteTy
), Mask
);
524 // Fill the bytes beyond BlockLen with 0s.
525 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
526 SDValue Q
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
527 {DAG
.getConstant(BlockLen
, dl
, MVT::i32
)}, DAG
);
528 SDValue M
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, Q
);
529 return DAG
.getNode(ISD::AND
, dl
, ByteTy
, S
, M
);
532 // Make sure that this is a valid scalar predicate.
533 assert(PredTy
== MVT::v2i1
|| PredTy
== MVT::v4i1
|| PredTy
== MVT::v8i1
);
535 unsigned Bytes
= 8 / PredTy
.getVectorNumElements();
536 SmallVector
<SDValue
,4> Words
[2];
539 auto Lo32
= [&DAG
, &dl
] (SDValue P
) {
540 return DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, P
);
542 auto Hi32
= [&DAG
, &dl
] (SDValue P
) {
543 return DAG
.getTargetExtractSubreg(Hexagon::isub_hi
, dl
, MVT::i32
, P
);
546 SDValue W0
= isUndef(PredV
)
547 ? DAG
.getUNDEF(MVT::i64
)
548 : DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, PredV
);
549 Words
[IdxW
].push_back(Hi32(W0
));
550 Words
[IdxW
].push_back(Lo32(W0
));
552 while (Bytes
< BitBytes
) {
557 for (const SDValue
&W
: Words
[IdxW
^ 1]) {
558 SDValue T
= expandPredicate(W
, dl
, DAG
);
559 Words
[IdxW
].push_back(Hi32(T
));
560 Words
[IdxW
].push_back(Lo32(T
));
563 for (const SDValue
&W
: Words
[IdxW
^ 1]) {
564 Words
[IdxW
].push_back(W
);
565 Words
[IdxW
].push_back(W
);
571 assert(Bytes
== BitBytes
);
573 SDValue Vec
= ZeroFill
? getZero(dl
, ByteTy
, DAG
) : DAG
.getUNDEF(ByteTy
);
574 SDValue S4
= DAG
.getConstant(HwLen
-4, dl
, MVT::i32
);
575 for (const SDValue
&W
: Words
[IdxW
]) {
576 Vec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, Vec
, S4
);
577 Vec
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, ByteTy
, Vec
, W
);
584 HexagonTargetLowering::buildHvxVectorPred(ArrayRef
<SDValue
> Values
,
585 const SDLoc
&dl
, MVT VecTy
,
586 SelectionDAG
&DAG
) const {
587 // Construct a vector V of bytes, such that a comparison V >u 0 would
588 // produce the required vector predicate.
589 unsigned VecLen
= Values
.size();
590 unsigned HwLen
= Subtarget
.getVectorLength();
591 assert(VecLen
<= HwLen
|| VecLen
== 8*HwLen
);
592 SmallVector
<SDValue
,128> Bytes
;
593 bool AllT
= true, AllF
= true;
595 auto IsTrue
= [] (SDValue V
) {
596 if (const auto *N
= dyn_cast
<ConstantSDNode
>(V
.getNode()))
597 return !N
->isNullValue();
600 auto IsFalse
= [] (SDValue V
) {
601 if (const auto *N
= dyn_cast
<ConstantSDNode
>(V
.getNode()))
602 return N
->isNullValue();
606 if (VecLen
<= HwLen
) {
607 // In the hardware, each bit of a vector predicate corresponds to a byte
608 // of a vector register. Calculate how many bytes does a bit of VecTy
610 assert(HwLen
% VecLen
== 0);
611 unsigned BitBytes
= HwLen
/ VecLen
;
612 for (SDValue V
: Values
) {
616 SDValue Ext
= !V
.isUndef() ? DAG
.getZExtOrTrunc(V
, dl
, MVT::i8
)
617 : DAG
.getUNDEF(MVT::i8
);
618 for (unsigned B
= 0; B
!= BitBytes
; ++B
)
619 Bytes
.push_back(Ext
);
622 // There are as many i1 values, as there are bits in a vector register.
623 // Divide the values into groups of 8 and check that each group consists
624 // of the same value (ignoring undefs).
625 for (unsigned I
= 0; I
!= VecLen
; I
+= 8) {
627 // Find the first non-undef value in this group.
628 for (; B
!= 8; ++B
) {
629 if (!Values
[I
+B
].isUndef())
632 SDValue F
= Values
[I
+B
];
636 SDValue Ext
= (B
< 8) ? DAG
.getZExtOrTrunc(F
, dl
, MVT::i8
)
637 : DAG
.getUNDEF(MVT::i8
);
638 Bytes
.push_back(Ext
);
639 // Verify that the rest of values in the group are the same as the
642 assert(Values
[I
+B
].isUndef() || Values
[I
+B
] == F
);
647 return DAG
.getNode(HexagonISD::QTRUE
, dl
, VecTy
);
649 return DAG
.getNode(HexagonISD::QFALSE
, dl
, VecTy
);
651 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
652 SDValue ByteVec
= buildHvxVectorReg(Bytes
, dl
, ByteTy
, DAG
);
653 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, ByteVec
);
657 HexagonTargetLowering::extractHvxElementReg(SDValue VecV
, SDValue IdxV
,
658 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
659 MVT ElemTy
= ty(VecV
).getVectorElementType();
661 unsigned ElemWidth
= ElemTy
.getSizeInBits();
662 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
665 SDValue ByteIdx
= convertToByteIndex(IdxV
, ElemTy
, DAG
);
666 SDValue ExWord
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
,
668 if (ElemTy
== MVT::i32
)
671 // Have an extracted word, need to extract the smaller element out of it.
672 // 1. Extract the bits of (the original) IdxV that correspond to the index
673 // of the desired element in the 32-bit word.
674 SDValue SubIdx
= getIndexInWord32(IdxV
, ElemTy
, DAG
);
675 // 2. Extract the element from the word.
676 SDValue ExVec
= DAG
.getBitcast(tyVector(ty(ExWord
), ElemTy
), ExWord
);
677 return extractVector(ExVec
, SubIdx
, dl
, ElemTy
, MVT::i32
, DAG
);
681 HexagonTargetLowering::extractHvxElementPred(SDValue VecV
, SDValue IdxV
,
682 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
683 // Implement other return types if necessary.
684 assert(ResTy
== MVT::i1
);
686 unsigned HwLen
= Subtarget
.getVectorLength();
687 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
688 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
690 unsigned Scale
= HwLen
/ ty(VecV
).getVectorNumElements();
691 SDValue ScV
= DAG
.getConstant(Scale
, dl
, MVT::i32
);
692 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, ScV
);
694 SDValue ExtB
= extractHvxElementReg(ByteVec
, IdxV
, dl
, MVT::i32
, DAG
);
695 SDValue Zero
= DAG
.getTargetConstant(0, dl
, MVT::i32
);
696 return getInstr(Hexagon::C2_cmpgtui
, dl
, MVT::i1
, {ExtB
, Zero
}, DAG
);
700 HexagonTargetLowering::insertHvxElementReg(SDValue VecV
, SDValue IdxV
,
701 SDValue ValV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
702 MVT ElemTy
= ty(VecV
).getVectorElementType();
704 unsigned ElemWidth
= ElemTy
.getSizeInBits();
705 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
708 auto InsertWord
= [&DAG
,&dl
,this] (SDValue VecV
, SDValue ValV
,
710 MVT VecTy
= ty(VecV
);
711 unsigned HwLen
= Subtarget
.getVectorLength();
712 SDValue MaskV
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
713 {ByteIdxV
, DAG
.getConstant(-4, dl
, MVT::i32
)});
714 SDValue RotV
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {VecV
, MaskV
});
715 SDValue InsV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
, {RotV
, ValV
});
716 SDValue SubV
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
717 {DAG
.getConstant(HwLen
, dl
, MVT::i32
), MaskV
});
718 SDValue TorV
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {InsV
, SubV
});
722 SDValue ByteIdx
= convertToByteIndex(IdxV
, ElemTy
, DAG
);
723 if (ElemTy
== MVT::i32
)
724 return InsertWord(VecV
, ValV
, ByteIdx
);
726 // If this is not inserting a 32-bit word, convert it into such a thing.
727 // 1. Extract the existing word from the target vector.
728 SDValue WordIdx
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
729 {ByteIdx
, DAG
.getConstant(2, dl
, MVT::i32
)});
730 SDValue Ext
= extractHvxElementReg(opCastElem(VecV
, MVT::i32
, DAG
), WordIdx
,
733 // 2. Treating the extracted word as a 32-bit vector, insert the given
735 SDValue SubIdx
= getIndexInWord32(IdxV
, ElemTy
, DAG
);
736 MVT SubVecTy
= tyVector(ty(Ext
), ElemTy
);
737 SDValue Ins
= insertVector(DAG
.getBitcast(SubVecTy
, Ext
),
738 ValV
, SubIdx
, dl
, ElemTy
, DAG
);
740 // 3. Insert the 32-bit word back into the original vector.
741 return InsertWord(VecV
, Ins
, ByteIdx
);
745 HexagonTargetLowering::insertHvxElementPred(SDValue VecV
, SDValue IdxV
,
746 SDValue ValV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
747 unsigned HwLen
= Subtarget
.getVectorLength();
748 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
749 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
751 unsigned Scale
= HwLen
/ ty(VecV
).getVectorNumElements();
752 SDValue ScV
= DAG
.getConstant(Scale
, dl
, MVT::i32
);
753 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, ScV
);
754 ValV
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i32
, ValV
);
756 SDValue InsV
= insertHvxElementReg(ByteVec
, IdxV
, ValV
, dl
, DAG
);
757 return DAG
.getNode(HexagonISD::V2Q
, dl
, ty(VecV
), InsV
);
761 HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV
, SDValue IdxV
,
762 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
763 MVT VecTy
= ty(VecV
);
764 unsigned HwLen
= Subtarget
.getVectorLength();
765 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
766 MVT ElemTy
= VecTy
.getVectorElementType();
767 unsigned ElemWidth
= ElemTy
.getSizeInBits();
769 // If the source vector is a vector pair, get the single vector containing
770 // the subvector of interest. The subvector will never overlap two single
772 if (isHvxPairTy(VecTy
)) {
774 if (Idx
* ElemWidth
>= 8*HwLen
) {
775 SubIdx
= Hexagon::vsub_hi
;
776 Idx
-= VecTy
.getVectorNumElements() / 2;
778 SubIdx
= Hexagon::vsub_lo
;
780 VecTy
= typeSplit(VecTy
).first
;
781 VecV
= DAG
.getTargetExtractSubreg(SubIdx
, dl
, VecTy
, VecV
);
786 // The only meaningful subvectors of a single HVX vector are those that
787 // fit in a scalar register.
788 assert(ResTy
.getSizeInBits() == 32 || ResTy
.getSizeInBits() == 64);
790 MVT WordTy
= tyVector(VecTy
, MVT::i32
);
791 SDValue WordVec
= DAG
.getBitcast(WordTy
, VecV
);
792 unsigned WordIdx
= (Idx
*ElemWidth
) / 32;
794 SDValue W0Idx
= DAG
.getConstant(WordIdx
, dl
, MVT::i32
);
795 SDValue W0
= extractHvxElementReg(WordVec
, W0Idx
, dl
, MVT::i32
, DAG
);
796 if (ResTy
.getSizeInBits() == 32)
797 return DAG
.getBitcast(ResTy
, W0
);
799 SDValue W1Idx
= DAG
.getConstant(WordIdx
+1, dl
, MVT::i32
);
800 SDValue W1
= extractHvxElementReg(WordVec
, W1Idx
, dl
, MVT::i32
, DAG
);
801 SDValue WW
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
, {W1
, W0
});
802 return DAG
.getBitcast(ResTy
, WW
);
806 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV
, SDValue IdxV
,
807 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
808 MVT VecTy
= ty(VecV
);
809 unsigned HwLen
= Subtarget
.getVectorLength();
810 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
811 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
812 // IdxV is required to be a constant.
813 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
815 unsigned ResLen
= ResTy
.getVectorNumElements();
816 unsigned BitBytes
= HwLen
/ VecTy
.getVectorNumElements();
817 unsigned Offset
= Idx
* BitBytes
;
818 SDValue Undef
= DAG
.getUNDEF(ByteTy
);
819 SmallVector
<int,128> Mask
;
821 if (Subtarget
.isHVXVectorType(ResTy
, true)) {
822 // Converting between two vector predicates. Since the result is shorter
823 // than the source, it will correspond to a vector predicate with the
824 // relevant bits replicated. The replication count is the ratio of the
825 // source and target vector lengths.
826 unsigned Rep
= VecTy
.getVectorNumElements() / ResLen
;
827 assert(isPowerOf2_32(Rep
) && HwLen
% Rep
== 0);
828 for (unsigned i
= 0; i
!= HwLen
/Rep
; ++i
) {
829 for (unsigned j
= 0; j
!= Rep
; ++j
)
830 Mask
.push_back(i
+ Offset
);
832 SDValue ShuffV
= DAG
.getVectorShuffle(ByteTy
, dl
, ByteVec
, Undef
, Mask
);
833 return DAG
.getNode(HexagonISD::V2Q
, dl
, ResTy
, ShuffV
);
836 // Converting between a vector predicate and a scalar predicate. In the
837 // vector predicate, a group of BitBytes bits will correspond to a single
838 // i1 element of the source vector type. Those bits will all have the same
839 // value. The same will be true for ByteVec, where each byte corresponds
840 // to a bit in the vector predicate.
841 // The algorithm is to traverse the ByteVec, going over the i1 values from
842 // the source vector, and generate the corresponding representation in an
843 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
844 // elements so that the interesting 8 bytes will be in the low end of the
846 unsigned Rep
= 8 / ResLen
;
847 // Make sure the output fill the entire vector register, so repeat the
848 // 8-byte groups as many times as necessary.
849 for (unsigned r
= 0; r
!= HwLen
/ResLen
; ++r
) {
850 // This will generate the indexes of the 8 interesting bytes.
851 for (unsigned i
= 0; i
!= ResLen
; ++i
) {
852 for (unsigned j
= 0; j
!= Rep
; ++j
)
853 Mask
.push_back(Offset
+ i
*BitBytes
);
857 SDValue Zero
= getZero(dl
, MVT::i32
, DAG
);
858 SDValue ShuffV
= DAG
.getVectorShuffle(ByteTy
, dl
, ByteVec
, Undef
, Mask
);
859 // Combine the two low words from ShuffV into a v8i8, and byte-compare
861 SDValue W0
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
, {ShuffV
, Zero
});
862 SDValue W1
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
,
863 {ShuffV
, DAG
.getConstant(4, dl
, MVT::i32
)});
864 SDValue Vec64
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::v8i8
, {W1
, W0
});
865 return getInstr(Hexagon::A4_vcmpbgtui
, dl
, ResTy
,
866 {Vec64
, DAG
.getTargetConstant(0, dl
, MVT::i32
)}, DAG
);
870 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV
, SDValue SubV
,
871 SDValue IdxV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
872 MVT VecTy
= ty(VecV
);
873 MVT SubTy
= ty(SubV
);
874 unsigned HwLen
= Subtarget
.getVectorLength();
875 MVT ElemTy
= VecTy
.getVectorElementType();
876 unsigned ElemWidth
= ElemTy
.getSizeInBits();
878 bool IsPair
= isHvxPairTy(VecTy
);
879 MVT SingleTy
= MVT::getVectorVT(ElemTy
, (8*HwLen
)/ElemWidth
);
880 // The two single vectors that VecV consists of, if it's a pair.
882 SDValue SingleV
= VecV
;
886 V0
= DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, SingleTy
, VecV
);
887 V1
= DAG
.getTargetExtractSubreg(Hexagon::vsub_hi
, dl
, SingleTy
, VecV
);
889 SDValue HalfV
= DAG
.getConstant(SingleTy
.getVectorNumElements(),
891 PickHi
= DAG
.getSetCC(dl
, MVT::i1
, IdxV
, HalfV
, ISD::SETUGT
);
892 if (isHvxSingleTy(SubTy
)) {
893 if (const auto *CN
= dyn_cast
<const ConstantSDNode
>(IdxV
.getNode())) {
894 unsigned Idx
= CN
->getZExtValue();
895 assert(Idx
== 0 || Idx
== VecTy
.getVectorNumElements()/2);
896 unsigned SubIdx
= (Idx
== 0) ? Hexagon::vsub_lo
: Hexagon::vsub_hi
;
897 return DAG
.getTargetInsertSubreg(SubIdx
, dl
, VecTy
, VecV
, SubV
);
899 // If IdxV is not a constant, generate the two variants: with the
900 // SubV as the high and as the low subregister, and select the right
901 // pair based on the IdxV.
902 SDValue InLo
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {SubV
, V1
});
903 SDValue InHi
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {V0
, SubV
});
904 return DAG
.getNode(ISD::SELECT
, dl
, VecTy
, PickHi
, InHi
, InLo
);
906 // The subvector being inserted must be entirely contained in one of
907 // the vectors V0 or V1. Set SingleV to the correct one, and update
908 // IdxV to be the index relative to the beginning of that vector.
909 SDValue S
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, IdxV
, HalfV
);
910 IdxV
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, PickHi
, S
, IdxV
);
911 SingleV
= DAG
.getNode(ISD::SELECT
, dl
, SingleTy
, PickHi
, V1
, V0
);
914 // The only meaningful subvectors of a single HVX vector are those that
915 // fit in a scalar register.
916 assert(SubTy
.getSizeInBits() == 32 || SubTy
.getSizeInBits() == 64);
917 // Convert IdxV to be index in bytes.
918 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
.getNode());
919 if (!IdxN
|| !IdxN
->isNullValue()) {
920 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
921 DAG
.getConstant(ElemWidth
/8, dl
, MVT::i32
));
922 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
, IdxV
);
924 // When inserting a single word, the rotation back to the original position
925 // would be by HwLen-Idx, but if two words are inserted, it will need to be
927 unsigned RolBase
= HwLen
;
928 if (VecTy
.getSizeInBits() == 32) {
929 SDValue V
= DAG
.getBitcast(MVT::i32
, SubV
);
930 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, V
);
932 SDValue V
= DAG
.getBitcast(MVT::i64
, SubV
);
933 SDValue R0
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, V
);
934 SDValue R1
= DAG
.getTargetExtractSubreg(Hexagon::isub_hi
, dl
, MVT::i32
, V
);
935 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, R0
);
936 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
,
937 DAG
.getConstant(4, dl
, MVT::i32
));
938 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, R1
);
941 // If the vector wasn't ror'ed, don't ror it back.
942 if (RolBase
!= 4 || !IdxN
|| !IdxN
->isNullValue()) {
943 SDValue RolV
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
944 DAG
.getConstant(RolBase
, dl
, MVT::i32
), IdxV
);
945 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
, RolV
);
949 SDValue InLo
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {SingleV
, V1
});
950 SDValue InHi
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {V0
, SingleV
});
951 return DAG
.getNode(ISD::SELECT
, dl
, VecTy
, PickHi
, InHi
, InLo
);
957 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV
, SDValue SubV
,
958 SDValue IdxV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
959 MVT VecTy
= ty(VecV
);
960 MVT SubTy
= ty(SubV
);
961 assert(Subtarget
.isHVXVectorType(VecTy
, true));
962 // VecV is an HVX vector predicate. SubV may be either an HVX vector
963 // predicate as well, or it can be a scalar predicate.
965 unsigned VecLen
= VecTy
.getVectorNumElements();
966 unsigned HwLen
= Subtarget
.getVectorLength();
967 assert(HwLen
% VecLen
== 0 && "Unexpected vector type");
969 unsigned Scale
= VecLen
/ SubTy
.getVectorNumElements();
970 unsigned BitBytes
= HwLen
/ VecLen
;
971 unsigned BlockLen
= HwLen
/ Scale
;
973 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
974 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
975 SDValue ByteSub
= createHvxPrefixPred(SubV
, dl
, BitBytes
, false, DAG
);
978 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
.getNode());
979 if (!IdxN
|| !IdxN
->isNullValue()) {
980 ByteIdx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
981 DAG
.getConstant(BitBytes
, dl
, MVT::i32
));
982 ByteVec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, ByteVec
, ByteIdx
);
985 // ByteVec is the target vector VecV rotated in such a way that the
986 // subvector should be inserted at index 0. Generate a predicate mask
987 // and use vmux to do the insertion.
988 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
989 SDValue Q
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
990 {DAG
.getConstant(BlockLen
, dl
, MVT::i32
)}, DAG
);
991 ByteVec
= getInstr(Hexagon::V6_vmux
, dl
, ByteTy
, {Q
, ByteSub
, ByteVec
}, DAG
);
992 // Rotate ByteVec back, and convert to a vector predicate.
993 if (!IdxN
|| !IdxN
->isNullValue()) {
994 SDValue HwLenV
= DAG
.getConstant(HwLen
, dl
, MVT::i32
);
995 SDValue ByteXdi
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, HwLenV
, ByteIdx
);
996 ByteVec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, ByteVec
, ByteXdi
);
998 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, ByteVec
);
1002 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV
, const SDLoc
&dl
,
1003 MVT ResTy
, bool ZeroExt
, SelectionDAG
&DAG
) const {
1004 // Sign- and any-extending of a vector predicate to a vector register is
1005 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1006 // a vector of 1s (where the 1s are of type matching the vector type).
1007 assert(Subtarget
.isHVXVectorType(ResTy
));
1009 return DAG
.getNode(HexagonISD::Q2V
, dl
, ResTy
, VecV
);
1011 assert(ty(VecV
).getVectorNumElements() == ResTy
.getVectorNumElements());
1012 SDValue True
= DAG
.getNode(HexagonISD::VSPLAT
, dl
, ResTy
,
1013 DAG
.getConstant(1, dl
, MVT::i32
));
1014 SDValue False
= getZero(dl
, ResTy
, DAG
);
1015 return DAG
.getSelect(dl
, ResTy
, VecV
, True
, False
);
1019 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op
, SelectionDAG
&DAG
)
1021 const SDLoc
&dl(Op
);
1024 unsigned Size
= Op
.getNumOperands();
1025 SmallVector
<SDValue
,128> Ops
;
1026 for (unsigned i
= 0; i
!= Size
; ++i
)
1027 Ops
.push_back(Op
.getOperand(i
));
1029 if (VecTy
.getVectorElementType() == MVT::i1
)
1030 return buildHvxVectorPred(Ops
, dl
, VecTy
, DAG
);
1032 if (VecTy
.getSizeInBits() == 16*Subtarget
.getVectorLength()) {
1033 ArrayRef
<SDValue
> A(Ops
);
1034 MVT SingleTy
= typeSplit(VecTy
).first
;
1035 SDValue V0
= buildHvxVectorReg(A
.take_front(Size
/2), dl
, SingleTy
, DAG
);
1036 SDValue V1
= buildHvxVectorReg(A
.drop_front(Size
/2), dl
, SingleTy
, DAG
);
1037 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, V0
, V1
);
1040 return buildHvxVectorReg(Ops
, dl
, VecTy
, DAG
);
1044 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op
, SelectionDAG
&DAG
)
1046 // Vector concatenation of two integer (non-bool) vectors does not need
1047 // special lowering. Custom-lower concats of bool vectors and expand
1048 // concats of more than 2 vectors.
1050 const SDLoc
&dl(Op
);
1051 unsigned NumOp
= Op
.getNumOperands();
1052 if (VecTy
.getVectorElementType() != MVT::i1
) {
1055 // Expand the other cases into a build-vector.
1056 SmallVector
<SDValue
,8> Elems
;
1057 for (SDValue V
: Op
.getNode()->ops())
1058 DAG
.ExtractVectorElements(V
, Elems
);
1059 // A vector of i16 will be broken up into a build_vector of i16's.
1060 // This is a problem, since at the time of operation legalization,
1061 // all operations are expected to be type-legalized, and i16 is not
1062 // a legal type. If any of the extracted elements is not of a valid
1063 // type, sign-extend it to a valid one.
1064 for (unsigned i
= 0, e
= Elems
.size(); i
!= e
; ++i
) {
1065 SDValue V
= Elems
[i
];
1067 if (!isTypeLegal(Ty
)) {
1068 EVT NTy
= getTypeToTransformTo(*DAG
.getContext(), Ty
);
1069 if (V
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
1070 Elems
[i
] = DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, NTy
,
1071 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, NTy
,
1072 V
.getOperand(0), V
.getOperand(1)),
1073 DAG
.getValueType(Ty
));
1076 // A few less complicated cases.
1077 if (V
.getOpcode() == ISD::Constant
)
1078 Elems
[i
] = DAG
.getSExtOrTrunc(V
, dl
, NTy
);
1079 else if (V
.isUndef())
1080 Elems
[i
] = DAG
.getUNDEF(NTy
);
1082 llvm_unreachable("Unexpected vector element");
1085 return DAG
.getBuildVector(VecTy
, dl
, Elems
);
1088 assert(VecTy
.getVectorElementType() == MVT::i1
);
1089 unsigned HwLen
= Subtarget
.getVectorLength();
1090 assert(isPowerOf2_32(NumOp
) && HwLen
% NumOp
== 0);
1092 SDValue Op0
= Op
.getOperand(0);
1094 // If the operands are HVX types (i.e. not scalar predicates), then
1095 // defer the concatenation, and create QCAT instead.
1096 if (Subtarget
.isHVXVectorType(ty(Op0
), true)) {
1098 return DAG
.getNode(HexagonISD::QCAT
, dl
, VecTy
, Op0
, Op
.getOperand(1));
1100 ArrayRef
<SDUse
> U(Op
.getNode()->ops());
1101 SmallVector
<SDValue
,4> SV(U
.begin(), U
.end());
1102 ArrayRef
<SDValue
> Ops(SV
);
1104 MVT HalfTy
= typeSplit(VecTy
).first
;
1105 SDValue V0
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, HalfTy
,
1106 Ops
.take_front(NumOp
/2));
1107 SDValue V1
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, HalfTy
,
1108 Ops
.take_back(NumOp
/2));
1109 return DAG
.getNode(HexagonISD::QCAT
, dl
, VecTy
, V0
, V1
);
1112 // Count how many bytes (in a vector register) each bit in VecTy
1114 unsigned BitBytes
= HwLen
/ VecTy
.getVectorNumElements();
1116 SmallVector
<SDValue
,8> Prefixes
;
1117 for (SDValue V
: Op
.getNode()->op_values()) {
1118 SDValue P
= createHvxPrefixPred(V
, dl
, BitBytes
, true, DAG
);
1119 Prefixes
.push_back(P
);
1122 unsigned InpLen
= ty(Op
.getOperand(0)).getVectorNumElements();
1123 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1124 SDValue S
= DAG
.getConstant(InpLen
*BitBytes
, dl
, MVT::i32
);
1125 SDValue Res
= getZero(dl
, ByteTy
, DAG
);
1126 for (unsigned i
= 0, e
= Prefixes
.size(); i
!= e
; ++i
) {
1127 Res
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, Res
, S
);
1128 Res
= DAG
.getNode(ISD::OR
, dl
, ByteTy
, Res
, Prefixes
[e
-i
-1]);
1130 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, Res
);
1134 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op
, SelectionDAG
&DAG
)
1136 // Change the type of the extracted element to i32.
1137 SDValue VecV
= Op
.getOperand(0);
1138 MVT ElemTy
= ty(VecV
).getVectorElementType();
1139 const SDLoc
&dl(Op
);
1140 SDValue IdxV
= Op
.getOperand(1);
1141 if (ElemTy
== MVT::i1
)
1142 return extractHvxElementPred(VecV
, IdxV
, dl
, ty(Op
), DAG
);
1144 return extractHvxElementReg(VecV
, IdxV
, dl
, ty(Op
), DAG
);
1148 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op
, SelectionDAG
&DAG
)
1150 const SDLoc
&dl(Op
);
1151 SDValue VecV
= Op
.getOperand(0);
1152 SDValue ValV
= Op
.getOperand(1);
1153 SDValue IdxV
= Op
.getOperand(2);
1154 MVT ElemTy
= ty(VecV
).getVectorElementType();
1155 if (ElemTy
== MVT::i1
)
1156 return insertHvxElementPred(VecV
, IdxV
, ValV
, dl
, DAG
);
1158 return insertHvxElementReg(VecV
, IdxV
, ValV
, dl
, DAG
);
1162 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op
, SelectionDAG
&DAG
)
1164 SDValue SrcV
= Op
.getOperand(0);
1165 MVT SrcTy
= ty(SrcV
);
1167 SDValue IdxV
= Op
.getOperand(1);
1168 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
1169 assert(Idx
% DstTy
.getVectorNumElements() == 0);
1171 const SDLoc
&dl(Op
);
1173 MVT ElemTy
= SrcTy
.getVectorElementType();
1174 if (ElemTy
== MVT::i1
)
1175 return extractHvxSubvectorPred(SrcV
, IdxV
, dl
, DstTy
, DAG
);
1177 return extractHvxSubvectorReg(SrcV
, IdxV
, dl
, DstTy
, DAG
);
1181 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op
, SelectionDAG
&DAG
)
1183 // Idx does not need to be a constant.
1184 SDValue VecV
= Op
.getOperand(0);
1185 SDValue ValV
= Op
.getOperand(1);
1186 SDValue IdxV
= Op
.getOperand(2);
1188 const SDLoc
&dl(Op
);
1189 MVT VecTy
= ty(VecV
);
1190 MVT ElemTy
= VecTy
.getVectorElementType();
1191 if (ElemTy
== MVT::i1
)
1192 return insertHvxSubvectorPred(VecV
, ValV
, IdxV
, dl
, DAG
);
1194 return insertHvxSubvectorReg(VecV
, ValV
, IdxV
, dl
, DAG
);
1198 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op
, SelectionDAG
&DAG
) const {
1199 // Lower any-extends of boolean vectors to sign-extends, since they
1200 // translate directly to Q2V. Zero-extending could also be done equally
1201 // fast, but Q2V is used/recognized in more places.
1202 // For all other vectors, use zero-extend.
1204 SDValue InpV
= Op
.getOperand(0);
1205 MVT ElemTy
= ty(InpV
).getVectorElementType();
1206 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1207 return LowerHvxSignExt(Op
, DAG
);
1208 return DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(Op
), ResTy
, InpV
);
1212 HexagonTargetLowering::LowerHvxSignExt(SDValue Op
, SelectionDAG
&DAG
) const {
1214 SDValue InpV
= Op
.getOperand(0);
1215 MVT ElemTy
= ty(InpV
).getVectorElementType();
1216 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1217 return extendHvxVectorPred(InpV
, SDLoc(Op
), ty(Op
), false, DAG
);
1222 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op
, SelectionDAG
&DAG
) const {
1224 SDValue InpV
= Op
.getOperand(0);
1225 MVT ElemTy
= ty(InpV
).getVectorElementType();
1226 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1227 return extendHvxVectorPred(InpV
, SDLoc(Op
), ty(Op
), true, DAG
);
1232 HexagonTargetLowering::LowerHvxCttz(SDValue Op
, SelectionDAG
&DAG
) const {
1233 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1234 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1235 const SDLoc
&dl(Op
);
1237 SDValue InpV
= Op
.getOperand(0);
1238 assert(ResTy
== ty(InpV
));
1240 // Calculate the vectors of 1 and bitwidth(x).
1241 MVT ElemTy
= ty(InpV
).getVectorElementType();
1242 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1243 // Using uint64_t because a shift by 32 can happen.
1244 uint64_t Splat1
= 0, SplatW
= 0;
1245 assert(isPowerOf2_32(ElemWidth
) && ElemWidth
<= 32);
1246 for (unsigned i
= 0; i
!= 32/ElemWidth
; ++i
) {
1247 Splat1
= (Splat1
<< ElemWidth
) | 1;
1248 SplatW
= (SplatW
<< ElemWidth
) | ElemWidth
;
1250 SDValue Vec1
= DAG
.getNode(HexagonISD::VSPLATW
, dl
, ResTy
,
1251 DAG
.getConstant(uint32_t(Splat1
), dl
, MVT::i32
));
1252 SDValue VecW
= DAG
.getNode(HexagonISD::VSPLATW
, dl
, ResTy
,
1253 DAG
.getConstant(uint32_t(SplatW
), dl
, MVT::i32
));
1254 SDValue VecN1
= DAG
.getNode(HexagonISD::VSPLATW
, dl
, ResTy
,
1255 DAG
.getConstant(-1, dl
, MVT::i32
));
1256 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1257 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1258 // it separately in custom combine or selection).
1259 SDValue A
= DAG
.getNode(ISD::AND
, dl
, ResTy
,
1260 {DAG
.getNode(ISD::XOR
, dl
, ResTy
, {InpV
, VecN1
}),
1261 DAG
.getNode(ISD::SUB
, dl
, ResTy
, {InpV
, Vec1
})});
1262 return DAG
.getNode(ISD::SUB
, dl
, ResTy
,
1263 {VecW
, DAG
.getNode(ISD::CTLZ
, dl
, ResTy
, A
)});
1267 HexagonTargetLowering::LowerHvxMul(SDValue Op
, SelectionDAG
&DAG
) const {
1269 assert(ResTy
.isVector() && isHvxSingleTy(ResTy
));
1270 const SDLoc
&dl(Op
);
1271 SmallVector
<int,256> ShuffMask
;
1273 MVT ElemTy
= ResTy
.getVectorElementType();
1274 unsigned VecLen
= ResTy
.getVectorNumElements();
1275 SDValue Vs
= Op
.getOperand(0);
1276 SDValue Vt
= Op
.getOperand(1);
1278 switch (ElemTy
.SimpleTy
) {
1280 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
1281 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
1282 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
1283 MVT ExtTy
= typeExtElem(ResTy
, 2);
1284 unsigned MpyOpc
= ElemTy
== MVT::i8
? Hexagon::V6_vmpybv
1285 : Hexagon::V6_vmpyhv
;
1286 SDValue M
= getInstr(MpyOpc
, dl
, ExtTy
, {Vs
, Vt
}, DAG
);
1288 // Discard high halves of the resulting values, collect the low halves.
1289 for (unsigned I
= 0; I
< VecLen
; I
+= 2) {
1290 ShuffMask
.push_back(I
); // Pick even element.
1291 ShuffMask
.push_back(I
+VecLen
); // Pick odd element.
1293 VectorPair P
= opSplit(opCastElem(M
, ElemTy
, DAG
), dl
, DAG
);
1294 SDValue BS
= getByteShuffle(dl
, P
.first
, P
.second
, ShuffMask
, DAG
);
1295 return DAG
.getBitcast(ResTy
, BS
);
1298 // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode.
1299 // (There is also V6_vmpyhv, which behaves in an analogous way to
1301 return getInstr(Hexagon::V6_vmpyih
, dl
, ResTy
, {Vs
, Vt
}, DAG
);
1303 // Use the following sequence for signed word multiply:
1304 // T0 = V6_vmpyiowh Vs, Vt
1305 // T1 = V6_vaslw T0, 16
1306 // T2 = V6_vmpyiewuh_acc T1, Vs, Vt
1307 SDValue S16
= DAG
.getConstant(16, dl
, MVT::i32
);
1308 SDValue T0
= getInstr(Hexagon::V6_vmpyiowh
, dl
, ResTy
, {Vs
, Vt
}, DAG
);
1309 SDValue T1
= getInstr(Hexagon::V6_vaslw
, dl
, ResTy
, {T0
, S16
}, DAG
);
1310 SDValue T2
= getInstr(Hexagon::V6_vmpyiewuh_acc
, dl
, ResTy
,
1321 HexagonTargetLowering::LowerHvxMulh(SDValue Op
, SelectionDAG
&DAG
) const {
1323 assert(ResTy
.isVector());
1324 const SDLoc
&dl(Op
);
1325 SmallVector
<int,256> ShuffMask
;
1327 MVT ElemTy
= ResTy
.getVectorElementType();
1328 unsigned VecLen
= ResTy
.getVectorNumElements();
1329 SDValue Vs
= Op
.getOperand(0);
1330 SDValue Vt
= Op
.getOperand(1);
1331 bool IsSigned
= Op
.getOpcode() == ISD::MULHS
;
1333 if (ElemTy
== MVT::i8
|| ElemTy
== MVT::i16
) {
1334 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
1335 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
1336 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
1337 // For i16, use V6_vmpyhv, which behaves in an analogous way to
1338 // V6_vmpybv: results Lo and Hi are products of even/odd elements
1340 MVT ExtTy
= typeExtElem(ResTy
, 2);
1341 unsigned MpyOpc
= ElemTy
== MVT::i8
1342 ? (IsSigned
? Hexagon::V6_vmpybv
: Hexagon::V6_vmpyubv
)
1343 : (IsSigned
? Hexagon::V6_vmpyhv
: Hexagon::V6_vmpyuhv
);
1344 SDValue M
= getInstr(MpyOpc
, dl
, ExtTy
, {Vs
, Vt
}, DAG
);
1346 // Discard low halves of the resulting values, collect the high halves.
1347 for (unsigned I
= 0; I
< VecLen
; I
+= 2) {
1348 ShuffMask
.push_back(I
+1); // Pick even element.
1349 ShuffMask
.push_back(I
+VecLen
+1); // Pick odd element.
1351 VectorPair P
= opSplit(opCastElem(M
, ElemTy
, DAG
), dl
, DAG
);
1352 SDValue BS
= getByteShuffle(dl
, P
.first
, P
.second
, ShuffMask
, DAG
);
1353 return DAG
.getBitcast(ResTy
, BS
);
1356 assert(ElemTy
== MVT::i32
);
1357 SDValue S16
= DAG
.getConstant(16, dl
, MVT::i32
);
1361 // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1362 // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16
1363 // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1364 // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16
1365 // + Lo(Vs) *us Vt] >> 32
1366 // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to
1367 // anything, so it cannot produce any carry over to higher bits),
1368 // so everything in [] can be shifted by 16 without loss of precision.
1369 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16
1370 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16
1371 // Denote Hi(Vs) = Vs':
1372 // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16
1373 // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16
1374 SDValue T0
= getInstr(Hexagon::V6_vmpyewuh
, dl
, ResTy
, {Vt
, Vs
}, DAG
);
1376 SDValue S0
= getInstr(Hexagon::V6_vasrw
, dl
, ResTy
, {Vs
, S16
}, DAG
);
1377 SDValue T1
= getInstr(Hexagon::V6_vmpyiewuh_acc
, dl
, ResTy
,
1380 SDValue S2
= getInstr(Hexagon::V6_vasrw
, dl
, ResTy
, {T1
, S16
}, DAG
);
1382 SDValue T2
= getInstr(Hexagon::V6_vmpyiowh
, dl
, ResTy
, {S0
, Vt
}, DAG
);
1384 SDValue T3
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {S2
, T2
});
1388 // Unsigned mulhw. (Would expansion using signed mulhw be better?)
1390 auto LoVec
= [&DAG
,ResTy
,dl
] (SDValue Pair
) {
1391 return DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, ResTy
, Pair
);
1393 auto HiVec
= [&DAG
,ResTy
,dl
] (SDValue Pair
) {
1394 return DAG
.getTargetExtractSubreg(Hexagon::vsub_hi
, dl
, ResTy
, Pair
);
1397 MVT PairTy
= typeJoin({ResTy
, ResTy
});
1398 SDValue P
= getInstr(Hexagon::V6_lvsplatw
, dl
, ResTy
,
1399 {DAG
.getConstant(0x02020202, dl
, MVT::i32
)}, DAG
);
1400 // Multiply-unsigned halfwords:
1401 // LoVec = Vs.uh[2i] * Vt.uh[2i],
1402 // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1]
1403 SDValue T0
= getInstr(Hexagon::V6_vmpyuhv
, dl
, PairTy
, {Vs
, Vt
}, DAG
);
1404 // The low halves in the LoVec of the pair can be discarded. They are
1405 // not added to anything (in the full-precision product), so they cannot
1406 // produce a carry into the higher bits.
1407 SDValue T1
= getInstr(Hexagon::V6_vlsrw
, dl
, ResTy
, {LoVec(T0
), S16
}, DAG
);
1408 // Swap low and high halves in Vt, and do the halfword multiplication
1409 // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i].
1410 SDValue D0
= getInstr(Hexagon::V6_vdelta
, dl
, ResTy
, {Vt
, P
}, DAG
);
1411 SDValue T2
= getInstr(Hexagon::V6_vmpyuhv
, dl
, PairTy
, {Vs
, D0
}, DAG
);
1412 // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs).
1413 // These products are words, but cannot be added directly because the
1414 // sums could overflow. Add these products, by halfwords, where each sum
1415 // of a pair of halfwords gives a word.
1416 SDValue T3
= getInstr(Hexagon::V6_vadduhw
, dl
, PairTy
,
1417 {LoVec(T2
), HiVec(T2
)}, DAG
);
1418 // Add the high halfwords from the products of the low halfwords.
1419 SDValue T4
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {T1
, LoVec(T3
)});
1420 SDValue T5
= getInstr(Hexagon::V6_vlsrw
, dl
, ResTy
, {T4
, S16
}, DAG
);
1421 SDValue T6
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {HiVec(T0
), HiVec(T3
)});
1422 SDValue T7
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {T5
, T6
});
1427 HexagonTargetLowering::LowerHvxExtend(SDValue Op
, SelectionDAG
&DAG
) const {
1428 // Sign- and zero-extends are legal.
1429 assert(Op
.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
);
1430 return DAG
.getNode(ISD::ZERO_EXTEND_VECTOR_INREG
, SDLoc(Op
), ty(Op
),
1435 HexagonTargetLowering::LowerHvxShift(SDValue Op
, SelectionDAG
&DAG
) const {
1436 if (SDValue S
= getVectorShiftByInt(Op
, DAG
))
1442 HexagonTargetLowering::SplitHvxPairOp(SDValue Op
, SelectionDAG
&DAG
) const {
1443 assert(!Op
.isMachineOpcode());
1444 SmallVector
<SDValue
,2> OpsL
, OpsH
;
1445 const SDLoc
&dl(Op
);
1447 auto SplitVTNode
= [&DAG
,this] (const VTSDNode
*N
) {
1448 MVT Ty
= typeSplit(N
->getVT().getSimpleVT()).first
;
1449 SDValue TV
= DAG
.getValueType(Ty
);
1450 return std::make_pair(TV
, TV
);
1453 for (SDValue A
: Op
.getNode()->ops()) {
1454 VectorPair P
= Subtarget
.isHVXVectorType(ty(A
), true)
1455 ? opSplit(A
, dl
, DAG
)
1456 : std::make_pair(A
, A
);
1457 // Special case for type operand.
1458 if (Op
.getOpcode() == ISD::SIGN_EXTEND_INREG
) {
1459 if (const auto *N
= dyn_cast
<const VTSDNode
>(A
.getNode()))
1462 OpsL
.push_back(P
.first
);
1463 OpsH
.push_back(P
.second
);
1467 MVT HalfTy
= typeSplit(ResTy
).first
;
1468 SDValue L
= DAG
.getNode(Op
.getOpcode(), dl
, HalfTy
, OpsL
);
1469 SDValue H
= DAG
.getNode(Op
.getOpcode(), dl
, HalfTy
, OpsH
);
1470 SDValue S
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, ResTy
, L
, H
);
1475 HexagonTargetLowering::SplitHvxMemOp(SDValue Op
, SelectionDAG
&DAG
) const {
1476 LSBaseSDNode
*BN
= cast
<LSBaseSDNode
>(Op
.getNode());
1477 assert(BN
->isUnindexed());
1478 MVT MemTy
= BN
->getMemoryVT().getSimpleVT();
1479 if (!isHvxPairTy(MemTy
))
1482 const SDLoc
&dl(Op
);
1483 unsigned HwLen
= Subtarget
.getVectorLength();
1484 MVT SingleTy
= typeSplit(MemTy
).first
;
1485 SDValue Chain
= BN
->getChain();
1486 SDValue Base0
= BN
->getBasePtr();
1487 SDValue Base1
= DAG
.getMemBasePlusOffset(Base0
, HwLen
, dl
);
1489 MachineMemOperand
*MOp0
= nullptr, *MOp1
= nullptr;
1490 if (MachineMemOperand
*MMO
= BN
->getMemOperand()) {
1491 MachineFunction
&MF
= DAG
.getMachineFunction();
1492 MOp0
= MF
.getMachineMemOperand(MMO
, 0, HwLen
);
1493 MOp1
= MF
.getMachineMemOperand(MMO
, HwLen
, HwLen
);
1496 unsigned MemOpc
= BN
->getOpcode();
1499 if (MemOpc
== ISD::LOAD
) {
1500 SDValue Load0
= DAG
.getLoad(SingleTy
, dl
, Chain
, Base0
, MOp0
);
1501 SDValue Load1
= DAG
.getLoad(SingleTy
, dl
, Chain
, Base1
, MOp1
);
1502 NewOp
= DAG
.getMergeValues(
1503 { DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, MemTy
, Load0
, Load1
),
1504 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1505 Load0
.getValue(1), Load1
.getValue(1)) }, dl
);
1507 assert(MemOpc
== ISD::STORE
);
1508 VectorPair Vals
= opSplit(cast
<StoreSDNode
>(Op
)->getValue(), dl
, DAG
);
1509 SDValue Store0
= DAG
.getStore(Chain
, dl
, Vals
.first
, Base0
, MOp0
);
1510 SDValue Store1
= DAG
.getStore(Chain
, dl
, Vals
.second
, Base1
, MOp1
);
1511 NewOp
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Store0
, Store1
);
1518 HexagonTargetLowering::LowerHvxOperation(SDValue Op
, SelectionDAG
&DAG
) const {
1519 unsigned Opc
= Op
.getOpcode();
1520 bool IsPairOp
= isHvxPairTy(ty(Op
)) ||
1521 llvm::any_of(Op
.getNode()->ops(), [this] (SDValue V
) {
1522 return isHvxPairTy(ty(V
));
1531 return SplitHvxMemOp(Op
, DAG
);
1546 case ISD::SIGN_EXTEND
:
1547 case ISD::ZERO_EXTEND
:
1548 case ISD::SIGN_EXTEND_INREG
:
1549 return SplitHvxPairOp(Op
, DAG
);
1556 case ISD::BUILD_VECTOR
: return LowerHvxBuildVector(Op
, DAG
);
1557 case ISD::CONCAT_VECTORS
: return LowerHvxConcatVectors(Op
, DAG
);
1558 case ISD::INSERT_SUBVECTOR
: return LowerHvxInsertSubvector(Op
, DAG
);
1559 case ISD::INSERT_VECTOR_ELT
: return LowerHvxInsertElement(Op
, DAG
);
1560 case ISD::EXTRACT_SUBVECTOR
: return LowerHvxExtractSubvector(Op
, DAG
);
1561 case ISD::EXTRACT_VECTOR_ELT
: return LowerHvxExtractElement(Op
, DAG
);
1563 case ISD::ANY_EXTEND
: return LowerHvxAnyExt(Op
, DAG
);
1564 case ISD::SIGN_EXTEND
: return LowerHvxSignExt(Op
, DAG
);
1565 case ISD::ZERO_EXTEND
: return LowerHvxZeroExt(Op
, DAG
);
1566 case ISD::CTTZ
: return LowerHvxCttz(Op
, DAG
);
1569 case ISD::SRL
: return LowerHvxShift(Op
, DAG
);
1570 case ISD::MUL
: return LowerHvxMul(Op
, DAG
);
1572 case ISD::MULHU
: return LowerHvxMulh(Op
, DAG
);
1573 case ISD::ANY_EXTEND_VECTOR_INREG
: return LowerHvxExtend(Op
, DAG
);
1575 case ISD::INTRINSIC_VOID
: return Op
;
1576 // Unaligned loads will be handled by the default lowering.
1577 case ISD::LOAD
: return SDValue();
1582 llvm_unreachable("Unhandled HVX operation");
1586 HexagonTargetLowering::PerformHvxDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
)
1591 unsigned Opc
= Op
.getOpcode();
1592 if (Opc
== ISD::VSELECT
) {
1593 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
1594 SDValue Cond
= Op
.getOperand(0);
1595 if (Cond
->getOpcode() == ISD::XOR
) {
1596 SDValue C0
= Cond
.getOperand(0), C1
= Cond
.getOperand(1);
1597 if (C1
->getOpcode() == HexagonISD::QTRUE
) {
1598 SDValue VSel
= DCI
.DAG
.getNode(ISD::VSELECT
, dl
, ty(Op
), C0
,
1599 Op
.getOperand(2), Op
.getOperand(1));
1608 HexagonTargetLowering::isHvxOperation(SDValue Op
) const {
1609 // If the type of the result, or any operand type are HVX vector types,
1610 // this is an HVX operation.
1611 return Subtarget
.isHVXVectorType(ty(Op
), true) ||
1612 llvm::any_of(Op
.getNode()->ops(),
1613 [this] (SDValue V
) {
1614 return Subtarget
.isHVXVectorType(ty(V
), true);