1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "HexagonISelLowering.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/Support/CommandLine.h"
16 static const MVT LegalV64
[] = { MVT::v64i8
, MVT::v32i16
, MVT::v16i32
};
17 static const MVT LegalW64
[] = { MVT::v128i8
, MVT::v64i16
, MVT::v32i32
};
18 static const MVT LegalV128
[] = { MVT::v128i8
, MVT::v64i16
, MVT::v32i32
};
19 static const MVT LegalW128
[] = { MVT::v256i8
, MVT::v128i16
, MVT::v64i32
};
23 HexagonTargetLowering::initializeHVXLowering() {
24 if (Subtarget
.useHVX64BOps()) {
25 addRegisterClass(MVT::v64i8
, &Hexagon::HvxVRRegClass
);
26 addRegisterClass(MVT::v32i16
, &Hexagon::HvxVRRegClass
);
27 addRegisterClass(MVT::v16i32
, &Hexagon::HvxVRRegClass
);
28 addRegisterClass(MVT::v128i8
, &Hexagon::HvxWRRegClass
);
29 addRegisterClass(MVT::v64i16
, &Hexagon::HvxWRRegClass
);
30 addRegisterClass(MVT::v32i32
, &Hexagon::HvxWRRegClass
);
31 // These "short" boolean vector types should be legal because
32 // they will appear as results of vector compares. If they were
33 // not legal, type legalization would try to make them legal
34 // and that would require using operations that do not use or
35 // produce such types. That, in turn, would imply using custom
36 // nodes, which would be unoptimizable by the DAG combiner.
37 // The idea is to rely on target-independent operations as much
39 addRegisterClass(MVT::v16i1
, &Hexagon::HvxQRRegClass
);
40 addRegisterClass(MVT::v32i1
, &Hexagon::HvxQRRegClass
);
41 addRegisterClass(MVT::v64i1
, &Hexagon::HvxQRRegClass
);
42 addRegisterClass(MVT::v512i1
, &Hexagon::HvxQRRegClass
);
43 } else if (Subtarget
.useHVX128BOps()) {
44 addRegisterClass(MVT::v128i8
, &Hexagon::HvxVRRegClass
);
45 addRegisterClass(MVT::v64i16
, &Hexagon::HvxVRRegClass
);
46 addRegisterClass(MVT::v32i32
, &Hexagon::HvxVRRegClass
);
47 addRegisterClass(MVT::v256i8
, &Hexagon::HvxWRRegClass
);
48 addRegisterClass(MVT::v128i16
, &Hexagon::HvxWRRegClass
);
49 addRegisterClass(MVT::v64i32
, &Hexagon::HvxWRRegClass
);
50 addRegisterClass(MVT::v32i1
, &Hexagon::HvxQRRegClass
);
51 addRegisterClass(MVT::v64i1
, &Hexagon::HvxQRRegClass
);
52 addRegisterClass(MVT::v128i1
, &Hexagon::HvxQRRegClass
);
53 addRegisterClass(MVT::v1024i1
, &Hexagon::HvxQRRegClass
);
56 // Set up operation actions.
58 bool Use64b
= Subtarget
.useHVX64BOps();
59 ArrayRef
<MVT
> LegalV
= Use64b
? LegalV64
: LegalV128
;
60 ArrayRef
<MVT
> LegalW
= Use64b
? LegalW64
: LegalW128
;
61 MVT ByteV
= Use64b
? MVT::v64i8
: MVT::v128i8
;
62 MVT ByteW
= Use64b
? MVT::v128i8
: MVT::v256i8
;
64 auto setPromoteTo
= [this] (unsigned Opc
, MVT FromTy
, MVT ToTy
) {
65 setOperationAction(Opc
, FromTy
, Promote
);
66 AddPromotedToType(Opc
, FromTy
, ToTy
);
69 setOperationAction(ISD::VECTOR_SHUFFLE
, ByteV
, Legal
);
70 setOperationAction(ISD::VECTOR_SHUFFLE
, ByteW
, Legal
);
72 for (MVT T
: LegalV
) {
73 setIndexedLoadAction(ISD::POST_INC
, T
, Legal
);
74 setIndexedStoreAction(ISD::POST_INC
, T
, Legal
);
76 setOperationAction(ISD::AND
, T
, Legal
);
77 setOperationAction(ISD::OR
, T
, Legal
);
78 setOperationAction(ISD::XOR
, T
, Legal
);
79 setOperationAction(ISD::ADD
, T
, Legal
);
80 setOperationAction(ISD::SUB
, T
, Legal
);
81 setOperationAction(ISD::CTPOP
, T
, Legal
);
82 setOperationAction(ISD::CTLZ
, T
, Legal
);
84 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, T
, Legal
);
85 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, T
, Legal
);
86 setOperationAction(ISD::BSWAP
, T
, Legal
);
89 setOperationAction(ISD::CTTZ
, T
, Custom
);
90 setOperationAction(ISD::LOAD
, T
, Custom
);
91 setOperationAction(ISD::MUL
, T
, Custom
);
92 setOperationAction(ISD::MULHS
, T
, Custom
);
93 setOperationAction(ISD::MULHU
, T
, Custom
);
94 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
95 // Make concat-vectors custom to handle concats of more than 2 vectors.
96 setOperationAction(ISD::CONCAT_VECTORS
, T
, Custom
);
97 setOperationAction(ISD::INSERT_SUBVECTOR
, T
, Custom
);
98 setOperationAction(ISD::INSERT_VECTOR_ELT
, T
, Custom
);
99 setOperationAction(ISD::EXTRACT_SUBVECTOR
, T
, Custom
);
100 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, T
, Custom
);
101 setOperationAction(ISD::ANY_EXTEND
, T
, Custom
);
102 setOperationAction(ISD::SIGN_EXTEND
, T
, Custom
);
103 setOperationAction(ISD::ZERO_EXTEND
, T
, Custom
);
105 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG
, T
, Custom
);
106 // HVX only has shifts of words and halfwords.
107 setOperationAction(ISD::SRA
, T
, Custom
);
108 setOperationAction(ISD::SHL
, T
, Custom
);
109 setOperationAction(ISD::SRL
, T
, Custom
);
111 // Promote all shuffles to operate on vectors of bytes.
112 setPromoteTo(ISD::VECTOR_SHUFFLE
, T
, ByteV
);
115 setCondCodeAction(ISD::SETNE
, T
, Expand
);
116 setCondCodeAction(ISD::SETLE
, T
, Expand
);
117 setCondCodeAction(ISD::SETGE
, T
, Expand
);
118 setCondCodeAction(ISD::SETLT
, T
, Expand
);
119 setCondCodeAction(ISD::SETULE
, T
, Expand
);
120 setCondCodeAction(ISD::SETUGE
, T
, Expand
);
121 setCondCodeAction(ISD::SETULT
, T
, Expand
);
124 for (MVT T
: LegalW
) {
125 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
126 // independent) handling of it would convert it to a load, which is
127 // not always the optimal choice.
128 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
129 // Make concat-vectors custom to handle concats of more than 2 vectors.
130 setOperationAction(ISD::CONCAT_VECTORS
, T
, Custom
);
132 // Custom-lower these operations for pairs. Expand them into a concat
133 // of the corresponding operations on individual vectors.
134 setOperationAction(ISD::ANY_EXTEND
, T
, Custom
);
135 setOperationAction(ISD::SIGN_EXTEND
, T
, Custom
);
136 setOperationAction(ISD::ZERO_EXTEND
, T
, Custom
);
137 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Custom
);
138 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG
, T
, Custom
);
139 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, T
, Legal
);
140 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, T
, Legal
);
142 setOperationAction(ISD::LOAD
, T
, Custom
);
143 setOperationAction(ISD::STORE
, T
, Custom
);
144 setOperationAction(ISD::CTLZ
, T
, Custom
);
145 setOperationAction(ISD::CTTZ
, T
, Custom
);
146 setOperationAction(ISD::CTPOP
, T
, Custom
);
148 setOperationAction(ISD::ADD
, T
, Legal
);
149 setOperationAction(ISD::SUB
, T
, Legal
);
150 setOperationAction(ISD::MUL
, T
, Custom
);
151 setOperationAction(ISD::MULHS
, T
, Custom
);
152 setOperationAction(ISD::MULHU
, T
, Custom
);
153 setOperationAction(ISD::AND
, T
, Custom
);
154 setOperationAction(ISD::OR
, T
, Custom
);
155 setOperationAction(ISD::XOR
, T
, Custom
);
156 setOperationAction(ISD::SETCC
, T
, Custom
);
157 setOperationAction(ISD::VSELECT
, T
, Custom
);
159 setOperationAction(ISD::SRA
, T
, Custom
);
160 setOperationAction(ISD::SHL
, T
, Custom
);
161 setOperationAction(ISD::SRL
, T
, Custom
);
163 // Promote all shuffles to operate on vectors of bytes.
164 setPromoteTo(ISD::VECTOR_SHUFFLE
, T
, ByteW
);
170 for (MVT T
: LegalW
) {
171 // Boolean types for vector pairs will overlap with the boolean
172 // types for single vectors, e.g.
173 // v64i8 -> v64i1 (single)
174 // v64i16 -> v64i1 (pair)
175 // Set these actions first, and allow the single actions to overwrite
177 MVT BoolW
= MVT::getVectorVT(MVT::i1
, T
.getVectorNumElements());
178 setOperationAction(ISD::SETCC
, BoolW
, Custom
);
179 setOperationAction(ISD::AND
, BoolW
, Custom
);
180 setOperationAction(ISD::OR
, BoolW
, Custom
);
181 setOperationAction(ISD::XOR
, BoolW
, Custom
);
184 for (MVT T
: LegalV
) {
185 MVT BoolV
= MVT::getVectorVT(MVT::i1
, T
.getVectorNumElements());
186 setOperationAction(ISD::BUILD_VECTOR
, BoolV
, Custom
);
187 setOperationAction(ISD::CONCAT_VECTORS
, BoolV
, Custom
);
188 setOperationAction(ISD::INSERT_SUBVECTOR
, BoolV
, Custom
);
189 setOperationAction(ISD::INSERT_VECTOR_ELT
, BoolV
, Custom
);
190 setOperationAction(ISD::EXTRACT_SUBVECTOR
, BoolV
, Custom
);
191 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, BoolV
, Custom
);
192 setOperationAction(ISD::AND
, BoolV
, Legal
);
193 setOperationAction(ISD::OR
, BoolV
, Legal
);
194 setOperationAction(ISD::XOR
, BoolV
, Legal
);
199 HexagonTargetLowering::getInt(unsigned IntId
, MVT ResTy
, ArrayRef
<SDValue
> Ops
,
200 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
201 SmallVector
<SDValue
,4> IntOps
;
202 IntOps
.push_back(DAG
.getConstant(IntId
, dl
, MVT::i32
));
203 for (const SDValue
&Op
: Ops
)
204 IntOps
.push_back(Op
);
205 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, ResTy
, IntOps
);
209 HexagonTargetLowering::typeJoin(const TypePair
&Tys
) const {
210 assert(Tys
.first
.getVectorElementType() == Tys
.second
.getVectorElementType());
212 MVT ElemTy
= Tys
.first
.getVectorElementType();
213 return MVT::getVectorVT(ElemTy
, Tys
.first
.getVectorNumElements() +
214 Tys
.second
.getVectorNumElements());
217 HexagonTargetLowering::TypePair
218 HexagonTargetLowering::typeSplit(MVT VecTy
) const {
219 assert(VecTy
.isVector());
220 unsigned NumElem
= VecTy
.getVectorNumElements();
221 assert((NumElem
% 2) == 0 && "Expecting even-sized vector type");
222 MVT HalfTy
= MVT::getVectorVT(VecTy
.getVectorElementType(), NumElem
/2);
223 return { HalfTy
, HalfTy
};
227 HexagonTargetLowering::typeExtElem(MVT VecTy
, unsigned Factor
) const {
228 MVT ElemTy
= VecTy
.getVectorElementType();
229 MVT NewElemTy
= MVT::getIntegerVT(ElemTy
.getSizeInBits() * Factor
);
230 return MVT::getVectorVT(NewElemTy
, VecTy
.getVectorNumElements());
234 HexagonTargetLowering::typeTruncElem(MVT VecTy
, unsigned Factor
) const {
235 MVT ElemTy
= VecTy
.getVectorElementType();
236 MVT NewElemTy
= MVT::getIntegerVT(ElemTy
.getSizeInBits() / Factor
);
237 return MVT::getVectorVT(NewElemTy
, VecTy
.getVectorNumElements());
241 HexagonTargetLowering::opCastElem(SDValue Vec
, MVT ElemTy
,
242 SelectionDAG
&DAG
) const {
243 if (ty(Vec
).getVectorElementType() == ElemTy
)
245 MVT CastTy
= tyVector(Vec
.getValueType().getSimpleVT(), ElemTy
);
246 return DAG
.getBitcast(CastTy
, Vec
);
250 HexagonTargetLowering::opJoin(const VectorPair
&Ops
, const SDLoc
&dl
,
251 SelectionDAG
&DAG
) const {
252 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, typeJoin(ty(Ops
)),
253 Ops
.second
, Ops
.first
);
256 HexagonTargetLowering::VectorPair
257 HexagonTargetLowering::opSplit(SDValue Vec
, const SDLoc
&dl
,
258 SelectionDAG
&DAG
) const {
259 TypePair Tys
= typeSplit(ty(Vec
));
260 if (Vec
.getOpcode() == HexagonISD::QCAT
)
261 return VectorPair(Vec
.getOperand(0), Vec
.getOperand(1));
262 return DAG
.SplitVector(Vec
, dl
, Tys
.first
, Tys
.second
);
266 HexagonTargetLowering::isHvxSingleTy(MVT Ty
) const {
267 return Subtarget
.isHVXVectorType(Ty
) &&
268 Ty
.getSizeInBits() == 8 * Subtarget
.getVectorLength();
272 HexagonTargetLowering::isHvxPairTy(MVT Ty
) const {
273 return Subtarget
.isHVXVectorType(Ty
) &&
274 Ty
.getSizeInBits() == 16 * Subtarget
.getVectorLength();
278 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx
, MVT ElemTy
,
279 SelectionDAG
&DAG
) const {
280 if (ElemIdx
.getValueType().getSimpleVT() != MVT::i32
)
281 ElemIdx
= DAG
.getBitcast(MVT::i32
, ElemIdx
);
283 unsigned ElemWidth
= ElemTy
.getSizeInBits();
287 unsigned L
= Log2_32(ElemWidth
/8);
288 const SDLoc
&dl(ElemIdx
);
289 return DAG
.getNode(ISD::SHL
, dl
, MVT::i32
,
290 {ElemIdx
, DAG
.getConstant(L
, dl
, MVT::i32
)});
294 HexagonTargetLowering::getIndexInWord32(SDValue Idx
, MVT ElemTy
,
295 SelectionDAG
&DAG
) const {
296 unsigned ElemWidth
= ElemTy
.getSizeInBits();
297 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
301 if (ty(Idx
) != MVT::i32
)
302 Idx
= DAG
.getBitcast(MVT::i32
, Idx
);
303 const SDLoc
&dl(Idx
);
304 SDValue Mask
= DAG
.getConstant(32/ElemWidth
- 1, dl
, MVT::i32
);
305 SDValue SubIdx
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, {Idx
, Mask
});
310 HexagonTargetLowering::getByteShuffle(const SDLoc
&dl
, SDValue Op0
,
311 SDValue Op1
, ArrayRef
<int> Mask
,
312 SelectionDAG
&DAG
) const {
314 assert(OpTy
== ty(Op1
));
316 MVT ElemTy
= OpTy
.getVectorElementType();
317 if (ElemTy
== MVT::i8
)
318 return DAG
.getVectorShuffle(OpTy
, dl
, Op0
, Op1
, Mask
);
319 assert(ElemTy
.getSizeInBits() >= 8);
321 MVT ResTy
= tyVector(OpTy
, MVT::i8
);
322 unsigned ElemSize
= ElemTy
.getSizeInBits() / 8;
324 SmallVector
<int,128> ByteMask
;
327 for (unsigned I
= 0; I
!= ElemSize
; ++I
)
328 ByteMask
.push_back(-1);
330 int NewM
= M
*ElemSize
;
331 for (unsigned I
= 0; I
!= ElemSize
; ++I
)
332 ByteMask
.push_back(NewM
+I
);
335 assert(ResTy
.getVectorNumElements() == ByteMask
.size());
336 return DAG
.getVectorShuffle(ResTy
, dl
, opCastElem(Op0
, MVT::i8
, DAG
),
337 opCastElem(Op1
, MVT::i8
, DAG
), ByteMask
);
341 HexagonTargetLowering::buildHvxVectorReg(ArrayRef
<SDValue
> Values
,
342 const SDLoc
&dl
, MVT VecTy
,
343 SelectionDAG
&DAG
) const {
344 unsigned VecLen
= Values
.size();
345 MachineFunction
&MF
= DAG
.getMachineFunction();
346 MVT ElemTy
= VecTy
.getVectorElementType();
347 unsigned ElemWidth
= ElemTy
.getSizeInBits();
348 unsigned HwLen
= Subtarget
.getVectorLength();
350 unsigned ElemSize
= ElemWidth
/ 8;
351 assert(ElemSize
*VecLen
== HwLen
);
352 SmallVector
<SDValue
,32> Words
;
354 if (VecTy
.getVectorElementType() != MVT::i32
) {
355 assert((ElemSize
== 1 || ElemSize
== 2) && "Invalid element size");
356 unsigned OpsPerWord
= (ElemSize
== 1) ? 4 : 2;
357 MVT PartVT
= MVT::getVectorVT(VecTy
.getVectorElementType(), OpsPerWord
);
358 for (unsigned i
= 0; i
!= VecLen
; i
+= OpsPerWord
) {
359 SDValue W
= buildVector32(Values
.slice(i
, OpsPerWord
), dl
, PartVT
, DAG
);
360 Words
.push_back(DAG
.getBitcast(MVT::i32
, W
));
363 Words
.assign(Values
.begin(), Values
.end());
366 unsigned NumWords
= Words
.size();
367 bool IsSplat
= true, IsUndef
= true;
369 for (unsigned i
= 0; i
!= NumWords
&& IsSplat
; ++i
) {
370 if (isUndef(Words
[i
]))
373 if (!SplatV
.getNode())
375 else if (SplatV
!= Words
[i
])
379 return DAG
.getUNDEF(VecTy
);
381 assert(SplatV
.getNode());
382 auto *IdxN
= dyn_cast
<ConstantSDNode
>(SplatV
.getNode());
383 if (IdxN
&& IdxN
->isNullValue())
384 return getZero(dl
, VecTy
, DAG
);
385 return DAG
.getNode(HexagonISD::VSPLATW
, dl
, VecTy
, SplatV
);
388 // Delay recognizing constant vectors until here, so that we can generate
390 SmallVector
<ConstantInt
*, 128> Consts(VecLen
);
391 bool AllConst
= getBuildVectorConstInts(Values
, VecTy
, DAG
, Consts
);
393 ArrayRef
<Constant
*> Tmp((Constant
**)Consts
.begin(),
394 (Constant
**)Consts
.end());
395 Constant
*CV
= ConstantVector::get(Tmp
);
396 unsigned Align
= HwLen
;
397 SDValue CP
= LowerConstantPool(DAG
.getConstantPool(CV
, VecTy
, Align
), DAG
);
398 return DAG
.getLoad(VecTy
, dl
, DAG
.getEntryNode(), CP
,
399 MachinePointerInfo::getConstantPool(MF
), Align
);
402 // A special case is a situation where the vector is built entirely from
403 // elements extracted from another vector. This could be done via a shuffle
404 // more efficiently, but typically, the size of the source vector will not
405 // match the size of the vector being built (which precludes the use of a
406 // shuffle directly).
407 // This only handles a single source vector, and the vector being built
408 // should be of a sub-vector type of the source vector type.
409 auto IsBuildFromExtracts
= [this,&Values
] (SDValue
&SrcVec
,
410 SmallVectorImpl
<int> &SrcIdx
) {
412 for (SDValue V
: Values
) {
414 SrcIdx
.push_back(-1);
417 if (V
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
419 // All extracts should come from the same vector.
420 SDValue T
= V
.getOperand(0);
421 if (Vec
.getNode() != nullptr && T
.getNode() != Vec
.getNode())
424 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(V
.getOperand(1));
427 int I
= C
->getSExtValue();
428 assert(I
>= 0 && "Negative element index");
435 SmallVector
<int,128> ExtIdx
;
437 if (IsBuildFromExtracts(ExtVec
, ExtIdx
)) {
438 MVT ExtTy
= ty(ExtVec
);
439 unsigned ExtLen
= ExtTy
.getVectorNumElements();
440 if (ExtLen
== VecLen
|| ExtLen
== 2*VecLen
) {
441 // Construct a new shuffle mask that will produce a vector with the same
442 // number of elements as the input vector, and such that the vector we
443 // want will be the initial subvector of it.
444 SmallVector
<int,128> Mask
;
445 BitVector
Used(ExtLen
);
447 for (int M
: ExtIdx
) {
452 // Fill the rest of the mask with the unused elements of ExtVec in hopes
453 // that it will result in a permutation of ExtVec's elements. It's still
454 // fine if it doesn't (e.g. if undefs are present, or elements are
455 // repeated), but permutations can always be done efficiently via vdelta
457 for (unsigned I
= 0; I
!= ExtLen
; ++I
) {
458 if (Mask
.size() == ExtLen
)
464 SDValue S
= DAG
.getVectorShuffle(ExtTy
, dl
, ExtVec
,
465 DAG
.getUNDEF(ExtTy
), Mask
);
466 if (ExtLen
== VecLen
)
468 return DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, VecTy
, S
);
472 // Construct two halves in parallel, then or them together.
473 assert(4*Words
.size() == Subtarget
.getVectorLength());
474 SDValue HalfV0
= getInstr(Hexagon::V6_vd0
, dl
, VecTy
, {}, DAG
);
475 SDValue HalfV1
= getInstr(Hexagon::V6_vd0
, dl
, VecTy
, {}, DAG
);
476 SDValue S
= DAG
.getConstant(4, dl
, MVT::i32
);
477 for (unsigned i
= 0; i
!= NumWords
/2; ++i
) {
478 SDValue N
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
,
480 SDValue M
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
,
481 {HalfV1
, Words
[i
+NumWords
/2]});
482 HalfV0
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {N
, S
});
483 HalfV1
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {M
, S
});
486 HalfV0
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
,
487 {HalfV0
, DAG
.getConstant(HwLen
/2, dl
, MVT::i32
)});
488 SDValue DstV
= DAG
.getNode(ISD::OR
, dl
, VecTy
, {HalfV0
, HalfV1
});
493 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV
, const SDLoc
&dl
,
494 unsigned BitBytes
, bool ZeroFill
, SelectionDAG
&DAG
) const {
495 MVT PredTy
= ty(PredV
);
496 unsigned HwLen
= Subtarget
.getVectorLength();
497 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
499 if (Subtarget
.isHVXVectorType(PredTy
, true)) {
500 // Move the vector predicate SubV to a vector register, and scale it
501 // down to match the representation (bytes per type element) that VecV
502 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
503 // in general) element and put them at the front of the resulting
504 // vector. This subvector will then be inserted into the Q2V of VecV.
505 // To avoid having an operation that generates an illegal type (short
506 // vector), generate a full size vector.
508 SDValue T
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, PredV
);
509 SmallVector
<int,128> Mask(HwLen
);
510 // Scale = BitBytes(PredV) / Given BitBytes.
511 unsigned Scale
= HwLen
/ (PredTy
.getVectorNumElements() * BitBytes
);
512 unsigned BlockLen
= PredTy
.getVectorNumElements() * BitBytes
;
514 for (unsigned i
= 0; i
!= HwLen
; ++i
) {
515 unsigned Num
= i
% Scale
;
516 unsigned Off
= i
/ Scale
;
517 Mask
[BlockLen
*Num
+ Off
] = i
;
519 SDValue S
= DAG
.getVectorShuffle(ByteTy
, dl
, T
, DAG
.getUNDEF(ByteTy
), Mask
);
522 // Fill the bytes beyond BlockLen with 0s.
523 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
524 SDValue Q
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
525 {DAG
.getConstant(BlockLen
, dl
, MVT::i32
)}, DAG
);
526 SDValue M
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, Q
);
527 return DAG
.getNode(ISD::AND
, dl
, ByteTy
, S
, M
);
530 // Make sure that this is a valid scalar predicate.
531 assert(PredTy
== MVT::v2i1
|| PredTy
== MVT::v4i1
|| PredTy
== MVT::v8i1
);
533 unsigned Bytes
= 8 / PredTy
.getVectorNumElements();
534 SmallVector
<SDValue
,4> Words
[2];
537 auto Lo32
= [&DAG
, &dl
] (SDValue P
) {
538 return DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, P
);
540 auto Hi32
= [&DAG
, &dl
] (SDValue P
) {
541 return DAG
.getTargetExtractSubreg(Hexagon::isub_hi
, dl
, MVT::i32
, P
);
544 SDValue W0
= isUndef(PredV
)
545 ? DAG
.getUNDEF(MVT::i64
)
546 : DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, PredV
);
547 Words
[IdxW
].push_back(Hi32(W0
));
548 Words
[IdxW
].push_back(Lo32(W0
));
550 while (Bytes
< BitBytes
) {
555 for (const SDValue
&W
: Words
[IdxW
^ 1]) {
556 SDValue T
= expandPredicate(W
, dl
, DAG
);
557 Words
[IdxW
].push_back(Hi32(T
));
558 Words
[IdxW
].push_back(Lo32(T
));
561 for (const SDValue
&W
: Words
[IdxW
^ 1]) {
562 Words
[IdxW
].push_back(W
);
563 Words
[IdxW
].push_back(W
);
569 assert(Bytes
== BitBytes
);
571 SDValue Vec
= ZeroFill
? getZero(dl
, ByteTy
, DAG
) : DAG
.getUNDEF(ByteTy
);
572 SDValue S4
= DAG
.getConstant(HwLen
-4, dl
, MVT::i32
);
573 for (const SDValue
&W
: Words
[IdxW
]) {
574 Vec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, Vec
, S4
);
575 Vec
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, ByteTy
, Vec
, W
);
582 HexagonTargetLowering::buildHvxVectorPred(ArrayRef
<SDValue
> Values
,
583 const SDLoc
&dl
, MVT VecTy
,
584 SelectionDAG
&DAG
) const {
585 // Construct a vector V of bytes, such that a comparison V >u 0 would
586 // produce the required vector predicate.
587 unsigned VecLen
= Values
.size();
588 unsigned HwLen
= Subtarget
.getVectorLength();
589 assert(VecLen
<= HwLen
|| VecLen
== 8*HwLen
);
590 SmallVector
<SDValue
,128> Bytes
;
591 bool AllT
= true, AllF
= true;
593 auto IsTrue
= [] (SDValue V
) {
594 if (const auto *N
= dyn_cast
<ConstantSDNode
>(V
.getNode()))
595 return !N
->isNullValue();
598 auto IsFalse
= [] (SDValue V
) {
599 if (const auto *N
= dyn_cast
<ConstantSDNode
>(V
.getNode()))
600 return N
->isNullValue();
604 if (VecLen
<= HwLen
) {
605 // In the hardware, each bit of a vector predicate corresponds to a byte
606 // of a vector register. Calculate how many bytes does a bit of VecTy
608 assert(HwLen
% VecLen
== 0);
609 unsigned BitBytes
= HwLen
/ VecLen
;
610 for (SDValue V
: Values
) {
614 SDValue Ext
= !V
.isUndef() ? DAG
.getZExtOrTrunc(V
, dl
, MVT::i8
)
615 : DAG
.getUNDEF(MVT::i8
);
616 for (unsigned B
= 0; B
!= BitBytes
; ++B
)
617 Bytes
.push_back(Ext
);
620 // There are as many i1 values, as there are bits in a vector register.
621 // Divide the values into groups of 8 and check that each group consists
622 // of the same value (ignoring undefs).
623 for (unsigned I
= 0; I
!= VecLen
; I
+= 8) {
625 // Find the first non-undef value in this group.
626 for (; B
!= 8; ++B
) {
627 if (!Values
[I
+B
].isUndef())
630 SDValue F
= Values
[I
+B
];
634 SDValue Ext
= (B
< 8) ? DAG
.getZExtOrTrunc(F
, dl
, MVT::i8
)
635 : DAG
.getUNDEF(MVT::i8
);
636 Bytes
.push_back(Ext
);
637 // Verify that the rest of values in the group are the same as the
640 assert(Values
[I
+B
].isUndef() || Values
[I
+B
] == F
);
645 return DAG
.getNode(HexagonISD::QTRUE
, dl
, VecTy
);
647 return DAG
.getNode(HexagonISD::QFALSE
, dl
, VecTy
);
649 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
650 SDValue ByteVec
= buildHvxVectorReg(Bytes
, dl
, ByteTy
, DAG
);
651 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, ByteVec
);
655 HexagonTargetLowering::extractHvxElementReg(SDValue VecV
, SDValue IdxV
,
656 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
657 MVT ElemTy
= ty(VecV
).getVectorElementType();
659 unsigned ElemWidth
= ElemTy
.getSizeInBits();
660 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
663 SDValue ByteIdx
= convertToByteIndex(IdxV
, ElemTy
, DAG
);
664 SDValue ExWord
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
,
666 if (ElemTy
== MVT::i32
)
669 // Have an extracted word, need to extract the smaller element out of it.
670 // 1. Extract the bits of (the original) IdxV that correspond to the index
671 // of the desired element in the 32-bit word.
672 SDValue SubIdx
= getIndexInWord32(IdxV
, ElemTy
, DAG
);
673 // 2. Extract the element from the word.
674 SDValue ExVec
= DAG
.getBitcast(tyVector(ty(ExWord
), ElemTy
), ExWord
);
675 return extractVector(ExVec
, SubIdx
, dl
, ElemTy
, MVT::i32
, DAG
);
679 HexagonTargetLowering::extractHvxElementPred(SDValue VecV
, SDValue IdxV
,
680 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
681 // Implement other return types if necessary.
682 assert(ResTy
== MVT::i1
);
684 unsigned HwLen
= Subtarget
.getVectorLength();
685 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
686 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
688 unsigned Scale
= HwLen
/ ty(VecV
).getVectorNumElements();
689 SDValue ScV
= DAG
.getConstant(Scale
, dl
, MVT::i32
);
690 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, ScV
);
692 SDValue ExtB
= extractHvxElementReg(ByteVec
, IdxV
, dl
, MVT::i32
, DAG
);
693 SDValue Zero
= DAG
.getTargetConstant(0, dl
, MVT::i32
);
694 return getInstr(Hexagon::C2_cmpgtui
, dl
, MVT::i1
, {ExtB
, Zero
}, DAG
);
698 HexagonTargetLowering::insertHvxElementReg(SDValue VecV
, SDValue IdxV
,
699 SDValue ValV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
700 MVT ElemTy
= ty(VecV
).getVectorElementType();
702 unsigned ElemWidth
= ElemTy
.getSizeInBits();
703 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
706 auto InsertWord
= [&DAG
,&dl
,this] (SDValue VecV
, SDValue ValV
,
708 MVT VecTy
= ty(VecV
);
709 unsigned HwLen
= Subtarget
.getVectorLength();
710 SDValue MaskV
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
711 {ByteIdxV
, DAG
.getConstant(-4, dl
, MVT::i32
)});
712 SDValue RotV
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {VecV
, MaskV
});
713 SDValue InsV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
, {RotV
, ValV
});
714 SDValue SubV
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
715 {DAG
.getConstant(HwLen
, dl
, MVT::i32
), MaskV
});
716 SDValue TorV
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {InsV
, SubV
});
720 SDValue ByteIdx
= convertToByteIndex(IdxV
, ElemTy
, DAG
);
721 if (ElemTy
== MVT::i32
)
722 return InsertWord(VecV
, ValV
, ByteIdx
);
724 // If this is not inserting a 32-bit word, convert it into such a thing.
725 // 1. Extract the existing word from the target vector.
726 SDValue WordIdx
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
727 {ByteIdx
, DAG
.getConstant(2, dl
, MVT::i32
)});
728 SDValue Ext
= extractHvxElementReg(opCastElem(VecV
, MVT::i32
, DAG
), WordIdx
,
731 // 2. Treating the extracted word as a 32-bit vector, insert the given
733 SDValue SubIdx
= getIndexInWord32(IdxV
, ElemTy
, DAG
);
734 MVT SubVecTy
= tyVector(ty(Ext
), ElemTy
);
735 SDValue Ins
= insertVector(DAG
.getBitcast(SubVecTy
, Ext
),
736 ValV
, SubIdx
, dl
, ElemTy
, DAG
);
738 // 3. Insert the 32-bit word back into the original vector.
739 return InsertWord(VecV
, Ins
, ByteIdx
);
743 HexagonTargetLowering::insertHvxElementPred(SDValue VecV
, SDValue IdxV
,
744 SDValue ValV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
745 unsigned HwLen
= Subtarget
.getVectorLength();
746 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
747 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
749 unsigned Scale
= HwLen
/ ty(VecV
).getVectorNumElements();
750 SDValue ScV
= DAG
.getConstant(Scale
, dl
, MVT::i32
);
751 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, ScV
);
752 ValV
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i32
, ValV
);
754 SDValue InsV
= insertHvxElementReg(ByteVec
, IdxV
, ValV
, dl
, DAG
);
755 return DAG
.getNode(HexagonISD::V2Q
, dl
, ty(VecV
), InsV
);
759 HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV
, SDValue IdxV
,
760 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
761 MVT VecTy
= ty(VecV
);
762 unsigned HwLen
= Subtarget
.getVectorLength();
763 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
764 MVT ElemTy
= VecTy
.getVectorElementType();
765 unsigned ElemWidth
= ElemTy
.getSizeInBits();
767 // If the source vector is a vector pair, get the single vector containing
768 // the subvector of interest. The subvector will never overlap two single
770 if (isHvxPairTy(VecTy
)) {
772 if (Idx
* ElemWidth
>= 8*HwLen
) {
773 SubIdx
= Hexagon::vsub_hi
;
774 Idx
-= VecTy
.getVectorNumElements() / 2;
776 SubIdx
= Hexagon::vsub_lo
;
778 VecTy
= typeSplit(VecTy
).first
;
779 VecV
= DAG
.getTargetExtractSubreg(SubIdx
, dl
, VecTy
, VecV
);
784 // The only meaningful subvectors of a single HVX vector are those that
785 // fit in a scalar register.
786 assert(ResTy
.getSizeInBits() == 32 || ResTy
.getSizeInBits() == 64);
788 MVT WordTy
= tyVector(VecTy
, MVT::i32
);
789 SDValue WordVec
= DAG
.getBitcast(WordTy
, VecV
);
790 unsigned WordIdx
= (Idx
*ElemWidth
) / 32;
792 SDValue W0Idx
= DAG
.getConstant(WordIdx
, dl
, MVT::i32
);
793 SDValue W0
= extractHvxElementReg(WordVec
, W0Idx
, dl
, MVT::i32
, DAG
);
794 if (ResTy
.getSizeInBits() == 32)
795 return DAG
.getBitcast(ResTy
, W0
);
797 SDValue W1Idx
= DAG
.getConstant(WordIdx
+1, dl
, MVT::i32
);
798 SDValue W1
= extractHvxElementReg(WordVec
, W1Idx
, dl
, MVT::i32
, DAG
);
799 SDValue WW
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
, {W1
, W0
});
800 return DAG
.getBitcast(ResTy
, WW
);
804 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV
, SDValue IdxV
,
805 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
806 MVT VecTy
= ty(VecV
);
807 unsigned HwLen
= Subtarget
.getVectorLength();
808 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
809 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
810 // IdxV is required to be a constant.
811 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
813 unsigned ResLen
= ResTy
.getVectorNumElements();
814 unsigned BitBytes
= HwLen
/ VecTy
.getVectorNumElements();
815 unsigned Offset
= Idx
* BitBytes
;
816 SDValue Undef
= DAG
.getUNDEF(ByteTy
);
817 SmallVector
<int,128> Mask
;
819 if (Subtarget
.isHVXVectorType(ResTy
, true)) {
820 // Converting between two vector predicates. Since the result is shorter
821 // than the source, it will correspond to a vector predicate with the
822 // relevant bits replicated. The replication count is the ratio of the
823 // source and target vector lengths.
824 unsigned Rep
= VecTy
.getVectorNumElements() / ResLen
;
825 assert(isPowerOf2_32(Rep
) && HwLen
% Rep
== 0);
826 for (unsigned i
= 0; i
!= HwLen
/Rep
; ++i
) {
827 for (unsigned j
= 0; j
!= Rep
; ++j
)
828 Mask
.push_back(i
+ Offset
);
830 SDValue ShuffV
= DAG
.getVectorShuffle(ByteTy
, dl
, ByteVec
, Undef
, Mask
);
831 return DAG
.getNode(HexagonISD::V2Q
, dl
, ResTy
, ShuffV
);
834 // Converting between a vector predicate and a scalar predicate. In the
835 // vector predicate, a group of BitBytes bits will correspond to a single
836 // i1 element of the source vector type. Those bits will all have the same
837 // value. The same will be true for ByteVec, where each byte corresponds
838 // to a bit in the vector predicate.
839 // The algorithm is to traverse the ByteVec, going over the i1 values from
840 // the source vector, and generate the corresponding representation in an
841 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
842 // elements so that the interesting 8 bytes will be in the low end of the
844 unsigned Rep
= 8 / ResLen
;
845 // Make sure the output fill the entire vector register, so repeat the
846 // 8-byte groups as many times as necessary.
847 for (unsigned r
= 0; r
!= HwLen
/ResLen
; ++r
) {
848 // This will generate the indexes of the 8 interesting bytes.
849 for (unsigned i
= 0; i
!= ResLen
; ++i
) {
850 for (unsigned j
= 0; j
!= Rep
; ++j
)
851 Mask
.push_back(Offset
+ i
*BitBytes
);
855 SDValue Zero
= getZero(dl
, MVT::i32
, DAG
);
856 SDValue ShuffV
= DAG
.getVectorShuffle(ByteTy
, dl
, ByteVec
, Undef
, Mask
);
857 // Combine the two low words from ShuffV into a v8i8, and byte-compare
859 SDValue W0
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
, {ShuffV
, Zero
});
860 SDValue W1
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
,
861 {ShuffV
, DAG
.getConstant(4, dl
, MVT::i32
)});
862 SDValue Vec64
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::v8i8
, {W1
, W0
});
863 return getInstr(Hexagon::A4_vcmpbgtui
, dl
, ResTy
,
864 {Vec64
, DAG
.getTargetConstant(0, dl
, MVT::i32
)}, DAG
);
868 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV
, SDValue SubV
,
869 SDValue IdxV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
870 MVT VecTy
= ty(VecV
);
871 MVT SubTy
= ty(SubV
);
872 unsigned HwLen
= Subtarget
.getVectorLength();
873 MVT ElemTy
= VecTy
.getVectorElementType();
874 unsigned ElemWidth
= ElemTy
.getSizeInBits();
876 bool IsPair
= isHvxPairTy(VecTy
);
877 MVT SingleTy
= MVT::getVectorVT(ElemTy
, (8*HwLen
)/ElemWidth
);
878 // The two single vectors that VecV consists of, if it's a pair.
880 SDValue SingleV
= VecV
;
884 V0
= DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, SingleTy
, VecV
);
885 V1
= DAG
.getTargetExtractSubreg(Hexagon::vsub_hi
, dl
, SingleTy
, VecV
);
887 SDValue HalfV
= DAG
.getConstant(SingleTy
.getVectorNumElements(),
889 PickHi
= DAG
.getSetCC(dl
, MVT::i1
, IdxV
, HalfV
, ISD::SETUGT
);
890 if (isHvxSingleTy(SubTy
)) {
891 if (const auto *CN
= dyn_cast
<const ConstantSDNode
>(IdxV
.getNode())) {
892 unsigned Idx
= CN
->getZExtValue();
893 assert(Idx
== 0 || Idx
== VecTy
.getVectorNumElements()/2);
894 unsigned SubIdx
= (Idx
== 0) ? Hexagon::vsub_lo
: Hexagon::vsub_hi
;
895 return DAG
.getTargetInsertSubreg(SubIdx
, dl
, VecTy
, VecV
, SubV
);
897 // If IdxV is not a constant, generate the two variants: with the
898 // SubV as the high and as the low subregister, and select the right
899 // pair based on the IdxV.
900 SDValue InLo
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {SubV
, V1
});
901 SDValue InHi
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {V0
, SubV
});
902 return DAG
.getNode(ISD::SELECT
, dl
, VecTy
, PickHi
, InHi
, InLo
);
904 // The subvector being inserted must be entirely contained in one of
905 // the vectors V0 or V1. Set SingleV to the correct one, and update
906 // IdxV to be the index relative to the beginning of that vector.
907 SDValue S
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, IdxV
, HalfV
);
908 IdxV
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, PickHi
, S
, IdxV
);
909 SingleV
= DAG
.getNode(ISD::SELECT
, dl
, SingleTy
, PickHi
, V1
, V0
);
912 // The only meaningful subvectors of a single HVX vector are those that
913 // fit in a scalar register.
914 assert(SubTy
.getSizeInBits() == 32 || SubTy
.getSizeInBits() == 64);
915 // Convert IdxV to be index in bytes.
916 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
.getNode());
917 if (!IdxN
|| !IdxN
->isNullValue()) {
918 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
919 DAG
.getConstant(ElemWidth
/8, dl
, MVT::i32
));
920 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
, IdxV
);
922 // When inserting a single word, the rotation back to the original position
923 // would be by HwLen-Idx, but if two words are inserted, it will need to be
925 unsigned RolBase
= HwLen
;
926 if (VecTy
.getSizeInBits() == 32) {
927 SDValue V
= DAG
.getBitcast(MVT::i32
, SubV
);
928 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, V
);
930 SDValue V
= DAG
.getBitcast(MVT::i64
, SubV
);
931 SDValue R0
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, V
);
932 SDValue R1
= DAG
.getTargetExtractSubreg(Hexagon::isub_hi
, dl
, MVT::i32
, V
);
933 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, R0
);
934 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
,
935 DAG
.getConstant(4, dl
, MVT::i32
));
936 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, R1
);
939 // If the vector wasn't ror'ed, don't ror it back.
940 if (RolBase
!= 4 || !IdxN
|| !IdxN
->isNullValue()) {
941 SDValue RolV
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
942 DAG
.getConstant(RolBase
, dl
, MVT::i32
), IdxV
);
943 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
, RolV
);
947 SDValue InLo
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {SingleV
, V1
});
948 SDValue InHi
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {V0
, SingleV
});
949 return DAG
.getNode(ISD::SELECT
, dl
, VecTy
, PickHi
, InHi
, InLo
);
955 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV
, SDValue SubV
,
956 SDValue IdxV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
957 MVT VecTy
= ty(VecV
);
958 MVT SubTy
= ty(SubV
);
959 assert(Subtarget
.isHVXVectorType(VecTy
, true));
960 // VecV is an HVX vector predicate. SubV may be either an HVX vector
961 // predicate as well, or it can be a scalar predicate.
963 unsigned VecLen
= VecTy
.getVectorNumElements();
964 unsigned HwLen
= Subtarget
.getVectorLength();
965 assert(HwLen
% VecLen
== 0 && "Unexpected vector type");
967 unsigned Scale
= VecLen
/ SubTy
.getVectorNumElements();
968 unsigned BitBytes
= HwLen
/ VecLen
;
969 unsigned BlockLen
= HwLen
/ Scale
;
971 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
972 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
973 SDValue ByteSub
= createHvxPrefixPred(SubV
, dl
, BitBytes
, false, DAG
);
976 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
.getNode());
977 if (!IdxN
|| !IdxN
->isNullValue()) {
978 ByteIdx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
979 DAG
.getConstant(BitBytes
, dl
, MVT::i32
));
980 ByteVec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, ByteVec
, ByteIdx
);
983 // ByteVec is the target vector VecV rotated in such a way that the
984 // subvector should be inserted at index 0. Generate a predicate mask
985 // and use vmux to do the insertion.
986 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
987 SDValue Q
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
988 {DAG
.getConstant(BlockLen
, dl
, MVT::i32
)}, DAG
);
989 ByteVec
= getInstr(Hexagon::V6_vmux
, dl
, ByteTy
, {Q
, ByteSub
, ByteVec
}, DAG
);
990 // Rotate ByteVec back, and convert to a vector predicate.
991 if (!IdxN
|| !IdxN
->isNullValue()) {
992 SDValue HwLenV
= DAG
.getConstant(HwLen
, dl
, MVT::i32
);
993 SDValue ByteXdi
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, HwLenV
, ByteIdx
);
994 ByteVec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, ByteVec
, ByteXdi
);
996 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, ByteVec
);
1000 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV
, const SDLoc
&dl
,
1001 MVT ResTy
, bool ZeroExt
, SelectionDAG
&DAG
) const {
1002 // Sign- and any-extending of a vector predicate to a vector register is
1003 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1004 // a vector of 1s (where the 1s are of type matching the vector type).
1005 assert(Subtarget
.isHVXVectorType(ResTy
));
1007 return DAG
.getNode(HexagonISD::Q2V
, dl
, ResTy
, VecV
);
1009 assert(ty(VecV
).getVectorNumElements() == ResTy
.getVectorNumElements());
1010 SDValue True
= DAG
.getNode(HexagonISD::VSPLAT
, dl
, ResTy
,
1011 DAG
.getConstant(1, dl
, MVT::i32
));
1012 SDValue False
= getZero(dl
, ResTy
, DAG
);
1013 return DAG
.getSelect(dl
, ResTy
, VecV
, True
, False
);
1017 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op
, SelectionDAG
&DAG
)
1019 const SDLoc
&dl(Op
);
1022 unsigned Size
= Op
.getNumOperands();
1023 SmallVector
<SDValue
,128> Ops
;
1024 for (unsigned i
= 0; i
!= Size
; ++i
)
1025 Ops
.push_back(Op
.getOperand(i
));
1027 if (VecTy
.getVectorElementType() == MVT::i1
)
1028 return buildHvxVectorPred(Ops
, dl
, VecTy
, DAG
);
1030 if (VecTy
.getSizeInBits() == 16*Subtarget
.getVectorLength()) {
1031 ArrayRef
<SDValue
> A(Ops
);
1032 MVT SingleTy
= typeSplit(VecTy
).first
;
1033 SDValue V0
= buildHvxVectorReg(A
.take_front(Size
/2), dl
, SingleTy
, DAG
);
1034 SDValue V1
= buildHvxVectorReg(A
.drop_front(Size
/2), dl
, SingleTy
, DAG
);
1035 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, V0
, V1
);
1038 return buildHvxVectorReg(Ops
, dl
, VecTy
, DAG
);
1042 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op
, SelectionDAG
&DAG
)
1044 // Vector concatenation of two integer (non-bool) vectors does not need
1045 // special lowering. Custom-lower concats of bool vectors and expand
1046 // concats of more than 2 vectors.
1048 const SDLoc
&dl(Op
);
1049 unsigned NumOp
= Op
.getNumOperands();
1050 if (VecTy
.getVectorElementType() != MVT::i1
) {
1053 // Expand the other cases into a build-vector.
1054 SmallVector
<SDValue
,8> Elems
;
1055 for (SDValue V
: Op
.getNode()->ops())
1056 DAG
.ExtractVectorElements(V
, Elems
);
1057 // A vector of i16 will be broken up into a build_vector of i16's.
1058 // This is a problem, since at the time of operation legalization,
1059 // all operations are expected to be type-legalized, and i16 is not
1060 // a legal type. If any of the extracted elements is not of a valid
1061 // type, sign-extend it to a valid one.
1062 for (unsigned i
= 0, e
= Elems
.size(); i
!= e
; ++i
) {
1063 SDValue V
= Elems
[i
];
1065 if (!isTypeLegal(Ty
)) {
1066 EVT NTy
= getTypeToTransformTo(*DAG
.getContext(), Ty
);
1067 if (V
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
1068 Elems
[i
] = DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, NTy
,
1069 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, NTy
,
1070 V
.getOperand(0), V
.getOperand(1)),
1071 DAG
.getValueType(Ty
));
1074 // A few less complicated cases.
1075 if (V
.getOpcode() == ISD::Constant
)
1076 Elems
[i
] = DAG
.getSExtOrTrunc(V
, dl
, NTy
);
1077 else if (V
.isUndef())
1078 Elems
[i
] = DAG
.getUNDEF(NTy
);
1080 llvm_unreachable("Unexpected vector element");
1083 return DAG
.getBuildVector(VecTy
, dl
, Elems
);
1086 assert(VecTy
.getVectorElementType() == MVT::i1
);
1087 unsigned HwLen
= Subtarget
.getVectorLength();
1088 assert(isPowerOf2_32(NumOp
) && HwLen
% NumOp
== 0);
1090 SDValue Op0
= Op
.getOperand(0);
1092 // If the operands are HVX types (i.e. not scalar predicates), then
1093 // defer the concatenation, and create QCAT instead.
1094 if (Subtarget
.isHVXVectorType(ty(Op0
), true)) {
1096 return DAG
.getNode(HexagonISD::QCAT
, dl
, VecTy
, Op0
, Op
.getOperand(1));
1098 ArrayRef
<SDUse
> U(Op
.getNode()->ops());
1099 SmallVector
<SDValue
,4> SV(U
.begin(), U
.end());
1100 ArrayRef
<SDValue
> Ops(SV
);
1102 MVT HalfTy
= typeSplit(VecTy
).first
;
1103 SDValue V0
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, HalfTy
,
1104 Ops
.take_front(NumOp
/2));
1105 SDValue V1
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, HalfTy
,
1106 Ops
.take_back(NumOp
/2));
1107 return DAG
.getNode(HexagonISD::QCAT
, dl
, VecTy
, V0
, V1
);
1110 // Count how many bytes (in a vector register) each bit in VecTy
1112 unsigned BitBytes
= HwLen
/ VecTy
.getVectorNumElements();
1114 SmallVector
<SDValue
,8> Prefixes
;
1115 for (SDValue V
: Op
.getNode()->op_values()) {
1116 SDValue P
= createHvxPrefixPred(V
, dl
, BitBytes
, true, DAG
);
1117 Prefixes
.push_back(P
);
1120 unsigned InpLen
= ty(Op
.getOperand(0)).getVectorNumElements();
1121 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1122 SDValue S
= DAG
.getConstant(InpLen
*BitBytes
, dl
, MVT::i32
);
1123 SDValue Res
= getZero(dl
, ByteTy
, DAG
);
1124 for (unsigned i
= 0, e
= Prefixes
.size(); i
!= e
; ++i
) {
1125 Res
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, Res
, S
);
1126 Res
= DAG
.getNode(ISD::OR
, dl
, ByteTy
, Res
, Prefixes
[e
-i
-1]);
1128 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, Res
);
1132 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op
, SelectionDAG
&DAG
)
1134 // Change the type of the extracted element to i32.
1135 SDValue VecV
= Op
.getOperand(0);
1136 MVT ElemTy
= ty(VecV
).getVectorElementType();
1137 const SDLoc
&dl(Op
);
1138 SDValue IdxV
= Op
.getOperand(1);
1139 if (ElemTy
== MVT::i1
)
1140 return extractHvxElementPred(VecV
, IdxV
, dl
, ty(Op
), DAG
);
1142 return extractHvxElementReg(VecV
, IdxV
, dl
, ty(Op
), DAG
);
1146 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op
, SelectionDAG
&DAG
)
1148 const SDLoc
&dl(Op
);
1149 SDValue VecV
= Op
.getOperand(0);
1150 SDValue ValV
= Op
.getOperand(1);
1151 SDValue IdxV
= Op
.getOperand(2);
1152 MVT ElemTy
= ty(VecV
).getVectorElementType();
1153 if (ElemTy
== MVT::i1
)
1154 return insertHvxElementPred(VecV
, IdxV
, ValV
, dl
, DAG
);
1156 return insertHvxElementReg(VecV
, IdxV
, ValV
, dl
, DAG
);
1160 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op
, SelectionDAG
&DAG
)
1162 SDValue SrcV
= Op
.getOperand(0);
1163 MVT SrcTy
= ty(SrcV
);
1165 SDValue IdxV
= Op
.getOperand(1);
1166 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
1167 assert(Idx
% DstTy
.getVectorNumElements() == 0);
1169 const SDLoc
&dl(Op
);
1171 MVT ElemTy
= SrcTy
.getVectorElementType();
1172 if (ElemTy
== MVT::i1
)
1173 return extractHvxSubvectorPred(SrcV
, IdxV
, dl
, DstTy
, DAG
);
1175 return extractHvxSubvectorReg(SrcV
, IdxV
, dl
, DstTy
, DAG
);
1179 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op
, SelectionDAG
&DAG
)
1181 // Idx does not need to be a constant.
1182 SDValue VecV
= Op
.getOperand(0);
1183 SDValue ValV
= Op
.getOperand(1);
1184 SDValue IdxV
= Op
.getOperand(2);
1186 const SDLoc
&dl(Op
);
1187 MVT VecTy
= ty(VecV
);
1188 MVT ElemTy
= VecTy
.getVectorElementType();
1189 if (ElemTy
== MVT::i1
)
1190 return insertHvxSubvectorPred(VecV
, ValV
, IdxV
, dl
, DAG
);
1192 return insertHvxSubvectorReg(VecV
, ValV
, IdxV
, dl
, DAG
);
1196 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op
, SelectionDAG
&DAG
) const {
1197 // Lower any-extends of boolean vectors to sign-extends, since they
1198 // translate directly to Q2V. Zero-extending could also be done equally
1199 // fast, but Q2V is used/recognized in more places.
1200 // For all other vectors, use zero-extend.
1202 SDValue InpV
= Op
.getOperand(0);
1203 MVT ElemTy
= ty(InpV
).getVectorElementType();
1204 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1205 return LowerHvxSignExt(Op
, DAG
);
1206 return DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(Op
), ResTy
, InpV
);
1210 HexagonTargetLowering::LowerHvxSignExt(SDValue Op
, SelectionDAG
&DAG
) const {
1212 SDValue InpV
= Op
.getOperand(0);
1213 MVT ElemTy
= ty(InpV
).getVectorElementType();
1214 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1215 return extendHvxVectorPred(InpV
, SDLoc(Op
), ty(Op
), false, DAG
);
1220 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op
, SelectionDAG
&DAG
) const {
1222 SDValue InpV
= Op
.getOperand(0);
1223 MVT ElemTy
= ty(InpV
).getVectorElementType();
1224 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1225 return extendHvxVectorPred(InpV
, SDLoc(Op
), ty(Op
), true, DAG
);
1230 HexagonTargetLowering::LowerHvxCttz(SDValue Op
, SelectionDAG
&DAG
) const {
1231 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1232 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1233 const SDLoc
&dl(Op
);
1235 SDValue InpV
= Op
.getOperand(0);
1236 assert(ResTy
== ty(InpV
));
1238 // Calculate the vectors of 1 and bitwidth(x).
1239 MVT ElemTy
= ty(InpV
).getVectorElementType();
1240 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1241 // Using uint64_t because a shift by 32 can happen.
1242 uint64_t Splat1
= 0, SplatW
= 0;
1243 assert(isPowerOf2_32(ElemWidth
) && ElemWidth
<= 32);
1244 for (unsigned i
= 0; i
!= 32/ElemWidth
; ++i
) {
1245 Splat1
= (Splat1
<< ElemWidth
) | 1;
1246 SplatW
= (SplatW
<< ElemWidth
) | ElemWidth
;
1248 SDValue Vec1
= DAG
.getNode(HexagonISD::VSPLATW
, dl
, ResTy
,
1249 DAG
.getConstant(uint32_t(Splat1
), dl
, MVT::i32
));
1250 SDValue VecW
= DAG
.getNode(HexagonISD::VSPLATW
, dl
, ResTy
,
1251 DAG
.getConstant(uint32_t(SplatW
), dl
, MVT::i32
));
1252 SDValue VecN1
= DAG
.getNode(HexagonISD::VSPLATW
, dl
, ResTy
,
1253 DAG
.getConstant(-1, dl
, MVT::i32
));
1254 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1255 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1256 // it separately in custom combine or selection).
1257 SDValue A
= DAG
.getNode(ISD::AND
, dl
, ResTy
,
1258 {DAG
.getNode(ISD::XOR
, dl
, ResTy
, {InpV
, VecN1
}),
1259 DAG
.getNode(ISD::SUB
, dl
, ResTy
, {InpV
, Vec1
})});
1260 return DAG
.getNode(ISD::SUB
, dl
, ResTy
,
1261 {VecW
, DAG
.getNode(ISD::CTLZ
, dl
, ResTy
, A
)});
1265 HexagonTargetLowering::LowerHvxMul(SDValue Op
, SelectionDAG
&DAG
) const {
1267 assert(ResTy
.isVector() && isHvxSingleTy(ResTy
));
1268 const SDLoc
&dl(Op
);
1269 SmallVector
<int,256> ShuffMask
;
1271 MVT ElemTy
= ResTy
.getVectorElementType();
1272 unsigned VecLen
= ResTy
.getVectorNumElements();
1273 SDValue Vs
= Op
.getOperand(0);
1274 SDValue Vt
= Op
.getOperand(1);
1276 switch (ElemTy
.SimpleTy
) {
1278 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
1279 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
1280 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
1281 MVT ExtTy
= typeExtElem(ResTy
, 2);
1282 unsigned MpyOpc
= ElemTy
== MVT::i8
? Hexagon::V6_vmpybv
1283 : Hexagon::V6_vmpyhv
;
1284 SDValue M
= getInstr(MpyOpc
, dl
, ExtTy
, {Vs
, Vt
}, DAG
);
1286 // Discard high halves of the resulting values, collect the low halves.
1287 for (unsigned I
= 0; I
< VecLen
; I
+= 2) {
1288 ShuffMask
.push_back(I
); // Pick even element.
1289 ShuffMask
.push_back(I
+VecLen
); // Pick odd element.
1291 VectorPair P
= opSplit(opCastElem(M
, ElemTy
, DAG
), dl
, DAG
);
1292 SDValue BS
= getByteShuffle(dl
, P
.first
, P
.second
, ShuffMask
, DAG
);
1293 return DAG
.getBitcast(ResTy
, BS
);
1296 // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode.
1297 // (There is also V6_vmpyhv, which behaves in an analogous way to
1299 return getInstr(Hexagon::V6_vmpyih
, dl
, ResTy
, {Vs
, Vt
}, DAG
);
1301 // Use the following sequence for signed word multiply:
1302 // T0 = V6_vmpyiowh Vs, Vt
1303 // T1 = V6_vaslw T0, 16
1304 // T2 = V6_vmpyiewuh_acc T1, Vs, Vt
1305 SDValue S16
= DAG
.getConstant(16, dl
, MVT::i32
);
1306 SDValue T0
= getInstr(Hexagon::V6_vmpyiowh
, dl
, ResTy
, {Vs
, Vt
}, DAG
);
1307 SDValue T1
= getInstr(Hexagon::V6_vaslw
, dl
, ResTy
, {T0
, S16
}, DAG
);
1308 SDValue T2
= getInstr(Hexagon::V6_vmpyiewuh_acc
, dl
, ResTy
,
1319 HexagonTargetLowering::LowerHvxMulh(SDValue Op
, SelectionDAG
&DAG
) const {
1321 assert(ResTy
.isVector());
1322 const SDLoc
&dl(Op
);
1323 SmallVector
<int,256> ShuffMask
;
1325 MVT ElemTy
= ResTy
.getVectorElementType();
1326 unsigned VecLen
= ResTy
.getVectorNumElements();
1327 SDValue Vs
= Op
.getOperand(0);
1328 SDValue Vt
= Op
.getOperand(1);
1329 bool IsSigned
= Op
.getOpcode() == ISD::MULHS
;
1331 if (ElemTy
== MVT::i8
|| ElemTy
== MVT::i16
) {
1332 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
1333 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
1334 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
1335 // For i16, use V6_vmpyhv, which behaves in an analogous way to
1336 // V6_vmpybv: results Lo and Hi are products of even/odd elements
1338 MVT ExtTy
= typeExtElem(ResTy
, 2);
1339 unsigned MpyOpc
= ElemTy
== MVT::i8
1340 ? (IsSigned
? Hexagon::V6_vmpybv
: Hexagon::V6_vmpyubv
)
1341 : (IsSigned
? Hexagon::V6_vmpyhv
: Hexagon::V6_vmpyuhv
);
1342 SDValue M
= getInstr(MpyOpc
, dl
, ExtTy
, {Vs
, Vt
}, DAG
);
1344 // Discard low halves of the resulting values, collect the high halves.
1345 for (unsigned I
= 0; I
< VecLen
; I
+= 2) {
1346 ShuffMask
.push_back(I
+1); // Pick even element.
1347 ShuffMask
.push_back(I
+VecLen
+1); // Pick odd element.
1349 VectorPair P
= opSplit(opCastElem(M
, ElemTy
, DAG
), dl
, DAG
);
1350 SDValue BS
= getByteShuffle(dl
, P
.first
, P
.second
, ShuffMask
, DAG
);
1351 return DAG
.getBitcast(ResTy
, BS
);
1354 assert(ElemTy
== MVT::i32
);
1355 SDValue S16
= DAG
.getConstant(16, dl
, MVT::i32
);
1359 // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1360 // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16
1361 // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1362 // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16
1363 // + Lo(Vs) *us Vt] >> 32
1364 // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to
1365 // anything, so it cannot produce any carry over to higher bits),
1366 // so everything in [] can be shifted by 16 without loss of precision.
1367 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16
1368 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16
1369 // Denote Hi(Vs) = Vs':
1370 // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16
1371 // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16
1372 SDValue T0
= getInstr(Hexagon::V6_vmpyewuh
, dl
, ResTy
, {Vt
, Vs
}, DAG
);
1374 SDValue S0
= getInstr(Hexagon::V6_vasrw
, dl
, ResTy
, {Vs
, S16
}, DAG
);
1375 SDValue T1
= getInstr(Hexagon::V6_vmpyiewuh_acc
, dl
, ResTy
,
1378 SDValue S2
= getInstr(Hexagon::V6_vasrw
, dl
, ResTy
, {T1
, S16
}, DAG
);
1380 SDValue T2
= getInstr(Hexagon::V6_vmpyiowh
, dl
, ResTy
, {S0
, Vt
}, DAG
);
1382 SDValue T3
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {S2
, T2
});
1386 // Unsigned mulhw. (Would expansion using signed mulhw be better?)
1388 auto LoVec
= [&DAG
,ResTy
,dl
] (SDValue Pair
) {
1389 return DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, ResTy
, Pair
);
1391 auto HiVec
= [&DAG
,ResTy
,dl
] (SDValue Pair
) {
1392 return DAG
.getTargetExtractSubreg(Hexagon::vsub_hi
, dl
, ResTy
, Pair
);
1395 MVT PairTy
= typeJoin({ResTy
, ResTy
});
1396 SDValue P
= getInstr(Hexagon::V6_lvsplatw
, dl
, ResTy
,
1397 {DAG
.getConstant(0x02020202, dl
, MVT::i32
)}, DAG
);
1398 // Multiply-unsigned halfwords:
1399 // LoVec = Vs.uh[2i] * Vt.uh[2i],
1400 // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1]
1401 SDValue T0
= getInstr(Hexagon::V6_vmpyuhv
, dl
, PairTy
, {Vs
, Vt
}, DAG
);
1402 // The low halves in the LoVec of the pair can be discarded. They are
1403 // not added to anything (in the full-precision product), so they cannot
1404 // produce a carry into the higher bits.
1405 SDValue T1
= getInstr(Hexagon::V6_vlsrw
, dl
, ResTy
, {LoVec(T0
), S16
}, DAG
);
1406 // Swap low and high halves in Vt, and do the halfword multiplication
1407 // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i].
1408 SDValue D0
= getInstr(Hexagon::V6_vdelta
, dl
, ResTy
, {Vt
, P
}, DAG
);
1409 SDValue T2
= getInstr(Hexagon::V6_vmpyuhv
, dl
, PairTy
, {Vs
, D0
}, DAG
);
1410 // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs).
1411 // These products are words, but cannot be added directly because the
1412 // sums could overflow. Add these products, by halfwords, where each sum
1413 // of a pair of halfwords gives a word.
1414 SDValue T3
= getInstr(Hexagon::V6_vadduhw
, dl
, PairTy
,
1415 {LoVec(T2
), HiVec(T2
)}, DAG
);
1416 // Add the high halfwords from the products of the low halfwords.
1417 SDValue T4
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {T1
, LoVec(T3
)});
1418 SDValue T5
= getInstr(Hexagon::V6_vlsrw
, dl
, ResTy
, {T4
, S16
}, DAG
);
1419 SDValue T6
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {HiVec(T0
), HiVec(T3
)});
1420 SDValue T7
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {T5
, T6
});
1425 HexagonTargetLowering::LowerHvxExtend(SDValue Op
, SelectionDAG
&DAG
) const {
1426 // Sign- and zero-extends are legal.
1427 assert(Op
.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
);
1428 return DAG
.getNode(ISD::ZERO_EXTEND_VECTOR_INREG
, SDLoc(Op
), ty(Op
),
1433 HexagonTargetLowering::LowerHvxShift(SDValue Op
, SelectionDAG
&DAG
) const {
1434 if (SDValue S
= getVectorShiftByInt(Op
, DAG
))
1440 HexagonTargetLowering::SplitHvxPairOp(SDValue Op
, SelectionDAG
&DAG
) const {
1441 assert(!Op
.isMachineOpcode());
1442 SmallVector
<SDValue
,2> OpsL
, OpsH
;
1443 const SDLoc
&dl(Op
);
1445 auto SplitVTNode
= [&DAG
,this] (const VTSDNode
*N
) {
1446 MVT Ty
= typeSplit(N
->getVT().getSimpleVT()).first
;
1447 SDValue TV
= DAG
.getValueType(Ty
);
1448 return std::make_pair(TV
, TV
);
1451 for (SDValue A
: Op
.getNode()->ops()) {
1452 VectorPair P
= Subtarget
.isHVXVectorType(ty(A
), true)
1453 ? opSplit(A
, dl
, DAG
)
1454 : std::make_pair(A
, A
);
1455 // Special case for type operand.
1456 if (Op
.getOpcode() == ISD::SIGN_EXTEND_INREG
) {
1457 if (const auto *N
= dyn_cast
<const VTSDNode
>(A
.getNode()))
1460 OpsL
.push_back(P
.first
);
1461 OpsH
.push_back(P
.second
);
1465 MVT HalfTy
= typeSplit(ResTy
).first
;
1466 SDValue L
= DAG
.getNode(Op
.getOpcode(), dl
, HalfTy
, OpsL
);
1467 SDValue H
= DAG
.getNode(Op
.getOpcode(), dl
, HalfTy
, OpsH
);
1468 SDValue S
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, ResTy
, L
, H
);
1473 HexagonTargetLowering::SplitHvxMemOp(SDValue Op
, SelectionDAG
&DAG
) const {
1474 LSBaseSDNode
*BN
= cast
<LSBaseSDNode
>(Op
.getNode());
1475 assert(BN
->isUnindexed());
1476 MVT MemTy
= BN
->getMemoryVT().getSimpleVT();
1477 if (!isHvxPairTy(MemTy
))
1480 const SDLoc
&dl(Op
);
1481 unsigned HwLen
= Subtarget
.getVectorLength();
1482 MVT SingleTy
= typeSplit(MemTy
).first
;
1483 SDValue Chain
= BN
->getChain();
1484 SDValue Base0
= BN
->getBasePtr();
1485 SDValue Base1
= DAG
.getMemBasePlusOffset(Base0
, HwLen
, dl
);
1487 MachineMemOperand
*MOp0
= nullptr, *MOp1
= nullptr;
1488 if (MachineMemOperand
*MMO
= BN
->getMemOperand()) {
1489 MachineFunction
&MF
= DAG
.getMachineFunction();
1490 MOp0
= MF
.getMachineMemOperand(MMO
, 0, HwLen
);
1491 MOp1
= MF
.getMachineMemOperand(MMO
, HwLen
, HwLen
);
1494 unsigned MemOpc
= BN
->getOpcode();
1497 if (MemOpc
== ISD::LOAD
) {
1498 SDValue Load0
= DAG
.getLoad(SingleTy
, dl
, Chain
, Base0
, MOp0
);
1499 SDValue Load1
= DAG
.getLoad(SingleTy
, dl
, Chain
, Base1
, MOp1
);
1500 NewOp
= DAG
.getMergeValues(
1501 { DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, MemTy
, Load0
, Load1
),
1502 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1503 Load0
.getValue(1), Load1
.getValue(1)) }, dl
);
1505 assert(MemOpc
== ISD::STORE
);
1506 VectorPair Vals
= opSplit(cast
<StoreSDNode
>(Op
)->getValue(), dl
, DAG
);
1507 SDValue Store0
= DAG
.getStore(Chain
, dl
, Vals
.first
, Base0
, MOp0
);
1508 SDValue Store1
= DAG
.getStore(Chain
, dl
, Vals
.second
, Base1
, MOp1
);
1509 NewOp
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Store0
, Store1
);
1516 HexagonTargetLowering::LowerHvxOperation(SDValue Op
, SelectionDAG
&DAG
) const {
1517 unsigned Opc
= Op
.getOpcode();
1518 bool IsPairOp
= isHvxPairTy(ty(Op
)) ||
1519 llvm::any_of(Op
.getNode()->ops(), [this] (SDValue V
) {
1520 return isHvxPairTy(ty(V
));
1529 return SplitHvxMemOp(Op
, DAG
);
1544 case ISD::SIGN_EXTEND_INREG
:
1545 return SplitHvxPairOp(Op
, DAG
);
1552 case ISD::BUILD_VECTOR
: return LowerHvxBuildVector(Op
, DAG
);
1553 case ISD::CONCAT_VECTORS
: return LowerHvxConcatVectors(Op
, DAG
);
1554 case ISD::INSERT_SUBVECTOR
: return LowerHvxInsertSubvector(Op
, DAG
);
1555 case ISD::INSERT_VECTOR_ELT
: return LowerHvxInsertElement(Op
, DAG
);
1556 case ISD::EXTRACT_SUBVECTOR
: return LowerHvxExtractSubvector(Op
, DAG
);
1557 case ISD::EXTRACT_VECTOR_ELT
: return LowerHvxExtractElement(Op
, DAG
);
1559 case ISD::ANY_EXTEND
: return LowerHvxAnyExt(Op
, DAG
);
1560 case ISD::SIGN_EXTEND
: return LowerHvxSignExt(Op
, DAG
);
1561 case ISD::ZERO_EXTEND
: return LowerHvxZeroExt(Op
, DAG
);
1562 case ISD::CTTZ
: return LowerHvxCttz(Op
, DAG
);
1565 case ISD::SRL
: return LowerHvxShift(Op
, DAG
);
1566 case ISD::MUL
: return LowerHvxMul(Op
, DAG
);
1568 case ISD::MULHU
: return LowerHvxMulh(Op
, DAG
);
1569 case ISD::ANY_EXTEND_VECTOR_INREG
: return LowerHvxExtend(Op
, DAG
);
1571 case ISD::INTRINSIC_VOID
: return Op
;
1572 // Unaligned loads will be handled by the default lowering.
1573 case ISD::LOAD
: return SDValue();
1578 llvm_unreachable("Unhandled HVX operation");
1582 HexagonTargetLowering::isHvxOperation(SDValue Op
) const {
1583 // If the type of the result, or any operand type are HVX vector types,
1584 // this is an HVX operation.
1585 return Subtarget
.isHVXVectorType(ty(Op
), true) ||
1586 llvm::any_of(Op
.getNode()->ops(),
1587 [this] (SDValue V
) {
1588 return Subtarget
.isHVXVectorType(ty(V
), true);