1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "HexagonISelLowering.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/IR/IntrinsicsHexagon.h"
13 #include "llvm/Support/CommandLine.h"
17 static cl::opt
<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
18 cl::Hidden
, cl::init(16),
19 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
21 static const MVT LegalV64
[] = { MVT::v64i8
, MVT::v32i16
, MVT::v16i32
};
22 static const MVT LegalW64
[] = { MVT::v128i8
, MVT::v64i16
, MVT::v32i32
};
23 static const MVT LegalV128
[] = { MVT::v128i8
, MVT::v64i16
, MVT::v32i32
};
24 static const MVT LegalW128
[] = { MVT::v256i8
, MVT::v128i16
, MVT::v64i32
};
28 HexagonTargetLowering::initializeHVXLowering() {
29 if (Subtarget
.useHVX64BOps()) {
30 addRegisterClass(MVT::v64i8
, &Hexagon::HvxVRRegClass
);
31 addRegisterClass(MVT::v32i16
, &Hexagon::HvxVRRegClass
);
32 addRegisterClass(MVT::v16i32
, &Hexagon::HvxVRRegClass
);
33 addRegisterClass(MVT::v128i8
, &Hexagon::HvxWRRegClass
);
34 addRegisterClass(MVT::v64i16
, &Hexagon::HvxWRRegClass
);
35 addRegisterClass(MVT::v32i32
, &Hexagon::HvxWRRegClass
);
36 // These "short" boolean vector types should be legal because
37 // they will appear as results of vector compares. If they were
38 // not legal, type legalization would try to make them legal
39 // and that would require using operations that do not use or
40 // produce such types. That, in turn, would imply using custom
41 // nodes, which would be unoptimizable by the DAG combiner.
42 // The idea is to rely on target-independent operations as much
44 addRegisterClass(MVT::v16i1
, &Hexagon::HvxQRRegClass
);
45 addRegisterClass(MVT::v32i1
, &Hexagon::HvxQRRegClass
);
46 addRegisterClass(MVT::v64i1
, &Hexagon::HvxQRRegClass
);
47 } else if (Subtarget
.useHVX128BOps()) {
48 addRegisterClass(MVT::v128i8
, &Hexagon::HvxVRRegClass
);
49 addRegisterClass(MVT::v64i16
, &Hexagon::HvxVRRegClass
);
50 addRegisterClass(MVT::v32i32
, &Hexagon::HvxVRRegClass
);
51 addRegisterClass(MVT::v256i8
, &Hexagon::HvxWRRegClass
);
52 addRegisterClass(MVT::v128i16
, &Hexagon::HvxWRRegClass
);
53 addRegisterClass(MVT::v64i32
, &Hexagon::HvxWRRegClass
);
54 addRegisterClass(MVT::v32i1
, &Hexagon::HvxQRRegClass
);
55 addRegisterClass(MVT::v64i1
, &Hexagon::HvxQRRegClass
);
56 addRegisterClass(MVT::v128i1
, &Hexagon::HvxQRRegClass
);
59 // Set up operation actions.
61 bool Use64b
= Subtarget
.useHVX64BOps();
62 ArrayRef
<MVT
> LegalV
= Use64b
? LegalV64
: LegalV128
;
63 ArrayRef
<MVT
> LegalW
= Use64b
? LegalW64
: LegalW128
;
64 MVT ByteV
= Use64b
? MVT::v64i8
: MVT::v128i8
;
65 MVT ByteW
= Use64b
? MVT::v128i8
: MVT::v256i8
;
67 auto setPromoteTo
= [this] (unsigned Opc
, MVT FromTy
, MVT ToTy
) {
68 setOperationAction(Opc
, FromTy
, Promote
);
69 AddPromotedToType(Opc
, FromTy
, ToTy
);
72 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
73 // Note: v16i1 -> i16 is handled in type legalization instead of op
75 setOperationAction(ISD::BITCAST
, MVT::i16
, Custom
);
76 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
77 setOperationAction(ISD::BITCAST
, MVT::i64
, Custom
);
78 setOperationAction(ISD::BITCAST
, MVT::v16i1
, Custom
);
79 setOperationAction(ISD::BITCAST
, MVT::v128i1
, Custom
);
80 setOperationAction(ISD::BITCAST
, MVT::i128
, Custom
);
81 setOperationAction(ISD::VECTOR_SHUFFLE
, ByteV
, Legal
);
82 setOperationAction(ISD::VECTOR_SHUFFLE
, ByteW
, Legal
);
83 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
85 for (MVT T
: LegalV
) {
86 setIndexedLoadAction(ISD::POST_INC
, T
, Legal
);
87 setIndexedStoreAction(ISD::POST_INC
, T
, Legal
);
89 setOperationAction(ISD::AND
, T
, Legal
);
90 setOperationAction(ISD::OR
, T
, Legal
);
91 setOperationAction(ISD::XOR
, T
, Legal
);
92 setOperationAction(ISD::ADD
, T
, Legal
);
93 setOperationAction(ISD::SUB
, T
, Legal
);
94 setOperationAction(ISD::MUL
, T
, Legal
);
95 setOperationAction(ISD::CTPOP
, T
, Legal
);
96 setOperationAction(ISD::CTLZ
, T
, Legal
);
97 setOperationAction(ISD::SELECT
, T
, Legal
);
98 setOperationAction(ISD::SPLAT_VECTOR
, T
, Legal
);
100 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, T
, Legal
);
101 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, T
, Legal
);
102 setOperationAction(ISD::BSWAP
, T
, Legal
);
105 setOperationAction(ISD::SMIN
, T
, Legal
);
106 setOperationAction(ISD::SMAX
, T
, Legal
);
107 if (T
.getScalarType() != MVT::i32
) {
108 setOperationAction(ISD::UMIN
, T
, Legal
);
109 setOperationAction(ISD::UMAX
, T
, Legal
);
112 setOperationAction(ISD::CTTZ
, T
, Custom
);
113 setOperationAction(ISD::LOAD
, T
, Custom
);
114 setOperationAction(ISD::MLOAD
, T
, Custom
);
115 setOperationAction(ISD::MSTORE
, T
, Custom
);
116 setOperationAction(ISD::MULHS
, T
, Custom
);
117 setOperationAction(ISD::MULHU
, T
, Custom
);
118 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
119 // Make concat-vectors custom to handle concats of more than 2 vectors.
120 setOperationAction(ISD::CONCAT_VECTORS
, T
, Custom
);
121 setOperationAction(ISD::INSERT_SUBVECTOR
, T
, Custom
);
122 setOperationAction(ISD::INSERT_VECTOR_ELT
, T
, Custom
);
123 setOperationAction(ISD::EXTRACT_SUBVECTOR
, T
, Custom
);
124 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, T
, Custom
);
125 setOperationAction(ISD::ANY_EXTEND
, T
, Custom
);
126 setOperationAction(ISD::SIGN_EXTEND
, T
, Custom
);
127 setOperationAction(ISD::ZERO_EXTEND
, T
, Custom
);
129 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG
, T
, Custom
);
130 // HVX only has shifts of words and halfwords.
131 setOperationAction(ISD::SRA
, T
, Custom
);
132 setOperationAction(ISD::SHL
, T
, Custom
);
133 setOperationAction(ISD::SRL
, T
, Custom
);
135 // Promote all shuffles to operate on vectors of bytes.
136 setPromoteTo(ISD::VECTOR_SHUFFLE
, T
, ByteV
);
139 setCondCodeAction(ISD::SETNE
, T
, Expand
);
140 setCondCodeAction(ISD::SETLE
, T
, Expand
);
141 setCondCodeAction(ISD::SETGE
, T
, Expand
);
142 setCondCodeAction(ISD::SETLT
, T
, Expand
);
143 setCondCodeAction(ISD::SETULE
, T
, Expand
);
144 setCondCodeAction(ISD::SETUGE
, T
, Expand
);
145 setCondCodeAction(ISD::SETULT
, T
, Expand
);
148 for (MVT T
: LegalW
) {
149 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
150 // independent) handling of it would convert it to a load, which is
151 // not always the optimal choice.
152 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
153 // Make concat-vectors custom to handle concats of more than 2 vectors.
154 setOperationAction(ISD::CONCAT_VECTORS
, T
, Custom
);
156 // Custom-lower these operations for pairs. Expand them into a concat
157 // of the corresponding operations on individual vectors.
158 setOperationAction(ISD::ANY_EXTEND
, T
, Custom
);
159 setOperationAction(ISD::SIGN_EXTEND
, T
, Custom
);
160 setOperationAction(ISD::ZERO_EXTEND
, T
, Custom
);
161 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Custom
);
162 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG
, T
, Custom
);
163 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, T
, Legal
);
164 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, T
, Legal
);
165 setOperationAction(ISD::SPLAT_VECTOR
, T
, Custom
);
167 setOperationAction(ISD::LOAD
, T
, Custom
);
168 setOperationAction(ISD::STORE
, T
, Custom
);
169 setOperationAction(ISD::MLOAD
, T
, Custom
);
170 setOperationAction(ISD::MSTORE
, T
, Custom
);
171 setOperationAction(ISD::CTLZ
, T
, Custom
);
172 setOperationAction(ISD::CTTZ
, T
, Custom
);
173 setOperationAction(ISD::CTPOP
, T
, Custom
);
175 setOperationAction(ISD::ADD
, T
, Legal
);
176 setOperationAction(ISD::SUB
, T
, Legal
);
177 setOperationAction(ISD::MUL
, T
, Custom
);
178 setOperationAction(ISD::MULHS
, T
, Custom
);
179 setOperationAction(ISD::MULHU
, T
, Custom
);
180 setOperationAction(ISD::AND
, T
, Custom
);
181 setOperationAction(ISD::OR
, T
, Custom
);
182 setOperationAction(ISD::XOR
, T
, Custom
);
183 setOperationAction(ISD::SETCC
, T
, Custom
);
184 setOperationAction(ISD::VSELECT
, T
, Custom
);
186 setOperationAction(ISD::SRA
, T
, Custom
);
187 setOperationAction(ISD::SHL
, T
, Custom
);
188 setOperationAction(ISD::SRL
, T
, Custom
);
190 // Promote all shuffles to operate on vectors of bytes.
191 setPromoteTo(ISD::VECTOR_SHUFFLE
, T
, ByteW
);
194 setOperationAction(ISD::SMIN
, T
, Custom
);
195 setOperationAction(ISD::SMAX
, T
, Custom
);
196 if (T
.getScalarType() != MVT::i32
) {
197 setOperationAction(ISD::UMIN
, T
, Custom
);
198 setOperationAction(ISD::UMAX
, T
, Custom
);
204 for (MVT T
: LegalW
) {
205 // Boolean types for vector pairs will overlap with the boolean
206 // types for single vectors, e.g.
207 // v64i8 -> v64i1 (single)
208 // v64i16 -> v64i1 (pair)
209 // Set these actions first, and allow the single actions to overwrite
211 MVT BoolW
= MVT::getVectorVT(MVT::i1
, T
.getVectorNumElements());
212 setOperationAction(ISD::SETCC
, BoolW
, Custom
);
213 setOperationAction(ISD::AND
, BoolW
, Custom
);
214 setOperationAction(ISD::OR
, BoolW
, Custom
);
215 setOperationAction(ISD::XOR
, BoolW
, Custom
);
216 // Masked load/store takes a mask that may need splitting.
217 setOperationAction(ISD::MLOAD
, BoolW
, Custom
);
218 setOperationAction(ISD::MSTORE
, BoolW
, Custom
);
221 for (MVT T
: LegalV
) {
222 MVT BoolV
= MVT::getVectorVT(MVT::i1
, T
.getVectorNumElements());
223 setOperationAction(ISD::BUILD_VECTOR
, BoolV
, Custom
);
224 setOperationAction(ISD::CONCAT_VECTORS
, BoolV
, Custom
);
225 setOperationAction(ISD::INSERT_SUBVECTOR
, BoolV
, Custom
);
226 setOperationAction(ISD::INSERT_VECTOR_ELT
, BoolV
, Custom
);
227 setOperationAction(ISD::EXTRACT_SUBVECTOR
, BoolV
, Custom
);
228 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, BoolV
, Custom
);
229 setOperationAction(ISD::SELECT
, BoolV
, Custom
);
230 setOperationAction(ISD::AND
, BoolV
, Legal
);
231 setOperationAction(ISD::OR
, BoolV
, Legal
);
232 setOperationAction(ISD::XOR
, BoolV
, Legal
);
236 for (MVT T
: {MVT::v32i8
, MVT::v32i16
, MVT::v16i8
, MVT::v16i16
, MVT::v16i32
})
237 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Legal
);
239 for (MVT T
: {MVT::v64i8
, MVT::v64i16
, MVT::v32i8
, MVT::v32i16
, MVT::v32i32
})
240 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Legal
);
243 // Handle store widening for short vectors.
244 unsigned HwLen
= Subtarget
.getVectorLength();
245 for (MVT ElemTy
: Subtarget
.getHVXElementTypes()) {
246 if (ElemTy
== MVT::i1
)
248 int ElemWidth
= ElemTy
.getFixedSizeInBits();
249 int MaxElems
= (8*HwLen
) / ElemWidth
;
250 for (int N
= 2; N
< MaxElems
; N
*= 2) {
251 MVT VecTy
= MVT::getVectorVT(ElemTy
, N
);
252 auto Action
= getPreferredVectorAction(VecTy
);
253 if (Action
== TargetLoweringBase::TypeWidenVector
) {
254 setOperationAction(ISD::LOAD
, VecTy
, Custom
);
255 setOperationAction(ISD::STORE
, VecTy
, Custom
);
256 setOperationAction(ISD::SETCC
, VecTy
, Custom
);
257 setOperationAction(ISD::TRUNCATE
, VecTy
, Custom
);
258 setOperationAction(ISD::ANY_EXTEND
, VecTy
, Custom
);
259 setOperationAction(ISD::SIGN_EXTEND
, VecTy
, Custom
);
260 setOperationAction(ISD::ZERO_EXTEND
, VecTy
, Custom
);
262 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, N
);
263 if (!isTypeLegal(BoolTy
))
264 setOperationAction(ISD::SETCC
, BoolTy
, Custom
);
269 setTargetDAGCombine(ISD::SPLAT_VECTOR
);
270 setTargetDAGCombine(ISD::VSELECT
);
274 HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy
) const {
275 MVT ElemTy
= VecTy
.getVectorElementType();
276 unsigned VecLen
= VecTy
.getVectorNumElements();
277 unsigned HwLen
= Subtarget
.getVectorLength();
279 // Split vectors of i1 that exceed byte vector length.
280 if (ElemTy
== MVT::i1
&& VecLen
> HwLen
)
281 return TargetLoweringBase::TypeSplitVector
;
283 ArrayRef
<MVT
> Tys
= Subtarget
.getHVXElementTypes();
284 // For shorter vectors of i1, widen them if any of the corresponding
285 // vectors of integers needs to be widened.
286 if (ElemTy
== MVT::i1
) {
288 assert(T
!= MVT::i1
);
289 auto A
= getPreferredHvxVectorAction(MVT::getVectorVT(T
, VecLen
));
296 // If the size of VecTy is at least half of the vector length,
297 // widen the vector. Note: the threshold was not selected in
298 // any scientific way.
299 if (llvm::is_contained(Tys
, ElemTy
)) {
300 unsigned VecWidth
= VecTy
.getSizeInBits();
301 bool HaveThreshold
= HvxWidenThreshold
.getNumOccurrences() > 0;
302 if (HaveThreshold
&& 8*HvxWidenThreshold
<= VecWidth
)
303 return TargetLoweringBase::TypeWidenVector
;
304 unsigned HwWidth
= 8*HwLen
;
305 if (VecWidth
>= HwWidth
/2 && VecWidth
< HwWidth
)
306 return TargetLoweringBase::TypeWidenVector
;
314 HexagonTargetLowering::getInt(unsigned IntId
, MVT ResTy
, ArrayRef
<SDValue
> Ops
,
315 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
316 SmallVector
<SDValue
,4> IntOps
;
317 IntOps
.push_back(DAG
.getConstant(IntId
, dl
, MVT::i32
));
318 append_range(IntOps
, Ops
);
319 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, ResTy
, IntOps
);
323 HexagonTargetLowering::typeJoin(const TypePair
&Tys
) const {
324 assert(Tys
.first
.getVectorElementType() == Tys
.second
.getVectorElementType());
326 MVT ElemTy
= Tys
.first
.getVectorElementType();
327 return MVT::getVectorVT(ElemTy
, Tys
.first
.getVectorNumElements() +
328 Tys
.second
.getVectorNumElements());
331 HexagonTargetLowering::TypePair
332 HexagonTargetLowering::typeSplit(MVT VecTy
) const {
333 assert(VecTy
.isVector());
334 unsigned NumElem
= VecTy
.getVectorNumElements();
335 assert((NumElem
% 2) == 0 && "Expecting even-sized vector type");
336 MVT HalfTy
= MVT::getVectorVT(VecTy
.getVectorElementType(), NumElem
/2);
337 return { HalfTy
, HalfTy
};
341 HexagonTargetLowering::typeExtElem(MVT VecTy
, unsigned Factor
) const {
342 MVT ElemTy
= VecTy
.getVectorElementType();
343 MVT NewElemTy
= MVT::getIntegerVT(ElemTy
.getSizeInBits() * Factor
);
344 return MVT::getVectorVT(NewElemTy
, VecTy
.getVectorNumElements());
348 HexagonTargetLowering::typeTruncElem(MVT VecTy
, unsigned Factor
) const {
349 MVT ElemTy
= VecTy
.getVectorElementType();
350 MVT NewElemTy
= MVT::getIntegerVT(ElemTy
.getSizeInBits() / Factor
);
351 return MVT::getVectorVT(NewElemTy
, VecTy
.getVectorNumElements());
355 HexagonTargetLowering::opCastElem(SDValue Vec
, MVT ElemTy
,
356 SelectionDAG
&DAG
) const {
357 if (ty(Vec
).getVectorElementType() == ElemTy
)
359 MVT CastTy
= tyVector(Vec
.getValueType().getSimpleVT(), ElemTy
);
360 return DAG
.getBitcast(CastTy
, Vec
);
364 HexagonTargetLowering::opJoin(const VectorPair
&Ops
, const SDLoc
&dl
,
365 SelectionDAG
&DAG
) const {
366 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, typeJoin(ty(Ops
)),
367 Ops
.second
, Ops
.first
);
370 HexagonTargetLowering::VectorPair
371 HexagonTargetLowering::opSplit(SDValue Vec
, const SDLoc
&dl
,
372 SelectionDAG
&DAG
) const {
373 TypePair Tys
= typeSplit(ty(Vec
));
374 if (Vec
.getOpcode() == HexagonISD::QCAT
)
375 return VectorPair(Vec
.getOperand(0), Vec
.getOperand(1));
376 return DAG
.SplitVector(Vec
, dl
, Tys
.first
, Tys
.second
);
380 HexagonTargetLowering::isHvxSingleTy(MVT Ty
) const {
381 return Subtarget
.isHVXVectorType(Ty
) &&
382 Ty
.getSizeInBits() == 8 * Subtarget
.getVectorLength();
386 HexagonTargetLowering::isHvxPairTy(MVT Ty
) const {
387 return Subtarget
.isHVXVectorType(Ty
) &&
388 Ty
.getSizeInBits() == 16 * Subtarget
.getVectorLength();
392 HexagonTargetLowering::isHvxBoolTy(MVT Ty
) const {
393 return Subtarget
.isHVXVectorType(Ty
, true) &&
394 Ty
.getVectorElementType() == MVT::i1
;
397 bool HexagonTargetLowering::allowsHvxMemoryAccess(
398 MVT VecTy
, MachineMemOperand::Flags Flags
, bool *Fast
) const {
399 // Bool vectors are excluded by default, but make it explicit to
400 // emphasize that bool vectors cannot be loaded or stored.
401 // Also, disallow double vector stores (to prevent unnecessary
402 // store widening in DAG combiner).
403 if (VecTy
.getSizeInBits() > 8*Subtarget
.getVectorLength())
405 if (!Subtarget
.isHVXVectorType(VecTy
, /*IncludeBool=*/false))
412 bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
413 MVT VecTy
, MachineMemOperand::Flags Flags
, bool *Fast
) const {
414 if (!Subtarget
.isHVXVectorType(VecTy
))
416 // XXX Should this be false? vmemu are a bit slower than vmem.
423 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx
, MVT ElemTy
,
424 SelectionDAG
&DAG
) const {
425 if (ElemIdx
.getValueType().getSimpleVT() != MVT::i32
)
426 ElemIdx
= DAG
.getBitcast(MVT::i32
, ElemIdx
);
428 unsigned ElemWidth
= ElemTy
.getSizeInBits();
432 unsigned L
= Log2_32(ElemWidth
/8);
433 const SDLoc
&dl(ElemIdx
);
434 return DAG
.getNode(ISD::SHL
, dl
, MVT::i32
,
435 {ElemIdx
, DAG
.getConstant(L
, dl
, MVT::i32
)});
439 HexagonTargetLowering::getIndexInWord32(SDValue Idx
, MVT ElemTy
,
440 SelectionDAG
&DAG
) const {
441 unsigned ElemWidth
= ElemTy
.getSizeInBits();
442 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
446 if (ty(Idx
) != MVT::i32
)
447 Idx
= DAG
.getBitcast(MVT::i32
, Idx
);
448 const SDLoc
&dl(Idx
);
449 SDValue Mask
= DAG
.getConstant(32/ElemWidth
- 1, dl
, MVT::i32
);
450 SDValue SubIdx
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, {Idx
, Mask
});
455 HexagonTargetLowering::getByteShuffle(const SDLoc
&dl
, SDValue Op0
,
456 SDValue Op1
, ArrayRef
<int> Mask
,
457 SelectionDAG
&DAG
) const {
459 assert(OpTy
== ty(Op1
));
461 MVT ElemTy
= OpTy
.getVectorElementType();
462 if (ElemTy
== MVT::i8
)
463 return DAG
.getVectorShuffle(OpTy
, dl
, Op0
, Op1
, Mask
);
464 assert(ElemTy
.getSizeInBits() >= 8);
466 MVT ResTy
= tyVector(OpTy
, MVT::i8
);
467 unsigned ElemSize
= ElemTy
.getSizeInBits() / 8;
469 SmallVector
<int,128> ByteMask
;
472 for (unsigned I
= 0; I
!= ElemSize
; ++I
)
473 ByteMask
.push_back(-1);
475 int NewM
= M
*ElemSize
;
476 for (unsigned I
= 0; I
!= ElemSize
; ++I
)
477 ByteMask
.push_back(NewM
+I
);
480 assert(ResTy
.getVectorNumElements() == ByteMask
.size());
481 return DAG
.getVectorShuffle(ResTy
, dl
, opCastElem(Op0
, MVT::i8
, DAG
),
482 opCastElem(Op1
, MVT::i8
, DAG
), ByteMask
);
486 HexagonTargetLowering::buildHvxVectorReg(ArrayRef
<SDValue
> Values
,
487 const SDLoc
&dl
, MVT VecTy
,
488 SelectionDAG
&DAG
) const {
489 unsigned VecLen
= Values
.size();
490 MachineFunction
&MF
= DAG
.getMachineFunction();
491 MVT ElemTy
= VecTy
.getVectorElementType();
492 unsigned ElemWidth
= ElemTy
.getSizeInBits();
493 unsigned HwLen
= Subtarget
.getVectorLength();
495 unsigned ElemSize
= ElemWidth
/ 8;
496 assert(ElemSize
*VecLen
== HwLen
);
497 SmallVector
<SDValue
,32> Words
;
499 if (VecTy
.getVectorElementType() != MVT::i32
) {
500 assert((ElemSize
== 1 || ElemSize
== 2) && "Invalid element size");
501 unsigned OpsPerWord
= (ElemSize
== 1) ? 4 : 2;
502 MVT PartVT
= MVT::getVectorVT(VecTy
.getVectorElementType(), OpsPerWord
);
503 for (unsigned i
= 0; i
!= VecLen
; i
+= OpsPerWord
) {
504 SDValue W
= buildVector32(Values
.slice(i
, OpsPerWord
), dl
, PartVT
, DAG
);
505 Words
.push_back(DAG
.getBitcast(MVT::i32
, W
));
508 Words
.assign(Values
.begin(), Values
.end());
511 unsigned NumWords
= Words
.size();
512 bool IsSplat
= true, IsUndef
= true;
514 for (unsigned i
= 0; i
!= NumWords
&& IsSplat
; ++i
) {
515 if (isUndef(Words
[i
]))
518 if (!SplatV
.getNode())
520 else if (SplatV
!= Words
[i
])
524 return DAG
.getUNDEF(VecTy
);
526 assert(SplatV
.getNode());
527 auto *IdxN
= dyn_cast
<ConstantSDNode
>(SplatV
.getNode());
528 if (IdxN
&& IdxN
->isNullValue())
529 return getZero(dl
, VecTy
, DAG
);
530 MVT WordTy
= MVT::getVectorVT(MVT::i32
, HwLen
/4);
531 SDValue S
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, WordTy
, SplatV
);
532 return DAG
.getBitcast(VecTy
, S
);
535 // Delay recognizing constant vectors until here, so that we can generate
537 SmallVector
<ConstantInt
*, 128> Consts(VecLen
);
538 bool AllConst
= getBuildVectorConstInts(Values
, VecTy
, DAG
, Consts
);
540 ArrayRef
<Constant
*> Tmp((Constant
**)Consts
.begin(),
541 (Constant
**)Consts
.end());
542 Constant
*CV
= ConstantVector::get(Tmp
);
543 Align
Alignment(HwLen
);
545 LowerConstantPool(DAG
.getConstantPool(CV
, VecTy
, Alignment
), DAG
);
546 return DAG
.getLoad(VecTy
, dl
, DAG
.getEntryNode(), CP
,
547 MachinePointerInfo::getConstantPool(MF
), Alignment
);
550 // A special case is a situation where the vector is built entirely from
551 // elements extracted from another vector. This could be done via a shuffle
552 // more efficiently, but typically, the size of the source vector will not
553 // match the size of the vector being built (which precludes the use of a
554 // shuffle directly).
555 // This only handles a single source vector, and the vector being built
556 // should be of a sub-vector type of the source vector type.
557 auto IsBuildFromExtracts
= [this,&Values
] (SDValue
&SrcVec
,
558 SmallVectorImpl
<int> &SrcIdx
) {
560 for (SDValue V
: Values
) {
562 SrcIdx
.push_back(-1);
565 if (V
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
567 // All extracts should come from the same vector.
568 SDValue T
= V
.getOperand(0);
569 if (Vec
.getNode() != nullptr && T
.getNode() != Vec
.getNode())
572 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(V
.getOperand(1));
575 int I
= C
->getSExtValue();
576 assert(I
>= 0 && "Negative element index");
583 SmallVector
<int,128> ExtIdx
;
585 if (IsBuildFromExtracts(ExtVec
, ExtIdx
)) {
586 MVT ExtTy
= ty(ExtVec
);
587 unsigned ExtLen
= ExtTy
.getVectorNumElements();
588 if (ExtLen
== VecLen
|| ExtLen
== 2*VecLen
) {
589 // Construct a new shuffle mask that will produce a vector with the same
590 // number of elements as the input vector, and such that the vector we
591 // want will be the initial subvector of it.
592 SmallVector
<int,128> Mask
;
593 BitVector
Used(ExtLen
);
595 for (int M
: ExtIdx
) {
600 // Fill the rest of the mask with the unused elements of ExtVec in hopes
601 // that it will result in a permutation of ExtVec's elements. It's still
602 // fine if it doesn't (e.g. if undefs are present, or elements are
603 // repeated), but permutations can always be done efficiently via vdelta
605 for (unsigned I
= 0; I
!= ExtLen
; ++I
) {
606 if (Mask
.size() == ExtLen
)
612 SDValue S
= DAG
.getVectorShuffle(ExtTy
, dl
, ExtVec
,
613 DAG
.getUNDEF(ExtTy
), Mask
);
614 if (ExtLen
== VecLen
)
616 return DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, VecTy
, S
);
620 // Construct two halves in parallel, then or them together.
621 assert(4*Words
.size() == Subtarget
.getVectorLength());
622 SDValue HalfV0
= getInstr(Hexagon::V6_vd0
, dl
, VecTy
, {}, DAG
);
623 SDValue HalfV1
= getInstr(Hexagon::V6_vd0
, dl
, VecTy
, {}, DAG
);
624 SDValue S
= DAG
.getConstant(4, dl
, MVT::i32
);
625 for (unsigned i
= 0; i
!= NumWords
/2; ++i
) {
626 SDValue N
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
,
628 SDValue M
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
,
629 {HalfV1
, Words
[i
+NumWords
/2]});
630 HalfV0
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {N
, S
});
631 HalfV1
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {M
, S
});
634 HalfV0
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
,
635 {HalfV0
, DAG
.getConstant(HwLen
/2, dl
, MVT::i32
)});
636 SDValue DstV
= DAG
.getNode(ISD::OR
, dl
, VecTy
, {HalfV0
, HalfV1
});
641 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV
, const SDLoc
&dl
,
642 unsigned BitBytes
, bool ZeroFill
, SelectionDAG
&DAG
) const {
643 MVT PredTy
= ty(PredV
);
644 unsigned HwLen
= Subtarget
.getVectorLength();
645 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
647 if (Subtarget
.isHVXVectorType(PredTy
, true)) {
648 // Move the vector predicate SubV to a vector register, and scale it
649 // down to match the representation (bytes per type element) that VecV
650 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
651 // in general) element and put them at the front of the resulting
652 // vector. This subvector will then be inserted into the Q2V of VecV.
653 // To avoid having an operation that generates an illegal type (short
654 // vector), generate a full size vector.
656 SDValue T
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, PredV
);
657 SmallVector
<int,128> Mask(HwLen
);
658 // Scale = BitBytes(PredV) / Given BitBytes.
659 unsigned Scale
= HwLen
/ (PredTy
.getVectorNumElements() * BitBytes
);
660 unsigned BlockLen
= PredTy
.getVectorNumElements() * BitBytes
;
662 for (unsigned i
= 0; i
!= HwLen
; ++i
) {
663 unsigned Num
= i
% Scale
;
664 unsigned Off
= i
/ Scale
;
665 Mask
[BlockLen
*Num
+ Off
] = i
;
667 SDValue S
= DAG
.getVectorShuffle(ByteTy
, dl
, T
, DAG
.getUNDEF(ByteTy
), Mask
);
670 // Fill the bytes beyond BlockLen with 0s.
671 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
672 // when BlockLen < HwLen.
673 assert(BlockLen
< HwLen
&& "vsetq(v1) prerequisite");
674 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
675 SDValue Q
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
676 {DAG
.getConstant(BlockLen
, dl
, MVT::i32
)}, DAG
);
677 SDValue M
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, Q
);
678 return DAG
.getNode(ISD::AND
, dl
, ByteTy
, S
, M
);
681 // Make sure that this is a valid scalar predicate.
682 assert(PredTy
== MVT::v2i1
|| PredTy
== MVT::v4i1
|| PredTy
== MVT::v8i1
);
684 unsigned Bytes
= 8 / PredTy
.getVectorNumElements();
685 SmallVector
<SDValue
,4> Words
[2];
688 auto Lo32
= [&DAG
, &dl
] (SDValue P
) {
689 return DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, P
);
691 auto Hi32
= [&DAG
, &dl
] (SDValue P
) {
692 return DAG
.getTargetExtractSubreg(Hexagon::isub_hi
, dl
, MVT::i32
, P
);
695 SDValue W0
= isUndef(PredV
)
696 ? DAG
.getUNDEF(MVT::i64
)
697 : DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, PredV
);
698 Words
[IdxW
].push_back(Hi32(W0
));
699 Words
[IdxW
].push_back(Lo32(W0
));
701 while (Bytes
< BitBytes
) {
706 for (const SDValue
&W
: Words
[IdxW
^ 1]) {
707 SDValue T
= expandPredicate(W
, dl
, DAG
);
708 Words
[IdxW
].push_back(Hi32(T
));
709 Words
[IdxW
].push_back(Lo32(T
));
712 for (const SDValue
&W
: Words
[IdxW
^ 1]) {
713 Words
[IdxW
].push_back(W
);
714 Words
[IdxW
].push_back(W
);
720 assert(Bytes
== BitBytes
);
722 SDValue Vec
= ZeroFill
? getZero(dl
, ByteTy
, DAG
) : DAG
.getUNDEF(ByteTy
);
723 SDValue S4
= DAG
.getConstant(HwLen
-4, dl
, MVT::i32
);
724 for (const SDValue
&W
: Words
[IdxW
]) {
725 Vec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, Vec
, S4
);
726 Vec
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, ByteTy
, Vec
, W
);
733 HexagonTargetLowering::buildHvxVectorPred(ArrayRef
<SDValue
> Values
,
734 const SDLoc
&dl
, MVT VecTy
,
735 SelectionDAG
&DAG
) const {
736 // Construct a vector V of bytes, such that a comparison V >u 0 would
737 // produce the required vector predicate.
738 unsigned VecLen
= Values
.size();
739 unsigned HwLen
= Subtarget
.getVectorLength();
740 assert(VecLen
<= HwLen
|| VecLen
== 8*HwLen
);
741 SmallVector
<SDValue
,128> Bytes
;
742 bool AllT
= true, AllF
= true;
744 auto IsTrue
= [] (SDValue V
) {
745 if (const auto *N
= dyn_cast
<ConstantSDNode
>(V
.getNode()))
746 return !N
->isNullValue();
749 auto IsFalse
= [] (SDValue V
) {
750 if (const auto *N
= dyn_cast
<ConstantSDNode
>(V
.getNode()))
751 return N
->isNullValue();
755 if (VecLen
<= HwLen
) {
756 // In the hardware, each bit of a vector predicate corresponds to a byte
757 // of a vector register. Calculate how many bytes does a bit of VecTy
759 assert(HwLen
% VecLen
== 0);
760 unsigned BitBytes
= HwLen
/ VecLen
;
761 for (SDValue V
: Values
) {
765 SDValue Ext
= !V
.isUndef() ? DAG
.getZExtOrTrunc(V
, dl
, MVT::i8
)
766 : DAG
.getUNDEF(MVT::i8
);
767 for (unsigned B
= 0; B
!= BitBytes
; ++B
)
768 Bytes
.push_back(Ext
);
771 // There are as many i1 values, as there are bits in a vector register.
772 // Divide the values into groups of 8 and check that each group consists
773 // of the same value (ignoring undefs).
774 for (unsigned I
= 0; I
!= VecLen
; I
+= 8) {
776 // Find the first non-undef value in this group.
777 for (; B
!= 8; ++B
) {
778 if (!Values
[I
+B
].isUndef())
781 SDValue F
= Values
[I
+B
];
785 SDValue Ext
= (B
< 8) ? DAG
.getZExtOrTrunc(F
, dl
, MVT::i8
)
786 : DAG
.getUNDEF(MVT::i8
);
787 Bytes
.push_back(Ext
);
788 // Verify that the rest of values in the group are the same as the
791 assert(Values
[I
+B
].isUndef() || Values
[I
+B
] == F
);
796 return DAG
.getNode(HexagonISD::QTRUE
, dl
, VecTy
);
798 return DAG
.getNode(HexagonISD::QFALSE
, dl
, VecTy
);
800 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
801 SDValue ByteVec
= buildHvxVectorReg(Bytes
, dl
, ByteTy
, DAG
);
802 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, ByteVec
);
806 HexagonTargetLowering::extractHvxElementReg(SDValue VecV
, SDValue IdxV
,
807 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
808 MVT ElemTy
= ty(VecV
).getVectorElementType();
810 unsigned ElemWidth
= ElemTy
.getSizeInBits();
811 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
814 SDValue ByteIdx
= convertToByteIndex(IdxV
, ElemTy
, DAG
);
815 SDValue ExWord
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
,
817 if (ElemTy
== MVT::i32
)
820 // Have an extracted word, need to extract the smaller element out of it.
821 // 1. Extract the bits of (the original) IdxV that correspond to the index
822 // of the desired element in the 32-bit word.
823 SDValue SubIdx
= getIndexInWord32(IdxV
, ElemTy
, DAG
);
824 // 2. Extract the element from the word.
825 SDValue ExVec
= DAG
.getBitcast(tyVector(ty(ExWord
), ElemTy
), ExWord
);
826 return extractVector(ExVec
, SubIdx
, dl
, ElemTy
, MVT::i32
, DAG
);
830 HexagonTargetLowering::extractHvxElementPred(SDValue VecV
, SDValue IdxV
,
831 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
832 // Implement other return types if necessary.
833 assert(ResTy
== MVT::i1
);
835 unsigned HwLen
= Subtarget
.getVectorLength();
836 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
837 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
839 unsigned Scale
= HwLen
/ ty(VecV
).getVectorNumElements();
840 SDValue ScV
= DAG
.getConstant(Scale
, dl
, MVT::i32
);
841 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, ScV
);
843 SDValue ExtB
= extractHvxElementReg(ByteVec
, IdxV
, dl
, MVT::i32
, DAG
);
844 SDValue Zero
= DAG
.getTargetConstant(0, dl
, MVT::i32
);
845 return getInstr(Hexagon::C2_cmpgtui
, dl
, MVT::i1
, {ExtB
, Zero
}, DAG
);
849 HexagonTargetLowering::insertHvxElementReg(SDValue VecV
, SDValue IdxV
,
850 SDValue ValV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
851 MVT ElemTy
= ty(VecV
).getVectorElementType();
853 unsigned ElemWidth
= ElemTy
.getSizeInBits();
854 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
857 auto InsertWord
= [&DAG
,&dl
,this] (SDValue VecV
, SDValue ValV
,
859 MVT VecTy
= ty(VecV
);
860 unsigned HwLen
= Subtarget
.getVectorLength();
861 SDValue MaskV
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
862 {ByteIdxV
, DAG
.getConstant(-4, dl
, MVT::i32
)});
863 SDValue RotV
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {VecV
, MaskV
});
864 SDValue InsV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
, {RotV
, ValV
});
865 SDValue SubV
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
866 {DAG
.getConstant(HwLen
, dl
, MVT::i32
), MaskV
});
867 SDValue TorV
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {InsV
, SubV
});
871 SDValue ByteIdx
= convertToByteIndex(IdxV
, ElemTy
, DAG
);
872 if (ElemTy
== MVT::i32
)
873 return InsertWord(VecV
, ValV
, ByteIdx
);
875 // If this is not inserting a 32-bit word, convert it into such a thing.
876 // 1. Extract the existing word from the target vector.
877 SDValue WordIdx
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
878 {ByteIdx
, DAG
.getConstant(2, dl
, MVT::i32
)});
879 SDValue Ext
= extractHvxElementReg(opCastElem(VecV
, MVT::i32
, DAG
), WordIdx
,
882 // 2. Treating the extracted word as a 32-bit vector, insert the given
884 SDValue SubIdx
= getIndexInWord32(IdxV
, ElemTy
, DAG
);
885 MVT SubVecTy
= tyVector(ty(Ext
), ElemTy
);
886 SDValue Ins
= insertVector(DAG
.getBitcast(SubVecTy
, Ext
),
887 ValV
, SubIdx
, dl
, ElemTy
, DAG
);
889 // 3. Insert the 32-bit word back into the original vector.
890 return InsertWord(VecV
, Ins
, ByteIdx
);
894 HexagonTargetLowering::insertHvxElementPred(SDValue VecV
, SDValue IdxV
,
895 SDValue ValV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
896 unsigned HwLen
= Subtarget
.getVectorLength();
897 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
898 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
900 unsigned Scale
= HwLen
/ ty(VecV
).getVectorNumElements();
901 SDValue ScV
= DAG
.getConstant(Scale
, dl
, MVT::i32
);
902 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, ScV
);
903 ValV
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i32
, ValV
);
905 SDValue InsV
= insertHvxElementReg(ByteVec
, IdxV
, ValV
, dl
, DAG
);
906 return DAG
.getNode(HexagonISD::V2Q
, dl
, ty(VecV
), InsV
);
910 HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV
, SDValue IdxV
,
911 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
912 MVT VecTy
= ty(VecV
);
913 unsigned HwLen
= Subtarget
.getVectorLength();
914 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
915 MVT ElemTy
= VecTy
.getVectorElementType();
916 unsigned ElemWidth
= ElemTy
.getSizeInBits();
918 // If the source vector is a vector pair, get the single vector containing
919 // the subvector of interest. The subvector will never overlap two single
921 if (isHvxPairTy(VecTy
)) {
923 if (Idx
* ElemWidth
>= 8*HwLen
) {
924 SubIdx
= Hexagon::vsub_hi
;
925 Idx
-= VecTy
.getVectorNumElements() / 2;
927 SubIdx
= Hexagon::vsub_lo
;
929 VecTy
= typeSplit(VecTy
).first
;
930 VecV
= DAG
.getTargetExtractSubreg(SubIdx
, dl
, VecTy
, VecV
);
935 // The only meaningful subvectors of a single HVX vector are those that
936 // fit in a scalar register.
937 assert(ResTy
.getSizeInBits() == 32 || ResTy
.getSizeInBits() == 64);
939 MVT WordTy
= tyVector(VecTy
, MVT::i32
);
940 SDValue WordVec
= DAG
.getBitcast(WordTy
, VecV
);
941 unsigned WordIdx
= (Idx
*ElemWidth
) / 32;
943 SDValue W0Idx
= DAG
.getConstant(WordIdx
, dl
, MVT::i32
);
944 SDValue W0
= extractHvxElementReg(WordVec
, W0Idx
, dl
, MVT::i32
, DAG
);
945 if (ResTy
.getSizeInBits() == 32)
946 return DAG
.getBitcast(ResTy
, W0
);
948 SDValue W1Idx
= DAG
.getConstant(WordIdx
+1, dl
, MVT::i32
);
949 SDValue W1
= extractHvxElementReg(WordVec
, W1Idx
, dl
, MVT::i32
, DAG
);
950 SDValue WW
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::i64
, {W1
, W0
});
951 return DAG
.getBitcast(ResTy
, WW
);
955 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV
, SDValue IdxV
,
956 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
957 MVT VecTy
= ty(VecV
);
958 unsigned HwLen
= Subtarget
.getVectorLength();
959 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
960 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
961 // IdxV is required to be a constant.
962 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
964 unsigned ResLen
= ResTy
.getVectorNumElements();
965 unsigned BitBytes
= HwLen
/ VecTy
.getVectorNumElements();
966 unsigned Offset
= Idx
* BitBytes
;
967 SDValue Undef
= DAG
.getUNDEF(ByteTy
);
968 SmallVector
<int,128> Mask
;
970 if (Subtarget
.isHVXVectorType(ResTy
, true)) {
971 // Converting between two vector predicates. Since the result is shorter
972 // than the source, it will correspond to a vector predicate with the
973 // relevant bits replicated. The replication count is the ratio of the
974 // source and target vector lengths.
975 unsigned Rep
= VecTy
.getVectorNumElements() / ResLen
;
976 assert(isPowerOf2_32(Rep
) && HwLen
% Rep
== 0);
977 for (unsigned i
= 0; i
!= HwLen
/Rep
; ++i
) {
978 for (unsigned j
= 0; j
!= Rep
; ++j
)
979 Mask
.push_back(i
+ Offset
);
981 SDValue ShuffV
= DAG
.getVectorShuffle(ByteTy
, dl
, ByteVec
, Undef
, Mask
);
982 return DAG
.getNode(HexagonISD::V2Q
, dl
, ResTy
, ShuffV
);
985 // Converting between a vector predicate and a scalar predicate. In the
986 // vector predicate, a group of BitBytes bits will correspond to a single
987 // i1 element of the source vector type. Those bits will all have the same
988 // value. The same will be true for ByteVec, where each byte corresponds
989 // to a bit in the vector predicate.
990 // The algorithm is to traverse the ByteVec, going over the i1 values from
991 // the source vector, and generate the corresponding representation in an
992 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
993 // elements so that the interesting 8 bytes will be in the low end of the
995 unsigned Rep
= 8 / ResLen
;
996 // Make sure the output fill the entire vector register, so repeat the
997 // 8-byte groups as many times as necessary.
998 for (unsigned r
= 0; r
!= HwLen
/ResLen
; ++r
) {
999 // This will generate the indexes of the 8 interesting bytes.
1000 for (unsigned i
= 0; i
!= ResLen
; ++i
) {
1001 for (unsigned j
= 0; j
!= Rep
; ++j
)
1002 Mask
.push_back(Offset
+ i
*BitBytes
);
1006 SDValue Zero
= getZero(dl
, MVT::i32
, DAG
);
1007 SDValue ShuffV
= DAG
.getVectorShuffle(ByteTy
, dl
, ByteVec
, Undef
, Mask
);
1008 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1010 SDValue W0
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
, {ShuffV
, Zero
});
1011 SDValue W1
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
,
1012 {ShuffV
, DAG
.getConstant(4, dl
, MVT::i32
)});
1013 SDValue Vec64
= DAG
.getNode(HexagonISD::COMBINE
, dl
, MVT::v8i8
, {W1
, W0
});
1014 return getInstr(Hexagon::A4_vcmpbgtui
, dl
, ResTy
,
1015 {Vec64
, DAG
.getTargetConstant(0, dl
, MVT::i32
)}, DAG
);
1019 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV
, SDValue SubV
,
1020 SDValue IdxV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1021 MVT VecTy
= ty(VecV
);
1022 MVT SubTy
= ty(SubV
);
1023 unsigned HwLen
= Subtarget
.getVectorLength();
1024 MVT ElemTy
= VecTy
.getVectorElementType();
1025 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1027 bool IsPair
= isHvxPairTy(VecTy
);
1028 MVT SingleTy
= MVT::getVectorVT(ElemTy
, (8*HwLen
)/ElemWidth
);
1029 // The two single vectors that VecV consists of, if it's a pair.
1031 SDValue SingleV
= VecV
;
1035 V0
= DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, SingleTy
, VecV
);
1036 V1
= DAG
.getTargetExtractSubreg(Hexagon::vsub_hi
, dl
, SingleTy
, VecV
);
1038 SDValue HalfV
= DAG
.getConstant(SingleTy
.getVectorNumElements(),
1040 PickHi
= DAG
.getSetCC(dl
, MVT::i1
, IdxV
, HalfV
, ISD::SETUGT
);
1041 if (isHvxSingleTy(SubTy
)) {
1042 if (const auto *CN
= dyn_cast
<const ConstantSDNode
>(IdxV
.getNode())) {
1043 unsigned Idx
= CN
->getZExtValue();
1044 assert(Idx
== 0 || Idx
== VecTy
.getVectorNumElements()/2);
1045 unsigned SubIdx
= (Idx
== 0) ? Hexagon::vsub_lo
: Hexagon::vsub_hi
;
1046 return DAG
.getTargetInsertSubreg(SubIdx
, dl
, VecTy
, VecV
, SubV
);
1048 // If IdxV is not a constant, generate the two variants: with the
1049 // SubV as the high and as the low subregister, and select the right
1050 // pair based on the IdxV.
1051 SDValue InLo
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {SubV
, V1
});
1052 SDValue InHi
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {V0
, SubV
});
1053 return DAG
.getNode(ISD::SELECT
, dl
, VecTy
, PickHi
, InHi
, InLo
);
1055 // The subvector being inserted must be entirely contained in one of
1056 // the vectors V0 or V1. Set SingleV to the correct one, and update
1057 // IdxV to be the index relative to the beginning of that vector.
1058 SDValue S
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, IdxV
, HalfV
);
1059 IdxV
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, PickHi
, S
, IdxV
);
1060 SingleV
= DAG
.getNode(ISD::SELECT
, dl
, SingleTy
, PickHi
, V1
, V0
);
1063 // The only meaningful subvectors of a single HVX vector are those that
1064 // fit in a scalar register.
1065 assert(SubTy
.getSizeInBits() == 32 || SubTy
.getSizeInBits() == 64);
1066 // Convert IdxV to be index in bytes.
1067 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
.getNode());
1068 if (!IdxN
|| !IdxN
->isNullValue()) {
1069 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
1070 DAG
.getConstant(ElemWidth
/8, dl
, MVT::i32
));
1071 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
, IdxV
);
1073 // When inserting a single word, the rotation back to the original position
1074 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1075 // by (HwLen-4)-Idx.
1076 unsigned RolBase
= HwLen
;
1077 if (VecTy
.getSizeInBits() == 32) {
1078 SDValue V
= DAG
.getBitcast(MVT::i32
, SubV
);
1079 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, V
);
1081 SDValue V
= DAG
.getBitcast(MVT::i64
, SubV
);
1082 SDValue R0
= DAG
.getTargetExtractSubreg(Hexagon::isub_lo
, dl
, MVT::i32
, V
);
1083 SDValue R1
= DAG
.getTargetExtractSubreg(Hexagon::isub_hi
, dl
, MVT::i32
, V
);
1084 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, R0
);
1085 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
,
1086 DAG
.getConstant(4, dl
, MVT::i32
));
1087 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, R1
);
1090 // If the vector wasn't ror'ed, don't ror it back.
1091 if (RolBase
!= 4 || !IdxN
|| !IdxN
->isNullValue()) {
1092 SDValue RolV
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
1093 DAG
.getConstant(RolBase
, dl
, MVT::i32
), IdxV
);
1094 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
, RolV
);
1098 SDValue InLo
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {SingleV
, V1
});
1099 SDValue InHi
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {V0
, SingleV
});
1100 return DAG
.getNode(ISD::SELECT
, dl
, VecTy
, PickHi
, InHi
, InLo
);
1106 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV
, SDValue SubV
,
1107 SDValue IdxV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1108 MVT VecTy
= ty(VecV
);
1109 MVT SubTy
= ty(SubV
);
1110 assert(Subtarget
.isHVXVectorType(VecTy
, true));
1111 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1112 // predicate as well, or it can be a scalar predicate.
1114 unsigned VecLen
= VecTy
.getVectorNumElements();
1115 unsigned HwLen
= Subtarget
.getVectorLength();
1116 assert(HwLen
% VecLen
== 0 && "Unexpected vector type");
1118 unsigned Scale
= VecLen
/ SubTy
.getVectorNumElements();
1119 unsigned BitBytes
= HwLen
/ VecLen
;
1120 unsigned BlockLen
= HwLen
/ Scale
;
1122 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1123 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
1124 SDValue ByteSub
= createHvxPrefixPred(SubV
, dl
, BitBytes
, false, DAG
);
1127 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
.getNode());
1128 if (!IdxN
|| !IdxN
->isNullValue()) {
1129 ByteIdx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
1130 DAG
.getConstant(BitBytes
, dl
, MVT::i32
));
1131 ByteVec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, ByteVec
, ByteIdx
);
1134 // ByteVec is the target vector VecV rotated in such a way that the
1135 // subvector should be inserted at index 0. Generate a predicate mask
1136 // and use vmux to do the insertion.
1137 assert(BlockLen
< HwLen
&& "vsetq(v1) prerequisite");
1138 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
1139 SDValue Q
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
1140 {DAG
.getConstant(BlockLen
, dl
, MVT::i32
)}, DAG
);
1141 ByteVec
= getInstr(Hexagon::V6_vmux
, dl
, ByteTy
, {Q
, ByteSub
, ByteVec
}, DAG
);
1142 // Rotate ByteVec back, and convert to a vector predicate.
1143 if (!IdxN
|| !IdxN
->isNullValue()) {
1144 SDValue HwLenV
= DAG
.getConstant(HwLen
, dl
, MVT::i32
);
1145 SDValue ByteXdi
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, HwLenV
, ByteIdx
);
1146 ByteVec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, ByteVec
, ByteXdi
);
1148 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, ByteVec
);
1152 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV
, const SDLoc
&dl
,
1153 MVT ResTy
, bool ZeroExt
, SelectionDAG
&DAG
) const {
1154 // Sign- and any-extending of a vector predicate to a vector register is
1155 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1156 // a vector of 1s (where the 1s are of type matching the vector type).
1157 assert(Subtarget
.isHVXVectorType(ResTy
));
1159 return DAG
.getNode(HexagonISD::Q2V
, dl
, ResTy
, VecV
);
1161 assert(ty(VecV
).getVectorNumElements() == ResTy
.getVectorNumElements());
1162 SDValue True
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ResTy
,
1163 DAG
.getConstant(1, dl
, MVT::i32
));
1164 SDValue False
= getZero(dl
, ResTy
, DAG
);
1165 return DAG
.getSelect(dl
, ResTy
, VecV
, True
, False
);
1169 HexagonTargetLowering::compressHvxPred(SDValue VecQ
, const SDLoc
&dl
,
1170 MVT ResTy
, SelectionDAG
&DAG
) const {
1171 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1172 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1173 // vector register. The remaining bits of the vector register are
1176 MachineFunction
&MF
= DAG
.getMachineFunction();
1177 unsigned HwLen
= Subtarget
.getVectorLength();
1178 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1179 MVT PredTy
= ty(VecQ
);
1180 unsigned PredLen
= PredTy
.getVectorNumElements();
1181 assert(HwLen
% PredLen
== 0);
1182 MVT VecTy
= MVT::getVectorVT(MVT::getIntegerVT(8*HwLen
/PredLen
), PredLen
);
1184 Type
*Int8Ty
= Type::getInt8Ty(*DAG
.getContext());
1185 SmallVector
<Constant
*, 128> Tmp
;
1186 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1187 // These are bytes with the LSB rotated left with respect to their index.
1188 for (unsigned i
= 0; i
!= HwLen
/8; ++i
) {
1189 for (unsigned j
= 0; j
!= 8; ++j
)
1190 Tmp
.push_back(ConstantInt::get(Int8Ty
, 1ull << j
));
1192 Constant
*CV
= ConstantVector::get(Tmp
);
1193 Align
Alignment(HwLen
);
1195 LowerConstantPool(DAG
.getConstantPool(CV
, ByteTy
, Alignment
), DAG
);
1197 DAG
.getLoad(ByteTy
, dl
, DAG
.getEntryNode(), CP
,
1198 MachinePointerInfo::getConstantPool(MF
), Alignment
);
1200 // Select the bytes that correspond to true bits in the vector predicate.
1201 SDValue Sel
= DAG
.getSelect(dl
, VecTy
, VecQ
, DAG
.getBitcast(VecTy
, Bytes
),
1202 getZero(dl
, VecTy
, DAG
));
1203 // Calculate the OR of all bytes in each group of 8. That will compress
1204 // all the individual bits into a single byte.
1205 // First, OR groups of 4, via vrmpy with 0x01010101.
1207 DAG
.getSplatBuildVector(MVT::v4i8
, dl
, DAG
.getConstant(1, dl
, MVT::i32
));
1208 SDValue Vrmpy
= getInstr(Hexagon::V6_vrmpyub
, dl
, ByteTy
, {Sel
, All1
}, DAG
);
1209 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1210 SDValue Rot
= getInstr(Hexagon::V6_valignbi
, dl
, ByteTy
,
1211 {Vrmpy
, Vrmpy
, DAG
.getTargetConstant(4, dl
, MVT::i32
)}, DAG
);
1212 SDValue Vor
= DAG
.getNode(ISD::OR
, dl
, ByteTy
, {Vrmpy
, Rot
});
1214 // Pick every 8th byte and coalesce them at the beginning of the output.
1215 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1217 SmallVector
<int,128> Mask
;
1218 for (unsigned i
= 0; i
!= HwLen
; ++i
)
1219 Mask
.push_back((8*i
) % HwLen
+ i
/(HwLen
/8));
1221 DAG
.getVectorShuffle(ByteTy
, dl
, Vor
, DAG
.getUNDEF(ByteTy
), Mask
);
1222 return DAG
.getBitcast(ResTy
, Collect
);
1226 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op
, SelectionDAG
&DAG
)
1228 const SDLoc
&dl(Op
);
1231 unsigned Size
= Op
.getNumOperands();
1232 SmallVector
<SDValue
,128> Ops
;
1233 for (unsigned i
= 0; i
!= Size
; ++i
)
1234 Ops
.push_back(Op
.getOperand(i
));
1236 if (VecTy
.getVectorElementType() == MVT::i1
)
1237 return buildHvxVectorPred(Ops
, dl
, VecTy
, DAG
);
1239 if (VecTy
.getSizeInBits() == 16*Subtarget
.getVectorLength()) {
1240 ArrayRef
<SDValue
> A(Ops
);
1241 MVT SingleTy
= typeSplit(VecTy
).first
;
1242 SDValue V0
= buildHvxVectorReg(A
.take_front(Size
/2), dl
, SingleTy
, DAG
);
1243 SDValue V1
= buildHvxVectorReg(A
.drop_front(Size
/2), dl
, SingleTy
, DAG
);
1244 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, V0
, V1
);
1247 return buildHvxVectorReg(Ops
, dl
, VecTy
, DAG
);
1251 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op
, SelectionDAG
&DAG
)
1253 // Vector concatenation of two integer (non-bool) vectors does not need
1254 // special lowering. Custom-lower concats of bool vectors and expand
1255 // concats of more than 2 vectors.
1257 const SDLoc
&dl(Op
);
1258 unsigned NumOp
= Op
.getNumOperands();
1259 if (VecTy
.getVectorElementType() != MVT::i1
) {
1262 // Expand the other cases into a build-vector.
1263 SmallVector
<SDValue
,8> Elems
;
1264 for (SDValue V
: Op
.getNode()->ops())
1265 DAG
.ExtractVectorElements(V
, Elems
);
1266 // A vector of i16 will be broken up into a build_vector of i16's.
1267 // This is a problem, since at the time of operation legalization,
1268 // all operations are expected to be type-legalized, and i16 is not
1269 // a legal type. If any of the extracted elements is not of a valid
1270 // type, sign-extend it to a valid one.
1271 for (unsigned i
= 0, e
= Elems
.size(); i
!= e
; ++i
) {
1272 SDValue V
= Elems
[i
];
1274 if (!isTypeLegal(Ty
)) {
1275 EVT NTy
= getTypeToTransformTo(*DAG
.getContext(), Ty
);
1276 if (V
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
1277 Elems
[i
] = DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, NTy
,
1278 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, NTy
,
1279 V
.getOperand(0), V
.getOperand(1)),
1280 DAG
.getValueType(Ty
));
1283 // A few less complicated cases.
1284 switch (V
.getOpcode()) {
1286 Elems
[i
] = DAG
.getSExtOrTrunc(V
, dl
, NTy
);
1289 Elems
[i
] = DAG
.getUNDEF(NTy
);
1292 Elems
[i
] = V
.getOperand(0);
1295 llvm_unreachable("Unexpected vector element");
1299 return DAG
.getBuildVector(VecTy
, dl
, Elems
);
1302 assert(VecTy
.getVectorElementType() == MVT::i1
);
1303 unsigned HwLen
= Subtarget
.getVectorLength();
1304 assert(isPowerOf2_32(NumOp
) && HwLen
% NumOp
== 0);
1306 SDValue Op0
= Op
.getOperand(0);
1308 // If the operands are HVX types (i.e. not scalar predicates), then
1309 // defer the concatenation, and create QCAT instead.
1310 if (Subtarget
.isHVXVectorType(ty(Op0
), true)) {
1312 return DAG
.getNode(HexagonISD::QCAT
, dl
, VecTy
, Op0
, Op
.getOperand(1));
1314 ArrayRef
<SDUse
> U(Op
.getNode()->ops());
1315 SmallVector
<SDValue
,4> SV(U
.begin(), U
.end());
1316 ArrayRef
<SDValue
> Ops(SV
);
1318 MVT HalfTy
= typeSplit(VecTy
).first
;
1319 SDValue V0
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, HalfTy
,
1320 Ops
.take_front(NumOp
/2));
1321 SDValue V1
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, HalfTy
,
1322 Ops
.take_back(NumOp
/2));
1323 return DAG
.getNode(HexagonISD::QCAT
, dl
, VecTy
, V0
, V1
);
1326 // Count how many bytes (in a vector register) each bit in VecTy
1328 unsigned BitBytes
= HwLen
/ VecTy
.getVectorNumElements();
1330 SmallVector
<SDValue
,8> Prefixes
;
1331 for (SDValue V
: Op
.getNode()->op_values()) {
1332 SDValue P
= createHvxPrefixPred(V
, dl
, BitBytes
, true, DAG
);
1333 Prefixes
.push_back(P
);
1336 unsigned InpLen
= ty(Op
.getOperand(0)).getVectorNumElements();
1337 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1338 SDValue S
= DAG
.getConstant(InpLen
*BitBytes
, dl
, MVT::i32
);
1339 SDValue Res
= getZero(dl
, ByteTy
, DAG
);
1340 for (unsigned i
= 0, e
= Prefixes
.size(); i
!= e
; ++i
) {
1341 Res
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, Res
, S
);
1342 Res
= DAG
.getNode(ISD::OR
, dl
, ByteTy
, Res
, Prefixes
[e
-i
-1]);
1344 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, Res
);
1348 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op
, SelectionDAG
&DAG
)
1350 // Change the type of the extracted element to i32.
1351 SDValue VecV
= Op
.getOperand(0);
1352 MVT ElemTy
= ty(VecV
).getVectorElementType();
1353 const SDLoc
&dl(Op
);
1354 SDValue IdxV
= Op
.getOperand(1);
1355 if (ElemTy
== MVT::i1
)
1356 return extractHvxElementPred(VecV
, IdxV
, dl
, ty(Op
), DAG
);
1358 return extractHvxElementReg(VecV
, IdxV
, dl
, ty(Op
), DAG
);
1362 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op
, SelectionDAG
&DAG
)
1364 const SDLoc
&dl(Op
);
1365 SDValue VecV
= Op
.getOperand(0);
1366 SDValue ValV
= Op
.getOperand(1);
1367 SDValue IdxV
= Op
.getOperand(2);
1368 MVT ElemTy
= ty(VecV
).getVectorElementType();
1369 if (ElemTy
== MVT::i1
)
1370 return insertHvxElementPred(VecV
, IdxV
, ValV
, dl
, DAG
);
1372 return insertHvxElementReg(VecV
, IdxV
, ValV
, dl
, DAG
);
1376 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op
, SelectionDAG
&DAG
)
1378 SDValue SrcV
= Op
.getOperand(0);
1379 MVT SrcTy
= ty(SrcV
);
1381 SDValue IdxV
= Op
.getOperand(1);
1382 unsigned Idx
= cast
<ConstantSDNode
>(IdxV
.getNode())->getZExtValue();
1383 assert(Idx
% DstTy
.getVectorNumElements() == 0);
1385 const SDLoc
&dl(Op
);
1387 MVT ElemTy
= SrcTy
.getVectorElementType();
1388 if (ElemTy
== MVT::i1
)
1389 return extractHvxSubvectorPred(SrcV
, IdxV
, dl
, DstTy
, DAG
);
1391 return extractHvxSubvectorReg(SrcV
, IdxV
, dl
, DstTy
, DAG
);
1395 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op
, SelectionDAG
&DAG
)
1397 // Idx does not need to be a constant.
1398 SDValue VecV
= Op
.getOperand(0);
1399 SDValue ValV
= Op
.getOperand(1);
1400 SDValue IdxV
= Op
.getOperand(2);
1402 const SDLoc
&dl(Op
);
1403 MVT VecTy
= ty(VecV
);
1404 MVT ElemTy
= VecTy
.getVectorElementType();
1405 if (ElemTy
== MVT::i1
)
1406 return insertHvxSubvectorPred(VecV
, ValV
, IdxV
, dl
, DAG
);
1408 return insertHvxSubvectorReg(VecV
, ValV
, IdxV
, dl
, DAG
);
1412 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op
, SelectionDAG
&DAG
) const {
1413 // Lower any-extends of boolean vectors to sign-extends, since they
1414 // translate directly to Q2V. Zero-extending could also be done equally
1415 // fast, but Q2V is used/recognized in more places.
1416 // For all other vectors, use zero-extend.
1418 SDValue InpV
= Op
.getOperand(0);
1419 MVT ElemTy
= ty(InpV
).getVectorElementType();
1420 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1421 return LowerHvxSignExt(Op
, DAG
);
1422 return DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(Op
), ResTy
, InpV
);
1426 HexagonTargetLowering::LowerHvxSignExt(SDValue Op
, SelectionDAG
&DAG
) const {
1428 SDValue InpV
= Op
.getOperand(0);
1429 MVT ElemTy
= ty(InpV
).getVectorElementType();
1430 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1431 return extendHvxVectorPred(InpV
, SDLoc(Op
), ty(Op
), false, DAG
);
1436 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op
, SelectionDAG
&DAG
) const {
1438 SDValue InpV
= Op
.getOperand(0);
1439 MVT ElemTy
= ty(InpV
).getVectorElementType();
1440 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1441 return extendHvxVectorPred(InpV
, SDLoc(Op
), ty(Op
), true, DAG
);
1446 HexagonTargetLowering::LowerHvxCttz(SDValue Op
, SelectionDAG
&DAG
) const {
1447 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1448 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1449 const SDLoc
&dl(Op
);
1451 SDValue InpV
= Op
.getOperand(0);
1452 assert(ResTy
== ty(InpV
));
1454 // Calculate the vectors of 1 and bitwidth(x).
1455 MVT ElemTy
= ty(InpV
).getVectorElementType();
1456 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1458 SDValue Vec1
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ResTy
,
1459 DAG
.getConstant(1, dl
, MVT::i32
));
1460 SDValue VecW
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ResTy
,
1461 DAG
.getConstant(ElemWidth
, dl
, MVT::i32
));
1462 SDValue VecN1
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ResTy
,
1463 DAG
.getConstant(-1, dl
, MVT::i32
));
1465 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1466 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1467 // it separately in custom combine or selection).
1468 SDValue A
= DAG
.getNode(ISD::AND
, dl
, ResTy
,
1469 {DAG
.getNode(ISD::XOR
, dl
, ResTy
, {InpV
, VecN1
}),
1470 DAG
.getNode(ISD::SUB
, dl
, ResTy
, {InpV
, Vec1
})});
1471 return DAG
.getNode(ISD::SUB
, dl
, ResTy
,
1472 {VecW
, DAG
.getNode(ISD::CTLZ
, dl
, ResTy
, A
)});
1476 HexagonTargetLowering::LowerHvxMulh(SDValue Op
, SelectionDAG
&DAG
) const {
1478 assert(ResTy
.isVector());
1479 const SDLoc
&dl(Op
);
1480 SmallVector
<int,256> ShuffMask
;
1482 MVT ElemTy
= ResTy
.getVectorElementType();
1483 unsigned VecLen
= ResTy
.getVectorNumElements();
1484 SDValue Vs
= Op
.getOperand(0);
1485 SDValue Vt
= Op
.getOperand(1);
1486 bool IsSigned
= Op
.getOpcode() == ISD::MULHS
;
1488 if (ElemTy
== MVT::i8
|| ElemTy
== MVT::i16
) {
1489 // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
1490 // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
1491 // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
1492 // For i16, use V6_vmpyhv, which behaves in an analogous way to
1493 // V6_vmpybv: results Lo and Hi are products of even/odd elements
1495 MVT ExtTy
= typeExtElem(ResTy
, 2);
1496 unsigned MpyOpc
= ElemTy
== MVT::i8
1497 ? (IsSigned
? Hexagon::V6_vmpybv
: Hexagon::V6_vmpyubv
)
1498 : (IsSigned
? Hexagon::V6_vmpyhv
: Hexagon::V6_vmpyuhv
);
1499 SDValue M
= getInstr(MpyOpc
, dl
, ExtTy
, {Vs
, Vt
}, DAG
);
1501 // Discard low halves of the resulting values, collect the high halves.
1502 for (unsigned I
= 0; I
< VecLen
; I
+= 2) {
1503 ShuffMask
.push_back(I
+1); // Pick even element.
1504 ShuffMask
.push_back(I
+VecLen
+1); // Pick odd element.
1506 VectorPair P
= opSplit(opCastElem(M
, ElemTy
, DAG
), dl
, DAG
);
1507 SDValue BS
= getByteShuffle(dl
, P
.first
, P
.second
, ShuffMask
, DAG
);
1508 return DAG
.getBitcast(ResTy
, BS
);
1511 assert(ElemTy
== MVT::i32
);
1512 SDValue S16
= DAG
.getConstant(16, dl
, MVT::i32
);
1514 auto MulHS_V60
= [&](SDValue Vs
, SDValue Vt
) {
1516 // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1517 // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16
1518 // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32
1519 // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16
1520 // + Lo(Vs) *us Vt] >> 32
1521 // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to
1522 // anything, so it cannot produce any carry over to higher bits),
1523 // so everything in [] can be shifted by 16 without loss of precision.
1524 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16
1525 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16
1526 // Denote Hi(Vs) = Vs':
1527 // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16
1528 // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16
1529 SDValue T0
= getInstr(Hexagon::V6_vmpyewuh
, dl
, ResTy
, {Vt
, Vs
}, DAG
);
1531 SDValue S0
= getInstr(Hexagon::V6_vasrw
, dl
, ResTy
, {Vs
, S16
}, DAG
);
1532 SDValue T1
= getInstr(Hexagon::V6_vmpyiewuh_acc
, dl
, ResTy
,
1535 SDValue S2
= getInstr(Hexagon::V6_vasrw
, dl
, ResTy
, {T1
, S16
}, DAG
);
1537 SDValue T2
= getInstr(Hexagon::V6_vmpyiowh
, dl
, ResTy
, {S0
, Vt
}, DAG
);
1539 SDValue T3
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {S2
, T2
});
1543 auto MulHS_V62
= [&](SDValue Vs
, SDValue Vt
) {
1544 MVT PairTy
= typeJoin({ResTy
, ResTy
});
1545 SDValue T0
= getInstr(Hexagon::V6_vmpyewuh_64
, dl
, PairTy
, {Vs
, Vt
}, DAG
);
1546 SDValue T1
= getInstr(Hexagon::V6_vmpyowh_64_acc
, dl
, PairTy
,
1548 return opSplit(T1
, dl
, DAG
).second
;
1552 if (Subtarget
.useHVXV62Ops())
1553 return MulHS_V62(Vs
, Vt
);
1554 return MulHS_V60(Vs
, Vt
);
1557 // Unsigned mulhw. (Would expansion using signed mulhw be better?)
1559 auto LoVec
= [&DAG
,ResTy
,dl
] (SDValue Pair
) {
1560 return DAG
.getTargetExtractSubreg(Hexagon::vsub_lo
, dl
, ResTy
, Pair
);
1562 auto HiVec
= [&DAG
,ResTy
,dl
] (SDValue Pair
) {
1563 return DAG
.getTargetExtractSubreg(Hexagon::vsub_hi
, dl
, ResTy
, Pair
);
1566 MVT PairTy
= typeJoin({ResTy
, ResTy
});
1567 SDValue P
= getInstr(Hexagon::V6_lvsplatw
, dl
, ResTy
,
1568 {DAG
.getConstant(0x02020202, dl
, MVT::i32
)}, DAG
);
1569 // Multiply-unsigned halfwords:
1570 // LoVec = Vs.uh[2i] * Vt.uh[2i],
1571 // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1]
1572 SDValue T0
= getInstr(Hexagon::V6_vmpyuhv
, dl
, PairTy
, {Vs
, Vt
}, DAG
);
1573 // The low halves in the LoVec of the pair can be discarded. They are
1574 // not added to anything (in the full-precision product), so they cannot
1575 // produce a carry into the higher bits.
1576 SDValue T1
= getInstr(Hexagon::V6_vlsrw
, dl
, ResTy
, {LoVec(T0
), S16
}, DAG
);
1577 // Swap low and high halves in Vt, and do the halfword multiplication
1578 // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i].
1579 SDValue D0
= getInstr(Hexagon::V6_vdelta
, dl
, ResTy
, {Vt
, P
}, DAG
);
1580 SDValue T2
= getInstr(Hexagon::V6_vmpyuhv
, dl
, PairTy
, {Vs
, D0
}, DAG
);
1581 // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs).
1582 // These products are words, but cannot be added directly because the
1583 // sums could overflow. Add these products, by halfwords, where each sum
1584 // of a pair of halfwords gives a word.
1585 SDValue T3
= getInstr(Hexagon::V6_vadduhw
, dl
, PairTy
,
1586 {LoVec(T2
), HiVec(T2
)}, DAG
);
1587 // Add the high halfwords from the products of the low halfwords.
1588 SDValue T4
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {T1
, LoVec(T3
)});
1589 SDValue T5
= getInstr(Hexagon::V6_vlsrw
, dl
, ResTy
, {T4
, S16
}, DAG
);
1590 SDValue T6
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {HiVec(T0
), HiVec(T3
)});
1591 SDValue T7
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {T5
, T6
});
1596 HexagonTargetLowering::LowerHvxBitcast(SDValue Op
, SelectionDAG
&DAG
) const {
1597 SDValue ValQ
= Op
.getOperand(0);
1599 MVT VecTy
= ty(ValQ
);
1600 const SDLoc
&dl(Op
);
1602 if (isHvxBoolTy(VecTy
) && ResTy
.isScalarInteger()) {
1603 unsigned HwLen
= Subtarget
.getVectorLength();
1604 MVT WordTy
= MVT::getVectorVT(MVT::i32
, HwLen
/4);
1605 SDValue VQ
= compressHvxPred(ValQ
, dl
, WordTy
, DAG
);
1606 unsigned BitWidth
= ResTy
.getSizeInBits();
1608 if (BitWidth
< 64) {
1609 SDValue W0
= extractHvxElementReg(VQ
, DAG
.getConstant(0, dl
, MVT::i32
),
1613 assert(BitWidth
< 32u);
1614 return DAG
.getZExtOrTrunc(W0
, dl
, ResTy
);
1617 // The result is >= 64 bits. The only options are 64 or 128.
1618 assert(BitWidth
== 64 || BitWidth
== 128);
1619 SmallVector
<SDValue
,4> Words
;
1620 for (unsigned i
= 0; i
!= BitWidth
/32; ++i
) {
1621 SDValue W
= extractHvxElementReg(
1622 VQ
, DAG
.getConstant(i
, dl
, MVT::i32
), dl
, MVT::i32
, DAG
);
1625 SmallVector
<SDValue
,2> Combines
;
1626 assert(Words
.size() % 2 == 0);
1627 for (unsigned i
= 0, e
= Words
.size(); i
< e
; i
+= 2) {
1628 SDValue C
= DAG
.getNode(
1629 HexagonISD::COMBINE
, dl
, MVT::i64
, {Words
[i
+1], Words
[i
]});
1630 Combines
.push_back(C
);
1636 return DAG
.getNode(ISD::BUILD_PAIR
, dl
, ResTy
, Combines
);
1643 HexagonTargetLowering::LowerHvxExtend(SDValue Op
, SelectionDAG
&DAG
) const {
1644 // Sign- and zero-extends are legal.
1645 assert(Op
.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
);
1646 return DAG
.getNode(ISD::ZERO_EXTEND_VECTOR_INREG
, SDLoc(Op
), ty(Op
),
1651 HexagonTargetLowering::LowerHvxSelect(SDValue Op
, SelectionDAG
&DAG
) const {
1653 if (ResTy
.getVectorElementType() != MVT::i1
)
1656 const SDLoc
&dl(Op
);
1657 unsigned HwLen
= Subtarget
.getVectorLength();
1658 unsigned VecLen
= ResTy
.getVectorNumElements();
1659 assert(HwLen
% VecLen
== 0);
1660 unsigned ElemSize
= HwLen
/ VecLen
;
1662 MVT VecTy
= MVT::getVectorVT(MVT::getIntegerVT(ElemSize
* 8), VecLen
);
1664 DAG
.getNode(ISD::SELECT
, dl
, VecTy
, Op
.getOperand(0),
1665 DAG
.getNode(HexagonISD::Q2V
, dl
, VecTy
, Op
.getOperand(1)),
1666 DAG
.getNode(HexagonISD::Q2V
, dl
, VecTy
, Op
.getOperand(2)));
1667 return DAG
.getNode(HexagonISD::V2Q
, dl
, ResTy
, S
);
1671 HexagonTargetLowering::LowerHvxShift(SDValue Op
, SelectionDAG
&DAG
) const {
1672 if (SDValue S
= getVectorShiftByInt(Op
, DAG
))
1678 HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op
, SelectionDAG
&DAG
) const {
1679 const SDLoc
&dl(Op
);
1682 unsigned IntNo
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
1683 bool Use64b
= Subtarget
.useHVX64BOps();
1684 unsigned IntPredCast
= Use64b
? Intrinsic::hexagon_V6_pred_typecast
1685 : Intrinsic::hexagon_V6_pred_typecast_128B
;
1686 if (IntNo
== IntPredCast
) {
1687 SDValue Vs
= Op
.getOperand(1);
1689 if (isHvxBoolTy(ResTy
) && isHvxBoolTy(OpTy
)) {
1692 return DAG
.getNode(HexagonISD::TYPECAST
, dl
, ResTy
, Vs
);
1700 HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op
, SelectionDAG
&DAG
) const {
1701 const SDLoc
&dl(Op
);
1702 unsigned HwLen
= Subtarget
.getVectorLength();
1703 MachineFunction
&MF
= DAG
.getMachineFunction();
1704 auto *MaskN
= cast
<MaskedLoadStoreSDNode
>(Op
.getNode());
1705 SDValue Mask
= MaskN
->getMask();
1706 SDValue Chain
= MaskN
->getChain();
1707 SDValue Base
= MaskN
->getBasePtr();
1708 auto *MemOp
= MF
.getMachineMemOperand(MaskN
->getMemOperand(), 0, HwLen
);
1710 unsigned Opc
= Op
->getOpcode();
1711 assert(Opc
== ISD::MLOAD
|| Opc
== ISD::MSTORE
);
1713 if (Opc
== ISD::MLOAD
) {
1715 SDValue Load
= DAG
.getLoad(ValTy
, dl
, Chain
, Base
, MemOp
);
1716 SDValue Thru
= cast
<MaskedLoadSDNode
>(MaskN
)->getPassThru();
1719 SDValue VSel
= DAG
.getNode(ISD::VSELECT
, dl
, ValTy
, Mask
, Load
, Thru
);
1720 return DAG
.getMergeValues({VSel
, Load
.getValue(1)}, dl
);
1724 // HVX only has aligned masked stores.
1726 // TODO: Fold negations of the mask into the store.
1727 unsigned StoreOpc
= Hexagon::V6_vS32b_qpred_ai
;
1728 SDValue Value
= cast
<MaskedStoreSDNode
>(MaskN
)->getValue();
1729 SDValue Offset0
= DAG
.getTargetConstant(0, dl
, ty(Base
));
1731 if (MaskN
->getAlign().value() % HwLen
== 0) {
1732 SDValue Store
= getInstr(StoreOpc
, dl
, MVT::Other
,
1733 {Mask
, Base
, Offset0
, Value
, Chain
}, DAG
);
1734 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Store
.getNode()), {MemOp
});
1739 auto StoreAlign
= [&](SDValue V
, SDValue A
) {
1740 SDValue Z
= getZero(dl
, ty(V
), DAG
);
1741 // TODO: use funnel shifts?
1742 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
1744 SDValue LoV
= getInstr(Hexagon::V6_vlalignb
, dl
, ty(V
), {V
, Z
, A
}, DAG
);
1745 SDValue HiV
= getInstr(Hexagon::V6_vlalignb
, dl
, ty(V
), {Z
, V
, A
}, DAG
);
1746 return std::make_pair(LoV
, HiV
);
1749 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1750 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
1751 SDValue MaskV
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, Mask
);
1752 VectorPair Tmp
= StoreAlign(MaskV
, Base
);
1753 VectorPair MaskU
= {DAG
.getNode(HexagonISD::V2Q
, dl
, BoolTy
, Tmp
.first
),
1754 DAG
.getNode(HexagonISD::V2Q
, dl
, BoolTy
, Tmp
.second
)};
1755 VectorPair ValueU
= StoreAlign(Value
, Base
);
1757 SDValue Offset1
= DAG
.getTargetConstant(HwLen
, dl
, MVT::i32
);
1759 getInstr(StoreOpc
, dl
, MVT::Other
,
1760 {MaskU
.first
, Base
, Offset0
, ValueU
.first
, Chain
}, DAG
);
1762 getInstr(StoreOpc
, dl
, MVT::Other
,
1763 {MaskU
.second
, Base
, Offset1
, ValueU
.second
, Chain
}, DAG
);
1764 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(StoreLo
.getNode()), {MemOp
});
1765 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(StoreHi
.getNode()), {MemOp
});
1766 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, {StoreLo
, StoreHi
});
1770 HexagonTargetLowering::SplitHvxPairOp(SDValue Op
, SelectionDAG
&DAG
) const {
1771 assert(!Op
.isMachineOpcode());
1772 SmallVector
<SDValue
,2> OpsL
, OpsH
;
1773 const SDLoc
&dl(Op
);
1775 auto SplitVTNode
= [&DAG
,this] (const VTSDNode
*N
) {
1776 MVT Ty
= typeSplit(N
->getVT().getSimpleVT()).first
;
1777 SDValue TV
= DAG
.getValueType(Ty
);
1778 return std::make_pair(TV
, TV
);
1781 for (SDValue A
: Op
.getNode()->ops()) {
1782 VectorPair P
= Subtarget
.isHVXVectorType(ty(A
), true)
1783 ? opSplit(A
, dl
, DAG
)
1784 : std::make_pair(A
, A
);
1785 // Special case for type operand.
1786 if (Op
.getOpcode() == ISD::SIGN_EXTEND_INREG
) {
1787 if (const auto *N
= dyn_cast
<const VTSDNode
>(A
.getNode()))
1790 OpsL
.push_back(P
.first
);
1791 OpsH
.push_back(P
.second
);
1795 MVT HalfTy
= typeSplit(ResTy
).first
;
1796 SDValue L
= DAG
.getNode(Op
.getOpcode(), dl
, HalfTy
, OpsL
);
1797 SDValue H
= DAG
.getNode(Op
.getOpcode(), dl
, HalfTy
, OpsH
);
1798 SDValue S
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, ResTy
, L
, H
);
1803 HexagonTargetLowering::SplitHvxMemOp(SDValue Op
, SelectionDAG
&DAG
) const {
1804 auto *MemN
= cast
<MemSDNode
>(Op
.getNode());
1806 MVT MemTy
= MemN
->getMemoryVT().getSimpleVT();
1807 if (!isHvxPairTy(MemTy
))
1810 const SDLoc
&dl(Op
);
1811 unsigned HwLen
= Subtarget
.getVectorLength();
1812 MVT SingleTy
= typeSplit(MemTy
).first
;
1813 SDValue Chain
= MemN
->getChain();
1814 SDValue Base0
= MemN
->getBasePtr();
1815 SDValue Base1
= DAG
.getMemBasePlusOffset(Base0
, TypeSize::Fixed(HwLen
), dl
);
1817 MachineMemOperand
*MOp0
= nullptr, *MOp1
= nullptr;
1818 if (MachineMemOperand
*MMO
= MemN
->getMemOperand()) {
1819 MachineFunction
&MF
= DAG
.getMachineFunction();
1820 MOp0
= MF
.getMachineMemOperand(MMO
, 0, HwLen
);
1821 MOp1
= MF
.getMachineMemOperand(MMO
, HwLen
, HwLen
);
1824 unsigned MemOpc
= MemN
->getOpcode();
1826 if (MemOpc
== ISD::LOAD
) {
1827 assert(cast
<LoadSDNode
>(Op
)->isUnindexed());
1828 SDValue Load0
= DAG
.getLoad(SingleTy
, dl
, Chain
, Base0
, MOp0
);
1829 SDValue Load1
= DAG
.getLoad(SingleTy
, dl
, Chain
, Base1
, MOp1
);
1830 return DAG
.getMergeValues(
1831 { DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, MemTy
, Load0
, Load1
),
1832 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1833 Load0
.getValue(1), Load1
.getValue(1)) }, dl
);
1835 if (MemOpc
== ISD::STORE
) {
1836 assert(cast
<StoreSDNode
>(Op
)->isUnindexed());
1837 VectorPair Vals
= opSplit(cast
<StoreSDNode
>(Op
)->getValue(), dl
, DAG
);
1838 SDValue Store0
= DAG
.getStore(Chain
, dl
, Vals
.first
, Base0
, MOp0
);
1839 SDValue Store1
= DAG
.getStore(Chain
, dl
, Vals
.second
, Base1
, MOp1
);
1840 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Store0
, Store1
);
1843 assert(MemOpc
== ISD::MLOAD
|| MemOpc
== ISD::MSTORE
);
1845 auto MaskN
= cast
<MaskedLoadStoreSDNode
>(Op
);
1846 assert(MaskN
->isUnindexed());
1847 VectorPair Masks
= opSplit(MaskN
->getMask(), dl
, DAG
);
1848 SDValue Offset
= DAG
.getUNDEF(MVT::i32
);
1850 if (MemOpc
== ISD::MLOAD
) {
1852 opSplit(cast
<MaskedLoadSDNode
>(Op
)->getPassThru(), dl
, DAG
);
1854 DAG
.getMaskedLoad(SingleTy
, dl
, Chain
, Base0
, Offset
, Masks
.first
,
1855 Thru
.first
, SingleTy
, MOp0
, ISD::UNINDEXED
,
1856 ISD::NON_EXTLOAD
, false);
1858 DAG
.getMaskedLoad(SingleTy
, dl
, Chain
, Base1
, Offset
, Masks
.second
,
1859 Thru
.second
, SingleTy
, MOp1
, ISD::UNINDEXED
,
1860 ISD::NON_EXTLOAD
, false);
1861 return DAG
.getMergeValues(
1862 { DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, MemTy
, MLoad0
, MLoad1
),
1863 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1864 MLoad0
.getValue(1), MLoad1
.getValue(1)) }, dl
);
1866 if (MemOpc
== ISD::MSTORE
) {
1867 VectorPair Vals
= opSplit(cast
<MaskedStoreSDNode
>(Op
)->getValue(), dl
, DAG
);
1868 SDValue MStore0
= DAG
.getMaskedStore(Chain
, dl
, Vals
.first
, Base0
, Offset
,
1869 Masks
.first
, SingleTy
, MOp0
,
1870 ISD::UNINDEXED
, false, false);
1871 SDValue MStore1
= DAG
.getMaskedStore(Chain
, dl
, Vals
.second
, Base1
, Offset
,
1872 Masks
.second
, SingleTy
, MOp1
,
1873 ISD::UNINDEXED
, false, false);
1874 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MStore0
, MStore1
);
1877 std::string Name
= "Unexpected operation: " + Op
->getOperationName(&DAG
);
1878 llvm_unreachable(Name
.c_str());
1882 HexagonTargetLowering::WidenHvxLoad(SDValue Op
, SelectionDAG
&DAG
) const {
1883 const SDLoc
&dl(Op
);
1884 auto *LoadN
= cast
<LoadSDNode
>(Op
.getNode());
1885 assert(LoadN
->isUnindexed() && "Not widening indexed loads yet");
1886 assert(LoadN
->getMemoryVT().getVectorElementType() != MVT::i1
&&
1887 "Not widening loads of i1 yet");
1889 SDValue Chain
= LoadN
->getChain();
1890 SDValue Base
= LoadN
->getBasePtr();
1891 SDValue Offset
= DAG
.getUNDEF(MVT::i32
);
1894 unsigned HwLen
= Subtarget
.getVectorLength();
1895 unsigned ResLen
= ResTy
.getStoreSize();
1896 assert(ResLen
< HwLen
&& "vsetq(v1) prerequisite");
1898 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
1899 SDValue Mask
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
1900 {DAG
.getConstant(ResLen
, dl
, MVT::i32
)}, DAG
);
1902 MVT LoadTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1903 MachineFunction
&MF
= DAG
.getMachineFunction();
1904 auto *MemOp
= MF
.getMachineMemOperand(LoadN
->getMemOperand(), 0, HwLen
);
1906 SDValue Load
= DAG
.getMaskedLoad(LoadTy
, dl
, Chain
, Base
, Offset
, Mask
,
1907 DAG
.getUNDEF(LoadTy
), LoadTy
, MemOp
,
1908 ISD::UNINDEXED
, ISD::NON_EXTLOAD
, false);
1909 SDValue Value
= opCastElem(Load
, ResTy
.getVectorElementType(), DAG
);
1910 return DAG
.getMergeValues({Value
, Chain
}, dl
);
1914 HexagonTargetLowering::WidenHvxStore(SDValue Op
, SelectionDAG
&DAG
) const {
1915 const SDLoc
&dl(Op
);
1916 auto *StoreN
= cast
<StoreSDNode
>(Op
.getNode());
1917 assert(StoreN
->isUnindexed() && "Not widening indexed stores yet");
1918 assert(StoreN
->getMemoryVT().getVectorElementType() != MVT::i1
&&
1919 "Not widening stores of i1 yet");
1921 SDValue Chain
= StoreN
->getChain();
1922 SDValue Base
= StoreN
->getBasePtr();
1923 SDValue Offset
= DAG
.getUNDEF(MVT::i32
);
1925 SDValue Value
= opCastElem(StoreN
->getValue(), MVT::i8
, DAG
);
1926 MVT ValueTy
= ty(Value
);
1927 unsigned ValueLen
= ValueTy
.getVectorNumElements();
1928 unsigned HwLen
= Subtarget
.getVectorLength();
1929 assert(isPowerOf2_32(ValueLen
));
1931 for (unsigned Len
= ValueLen
; Len
< HwLen
; ) {
1932 Value
= opJoin({DAG
.getUNDEF(ty(Value
)), Value
}, dl
, DAG
);
1933 Len
= ty(Value
).getVectorNumElements(); // This is Len *= 2
1935 assert(ty(Value
).getVectorNumElements() == HwLen
); // Paranoia
1937 assert(ValueLen
< HwLen
&& "vsetq(v1) prerequisite");
1938 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
1939 SDValue Mask
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
1940 {DAG
.getConstant(ValueLen
, dl
, MVT::i32
)}, DAG
);
1941 MachineFunction
&MF
= DAG
.getMachineFunction();
1942 auto *MemOp
= MF
.getMachineMemOperand(StoreN
->getMemOperand(), 0, HwLen
);
1943 return DAG
.getMaskedStore(Chain
, dl
, Value
, Base
, Offset
, Mask
, ty(Value
),
1944 MemOp
, ISD::UNINDEXED
, false, false);
1948 HexagonTargetLowering::WidenHvxSetCC(SDValue Op
, SelectionDAG
&DAG
) const {
1949 const SDLoc
&dl(Op
);
1950 SDValue Op0
= Op
.getOperand(0), Op1
= Op
.getOperand(1);
1951 MVT ElemTy
= ty(Op0
).getVectorElementType();
1952 unsigned HwLen
= Subtarget
.getVectorLength();
1954 unsigned WideOpLen
= (8 * HwLen
) / ElemTy
.getSizeInBits();
1955 assert(WideOpLen
* ElemTy
.getSizeInBits() == 8 * HwLen
);
1956 MVT WideOpTy
= MVT::getVectorVT(ElemTy
, WideOpLen
);
1957 if (!Subtarget
.isHVXVectorType(WideOpTy
, true))
1960 SDValue WideOp0
= appendUndef(Op0
, WideOpTy
, DAG
);
1961 SDValue WideOp1
= appendUndef(Op1
, WideOpTy
, DAG
);
1963 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), WideOpTy
);
1964 SDValue SetCC
= DAG
.getNode(ISD::SETCC
, dl
, ResTy
,
1965 {WideOp0
, WideOp1
, Op
.getOperand(2)});
1967 EVT RetTy
= getTypeToTransformTo(*DAG
.getContext(), ty(Op
));
1968 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, RetTy
,
1969 {SetCC
, getZero(dl
, MVT::i32
, DAG
)});
1973 HexagonTargetLowering::WidenHvxExtend(SDValue Op
, SelectionDAG
&DAG
) const {
1974 const SDLoc
&dl(Op
);
1975 unsigned HwWidth
= 8*Subtarget
.getVectorLength();
1977 SDValue Op0
= Op
.getOperand(0);
1980 if (!Subtarget
.isHVXElementType(OpTy
) || !Subtarget
.isHVXElementType(ResTy
))
1983 // .-res, op-> ScalarVec Illegal HVX
1985 // Illegal widen(insert) widen -
1988 auto getFactor
= [HwWidth
](MVT Ty
) {
1989 unsigned Width
= Ty
.getSizeInBits();
1990 return HwWidth
> Width
? HwWidth
/ Width
: 1;
1993 auto getWideTy
= [getFactor
](MVT Ty
) {
1994 unsigned WideLen
= Ty
.getVectorNumElements() * getFactor(Ty
);
1995 return MVT::getVectorVT(Ty
.getVectorElementType(), WideLen
);
1998 unsigned Opcode
= Op
.getOpcode() == ISD::SIGN_EXTEND
? HexagonISD::VUNPACK
1999 : HexagonISD::VUNPACKU
;
2000 SDValue WideOp
= appendUndef(Op0
, getWideTy(OpTy
), DAG
);
2001 SDValue WideRes
= DAG
.getNode(Opcode
, dl
, getWideTy(ResTy
), WideOp
);
2006 HexagonTargetLowering::WidenHvxTruncate(SDValue Op
, SelectionDAG
&DAG
) const {
2007 const SDLoc
&dl(Op
);
2008 unsigned HwWidth
= 8*Subtarget
.getVectorLength();
2010 SDValue Op0
= Op
.getOperand(0);
2013 if (!Subtarget
.isHVXElementType(OpTy
) || !Subtarget
.isHVXElementType(ResTy
))
2016 // .-res, op-> ScalarVec Illegal HVX
2017 // Scalar ok extract(widen) -
2018 // Illegal - widen widen
2021 auto getFactor
= [HwWidth
](MVT Ty
) {
2022 unsigned Width
= Ty
.getSizeInBits();
2023 assert(HwWidth
% Width
== 0);
2024 return HwWidth
/ Width
;
2027 auto getWideTy
= [getFactor
](MVT Ty
) {
2028 unsigned WideLen
= Ty
.getVectorNumElements() * getFactor(Ty
);
2029 return MVT::getVectorVT(Ty
.getVectorElementType(), WideLen
);
2032 if (Subtarget
.isHVXVectorType(OpTy
))
2033 return DAG
.getNode(HexagonISD::VPACKL
, dl
, getWideTy(ResTy
), Op0
);
2035 assert(!isTypeLegal(OpTy
) && "HVX-widening a truncate of scalar?");
2037 SDValue WideOp
= appendUndef(Op0
, getWideTy(OpTy
), DAG
);
2038 SDValue WideRes
= DAG
.getNode(HexagonISD::VPACKL
, dl
, getWideTy(ResTy
),
2040 // If the original result wasn't legal and was supposed to be widened,
2042 if (shouldWidenToHvx(ResTy
, DAG
))
2045 // The original result type wasn't meant to be widened to HVX, so
2046 // leave it as it is. Standard legalization should be able to deal
2047 // with it (since now it's a result of a target-idendependent ISD
2049 assert(ResTy
.isVector());
2050 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, ResTy
,
2051 {WideRes
, getZero(dl
, MVT::i32
, DAG
)});
2055 HexagonTargetLowering::LowerHvxOperation(SDValue Op
, SelectionDAG
&DAG
) const {
2056 unsigned Opc
= Op
.getOpcode();
2057 bool IsPairOp
= isHvxPairTy(ty(Op
)) ||
2058 llvm::any_of(Op
.getNode()->ops(), [this] (SDValue V
) {
2059 return isHvxPairTy(ty(V
));
2070 return SplitHvxMemOp(Op
, DAG
);
2089 case ISD::SIGN_EXTEND
:
2090 case ISD::ZERO_EXTEND
:
2091 case ISD::SIGN_EXTEND_INREG
:
2092 case ISD::SPLAT_VECTOR
:
2093 return SplitHvxPairOp(Op
, DAG
);
2100 case ISD::BUILD_VECTOR
: return LowerHvxBuildVector(Op
, DAG
);
2101 case ISD::CONCAT_VECTORS
: return LowerHvxConcatVectors(Op
, DAG
);
2102 case ISD::INSERT_SUBVECTOR
: return LowerHvxInsertSubvector(Op
, DAG
);
2103 case ISD::INSERT_VECTOR_ELT
: return LowerHvxInsertElement(Op
, DAG
);
2104 case ISD::EXTRACT_SUBVECTOR
: return LowerHvxExtractSubvector(Op
, DAG
);
2105 case ISD::EXTRACT_VECTOR_ELT
: return LowerHvxExtractElement(Op
, DAG
);
2106 case ISD::BITCAST
: return LowerHvxBitcast(Op
, DAG
);
2107 case ISD::ANY_EXTEND
: return LowerHvxAnyExt(Op
, DAG
);
2108 case ISD::SIGN_EXTEND
: return LowerHvxSignExt(Op
, DAG
);
2109 case ISD::ZERO_EXTEND
: return LowerHvxZeroExt(Op
, DAG
);
2110 case ISD::CTTZ
: return LowerHvxCttz(Op
, DAG
);
2111 case ISD::SELECT
: return LowerHvxSelect(Op
, DAG
);
2114 case ISD::SRL
: return LowerHvxShift(Op
, DAG
);
2116 case ISD::MULHU
: return LowerHvxMulh(Op
, DAG
);
2117 case ISD::ANY_EXTEND_VECTOR_INREG
: return LowerHvxExtend(Op
, DAG
);
2119 case ISD::INTRINSIC_VOID
: return Op
;
2120 case ISD::INTRINSIC_WO_CHAIN
: return LowerHvxIntrinsic(Op
, DAG
);
2122 case ISD::MSTORE
: return LowerHvxMaskedOp(Op
, DAG
);
2123 // Unaligned loads will be handled by the default lowering.
2124 case ISD::LOAD
: return SDValue();
2129 llvm_unreachable("Unhandled HVX operation");
2133 HexagonTargetLowering::LowerHvxOperationWrapper(SDNode
*N
,
2134 SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
) const {
2135 unsigned Opc
= N
->getOpcode();
2139 case ISD::ANY_EXTEND
:
2140 case ISD::SIGN_EXTEND
:
2141 case ISD::ZERO_EXTEND
:
2142 if (shouldWidenToHvx(ty(Op
.getOperand(0)), DAG
)) {
2143 if (SDValue T
= WidenHvxExtend(Op
, DAG
))
2144 Results
.push_back(T
);
2148 if (shouldWidenToHvx(ty(Op
.getOperand(0)), DAG
)) {
2149 if (SDValue T
= WidenHvxSetCC(Op
, DAG
))
2150 Results
.push_back(T
);
2154 if (shouldWidenToHvx(ty(Op
.getOperand(0)), DAG
)) {
2155 if (SDValue T
= WidenHvxTruncate(Op
, DAG
))
2156 Results
.push_back(T
);
2160 if (shouldWidenToHvx(ty(cast
<StoreSDNode
>(N
)->getValue()), DAG
)) {
2161 SDValue Store
= WidenHvxStore(Op
, DAG
);
2162 Results
.push_back(Store
);
2167 if (isHvxPairTy(ty(Op
))) {
2168 SDValue S
= SplitHvxMemOp(Op
, DAG
);
2169 assert(S
->getOpcode() == ISD::MERGE_VALUES
);
2170 Results
.push_back(S
.getOperand(0));
2171 Results
.push_back(S
.getOperand(1));
2175 if (isHvxPairTy(ty(Op
->getOperand(1)))) { // Stored value
2176 SDValue S
= SplitHvxMemOp(Op
, DAG
);
2177 Results
.push_back(S
);
2186 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode
*N
,
2187 SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
) const {
2188 unsigned Opc
= N
->getOpcode();
2191 case ISD::ANY_EXTEND
:
2192 case ISD::SIGN_EXTEND
:
2193 case ISD::ZERO_EXTEND
:
2194 if (shouldWidenToHvx(ty(Op
), DAG
)) {
2195 if (SDValue T
= WidenHvxExtend(Op
, DAG
))
2196 Results
.push_back(T
);
2200 if (shouldWidenToHvx(ty(Op
), DAG
)) {
2201 if (SDValue T
= WidenHvxSetCC(Op
, DAG
))
2202 Results
.push_back(T
);
2206 if (shouldWidenToHvx(ty(Op
), DAG
)) {
2207 if (SDValue T
= WidenHvxTruncate(Op
, DAG
))
2208 Results
.push_back(T
);
2212 if (shouldWidenToHvx(ty(Op
), DAG
)) {
2213 SDValue Load
= WidenHvxLoad(Op
, DAG
);
2214 assert(Load
->getOpcode() == ISD::MERGE_VALUES
);
2215 Results
.push_back(Load
.getOperand(0));
2216 Results
.push_back(Load
.getOperand(1));
2221 if (isHvxBoolTy(ty(N
->getOperand(0)))) {
2223 SDValue C
= LowerHvxBitcast(Op
, DAG
);
2224 Results
.push_back(C
);
2233 HexagonTargetLowering::PerformHvxDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
)
2236 SelectionDAG
&DAG
= DCI
.DAG
;
2238 unsigned Opc
= Op
.getOpcode();
2239 if (DCI
.isBeforeLegalizeOps())
2242 SmallVector
<SDValue
, 4> Ops(N
->ops().begin(), N
->ops().end());
2245 case ISD::VSELECT
: {
2246 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
2247 SDValue Cond
= Ops
[0];
2248 if (Cond
->getOpcode() == ISD::XOR
) {
2249 SDValue C0
= Cond
.getOperand(0), C1
= Cond
.getOperand(1);
2250 if (C1
->getOpcode() == HexagonISD::QTRUE
)
2251 return DAG
.getNode(ISD::VSELECT
, dl
, ty(Op
), C0
, Ops
[2], Ops
[1]);
2255 case HexagonISD::V2Q
:
2256 if (Ops
[0].getOpcode() == ISD::SPLAT_VECTOR
) {
2257 if (const auto *C
= dyn_cast
<ConstantSDNode
>(Ops
[0].getOperand(0)))
2258 return C
->isNullValue() ? DAG
.getNode(HexagonISD::QFALSE
, dl
, ty(Op
))
2259 : DAG
.getNode(HexagonISD::QTRUE
, dl
, ty(Op
));
2262 case HexagonISD::Q2V
:
2263 if (Ops
[0].getOpcode() == HexagonISD::QTRUE
)
2264 return DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ty(Op
),
2265 DAG
.getConstant(-1, dl
, MVT::i32
));
2266 if (Ops
[0].getOpcode() == HexagonISD::QFALSE
)
2267 return getZero(dl
, ty(Op
), DAG
);
2269 case HexagonISD::VINSERTW0
:
2270 if (isUndef(Ops
[1]))
2273 case HexagonISD::VROR
: {
2274 if (Ops
[0].getOpcode() == HexagonISD::VROR
) {
2275 SDValue Vec
= Ops
[0].getOperand(0);
2276 SDValue Rot0
= Ops
[1], Rot1
= Ops
[0].getOperand(1);
2277 SDValue Rot
= DAG
.getNode(ISD::ADD
, dl
, ty(Rot0
), {Rot0
, Rot1
});
2278 return DAG
.getNode(HexagonISD::VROR
, dl
, ty(Op
), {Vec
, Rot
});
2288 HexagonTargetLowering::shouldWidenToHvx(MVT Ty
, SelectionDAG
&DAG
) const {
2289 auto Action
= getPreferredHvxVectorAction(Ty
);
2290 if (Action
== TargetLoweringBase::TypeWidenVector
) {
2291 EVT WideTy
= getTypeToTransformTo(*DAG
.getContext(), Ty
);
2292 assert(WideTy
.isSimple());
2293 return Subtarget
.isHVXVectorType(WideTy
.getSimpleVT(), true);
2299 HexagonTargetLowering::isHvxOperation(SDNode
*N
, SelectionDAG
&DAG
) const {
2300 if (!Subtarget
.useHVXOps())
2302 // If the type of any result, or any operand type are HVX vector types,
2303 // this is an HVX operation.
2304 auto IsHvxTy
= [this](EVT Ty
) {
2305 return Ty
.isSimple() && Subtarget
.isHVXVectorType(Ty
.getSimpleVT(), true);
2307 auto IsHvxOp
= [this](SDValue Op
) {
2308 return Op
.getValueType().isSimple() &&
2309 Subtarget
.isHVXVectorType(ty(Op
), true);
2311 if (llvm::any_of(N
->values(), IsHvxTy
) || llvm::any_of(N
->ops(), IsHvxOp
))
2314 // Check if this could be an HVX operation after type widening.
2315 auto IsWidenedToHvx
= [this, &DAG
](SDValue Op
) {
2316 if (!Op
.getValueType().isSimple())
2319 return ValTy
.isVector() && shouldWidenToHvx(ValTy
, DAG
);
2322 for (int i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
2323 if (IsWidenedToHvx(SDValue(N
, i
)))
2326 return llvm::any_of(N
->ops(), IsWidenedToHvx
);