1 //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "HexagonISelLowering.h"
10 #include "HexagonRegisterInfo.h"
11 #include "HexagonSubtarget.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/Analysis/MemoryLocation.h"
15 #include "llvm/CodeGen/MachineBasicBlock.h"
16 #include "llvm/CodeGen/MachineFunction.h"
17 #include "llvm/CodeGen/MachineInstr.h"
18 #include "llvm/CodeGen/MachineOperand.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/IR/IntrinsicsHexagon.h"
22 #include "llvm/Support/CommandLine.h"
30 static cl::opt
<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
31 cl::Hidden
, cl::init(16),
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
34 static const MVT LegalV64
[] = { MVT::v64i8
, MVT::v32i16
, MVT::v16i32
};
35 static const MVT LegalW64
[] = { MVT::v128i8
, MVT::v64i16
, MVT::v32i32
};
36 static const MVT LegalV128
[] = { MVT::v128i8
, MVT::v64i16
, MVT::v32i32
};
37 static const MVT LegalW128
[] = { MVT::v256i8
, MVT::v128i16
, MVT::v64i32
};
39 static std::tuple
<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty
) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy
= Ty
.getScalarType();
42 switch (ElemTy
.SimpleTy
) {
44 return std::make_tuple(5, 15, 10);
46 return std::make_tuple(8, 127, 23);
48 return std::make_tuple(11, 1023, 52);
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy
).getEVTString()).c_str());
56 HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget
.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8
, &Hexagon::HvxVRRegClass
);
59 addRegisterClass(MVT::v32i16
, &Hexagon::HvxVRRegClass
);
60 addRegisterClass(MVT::v16i32
, &Hexagon::HvxVRRegClass
);
61 addRegisterClass(MVT::v128i8
, &Hexagon::HvxWRRegClass
);
62 addRegisterClass(MVT::v64i16
, &Hexagon::HvxWRRegClass
);
63 addRegisterClass(MVT::v32i32
, &Hexagon::HvxWRRegClass
);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
72 addRegisterClass(MVT::v16i1
, &Hexagon::HvxQRRegClass
);
73 addRegisterClass(MVT::v32i1
, &Hexagon::HvxQRRegClass
);
74 addRegisterClass(MVT::v64i1
, &Hexagon::HvxQRRegClass
);
75 } else if (Subtarget
.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8
, &Hexagon::HvxVRRegClass
);
77 addRegisterClass(MVT::v64i16
, &Hexagon::HvxVRRegClass
);
78 addRegisterClass(MVT::v32i32
, &Hexagon::HvxVRRegClass
);
79 addRegisterClass(MVT::v256i8
, &Hexagon::HvxWRRegClass
);
80 addRegisterClass(MVT::v128i16
, &Hexagon::HvxWRRegClass
);
81 addRegisterClass(MVT::v64i32
, &Hexagon::HvxWRRegClass
);
82 addRegisterClass(MVT::v32i1
, &Hexagon::HvxQRRegClass
);
83 addRegisterClass(MVT::v64i1
, &Hexagon::HvxQRRegClass
);
84 addRegisterClass(MVT::v128i1
, &Hexagon::HvxQRRegClass
);
85 if (Subtarget
.useHVXV68Ops() && Subtarget
.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32
, &Hexagon::HvxVRRegClass
);
87 addRegisterClass(MVT::v64f16
, &Hexagon::HvxVRRegClass
);
88 addRegisterClass(MVT::v64f32
, &Hexagon::HvxWRRegClass
);
89 addRegisterClass(MVT::v128f16
, &Hexagon::HvxWRRegClass
);
93 // Set up operation actions.
95 bool Use64b
= Subtarget
.useHVX64BOps();
96 ArrayRef
<MVT
> LegalV
= Use64b
? LegalV64
: LegalV128
;
97 ArrayRef
<MVT
> LegalW
= Use64b
? LegalW64
: LegalW128
;
98 MVT ByteV
= Use64b
? MVT::v64i8
: MVT::v128i8
;
99 MVT WordV
= Use64b
? MVT::v16i32
: MVT::v32i32
;
100 MVT ByteW
= Use64b
? MVT::v128i8
: MVT::v256i8
;
102 auto setPromoteTo
= [this] (unsigned Opc
, MVT FromTy
, MVT ToTy
) {
103 setOperationAction(Opc
, FromTy
, Promote
);
104 AddPromotedToType(Opc
, FromTy
, ToTy
);
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
110 setOperationAction(ISD::BITCAST
, MVT::i16
, Custom
);
111 setOperationAction(ISD::BITCAST
, MVT::i32
, Custom
);
112 setOperationAction(ISD::BITCAST
, MVT::i64
, Custom
);
113 setOperationAction(ISD::BITCAST
, MVT::v16i1
, Custom
);
114 setOperationAction(ISD::BITCAST
, MVT::v128i1
, Custom
);
115 setOperationAction(ISD::BITCAST
, MVT::i128
, Custom
);
116 setOperationAction(ISD::VECTOR_SHUFFLE
, ByteV
, Legal
);
117 setOperationAction(ISD::VECTOR_SHUFFLE
, ByteW
, Legal
);
118 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
120 if (Subtarget
.useHVX128BOps() && Subtarget
.useHVXV68Ops() &&
121 Subtarget
.useHVXFloatingPoint()) {
123 static const MVT FloatV
[] = { MVT::v64f16
, MVT::v32f32
};
124 static const MVT FloatW
[] = { MVT::v128f16
, MVT::v64f32
};
126 for (MVT T
: FloatV
) {
127 setOperationAction(ISD::FADD
, T
, Legal
);
128 setOperationAction(ISD::FSUB
, T
, Legal
);
129 setOperationAction(ISD::FMUL
, T
, Legal
);
130 setOperationAction(ISD::FMINNUM
, T
, Legal
);
131 setOperationAction(ISD::FMAXNUM
, T
, Legal
);
133 setOperationAction(ISD::INSERT_SUBVECTOR
, T
, Custom
);
134 setOperationAction(ISD::EXTRACT_SUBVECTOR
, T
, Custom
);
136 setOperationAction(ISD::SPLAT_VECTOR
, T
, Legal
);
137 setOperationAction(ISD::SPLAT_VECTOR
, T
, Legal
);
139 setOperationAction(ISD::MLOAD
, T
, Custom
);
140 setOperationAction(ISD::MSTORE
, T
, Custom
);
141 // Custom-lower BUILD_VECTOR. The standard (target-independent)
142 // handling of it would convert it to a load, which is not always
143 // the optimal choice.
144 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
148 // BUILD_VECTOR with f16 operands cannot be promoted without
149 // promoting the result, so lower the node to vsplat or constant pool
150 setOperationAction(ISD::BUILD_VECTOR
, MVT::f16
, Custom
);
151 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::f16
, Custom
);
152 setOperationAction(ISD::SPLAT_VECTOR
, MVT::f16
, Custom
);
154 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
156 setPromoteTo(ISD::VECTOR_SHUFFLE
, MVT::v128f16
, ByteW
);
157 setPromoteTo(ISD::VECTOR_SHUFFLE
, MVT::v64f16
, ByteV
);
158 setPromoteTo(ISD::VECTOR_SHUFFLE
, MVT::v64f32
, ByteW
);
159 setPromoteTo(ISD::VECTOR_SHUFFLE
, MVT::v32f32
, ByteV
);
161 for (MVT P
: FloatW
) {
162 setOperationAction(ISD::LOAD
, P
, Custom
);
163 setOperationAction(ISD::STORE
, P
, Custom
);
164 setOperationAction(ISD::FADD
, P
, Custom
);
165 setOperationAction(ISD::FSUB
, P
, Custom
);
166 setOperationAction(ISD::FMUL
, P
, Custom
);
167 setOperationAction(ISD::FMINNUM
, P
, Custom
);
168 setOperationAction(ISD::FMAXNUM
, P
, Custom
);
169 setOperationAction(ISD::SETCC
, P
, Custom
);
170 setOperationAction(ISD::VSELECT
, P
, Custom
);
172 // Custom-lower BUILD_VECTOR. The standard (target-independent)
173 // handling of it would convert it to a load, which is not always
174 // the optimal choice.
175 setOperationAction(ISD::BUILD_VECTOR
, P
, Custom
);
176 // Make concat-vectors custom to handle concats of more than 2 vectors.
177 setOperationAction(ISD::CONCAT_VECTORS
, P
, Custom
);
179 setOperationAction(ISD::MLOAD
, P
, Custom
);
180 setOperationAction(ISD::MSTORE
, P
, Custom
);
183 if (Subtarget
.useHVXQFloatOps()) {
184 setOperationAction(ISD::FP_EXTEND
, MVT::v64f32
, Custom
);
185 setOperationAction(ISD::FP_ROUND
, MVT::v64f16
, Legal
);
186 } else if (Subtarget
.useHVXIEEEFPOps()) {
187 setOperationAction(ISD::FP_EXTEND
, MVT::v64f32
, Legal
);
188 setOperationAction(ISD::FP_ROUND
, MVT::v64f16
, Legal
);
192 for (MVT T
: LegalV
) {
193 setIndexedLoadAction(ISD::POST_INC
, T
, Legal
);
194 setIndexedStoreAction(ISD::POST_INC
, T
, Legal
);
196 setOperationAction(ISD::ABS
, T
, Legal
);
197 setOperationAction(ISD::AND
, T
, Legal
);
198 setOperationAction(ISD::OR
, T
, Legal
);
199 setOperationAction(ISD::XOR
, T
, Legal
);
200 setOperationAction(ISD::ADD
, T
, Legal
);
201 setOperationAction(ISD::SUB
, T
, Legal
);
202 setOperationAction(ISD::MUL
, T
, Legal
);
203 setOperationAction(ISD::CTPOP
, T
, Legal
);
204 setOperationAction(ISD::CTLZ
, T
, Legal
);
205 setOperationAction(ISD::SELECT
, T
, Legal
);
206 setOperationAction(ISD::SPLAT_VECTOR
, T
, Legal
);
208 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, T
, Legal
);
209 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, T
, Legal
);
210 setOperationAction(ISD::BSWAP
, T
, Legal
);
213 setOperationAction(ISD::SMIN
, T
, Legal
);
214 setOperationAction(ISD::SMAX
, T
, Legal
);
215 if (T
.getScalarType() != MVT::i32
) {
216 setOperationAction(ISD::UMIN
, T
, Legal
);
217 setOperationAction(ISD::UMAX
, T
, Legal
);
220 setOperationAction(ISD::CTTZ
, T
, Custom
);
221 setOperationAction(ISD::LOAD
, T
, Custom
);
222 setOperationAction(ISD::MLOAD
, T
, Custom
);
223 setOperationAction(ISD::MSTORE
, T
, Custom
);
224 if (T
.getScalarType() != MVT::i32
) {
225 setOperationAction(ISD::MULHS
, T
, Legal
);
226 setOperationAction(ISD::MULHU
, T
, Legal
);
229 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
230 // Make concat-vectors custom to handle concats of more than 2 vectors.
231 setOperationAction(ISD::CONCAT_VECTORS
, T
, Custom
);
232 setOperationAction(ISD::INSERT_SUBVECTOR
, T
, Custom
);
233 setOperationAction(ISD::INSERT_VECTOR_ELT
, T
, Custom
);
234 setOperationAction(ISD::EXTRACT_SUBVECTOR
, T
, Custom
);
235 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, T
, Custom
);
236 setOperationAction(ISD::ANY_EXTEND
, T
, Custom
);
237 setOperationAction(ISD::SIGN_EXTEND
, T
, Custom
);
238 setOperationAction(ISD::ZERO_EXTEND
, T
, Custom
);
239 setOperationAction(ISD::FSHL
, T
, Custom
);
240 setOperationAction(ISD::FSHR
, T
, Custom
);
242 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG
, T
, Custom
);
243 // HVX only has shifts of words and halfwords.
244 setOperationAction(ISD::SRA
, T
, Custom
);
245 setOperationAction(ISD::SHL
, T
, Custom
);
246 setOperationAction(ISD::SRL
, T
, Custom
);
248 // Promote all shuffles to operate on vectors of bytes.
249 setPromoteTo(ISD::VECTOR_SHUFFLE
, T
, ByteV
);
252 if (Subtarget
.useHVXFloatingPoint()) {
253 // Same action for both QFloat and IEEE.
254 setOperationAction(ISD::SINT_TO_FP
, T
, Custom
);
255 setOperationAction(ISD::UINT_TO_FP
, T
, Custom
);
256 setOperationAction(ISD::FP_TO_SINT
, T
, Custom
);
257 setOperationAction(ISD::FP_TO_UINT
, T
, Custom
);
260 setCondCodeAction(ISD::SETNE
, T
, Expand
);
261 setCondCodeAction(ISD::SETLE
, T
, Expand
);
262 setCondCodeAction(ISD::SETGE
, T
, Expand
);
263 setCondCodeAction(ISD::SETLT
, T
, Expand
);
264 setCondCodeAction(ISD::SETULE
, T
, Expand
);
265 setCondCodeAction(ISD::SETUGE
, T
, Expand
);
266 setCondCodeAction(ISD::SETULT
, T
, Expand
);
269 for (MVT T
: LegalW
) {
270 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
271 // independent) handling of it would convert it to a load, which is
272 // not always the optimal choice.
273 setOperationAction(ISD::BUILD_VECTOR
, T
, Custom
);
274 // Make concat-vectors custom to handle concats of more than 2 vectors.
275 setOperationAction(ISD::CONCAT_VECTORS
, T
, Custom
);
277 // Custom-lower these operations for pairs. Expand them into a concat
278 // of the corresponding operations on individual vectors.
279 setOperationAction(ISD::ANY_EXTEND
, T
, Custom
);
280 setOperationAction(ISD::SIGN_EXTEND
, T
, Custom
);
281 setOperationAction(ISD::ZERO_EXTEND
, T
, Custom
);
282 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Custom
);
283 setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG
, T
, Custom
);
284 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG
, T
, Legal
);
285 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG
, T
, Legal
);
286 setOperationAction(ISD::SPLAT_VECTOR
, T
, Custom
);
288 setOperationAction(ISD::LOAD
, T
, Custom
);
289 setOperationAction(ISD::STORE
, T
, Custom
);
290 setOperationAction(ISD::MLOAD
, T
, Custom
);
291 setOperationAction(ISD::MSTORE
, T
, Custom
);
292 setOperationAction(ISD::ABS
, T
, Custom
);
293 setOperationAction(ISD::CTLZ
, T
, Custom
);
294 setOperationAction(ISD::CTTZ
, T
, Custom
);
295 setOperationAction(ISD::CTPOP
, T
, Custom
);
297 setOperationAction(ISD::ADD
, T
, Legal
);
298 setOperationAction(ISD::SUB
, T
, Legal
);
299 setOperationAction(ISD::MUL
, T
, Custom
);
300 setOperationAction(ISD::MULHS
, T
, Custom
);
301 setOperationAction(ISD::MULHU
, T
, Custom
);
302 setOperationAction(ISD::AND
, T
, Custom
);
303 setOperationAction(ISD::OR
, T
, Custom
);
304 setOperationAction(ISD::XOR
, T
, Custom
);
305 setOperationAction(ISD::SETCC
, T
, Custom
);
306 setOperationAction(ISD::VSELECT
, T
, Custom
);
308 setOperationAction(ISD::SRA
, T
, Custom
);
309 setOperationAction(ISD::SHL
, T
, Custom
);
310 setOperationAction(ISD::SRL
, T
, Custom
);
312 // Promote all shuffles to operate on vectors of bytes.
313 setPromoteTo(ISD::VECTOR_SHUFFLE
, T
, ByteW
);
315 setOperationAction(ISD::FSHL
, T
, Custom
);
316 setOperationAction(ISD::FSHR
, T
, Custom
);
318 setOperationAction(ISD::SMIN
, T
, Custom
);
319 setOperationAction(ISD::SMAX
, T
, Custom
);
320 if (T
.getScalarType() != MVT::i32
) {
321 setOperationAction(ISD::UMIN
, T
, Custom
);
322 setOperationAction(ISD::UMAX
, T
, Custom
);
325 if (Subtarget
.useHVXFloatingPoint()) {
326 // Same action for both QFloat and IEEE.
327 setOperationAction(ISD::SINT_TO_FP
, T
, Custom
);
328 setOperationAction(ISD::UINT_TO_FP
, T
, Custom
);
329 setOperationAction(ISD::FP_TO_SINT
, T
, Custom
);
330 setOperationAction(ISD::FP_TO_UINT
, T
, Custom
);
334 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
335 setOperationAction(ISD::MULHS
, WordV
, Custom
); // -> _LOHI
336 setOperationAction(ISD::MULHU
, WordV
, Custom
); // -> _LOHI
337 setOperationAction(ISD::SMUL_LOHI
, WordV
, Custom
);
338 setOperationAction(ISD::UMUL_LOHI
, WordV
, Custom
);
340 setCondCodeAction(ISD::SETNE
, MVT::v64f16
, Expand
);
341 setCondCodeAction(ISD::SETLE
, MVT::v64f16
, Expand
);
342 setCondCodeAction(ISD::SETGE
, MVT::v64f16
, Expand
);
343 setCondCodeAction(ISD::SETLT
, MVT::v64f16
, Expand
);
344 setCondCodeAction(ISD::SETONE
, MVT::v64f16
, Expand
);
345 setCondCodeAction(ISD::SETOLE
, MVT::v64f16
, Expand
);
346 setCondCodeAction(ISD::SETOGE
, MVT::v64f16
, Expand
);
347 setCondCodeAction(ISD::SETOLT
, MVT::v64f16
, Expand
);
348 setCondCodeAction(ISD::SETUNE
, MVT::v64f16
, Expand
);
349 setCondCodeAction(ISD::SETULE
, MVT::v64f16
, Expand
);
350 setCondCodeAction(ISD::SETUGE
, MVT::v64f16
, Expand
);
351 setCondCodeAction(ISD::SETULT
, MVT::v64f16
, Expand
);
353 setCondCodeAction(ISD::SETNE
, MVT::v32f32
, Expand
);
354 setCondCodeAction(ISD::SETLE
, MVT::v32f32
, Expand
);
355 setCondCodeAction(ISD::SETGE
, MVT::v32f32
, Expand
);
356 setCondCodeAction(ISD::SETLT
, MVT::v32f32
, Expand
);
357 setCondCodeAction(ISD::SETONE
, MVT::v32f32
, Expand
);
358 setCondCodeAction(ISD::SETOLE
, MVT::v32f32
, Expand
);
359 setCondCodeAction(ISD::SETOGE
, MVT::v32f32
, Expand
);
360 setCondCodeAction(ISD::SETOLT
, MVT::v32f32
, Expand
);
361 setCondCodeAction(ISD::SETUNE
, MVT::v32f32
, Expand
);
362 setCondCodeAction(ISD::SETULE
, MVT::v32f32
, Expand
);
363 setCondCodeAction(ISD::SETUGE
, MVT::v32f32
, Expand
);
364 setCondCodeAction(ISD::SETULT
, MVT::v32f32
, Expand
);
368 for (MVT T
: LegalW
) {
369 // Boolean types for vector pairs will overlap with the boolean
370 // types for single vectors, e.g.
371 // v64i8 -> v64i1 (single)
372 // v64i16 -> v64i1 (pair)
373 // Set these actions first, and allow the single actions to overwrite
375 MVT BoolW
= MVT::getVectorVT(MVT::i1
, T
.getVectorNumElements());
376 setOperationAction(ISD::SETCC
, BoolW
, Custom
);
377 setOperationAction(ISD::AND
, BoolW
, Custom
);
378 setOperationAction(ISD::OR
, BoolW
, Custom
);
379 setOperationAction(ISD::XOR
, BoolW
, Custom
);
380 // Masked load/store takes a mask that may need splitting.
381 setOperationAction(ISD::MLOAD
, BoolW
, Custom
);
382 setOperationAction(ISD::MSTORE
, BoolW
, Custom
);
385 for (MVT T
: LegalV
) {
386 MVT BoolV
= MVT::getVectorVT(MVT::i1
, T
.getVectorNumElements());
387 setOperationAction(ISD::BUILD_VECTOR
, BoolV
, Custom
);
388 setOperationAction(ISD::CONCAT_VECTORS
, BoolV
, Custom
);
389 setOperationAction(ISD::INSERT_SUBVECTOR
, BoolV
, Custom
);
390 setOperationAction(ISD::INSERT_VECTOR_ELT
, BoolV
, Custom
);
391 setOperationAction(ISD::EXTRACT_SUBVECTOR
, BoolV
, Custom
);
392 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, BoolV
, Custom
);
393 setOperationAction(ISD::SELECT
, BoolV
, Custom
);
394 setOperationAction(ISD::AND
, BoolV
, Legal
);
395 setOperationAction(ISD::OR
, BoolV
, Legal
);
396 setOperationAction(ISD::XOR
, BoolV
, Legal
);
400 for (MVT T
: {MVT::v32i8
, MVT::v32i16
, MVT::v16i8
, MVT::v16i16
, MVT::v16i32
})
401 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Legal
);
403 for (MVT T
: {MVT::v64i8
, MVT::v64i16
, MVT::v32i8
, MVT::v32i16
, MVT::v32i32
})
404 setOperationAction(ISD::SIGN_EXTEND_INREG
, T
, Legal
);
407 // Handle store widening for short vectors.
408 unsigned HwLen
= Subtarget
.getVectorLength();
409 for (MVT ElemTy
: Subtarget
.getHVXElementTypes()) {
410 if (ElemTy
== MVT::i1
)
412 int ElemWidth
= ElemTy
.getFixedSizeInBits();
413 int MaxElems
= (8*HwLen
) / ElemWidth
;
414 for (int N
= 2; N
< MaxElems
; N
*= 2) {
415 MVT VecTy
= MVT::getVectorVT(ElemTy
, N
);
416 auto Action
= getPreferredVectorAction(VecTy
);
417 if (Action
== TargetLoweringBase::TypeWidenVector
) {
418 setOperationAction(ISD::LOAD
, VecTy
, Custom
);
419 setOperationAction(ISD::STORE
, VecTy
, Custom
);
420 setOperationAction(ISD::SETCC
, VecTy
, Custom
);
421 setOperationAction(ISD::TRUNCATE
, VecTy
, Custom
);
422 setOperationAction(ISD::ANY_EXTEND
, VecTy
, Custom
);
423 setOperationAction(ISD::SIGN_EXTEND
, VecTy
, Custom
);
424 setOperationAction(ISD::ZERO_EXTEND
, VecTy
, Custom
);
425 if (Subtarget
.useHVXFloatingPoint()) {
426 setOperationAction(ISD::FP_TO_SINT
, VecTy
, Custom
);
427 setOperationAction(ISD::FP_TO_UINT
, VecTy
, Custom
);
428 setOperationAction(ISD::SINT_TO_FP
, VecTy
, Custom
);
429 setOperationAction(ISD::UINT_TO_FP
, VecTy
, Custom
);
432 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, N
);
433 if (!isTypeLegal(BoolTy
))
434 setOperationAction(ISD::SETCC
, BoolTy
, Custom
);
439 setTargetDAGCombine({ISD::CONCAT_VECTORS
, ISD::TRUNCATE
, ISD::VSELECT
});
443 HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy
) const {
444 MVT ElemTy
= VecTy
.getVectorElementType();
445 unsigned VecLen
= VecTy
.getVectorNumElements();
446 unsigned HwLen
= Subtarget
.getVectorLength();
448 // Split vectors of i1 that exceed byte vector length.
449 if (ElemTy
== MVT::i1
&& VecLen
> HwLen
)
450 return TargetLoweringBase::TypeSplitVector
;
452 ArrayRef
<MVT
> Tys
= Subtarget
.getHVXElementTypes();
453 // For shorter vectors of i1, widen them if any of the corresponding
454 // vectors of integers needs to be widened.
455 if (ElemTy
== MVT::i1
) {
457 assert(T
!= MVT::i1
);
458 auto A
= getPreferredHvxVectorAction(MVT::getVectorVT(T
, VecLen
));
465 // If the size of VecTy is at least half of the vector length,
466 // widen the vector. Note: the threshold was not selected in
467 // any scientific way.
468 if (llvm::is_contained(Tys
, ElemTy
)) {
469 unsigned VecWidth
= VecTy
.getSizeInBits();
470 unsigned HwWidth
= 8*HwLen
;
471 if (VecWidth
> 2*HwWidth
)
472 return TargetLoweringBase::TypeSplitVector
;
474 bool HaveThreshold
= HvxWidenThreshold
.getNumOccurrences() > 0;
475 if (HaveThreshold
&& 8*HvxWidenThreshold
<= VecWidth
)
476 return TargetLoweringBase::TypeWidenVector
;
477 if (VecWidth
>= HwWidth
/2 && VecWidth
< HwWidth
)
478 return TargetLoweringBase::TypeWidenVector
;
486 HexagonTargetLowering::getCustomHvxOperationAction(SDNode
&Op
) const {
487 unsigned Opc
= Op
.getOpcode();
489 case HexagonISD::SMUL_LOHI
:
490 case HexagonISD::UMUL_LOHI
:
491 case HexagonISD::USMUL_LOHI
:
492 return TargetLoweringBase::Custom
;
494 return TargetLoweringBase::Legal
;
498 HexagonTargetLowering::getInt(unsigned IntId
, MVT ResTy
, ArrayRef
<SDValue
> Ops
,
499 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
500 SmallVector
<SDValue
,4> IntOps
;
501 IntOps
.push_back(DAG
.getConstant(IntId
, dl
, MVT::i32
));
502 append_range(IntOps
, Ops
);
503 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, ResTy
, IntOps
);
507 HexagonTargetLowering::typeJoin(const TypePair
&Tys
) const {
508 assert(Tys
.first
.getVectorElementType() == Tys
.second
.getVectorElementType());
510 MVT ElemTy
= Tys
.first
.getVectorElementType();
511 return MVT::getVectorVT(ElemTy
, Tys
.first
.getVectorNumElements() +
512 Tys
.second
.getVectorNumElements());
515 HexagonTargetLowering::TypePair
516 HexagonTargetLowering::typeSplit(MVT VecTy
) const {
517 assert(VecTy
.isVector());
518 unsigned NumElem
= VecTy
.getVectorNumElements();
519 assert((NumElem
% 2) == 0 && "Expecting even-sized vector type");
520 MVT HalfTy
= MVT::getVectorVT(VecTy
.getVectorElementType(), NumElem
/2);
521 return { HalfTy
, HalfTy
};
525 HexagonTargetLowering::typeExtElem(MVT VecTy
, unsigned Factor
) const {
526 MVT ElemTy
= VecTy
.getVectorElementType();
527 MVT NewElemTy
= MVT::getIntegerVT(ElemTy
.getSizeInBits() * Factor
);
528 return MVT::getVectorVT(NewElemTy
, VecTy
.getVectorNumElements());
532 HexagonTargetLowering::typeTruncElem(MVT VecTy
, unsigned Factor
) const {
533 MVT ElemTy
= VecTy
.getVectorElementType();
534 MVT NewElemTy
= MVT::getIntegerVT(ElemTy
.getSizeInBits() / Factor
);
535 return MVT::getVectorVT(NewElemTy
, VecTy
.getVectorNumElements());
539 HexagonTargetLowering::opCastElem(SDValue Vec
, MVT ElemTy
,
540 SelectionDAG
&DAG
) const {
541 if (ty(Vec
).getVectorElementType() == ElemTy
)
543 MVT CastTy
= tyVector(Vec
.getValueType().getSimpleVT(), ElemTy
);
544 return DAG
.getBitcast(CastTy
, Vec
);
548 HexagonTargetLowering::opJoin(const VectorPair
&Ops
, const SDLoc
&dl
,
549 SelectionDAG
&DAG
) const {
550 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, typeJoin(ty(Ops
)),
551 Ops
.first
, Ops
.second
);
554 HexagonTargetLowering::VectorPair
555 HexagonTargetLowering::opSplit(SDValue Vec
, const SDLoc
&dl
,
556 SelectionDAG
&DAG
) const {
557 TypePair Tys
= typeSplit(ty(Vec
));
558 if (Vec
.getOpcode() == HexagonISD::QCAT
)
559 return VectorPair(Vec
.getOperand(0), Vec
.getOperand(1));
560 return DAG
.SplitVector(Vec
, dl
, Tys
.first
, Tys
.second
);
564 HexagonTargetLowering::isHvxSingleTy(MVT Ty
) const {
565 return Subtarget
.isHVXVectorType(Ty
) &&
566 Ty
.getSizeInBits() == 8 * Subtarget
.getVectorLength();
570 HexagonTargetLowering::isHvxPairTy(MVT Ty
) const {
571 return Subtarget
.isHVXVectorType(Ty
) &&
572 Ty
.getSizeInBits() == 16 * Subtarget
.getVectorLength();
576 HexagonTargetLowering::isHvxBoolTy(MVT Ty
) const {
577 return Subtarget
.isHVXVectorType(Ty
, true) &&
578 Ty
.getVectorElementType() == MVT::i1
;
581 bool HexagonTargetLowering::allowsHvxMemoryAccess(
582 MVT VecTy
, MachineMemOperand::Flags Flags
, unsigned *Fast
) const {
583 // Bool vectors are excluded by default, but make it explicit to
584 // emphasize that bool vectors cannot be loaded or stored.
585 // Also, disallow double vector stores (to prevent unnecessary
586 // store widening in DAG combiner).
587 if (VecTy
.getSizeInBits() > 8*Subtarget
.getVectorLength())
589 if (!Subtarget
.isHVXVectorType(VecTy
, /*IncludeBool=*/false))
596 bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
597 MVT VecTy
, MachineMemOperand::Flags Flags
, unsigned *Fast
) const {
598 if (!Subtarget
.isHVXVectorType(VecTy
))
600 // XXX Should this be false? vmemu are a bit slower than vmem.
606 void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
607 MachineInstr
&MI
, SDNode
*Node
) const {
608 unsigned Opc
= MI
.getOpcode();
609 const TargetInstrInfo
&TII
= *Subtarget
.getInstrInfo();
610 MachineBasicBlock
&MB
= *MI
.getParent();
611 MachineFunction
&MF
= *MB
.getParent();
612 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
613 DebugLoc DL
= MI
.getDebugLoc();
614 auto At
= MI
.getIterator();
617 case Hexagon::PS_vsplatib
:
618 if (Subtarget
.useHVXV62Ops()) {
619 // SplatV = A2_tfrsi #imm
620 // OutV = V6_lvsplatb SplatV
621 Register SplatV
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
622 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::A2_tfrsi
), SplatV
)
623 .add(MI
.getOperand(1));
624 Register OutV
= MI
.getOperand(0).getReg();
625 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::V6_lvsplatb
), OutV
)
628 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
629 // OutV = V6_lvsplatw SplatV
630 Register SplatV
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
631 const MachineOperand
&InpOp
= MI
.getOperand(1);
632 assert(InpOp
.isImm());
633 uint32_t V
= InpOp
.getImm() & 0xFF;
634 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::A2_tfrsi
), SplatV
)
635 .addImm(V
<< 24 | V
<< 16 | V
<< 8 | V
);
636 Register OutV
= MI
.getOperand(0).getReg();
637 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::V6_lvsplatw
), OutV
).addReg(SplatV
);
641 case Hexagon::PS_vsplatrb
:
642 if (Subtarget
.useHVXV62Ops()) {
643 // OutV = V6_lvsplatb Inp
644 Register OutV
= MI
.getOperand(0).getReg();
645 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::V6_lvsplatb
), OutV
)
646 .add(MI
.getOperand(1));
648 Register SplatV
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
649 const MachineOperand
&InpOp
= MI
.getOperand(1);
650 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::S2_vsplatrb
), SplatV
)
651 .addReg(InpOp
.getReg(), 0, InpOp
.getSubReg());
652 Register OutV
= MI
.getOperand(0).getReg();
653 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::V6_lvsplatw
), OutV
)
658 case Hexagon::PS_vsplatih
:
659 if (Subtarget
.useHVXV62Ops()) {
660 // SplatV = A2_tfrsi #imm
661 // OutV = V6_lvsplath SplatV
662 Register SplatV
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
663 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::A2_tfrsi
), SplatV
)
664 .add(MI
.getOperand(1));
665 Register OutV
= MI
.getOperand(0).getReg();
666 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::V6_lvsplath
), OutV
)
669 // SplatV = A2_tfrsi #imm:#imm
670 // OutV = V6_lvsplatw SplatV
671 Register SplatV
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
672 const MachineOperand
&InpOp
= MI
.getOperand(1);
673 assert(InpOp
.isImm());
674 uint32_t V
= InpOp
.getImm() & 0xFFFF;
675 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::A2_tfrsi
), SplatV
)
676 .addImm(V
<< 16 | V
);
677 Register OutV
= MI
.getOperand(0).getReg();
678 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::V6_lvsplatw
), OutV
).addReg(SplatV
);
682 case Hexagon::PS_vsplatrh
:
683 if (Subtarget
.useHVXV62Ops()) {
684 // OutV = V6_lvsplath Inp
685 Register OutV
= MI
.getOperand(0).getReg();
686 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::V6_lvsplath
), OutV
)
687 .add(MI
.getOperand(1));
689 // SplatV = A2_combine_ll Inp, Inp
690 // OutV = V6_lvsplatw SplatV
691 Register SplatV
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
692 const MachineOperand
&InpOp
= MI
.getOperand(1);
693 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::A2_combine_ll
), SplatV
)
694 .addReg(InpOp
.getReg(), 0, InpOp
.getSubReg())
695 .addReg(InpOp
.getReg(), 0, InpOp
.getSubReg());
696 Register OutV
= MI
.getOperand(0).getReg();
697 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::V6_lvsplatw
), OutV
).addReg(SplatV
);
701 case Hexagon::PS_vsplatiw
:
702 case Hexagon::PS_vsplatrw
:
703 if (Opc
== Hexagon::PS_vsplatiw
) {
704 // SplatV = A2_tfrsi #imm
705 Register SplatV
= MRI
.createVirtualRegister(&Hexagon::IntRegsRegClass
);
706 BuildMI(MB
, At
, DL
, TII
.get(Hexagon::A2_tfrsi
), SplatV
)
707 .add(MI
.getOperand(1));
708 MI
.getOperand(1).ChangeToRegister(SplatV
, false);
710 // OutV = V6_lvsplatw SplatV/Inp
711 MI
.setDesc(TII
.get(Hexagon::V6_lvsplatw
));
717 HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx
, MVT ElemTy
,
718 SelectionDAG
&DAG
) const {
719 if (ElemIdx
.getValueType().getSimpleVT() != MVT::i32
)
720 ElemIdx
= DAG
.getBitcast(MVT::i32
, ElemIdx
);
722 unsigned ElemWidth
= ElemTy
.getSizeInBits();
726 unsigned L
= Log2_32(ElemWidth
/8);
727 const SDLoc
&dl(ElemIdx
);
728 return DAG
.getNode(ISD::SHL
, dl
, MVT::i32
,
729 {ElemIdx
, DAG
.getConstant(L
, dl
, MVT::i32
)});
733 HexagonTargetLowering::getIndexInWord32(SDValue Idx
, MVT ElemTy
,
734 SelectionDAG
&DAG
) const {
735 unsigned ElemWidth
= ElemTy
.getSizeInBits();
736 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
740 if (ty(Idx
) != MVT::i32
)
741 Idx
= DAG
.getBitcast(MVT::i32
, Idx
);
742 const SDLoc
&dl(Idx
);
743 SDValue Mask
= DAG
.getConstant(32/ElemWidth
- 1, dl
, MVT::i32
);
744 SDValue SubIdx
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, {Idx
, Mask
});
749 HexagonTargetLowering::getByteShuffle(const SDLoc
&dl
, SDValue Op0
,
750 SDValue Op1
, ArrayRef
<int> Mask
,
751 SelectionDAG
&DAG
) const {
753 assert(OpTy
== ty(Op1
));
755 MVT ElemTy
= OpTy
.getVectorElementType();
756 if (ElemTy
== MVT::i8
)
757 return DAG
.getVectorShuffle(OpTy
, dl
, Op0
, Op1
, Mask
);
758 assert(ElemTy
.getSizeInBits() >= 8);
760 MVT ResTy
= tyVector(OpTy
, MVT::i8
);
761 unsigned ElemSize
= ElemTy
.getSizeInBits() / 8;
763 SmallVector
<int,128> ByteMask
;
766 for (unsigned I
= 0; I
!= ElemSize
; ++I
)
767 ByteMask
.push_back(-1);
769 int NewM
= M
*ElemSize
;
770 for (unsigned I
= 0; I
!= ElemSize
; ++I
)
771 ByteMask
.push_back(NewM
+I
);
774 assert(ResTy
.getVectorNumElements() == ByteMask
.size());
775 return DAG
.getVectorShuffle(ResTy
, dl
, opCastElem(Op0
, MVT::i8
, DAG
),
776 opCastElem(Op1
, MVT::i8
, DAG
), ByteMask
);
780 HexagonTargetLowering::buildHvxVectorReg(ArrayRef
<SDValue
> Values
,
781 const SDLoc
&dl
, MVT VecTy
,
782 SelectionDAG
&DAG
) const {
783 unsigned VecLen
= Values
.size();
784 MachineFunction
&MF
= DAG
.getMachineFunction();
785 MVT ElemTy
= VecTy
.getVectorElementType();
786 unsigned ElemWidth
= ElemTy
.getSizeInBits();
787 unsigned HwLen
= Subtarget
.getVectorLength();
789 unsigned ElemSize
= ElemWidth
/ 8;
790 assert(ElemSize
*VecLen
== HwLen
);
791 SmallVector
<SDValue
,32> Words
;
793 if (VecTy
.getVectorElementType() != MVT::i32
&&
794 !(Subtarget
.useHVXFloatingPoint() &&
795 VecTy
.getVectorElementType() == MVT::f32
)) {
796 assert((ElemSize
== 1 || ElemSize
== 2) && "Invalid element size");
797 unsigned OpsPerWord
= (ElemSize
== 1) ? 4 : 2;
798 MVT PartVT
= MVT::getVectorVT(VecTy
.getVectorElementType(), OpsPerWord
);
799 for (unsigned i
= 0; i
!= VecLen
; i
+= OpsPerWord
) {
800 SDValue W
= buildVector32(Values
.slice(i
, OpsPerWord
), dl
, PartVT
, DAG
);
801 Words
.push_back(DAG
.getBitcast(MVT::i32
, W
));
804 for (SDValue V
: Values
)
805 Words
.push_back(DAG
.getBitcast(MVT::i32
, V
));
807 auto isSplat
= [] (ArrayRef
<SDValue
> Values
, SDValue
&SplatV
) {
808 unsigned NumValues
= Values
.size();
809 assert(NumValues
> 0);
811 for (unsigned i
= 0; i
!= NumValues
; ++i
) {
812 if (Values
[i
].isUndef())
815 if (!SplatV
.getNode())
817 else if (SplatV
!= Values
[i
])
825 unsigned NumWords
= Words
.size();
827 bool IsSplat
= isSplat(Words
, SplatV
);
828 if (IsSplat
&& isUndef(SplatV
))
829 return DAG
.getUNDEF(VecTy
);
831 assert(SplatV
.getNode());
832 if (isNullConstant(SplatV
))
833 return getZero(dl
, VecTy
, DAG
);
834 MVT WordTy
= MVT::getVectorVT(MVT::i32
, HwLen
/4);
835 SDValue S
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, WordTy
, SplatV
);
836 return DAG
.getBitcast(VecTy
, S
);
839 // Delay recognizing constant vectors until here, so that we can generate
841 SmallVector
<ConstantInt
*, 128> Consts(VecLen
);
842 bool AllConst
= getBuildVectorConstInts(Values
, VecTy
, DAG
, Consts
);
844 ArrayRef
<Constant
*> Tmp((Constant
**)Consts
.begin(),
845 (Constant
**)Consts
.end());
846 Constant
*CV
= ConstantVector::get(Tmp
);
847 Align
Alignment(HwLen
);
849 LowerConstantPool(DAG
.getConstantPool(CV
, VecTy
, Alignment
), DAG
);
850 return DAG
.getLoad(VecTy
, dl
, DAG
.getEntryNode(), CP
,
851 MachinePointerInfo::getConstantPool(MF
), Alignment
);
854 // A special case is a situation where the vector is built entirely from
855 // elements extracted from another vector. This could be done via a shuffle
856 // more efficiently, but typically, the size of the source vector will not
857 // match the size of the vector being built (which precludes the use of a
858 // shuffle directly).
859 // This only handles a single source vector, and the vector being built
860 // should be of a sub-vector type of the source vector type.
861 auto IsBuildFromExtracts
= [this,&Values
] (SDValue
&SrcVec
,
862 SmallVectorImpl
<int> &SrcIdx
) {
864 for (SDValue V
: Values
) {
866 SrcIdx
.push_back(-1);
869 if (V
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
871 // All extracts should come from the same vector.
872 SDValue T
= V
.getOperand(0);
873 if (Vec
.getNode() != nullptr && T
.getNode() != Vec
.getNode())
876 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(V
.getOperand(1));
879 int I
= C
->getSExtValue();
880 assert(I
>= 0 && "Negative element index");
887 SmallVector
<int,128> ExtIdx
;
889 if (IsBuildFromExtracts(ExtVec
, ExtIdx
)) {
890 MVT ExtTy
= ty(ExtVec
);
891 unsigned ExtLen
= ExtTy
.getVectorNumElements();
892 if (ExtLen
== VecLen
|| ExtLen
== 2*VecLen
) {
893 // Construct a new shuffle mask that will produce a vector with the same
894 // number of elements as the input vector, and such that the vector we
895 // want will be the initial subvector of it.
896 SmallVector
<int,128> Mask
;
897 BitVector
Used(ExtLen
);
899 for (int M
: ExtIdx
) {
904 // Fill the rest of the mask with the unused elements of ExtVec in hopes
905 // that it will result in a permutation of ExtVec's elements. It's still
906 // fine if it doesn't (e.g. if undefs are present, or elements are
907 // repeated), but permutations can always be done efficiently via vdelta
909 for (unsigned I
= 0; I
!= ExtLen
; ++I
) {
910 if (Mask
.size() == ExtLen
)
916 SDValue S
= DAG
.getVectorShuffle(ExtTy
, dl
, ExtVec
,
917 DAG
.getUNDEF(ExtTy
), Mask
);
918 return ExtLen
== VecLen
? S
: LoHalf(S
, DAG
);
922 // Find most common element to initialize vector with. This is to avoid
923 // unnecessary vinsert/valign for cases where the same value is present
924 // many times. Creates a histogram of the vector's elements to find the
925 // most common element n.
926 assert(4*Words
.size() == Subtarget
.getVectorLength());
929 for (unsigned i
= 0; i
!= NumWords
; ++i
) {
931 if (Words
[i
].isUndef())
933 for (unsigned j
= i
; j
!= NumWords
; ++j
)
934 if (Words
[i
] == Words
[j
])
937 if (VecHist
[i
] > VecHist
[n
])
941 SDValue HalfV
= getZero(dl
, VecTy
, DAG
);
942 if (VecHist
[n
] > 1) {
943 SDValue SplatV
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, VecTy
, Words
[n
]);
944 HalfV
= DAG
.getNode(HexagonISD::VALIGN
, dl
, VecTy
,
945 {HalfV
, SplatV
, DAG
.getConstant(HwLen
/2, dl
, MVT::i32
)});
947 SDValue HalfV0
= HalfV
;
948 SDValue HalfV1
= HalfV
;
950 // Construct two halves in parallel, then or them together. Rn and Rm count
951 // number of rotations needed before the next element. One last rotation is
952 // performed post-loop to position the last element.
957 for (unsigned i
= 0; i
!= NumWords
/2; ++i
) {
958 // Rotate by element count since last insertion.
959 if (Words
[i
] != Words
[n
] || VecHist
[n
] <= 1) {
960 Sn
= DAG
.getConstant(Rn
, dl
, MVT::i32
);
961 HalfV0
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {N
, Sn
});
962 N
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
,
966 if (Words
[i
+NumWords
/2] != Words
[n
] || VecHist
[n
] <= 1) {
967 Sm
= DAG
.getConstant(Rm
, dl
, MVT::i32
);
968 HalfV1
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {M
, Sm
});
969 M
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
,
970 {HalfV1
, Words
[i
+NumWords
/2]});
976 // Perform last rotation.
977 Sn
= DAG
.getConstant(Rn
+HwLen
/2, dl
, MVT::i32
);
978 Sm
= DAG
.getConstant(Rm
, dl
, MVT::i32
);
979 HalfV0
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {N
, Sn
});
980 HalfV1
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {M
, Sm
});
982 SDValue T0
= DAG
.getBitcast(tyVector(VecTy
, MVT::i32
), HalfV0
);
983 SDValue T1
= DAG
.getBitcast(tyVector(VecTy
, MVT::i32
), HalfV1
);
985 SDValue DstV
= DAG
.getNode(ISD::OR
, dl
, ty(T0
), {T0
, T1
});
988 DAG
.getBitcast(tyVector(ty(DstV
), VecTy
.getVectorElementType()), DstV
);
993 HexagonTargetLowering::createHvxPrefixPred(SDValue PredV
, const SDLoc
&dl
,
994 unsigned BitBytes
, bool ZeroFill
, SelectionDAG
&DAG
) const {
995 MVT PredTy
= ty(PredV
);
996 unsigned HwLen
= Subtarget
.getVectorLength();
997 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
999 if (Subtarget
.isHVXVectorType(PredTy
, true)) {
1000 // Move the vector predicate SubV to a vector register, and scale it
1001 // down to match the representation (bytes per type element) that VecV
1002 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1003 // in general) element and put them at the front of the resulting
1004 // vector. This subvector will then be inserted into the Q2V of VecV.
1005 // To avoid having an operation that generates an illegal type (short
1006 // vector), generate a full size vector.
1008 SDValue T
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, PredV
);
1009 SmallVector
<int,128> Mask(HwLen
);
1010 // Scale = BitBytes(PredV) / Given BitBytes.
1011 unsigned Scale
= HwLen
/ (PredTy
.getVectorNumElements() * BitBytes
);
1012 unsigned BlockLen
= PredTy
.getVectorNumElements() * BitBytes
;
1014 for (unsigned i
= 0; i
!= HwLen
; ++i
) {
1015 unsigned Num
= i
% Scale
;
1016 unsigned Off
= i
/ Scale
;
1017 Mask
[BlockLen
*Num
+ Off
] = i
;
1019 SDValue S
= DAG
.getVectorShuffle(ByteTy
, dl
, T
, DAG
.getUNDEF(ByteTy
), Mask
);
1022 // Fill the bytes beyond BlockLen with 0s.
1023 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1024 // when BlockLen < HwLen.
1025 assert(BlockLen
< HwLen
&& "vsetq(v1) prerequisite");
1026 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
1027 SDValue Q
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
1028 {DAG
.getConstant(BlockLen
, dl
, MVT::i32
)}, DAG
);
1029 SDValue M
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, Q
);
1030 return DAG
.getNode(ISD::AND
, dl
, ByteTy
, S
, M
);
1033 // Make sure that this is a valid scalar predicate.
1034 assert(PredTy
== MVT::v2i1
|| PredTy
== MVT::v4i1
|| PredTy
== MVT::v8i1
);
1036 unsigned Bytes
= 8 / PredTy
.getVectorNumElements();
1037 SmallVector
<SDValue
,4> Words
[2];
1040 SDValue W0
= isUndef(PredV
)
1041 ? DAG
.getUNDEF(MVT::i64
)
1042 : DAG
.getNode(HexagonISD::P2D
, dl
, MVT::i64
, PredV
);
1043 Words
[IdxW
].push_back(HiHalf(W0
, DAG
));
1044 Words
[IdxW
].push_back(LoHalf(W0
, DAG
));
1046 while (Bytes
< BitBytes
) {
1048 Words
[IdxW
].clear();
1051 for (const SDValue
&W
: Words
[IdxW
^ 1]) {
1052 SDValue T
= expandPredicate(W
, dl
, DAG
);
1053 Words
[IdxW
].push_back(HiHalf(T
, DAG
));
1054 Words
[IdxW
].push_back(LoHalf(T
, DAG
));
1057 for (const SDValue
&W
: Words
[IdxW
^ 1]) {
1058 Words
[IdxW
].push_back(W
);
1059 Words
[IdxW
].push_back(W
);
1065 assert(Bytes
== BitBytes
);
1067 SDValue Vec
= ZeroFill
? getZero(dl
, ByteTy
, DAG
) : DAG
.getUNDEF(ByteTy
);
1068 SDValue S4
= DAG
.getConstant(HwLen
-4, dl
, MVT::i32
);
1069 for (const SDValue
&W
: Words
[IdxW
]) {
1070 Vec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, Vec
, S4
);
1071 Vec
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, ByteTy
, Vec
, W
);
1078 HexagonTargetLowering::buildHvxVectorPred(ArrayRef
<SDValue
> Values
,
1079 const SDLoc
&dl
, MVT VecTy
,
1080 SelectionDAG
&DAG
) const {
1081 // Construct a vector V of bytes, such that a comparison V >u 0 would
1082 // produce the required vector predicate.
1083 unsigned VecLen
= Values
.size();
1084 unsigned HwLen
= Subtarget
.getVectorLength();
1085 assert(VecLen
<= HwLen
|| VecLen
== 8*HwLen
);
1086 SmallVector
<SDValue
,128> Bytes
;
1087 bool AllT
= true, AllF
= true;
1089 auto IsTrue
= [] (SDValue V
) {
1090 if (const auto *N
= dyn_cast
<ConstantSDNode
>(V
.getNode()))
1091 return !N
->isZero();
1094 auto IsFalse
= [] (SDValue V
) {
1095 if (const auto *N
= dyn_cast
<ConstantSDNode
>(V
.getNode()))
1100 if (VecLen
<= HwLen
) {
1101 // In the hardware, each bit of a vector predicate corresponds to a byte
1102 // of a vector register. Calculate how many bytes does a bit of VecTy
1104 assert(HwLen
% VecLen
== 0);
1105 unsigned BitBytes
= HwLen
/ VecLen
;
1106 for (SDValue V
: Values
) {
1110 SDValue Ext
= !V
.isUndef() ? DAG
.getZExtOrTrunc(V
, dl
, MVT::i8
)
1111 : DAG
.getUNDEF(MVT::i8
);
1112 for (unsigned B
= 0; B
!= BitBytes
; ++B
)
1113 Bytes
.push_back(Ext
);
1116 // There are as many i1 values, as there are bits in a vector register.
1117 // Divide the values into groups of 8 and check that each group consists
1118 // of the same value (ignoring undefs).
1119 for (unsigned I
= 0; I
!= VecLen
; I
+= 8) {
1121 // Find the first non-undef value in this group.
1122 for (; B
!= 8; ++B
) {
1123 if (!Values
[I
+B
].isUndef())
1126 SDValue F
= Values
[I
+B
];
1130 SDValue Ext
= (B
< 8) ? DAG
.getZExtOrTrunc(F
, dl
, MVT::i8
)
1131 : DAG
.getUNDEF(MVT::i8
);
1132 Bytes
.push_back(Ext
);
1133 // Verify that the rest of values in the group are the same as the
1136 assert(Values
[I
+B
].isUndef() || Values
[I
+B
] == F
);
1141 return DAG
.getNode(HexagonISD::QTRUE
, dl
, VecTy
);
1143 return DAG
.getNode(HexagonISD::QFALSE
, dl
, VecTy
);
1145 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1146 SDValue ByteVec
= buildHvxVectorReg(Bytes
, dl
, ByteTy
, DAG
);
1147 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, ByteVec
);
1151 HexagonTargetLowering::extractHvxElementReg(SDValue VecV
, SDValue IdxV
,
1152 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
1153 MVT ElemTy
= ty(VecV
).getVectorElementType();
1155 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1156 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
1159 SDValue ByteIdx
= convertToByteIndex(IdxV
, ElemTy
, DAG
);
1160 SDValue ExWord
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
,
1162 if (ElemTy
== MVT::i32
)
1165 // Have an extracted word, need to extract the smaller element out of it.
1166 // 1. Extract the bits of (the original) IdxV that correspond to the index
1167 // of the desired element in the 32-bit word.
1168 SDValue SubIdx
= getIndexInWord32(IdxV
, ElemTy
, DAG
);
1169 // 2. Extract the element from the word.
1170 SDValue ExVec
= DAG
.getBitcast(tyVector(ty(ExWord
), ElemTy
), ExWord
);
1171 return extractVector(ExVec
, SubIdx
, dl
, ElemTy
, MVT::i32
, DAG
);
1175 HexagonTargetLowering::extractHvxElementPred(SDValue VecV
, SDValue IdxV
,
1176 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
1177 // Implement other return types if necessary.
1178 assert(ResTy
== MVT::i1
);
1180 unsigned HwLen
= Subtarget
.getVectorLength();
1181 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1182 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
1184 unsigned Scale
= HwLen
/ ty(VecV
).getVectorNumElements();
1185 SDValue ScV
= DAG
.getConstant(Scale
, dl
, MVT::i32
);
1186 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, ScV
);
1188 SDValue ExtB
= extractHvxElementReg(ByteVec
, IdxV
, dl
, MVT::i32
, DAG
);
1189 SDValue Zero
= DAG
.getTargetConstant(0, dl
, MVT::i32
);
1190 return getInstr(Hexagon::C2_cmpgtui
, dl
, MVT::i1
, {ExtB
, Zero
}, DAG
);
1194 HexagonTargetLowering::insertHvxElementReg(SDValue VecV
, SDValue IdxV
,
1195 SDValue ValV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1196 MVT ElemTy
= ty(VecV
).getVectorElementType();
1198 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1199 assert(ElemWidth
>= 8 && ElemWidth
<= 32);
1202 auto InsertWord
= [&DAG
,&dl
,this] (SDValue VecV
, SDValue ValV
,
1204 MVT VecTy
= ty(VecV
);
1205 unsigned HwLen
= Subtarget
.getVectorLength();
1207 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
1208 {ByteIdxV
, DAG
.getSignedConstant(-4, dl
, MVT::i32
)});
1209 SDValue RotV
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {VecV
, MaskV
});
1210 SDValue InsV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, VecTy
, {RotV
, ValV
});
1211 SDValue SubV
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
1212 {DAG
.getConstant(HwLen
, dl
, MVT::i32
), MaskV
});
1213 SDValue TorV
= DAG
.getNode(HexagonISD::VROR
, dl
, VecTy
, {InsV
, SubV
});
1217 SDValue ByteIdx
= convertToByteIndex(IdxV
, ElemTy
, DAG
);
1218 if (ElemTy
== MVT::i32
)
1219 return InsertWord(VecV
, ValV
, ByteIdx
);
1221 // If this is not inserting a 32-bit word, convert it into such a thing.
1222 // 1. Extract the existing word from the target vector.
1223 SDValue WordIdx
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
1224 {ByteIdx
, DAG
.getConstant(2, dl
, MVT::i32
)});
1225 SDValue Ext
= extractHvxElementReg(opCastElem(VecV
, MVT::i32
, DAG
), WordIdx
,
1228 // 2. Treating the extracted word as a 32-bit vector, insert the given
1230 SDValue SubIdx
= getIndexInWord32(IdxV
, ElemTy
, DAG
);
1231 MVT SubVecTy
= tyVector(ty(Ext
), ElemTy
);
1232 SDValue Ins
= insertVector(DAG
.getBitcast(SubVecTy
, Ext
),
1233 ValV
, SubIdx
, dl
, ElemTy
, DAG
);
1235 // 3. Insert the 32-bit word back into the original vector.
1236 return InsertWord(VecV
, Ins
, ByteIdx
);
1240 HexagonTargetLowering::insertHvxElementPred(SDValue VecV
, SDValue IdxV
,
1241 SDValue ValV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1242 unsigned HwLen
= Subtarget
.getVectorLength();
1243 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1244 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
1246 unsigned Scale
= HwLen
/ ty(VecV
).getVectorNumElements();
1247 SDValue ScV
= DAG
.getConstant(Scale
, dl
, MVT::i32
);
1248 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
, ScV
);
1249 ValV
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i32
, ValV
);
1251 SDValue InsV
= insertHvxElementReg(ByteVec
, IdxV
, ValV
, dl
, DAG
);
1252 return DAG
.getNode(HexagonISD::V2Q
, dl
, ty(VecV
), InsV
);
1256 HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp
, SDValue VecV
,
1257 SDValue IdxV
, const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
1258 MVT VecTy
= ty(VecV
);
1259 unsigned HwLen
= Subtarget
.getVectorLength();
1260 unsigned Idx
= IdxV
.getNode()->getAsZExtVal();
1261 MVT ElemTy
= VecTy
.getVectorElementType();
1262 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1264 // If the source vector is a vector pair, get the single vector containing
1265 // the subvector of interest. The subvector will never overlap two single
1267 if (isHvxPairTy(VecTy
)) {
1268 if (Idx
* ElemWidth
>= 8*HwLen
)
1269 Idx
-= VecTy
.getVectorNumElements() / 2;
1272 if (typeSplit(VecTy
).first
== ResTy
)
1276 // The only meaningful subvectors of a single HVX vector are those that
1277 // fit in a scalar register.
1278 assert(ResTy
.getSizeInBits() == 32 || ResTy
.getSizeInBits() == 64);
1280 MVT WordTy
= tyVector(VecTy
, MVT::i32
);
1281 SDValue WordVec
= DAG
.getBitcast(WordTy
, VecV
);
1282 unsigned WordIdx
= (Idx
*ElemWidth
) / 32;
1284 SDValue W0Idx
= DAG
.getConstant(WordIdx
, dl
, MVT::i32
);
1285 SDValue W0
= extractHvxElementReg(WordVec
, W0Idx
, dl
, MVT::i32
, DAG
);
1286 if (ResTy
.getSizeInBits() == 32)
1287 return DAG
.getBitcast(ResTy
, W0
);
1289 SDValue W1Idx
= DAG
.getConstant(WordIdx
+1, dl
, MVT::i32
);
1290 SDValue W1
= extractHvxElementReg(WordVec
, W1Idx
, dl
, MVT::i32
, DAG
);
1291 SDValue WW
= getCombine(W1
, W0
, dl
, MVT::i64
, DAG
);
1292 return DAG
.getBitcast(ResTy
, WW
);
1296 HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV
, SDValue IdxV
,
1297 const SDLoc
&dl
, MVT ResTy
, SelectionDAG
&DAG
) const {
1298 MVT VecTy
= ty(VecV
);
1299 unsigned HwLen
= Subtarget
.getVectorLength();
1300 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1301 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
1302 // IdxV is required to be a constant.
1303 unsigned Idx
= IdxV
.getNode()->getAsZExtVal();
1305 unsigned ResLen
= ResTy
.getVectorNumElements();
1306 unsigned BitBytes
= HwLen
/ VecTy
.getVectorNumElements();
1307 unsigned Offset
= Idx
* BitBytes
;
1308 SDValue Undef
= DAG
.getUNDEF(ByteTy
);
1309 SmallVector
<int,128> Mask
;
1311 if (Subtarget
.isHVXVectorType(ResTy
, true)) {
1312 // Converting between two vector predicates. Since the result is shorter
1313 // than the source, it will correspond to a vector predicate with the
1314 // relevant bits replicated. The replication count is the ratio of the
1315 // source and target vector lengths.
1316 unsigned Rep
= VecTy
.getVectorNumElements() / ResLen
;
1317 assert(isPowerOf2_32(Rep
) && HwLen
% Rep
== 0);
1318 for (unsigned i
= 0; i
!= HwLen
/Rep
; ++i
) {
1319 for (unsigned j
= 0; j
!= Rep
; ++j
)
1320 Mask
.push_back(i
+ Offset
);
1322 SDValue ShuffV
= DAG
.getVectorShuffle(ByteTy
, dl
, ByteVec
, Undef
, Mask
);
1323 return DAG
.getNode(HexagonISD::V2Q
, dl
, ResTy
, ShuffV
);
1326 // Converting between a vector predicate and a scalar predicate. In the
1327 // vector predicate, a group of BitBytes bits will correspond to a single
1328 // i1 element of the source vector type. Those bits will all have the same
1329 // value. The same will be true for ByteVec, where each byte corresponds
1330 // to a bit in the vector predicate.
1331 // The algorithm is to traverse the ByteVec, going over the i1 values from
1332 // the source vector, and generate the corresponding representation in an
1333 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1334 // elements so that the interesting 8 bytes will be in the low end of the
1336 unsigned Rep
= 8 / ResLen
;
1337 // Make sure the output fill the entire vector register, so repeat the
1338 // 8-byte groups as many times as necessary.
1339 for (unsigned r
= 0; r
!= HwLen
/ResLen
; ++r
) {
1340 // This will generate the indexes of the 8 interesting bytes.
1341 for (unsigned i
= 0; i
!= ResLen
; ++i
) {
1342 for (unsigned j
= 0; j
!= Rep
; ++j
)
1343 Mask
.push_back(Offset
+ i
*BitBytes
);
1347 SDValue Zero
= getZero(dl
, MVT::i32
, DAG
);
1348 SDValue ShuffV
= DAG
.getVectorShuffle(ByteTy
, dl
, ByteVec
, Undef
, Mask
);
1349 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1351 SDValue W0
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
, {ShuffV
, Zero
});
1352 SDValue W1
= DAG
.getNode(HexagonISD::VEXTRACTW
, dl
, MVT::i32
,
1353 {ShuffV
, DAG
.getConstant(4, dl
, MVT::i32
)});
1354 SDValue Vec64
= getCombine(W1
, W0
, dl
, MVT::v8i8
, DAG
);
1355 return getInstr(Hexagon::A4_vcmpbgtui
, dl
, ResTy
,
1356 {Vec64
, DAG
.getTargetConstant(0, dl
, MVT::i32
)}, DAG
);
1360 HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV
, SDValue SubV
,
1361 SDValue IdxV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1362 MVT VecTy
= ty(VecV
);
1363 MVT SubTy
= ty(SubV
);
1364 unsigned HwLen
= Subtarget
.getVectorLength();
1365 MVT ElemTy
= VecTy
.getVectorElementType();
1366 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1368 bool IsPair
= isHvxPairTy(VecTy
);
1369 MVT SingleTy
= MVT::getVectorVT(ElemTy
, (8*HwLen
)/ElemWidth
);
1370 // The two single vectors that VecV consists of, if it's a pair.
1372 SDValue SingleV
= VecV
;
1376 V0
= LoHalf(VecV
, DAG
);
1377 V1
= HiHalf(VecV
, DAG
);
1379 SDValue HalfV
= DAG
.getConstant(SingleTy
.getVectorNumElements(),
1381 PickHi
= DAG
.getSetCC(dl
, MVT::i1
, IdxV
, HalfV
, ISD::SETUGT
);
1382 if (isHvxSingleTy(SubTy
)) {
1383 if (const auto *CN
= dyn_cast
<const ConstantSDNode
>(IdxV
.getNode())) {
1384 unsigned Idx
= CN
->getZExtValue();
1385 assert(Idx
== 0 || Idx
== VecTy
.getVectorNumElements()/2);
1386 unsigned SubIdx
= (Idx
== 0) ? Hexagon::vsub_lo
: Hexagon::vsub_hi
;
1387 return DAG
.getTargetInsertSubreg(SubIdx
, dl
, VecTy
, VecV
, SubV
);
1389 // If IdxV is not a constant, generate the two variants: with the
1390 // SubV as the high and as the low subregister, and select the right
1391 // pair based on the IdxV.
1392 SDValue InLo
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {SubV
, V1
});
1393 SDValue InHi
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {V0
, SubV
});
1394 return DAG
.getNode(ISD::SELECT
, dl
, VecTy
, PickHi
, InHi
, InLo
);
1396 // The subvector being inserted must be entirely contained in one of
1397 // the vectors V0 or V1. Set SingleV to the correct one, and update
1398 // IdxV to be the index relative to the beginning of that vector.
1399 SDValue S
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, IdxV
, HalfV
);
1400 IdxV
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, PickHi
, S
, IdxV
);
1401 SingleV
= DAG
.getNode(ISD::SELECT
, dl
, SingleTy
, PickHi
, V1
, V0
);
1404 // The only meaningful subvectors of a single HVX vector are those that
1405 // fit in a scalar register.
1406 assert(SubTy
.getSizeInBits() == 32 || SubTy
.getSizeInBits() == 64);
1407 // Convert IdxV to be index in bytes.
1408 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
.getNode());
1409 if (!IdxN
|| !IdxN
->isZero()) {
1410 IdxV
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
1411 DAG
.getConstant(ElemWidth
/8, dl
, MVT::i32
));
1412 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
, IdxV
);
1414 // When inserting a single word, the rotation back to the original position
1415 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1416 // by (HwLen-4)-Idx.
1417 unsigned RolBase
= HwLen
;
1418 if (SubTy
.getSizeInBits() == 32) {
1419 SDValue V
= DAG
.getBitcast(MVT::i32
, SubV
);
1420 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, V
);
1422 SDValue V
= DAG
.getBitcast(MVT::i64
, SubV
);
1423 SDValue R0
= LoHalf(V
, DAG
);
1424 SDValue R1
= HiHalf(V
, DAG
);
1425 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, R0
);
1426 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
,
1427 DAG
.getConstant(4, dl
, MVT::i32
));
1428 SingleV
= DAG
.getNode(HexagonISD::VINSERTW0
, dl
, SingleTy
, SingleV
, R1
);
1431 // If the vector wasn't ror'ed, don't ror it back.
1432 if (RolBase
!= 4 || !IdxN
|| !IdxN
->isZero()) {
1433 SDValue RolV
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
1434 DAG
.getConstant(RolBase
, dl
, MVT::i32
), IdxV
);
1435 SingleV
= DAG
.getNode(HexagonISD::VROR
, dl
, SingleTy
, SingleV
, RolV
);
1439 SDValue InLo
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {SingleV
, V1
});
1440 SDValue InHi
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, {V0
, SingleV
});
1441 return DAG
.getNode(ISD::SELECT
, dl
, VecTy
, PickHi
, InHi
, InLo
);
1447 HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV
, SDValue SubV
,
1448 SDValue IdxV
, const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1449 MVT VecTy
= ty(VecV
);
1450 MVT SubTy
= ty(SubV
);
1451 assert(Subtarget
.isHVXVectorType(VecTy
, true));
1452 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1453 // predicate as well, or it can be a scalar predicate.
1455 unsigned VecLen
= VecTy
.getVectorNumElements();
1456 unsigned HwLen
= Subtarget
.getVectorLength();
1457 assert(HwLen
% VecLen
== 0 && "Unexpected vector type");
1459 unsigned Scale
= VecLen
/ SubTy
.getVectorNumElements();
1460 unsigned BitBytes
= HwLen
/ VecLen
;
1461 unsigned BlockLen
= HwLen
/ Scale
;
1463 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1464 SDValue ByteVec
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, VecV
);
1465 SDValue ByteSub
= createHvxPrefixPred(SubV
, dl
, BitBytes
, false, DAG
);
1468 auto *IdxN
= dyn_cast
<ConstantSDNode
>(IdxV
.getNode());
1469 if (!IdxN
|| !IdxN
->isZero()) {
1470 ByteIdx
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
, IdxV
,
1471 DAG
.getConstant(BitBytes
, dl
, MVT::i32
));
1472 ByteVec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, ByteVec
, ByteIdx
);
1475 // ByteVec is the target vector VecV rotated in such a way that the
1476 // subvector should be inserted at index 0. Generate a predicate mask
1477 // and use vmux to do the insertion.
1478 assert(BlockLen
< HwLen
&& "vsetq(v1) prerequisite");
1479 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
1480 SDValue Q
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
1481 {DAG
.getConstant(BlockLen
, dl
, MVT::i32
)}, DAG
);
1482 ByteVec
= getInstr(Hexagon::V6_vmux
, dl
, ByteTy
, {Q
, ByteSub
, ByteVec
}, DAG
);
1483 // Rotate ByteVec back, and convert to a vector predicate.
1484 if (!IdxN
|| !IdxN
->isZero()) {
1485 SDValue HwLenV
= DAG
.getConstant(HwLen
, dl
, MVT::i32
);
1486 SDValue ByteXdi
= DAG
.getNode(ISD::SUB
, dl
, MVT::i32
, HwLenV
, ByteIdx
);
1487 ByteVec
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, ByteVec
, ByteXdi
);
1489 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, ByteVec
);
1493 HexagonTargetLowering::extendHvxVectorPred(SDValue VecV
, const SDLoc
&dl
,
1494 MVT ResTy
, bool ZeroExt
, SelectionDAG
&DAG
) const {
1495 // Sign- and any-extending of a vector predicate to a vector register is
1496 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1497 // a vector of 1s (where the 1s are of type matching the vector type).
1498 assert(Subtarget
.isHVXVectorType(ResTy
));
1500 return DAG
.getNode(HexagonISD::Q2V
, dl
, ResTy
, VecV
);
1502 assert(ty(VecV
).getVectorNumElements() == ResTy
.getVectorNumElements());
1503 SDValue True
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ResTy
,
1504 DAG
.getConstant(1, dl
, MVT::i32
));
1505 SDValue False
= getZero(dl
, ResTy
, DAG
);
1506 return DAG
.getSelect(dl
, ResTy
, VecV
, True
, False
);
1510 HexagonTargetLowering::compressHvxPred(SDValue VecQ
, const SDLoc
&dl
,
1511 MVT ResTy
, SelectionDAG
&DAG
) const {
1512 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1513 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1514 // vector register. The remaining bits of the vector register are
1517 MachineFunction
&MF
= DAG
.getMachineFunction();
1518 unsigned HwLen
= Subtarget
.getVectorLength();
1519 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1520 MVT PredTy
= ty(VecQ
);
1521 unsigned PredLen
= PredTy
.getVectorNumElements();
1522 assert(HwLen
% PredLen
== 0);
1523 MVT VecTy
= MVT::getVectorVT(MVT::getIntegerVT(8*HwLen
/PredLen
), PredLen
);
1525 Type
*Int8Ty
= Type::getInt8Ty(*DAG
.getContext());
1526 SmallVector
<Constant
*, 128> Tmp
;
1527 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1528 // These are bytes with the LSB rotated left with respect to their index.
1529 for (unsigned i
= 0; i
!= HwLen
/8; ++i
) {
1530 for (unsigned j
= 0; j
!= 8; ++j
)
1531 Tmp
.push_back(ConstantInt::get(Int8Ty
, 1ull << j
));
1533 Constant
*CV
= ConstantVector::get(Tmp
);
1534 Align
Alignment(HwLen
);
1536 LowerConstantPool(DAG
.getConstantPool(CV
, ByteTy
, Alignment
), DAG
);
1538 DAG
.getLoad(ByteTy
, dl
, DAG
.getEntryNode(), CP
,
1539 MachinePointerInfo::getConstantPool(MF
), Alignment
);
1541 // Select the bytes that correspond to true bits in the vector predicate.
1542 SDValue Sel
= DAG
.getSelect(dl
, VecTy
, VecQ
, DAG
.getBitcast(VecTy
, Bytes
),
1543 getZero(dl
, VecTy
, DAG
));
1544 // Calculate the OR of all bytes in each group of 8. That will compress
1545 // all the individual bits into a single byte.
1546 // First, OR groups of 4, via vrmpy with 0x01010101.
1548 DAG
.getSplatBuildVector(MVT::v4i8
, dl
, DAG
.getConstant(1, dl
, MVT::i32
));
1549 SDValue Vrmpy
= getInstr(Hexagon::V6_vrmpyub
, dl
, ByteTy
, {Sel
, All1
}, DAG
);
1550 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1551 SDValue Rot
= getInstr(Hexagon::V6_valignbi
, dl
, ByteTy
,
1552 {Vrmpy
, Vrmpy
, DAG
.getTargetConstant(4, dl
, MVT::i32
)}, DAG
);
1553 SDValue Vor
= DAG
.getNode(ISD::OR
, dl
, ByteTy
, {Vrmpy
, Rot
});
1555 // Pick every 8th byte and coalesce them at the beginning of the output.
1556 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1558 SmallVector
<int,128> Mask
;
1559 for (unsigned i
= 0; i
!= HwLen
; ++i
)
1560 Mask
.push_back((8*i
) % HwLen
+ i
/(HwLen
/8));
1562 DAG
.getVectorShuffle(ByteTy
, dl
, Vor
, DAG
.getUNDEF(ByteTy
), Mask
);
1563 return DAG
.getBitcast(ResTy
, Collect
);
1567 HexagonTargetLowering::resizeToWidth(SDValue VecV
, MVT ResTy
, bool Signed
,
1568 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
1569 // Take a vector and resize the element type to match the given type.
1570 MVT InpTy
= ty(VecV
);
1574 unsigned InpWidth
= InpTy
.getSizeInBits();
1575 unsigned ResWidth
= ResTy
.getSizeInBits();
1577 if (InpTy
.isFloatingPoint()) {
1578 return InpWidth
< ResWidth
1579 ? DAG
.getNode(ISD::FP_EXTEND
, dl
, ResTy
, VecV
)
1580 : DAG
.getNode(ISD::FP_ROUND
, dl
, ResTy
, VecV
,
1581 DAG
.getTargetConstant(0, dl
, MVT::i32
));
1584 assert(InpTy
.isInteger());
1586 if (InpWidth
< ResWidth
) {
1587 unsigned ExtOpc
= Signed
? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
1588 return DAG
.getNode(ExtOpc
, dl
, ResTy
, VecV
);
1590 unsigned NarOpc
= Signed
? HexagonISD::SSAT
: HexagonISD::USAT
;
1591 return DAG
.getNode(NarOpc
, dl
, ResTy
, VecV
, DAG
.getValueType(ResTy
));
1596 HexagonTargetLowering::extractSubvector(SDValue Vec
, MVT SubTy
, unsigned SubIdx
,
1597 SelectionDAG
&DAG
) const {
1598 assert(ty(Vec
).getSizeInBits() % SubTy
.getSizeInBits() == 0);
1600 const SDLoc
&dl(Vec
);
1601 unsigned ElemIdx
= SubIdx
* SubTy
.getVectorNumElements();
1602 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, SubTy
,
1603 {Vec
, DAG
.getConstant(ElemIdx
, dl
, MVT::i32
)});
1607 HexagonTargetLowering::LowerHvxBuildVector(SDValue Op
, SelectionDAG
&DAG
)
1609 const SDLoc
&dl(Op
);
1612 unsigned Size
= Op
.getNumOperands();
1613 SmallVector
<SDValue
,128> Ops
;
1614 for (unsigned i
= 0; i
!= Size
; ++i
)
1615 Ops
.push_back(Op
.getOperand(i
));
1617 // First, split the BUILD_VECTOR for vector pairs. We could generate
1618 // some pairs directly (via splat), but splats should be generated
1619 // by the combiner prior to getting here.
1620 if (VecTy
.getSizeInBits() == 16*Subtarget
.getVectorLength()) {
1621 ArrayRef
<SDValue
> A(Ops
);
1622 MVT SingleTy
= typeSplit(VecTy
).first
;
1623 SDValue V0
= buildHvxVectorReg(A
.take_front(Size
/2), dl
, SingleTy
, DAG
);
1624 SDValue V1
= buildHvxVectorReg(A
.drop_front(Size
/2), dl
, SingleTy
, DAG
);
1625 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, VecTy
, V0
, V1
);
1628 if (VecTy
.getVectorElementType() == MVT::i1
)
1629 return buildHvxVectorPred(Ops
, dl
, VecTy
, DAG
);
1631 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1632 // not a legal type, just bitcast the node to use i16
1633 // types and bitcast the result back to f16
1634 if (VecTy
.getVectorElementType() == MVT::f16
) {
1635 SmallVector
<SDValue
,64> NewOps
;
1636 for (unsigned i
= 0; i
!= Size
; i
++)
1637 NewOps
.push_back(DAG
.getBitcast(MVT::i16
, Ops
[i
]));
1639 SDValue T0
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
,
1640 tyVector(VecTy
, MVT::i16
), NewOps
);
1641 return DAG
.getBitcast(tyVector(VecTy
, MVT::f16
), T0
);
1644 return buildHvxVectorReg(Ops
, dl
, VecTy
, DAG
);
1648 HexagonTargetLowering::LowerHvxSplatVector(SDValue Op
, SelectionDAG
&DAG
)
1650 const SDLoc
&dl(Op
);
1652 MVT ArgTy
= ty(Op
.getOperand(0));
1654 if (ArgTy
== MVT::f16
) {
1655 MVT SplatTy
= MVT::getVectorVT(MVT::i16
, VecTy
.getVectorNumElements());
1656 SDValue ToInt16
= DAG
.getBitcast(MVT::i16
, Op
.getOperand(0));
1657 SDValue ToInt32
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, ToInt16
);
1658 SDValue Splat
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, SplatTy
, ToInt32
);
1659 return DAG
.getBitcast(VecTy
, Splat
);
1666 HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op
, SelectionDAG
&DAG
)
1668 // Vector concatenation of two integer (non-bool) vectors does not need
1669 // special lowering. Custom-lower concats of bool vectors and expand
1670 // concats of more than 2 vectors.
1672 const SDLoc
&dl(Op
);
1673 unsigned NumOp
= Op
.getNumOperands();
1674 if (VecTy
.getVectorElementType() != MVT::i1
) {
1677 // Expand the other cases into a build-vector.
1678 SmallVector
<SDValue
,8> Elems
;
1679 for (SDValue V
: Op
.getNode()->ops())
1680 DAG
.ExtractVectorElements(V
, Elems
);
1681 // A vector of i16 will be broken up into a build_vector of i16's.
1682 // This is a problem, since at the time of operation legalization,
1683 // all operations are expected to be type-legalized, and i16 is not
1684 // a legal type. If any of the extracted elements is not of a valid
1685 // type, sign-extend it to a valid one.
1686 for (unsigned i
= 0, e
= Elems
.size(); i
!= e
; ++i
) {
1687 SDValue V
= Elems
[i
];
1689 if (!isTypeLegal(Ty
)) {
1690 MVT NTy
= typeLegalize(Ty
, DAG
);
1691 if (V
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
1692 Elems
[i
] = DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, NTy
,
1693 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, NTy
,
1694 V
.getOperand(0), V
.getOperand(1)),
1695 DAG
.getValueType(Ty
));
1698 // A few less complicated cases.
1699 switch (V
.getOpcode()) {
1701 Elems
[i
] = DAG
.getSExtOrTrunc(V
, dl
, NTy
);
1704 Elems
[i
] = DAG
.getUNDEF(NTy
);
1707 Elems
[i
] = V
.getOperand(0);
1710 llvm_unreachable("Unexpected vector element");
1714 return DAG
.getBuildVector(VecTy
, dl
, Elems
);
1717 assert(VecTy
.getVectorElementType() == MVT::i1
);
1718 unsigned HwLen
= Subtarget
.getVectorLength();
1719 assert(isPowerOf2_32(NumOp
) && HwLen
% NumOp
== 0);
1721 SDValue Op0
= Op
.getOperand(0);
1723 // If the operands are HVX types (i.e. not scalar predicates), then
1724 // defer the concatenation, and create QCAT instead.
1725 if (Subtarget
.isHVXVectorType(ty(Op0
), true)) {
1727 return DAG
.getNode(HexagonISD::QCAT
, dl
, VecTy
, Op0
, Op
.getOperand(1));
1729 ArrayRef
<SDUse
> U(Op
.getNode()->ops());
1730 SmallVector
<SDValue
, 4> SV(U
);
1731 ArrayRef
<SDValue
> Ops(SV
);
1733 MVT HalfTy
= typeSplit(VecTy
).first
;
1734 SDValue V0
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, HalfTy
,
1735 Ops
.take_front(NumOp
/2));
1736 SDValue V1
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, HalfTy
,
1737 Ops
.take_back(NumOp
/2));
1738 return DAG
.getNode(HexagonISD::QCAT
, dl
, VecTy
, V0
, V1
);
1741 // Count how many bytes (in a vector register) each bit in VecTy
1743 unsigned BitBytes
= HwLen
/ VecTy
.getVectorNumElements();
1745 SmallVector
<SDValue
,8> Prefixes
;
1746 for (SDValue V
: Op
.getNode()->op_values()) {
1747 SDValue P
= createHvxPrefixPred(V
, dl
, BitBytes
, true, DAG
);
1748 Prefixes
.push_back(P
);
1751 unsigned InpLen
= ty(Op
.getOperand(0)).getVectorNumElements();
1752 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
1753 SDValue S
= DAG
.getConstant(HwLen
- InpLen
*BitBytes
, dl
, MVT::i32
);
1754 SDValue Res
= getZero(dl
, ByteTy
, DAG
);
1755 for (unsigned i
= 0, e
= Prefixes
.size(); i
!= e
; ++i
) {
1756 Res
= DAG
.getNode(HexagonISD::VROR
, dl
, ByteTy
, Res
, S
);
1757 Res
= DAG
.getNode(ISD::OR
, dl
, ByteTy
, Res
, Prefixes
[e
-i
-1]);
1759 return DAG
.getNode(HexagonISD::V2Q
, dl
, VecTy
, Res
);
1763 HexagonTargetLowering::LowerHvxExtractElement(SDValue Op
, SelectionDAG
&DAG
)
1765 // Change the type of the extracted element to i32.
1766 SDValue VecV
= Op
.getOperand(0);
1767 MVT ElemTy
= ty(VecV
).getVectorElementType();
1768 const SDLoc
&dl(Op
);
1769 SDValue IdxV
= Op
.getOperand(1);
1770 if (ElemTy
== MVT::i1
)
1771 return extractHvxElementPred(VecV
, IdxV
, dl
, ty(Op
), DAG
);
1773 return extractHvxElementReg(VecV
, IdxV
, dl
, ty(Op
), DAG
);
1777 HexagonTargetLowering::LowerHvxInsertElement(SDValue Op
, SelectionDAG
&DAG
)
1779 const SDLoc
&dl(Op
);
1781 SDValue VecV
= Op
.getOperand(0);
1782 SDValue ValV
= Op
.getOperand(1);
1783 SDValue IdxV
= Op
.getOperand(2);
1784 MVT ElemTy
= ty(VecV
).getVectorElementType();
1785 if (ElemTy
== MVT::i1
)
1786 return insertHvxElementPred(VecV
, IdxV
, ValV
, dl
, DAG
);
1788 if (ElemTy
== MVT::f16
) {
1789 SDValue T0
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, dl
,
1790 tyVector(VecTy
, MVT::i16
),
1791 DAG
.getBitcast(tyVector(VecTy
, MVT::i16
), VecV
),
1792 DAG
.getBitcast(MVT::i16
, ValV
), IdxV
);
1793 return DAG
.getBitcast(tyVector(VecTy
, MVT::f16
), T0
);
1796 return insertHvxElementReg(VecV
, IdxV
, ValV
, dl
, DAG
);
1800 HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op
, SelectionDAG
&DAG
)
1802 SDValue SrcV
= Op
.getOperand(0);
1803 MVT SrcTy
= ty(SrcV
);
1805 SDValue IdxV
= Op
.getOperand(1);
1806 unsigned Idx
= IdxV
.getNode()->getAsZExtVal();
1807 assert(Idx
% DstTy
.getVectorNumElements() == 0);
1809 const SDLoc
&dl(Op
);
1811 MVT ElemTy
= SrcTy
.getVectorElementType();
1812 if (ElemTy
== MVT::i1
)
1813 return extractHvxSubvectorPred(SrcV
, IdxV
, dl
, DstTy
, DAG
);
1815 return extractHvxSubvectorReg(Op
, SrcV
, IdxV
, dl
, DstTy
, DAG
);
1819 HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op
, SelectionDAG
&DAG
)
1821 // Idx does not need to be a constant.
1822 SDValue VecV
= Op
.getOperand(0);
1823 SDValue ValV
= Op
.getOperand(1);
1824 SDValue IdxV
= Op
.getOperand(2);
1826 const SDLoc
&dl(Op
);
1827 MVT VecTy
= ty(VecV
);
1828 MVT ElemTy
= VecTy
.getVectorElementType();
1829 if (ElemTy
== MVT::i1
)
1830 return insertHvxSubvectorPred(VecV
, ValV
, IdxV
, dl
, DAG
);
1832 return insertHvxSubvectorReg(VecV
, ValV
, IdxV
, dl
, DAG
);
1836 HexagonTargetLowering::LowerHvxAnyExt(SDValue Op
, SelectionDAG
&DAG
) const {
1837 // Lower any-extends of boolean vectors to sign-extends, since they
1838 // translate directly to Q2V. Zero-extending could also be done equally
1839 // fast, but Q2V is used/recognized in more places.
1840 // For all other vectors, use zero-extend.
1842 SDValue InpV
= Op
.getOperand(0);
1843 MVT ElemTy
= ty(InpV
).getVectorElementType();
1844 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1845 return LowerHvxSignExt(Op
, DAG
);
1846 return DAG
.getNode(ISD::ZERO_EXTEND
, SDLoc(Op
), ResTy
, InpV
);
1850 HexagonTargetLowering::LowerHvxSignExt(SDValue Op
, SelectionDAG
&DAG
) const {
1852 SDValue InpV
= Op
.getOperand(0);
1853 MVT ElemTy
= ty(InpV
).getVectorElementType();
1854 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1855 return extendHvxVectorPred(InpV
, SDLoc(Op
), ty(Op
), false, DAG
);
1860 HexagonTargetLowering::LowerHvxZeroExt(SDValue Op
, SelectionDAG
&DAG
) const {
1862 SDValue InpV
= Op
.getOperand(0);
1863 MVT ElemTy
= ty(InpV
).getVectorElementType();
1864 if (ElemTy
== MVT::i1
&& Subtarget
.isHVXVectorType(ResTy
))
1865 return extendHvxVectorPred(InpV
, SDLoc(Op
), ty(Op
), true, DAG
);
1870 HexagonTargetLowering::LowerHvxCttz(SDValue Op
, SelectionDAG
&DAG
) const {
1871 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1872 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1873 const SDLoc
&dl(Op
);
1875 SDValue InpV
= Op
.getOperand(0);
1876 assert(ResTy
== ty(InpV
));
1878 // Calculate the vectors of 1 and bitwidth(x).
1879 MVT ElemTy
= ty(InpV
).getVectorElementType();
1880 unsigned ElemWidth
= ElemTy
.getSizeInBits();
1882 SDValue Vec1
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ResTy
,
1883 DAG
.getConstant(1, dl
, MVT::i32
));
1884 SDValue VecW
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ResTy
,
1885 DAG
.getConstant(ElemWidth
, dl
, MVT::i32
));
1886 SDValue VecN1
= DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ResTy
,
1887 DAG
.getAllOnesConstant(dl
, MVT::i32
));
1889 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1890 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1891 // it separately in custom combine or selection).
1892 SDValue A
= DAG
.getNode(ISD::AND
, dl
, ResTy
,
1893 {DAG
.getNode(ISD::XOR
, dl
, ResTy
, {InpV
, VecN1
}),
1894 DAG
.getNode(ISD::SUB
, dl
, ResTy
, {InpV
, Vec1
})});
1895 return DAG
.getNode(ISD::SUB
, dl
, ResTy
,
1896 {VecW
, DAG
.getNode(ISD::CTLZ
, dl
, ResTy
, A
)});
1900 HexagonTargetLowering::LowerHvxMulh(SDValue Op
, SelectionDAG
&DAG
) const {
1901 const SDLoc
&dl(Op
);
1903 assert(ResTy
.getVectorElementType() == MVT::i32
);
1905 SDValue Vs
= Op
.getOperand(0);
1906 SDValue Vt
= Op
.getOperand(1);
1908 SDVTList ResTys
= DAG
.getVTList(ResTy
, ResTy
);
1909 unsigned Opc
= Op
.getOpcode();
1911 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1912 if (Opc
== ISD::MULHU
)
1913 return DAG
.getNode(HexagonISD::UMUL_LOHI
, dl
, ResTys
, {Vs
, Vt
}).getValue(1);
1914 if (Opc
== ISD::MULHS
)
1915 return DAG
.getNode(HexagonISD::SMUL_LOHI
, dl
, ResTys
, {Vs
, Vt
}).getValue(1);
1920 llvm_unreachable("Unexpected mulh operation");
1924 HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op
, SelectionDAG
&DAG
) const {
1925 const SDLoc
&dl(Op
);
1926 unsigned Opc
= Op
.getOpcode();
1927 SDValue Vu
= Op
.getOperand(0);
1928 SDValue Vv
= Op
.getOperand(1);
1930 // If the HI part is not used, convert it to a regular MUL.
1931 if (auto HiVal
= Op
.getValue(1); HiVal
.use_empty()) {
1932 // Need to preserve the types and the number of values.
1933 SDValue Hi
= DAG
.getUNDEF(ty(HiVal
));
1934 SDValue Lo
= DAG
.getNode(ISD::MUL
, dl
, ty(Op
), {Vu
, Vv
});
1935 return DAG
.getMergeValues({Lo
, Hi
}, dl
);
1938 bool SignedVu
= Opc
== HexagonISD::SMUL_LOHI
;
1939 bool SignedVv
= Opc
== HexagonISD::SMUL_LOHI
|| Opc
== HexagonISD::USMUL_LOHI
;
1941 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1943 if (Subtarget
.useHVXV62Ops())
1944 return emitHvxMulLoHiV62(Vu
, SignedVu
, Vv
, SignedVv
, dl
, DAG
);
1946 if (Opc
== HexagonISD::SMUL_LOHI
) {
1947 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1948 // for other signedness LOHI is cheaper.
1949 if (auto LoVal
= Op
.getValue(0); LoVal
.use_empty()) {
1950 SDValue Hi
= emitHvxMulHsV60(Vu
, Vv
, dl
, DAG
);
1951 SDValue Lo
= DAG
.getUNDEF(ty(LoVal
));
1952 return DAG
.getMergeValues({Lo
, Hi
}, dl
);
1956 return emitHvxMulLoHiV60(Vu
, SignedVu
, Vv
, SignedVv
, dl
, DAG
);
1960 HexagonTargetLowering::LowerHvxBitcast(SDValue Op
, SelectionDAG
&DAG
) const {
1961 SDValue Val
= Op
.getOperand(0);
1963 MVT ValTy
= ty(Val
);
1964 const SDLoc
&dl(Op
);
1966 if (isHvxBoolTy(ValTy
) && ResTy
.isScalarInteger()) {
1967 unsigned HwLen
= Subtarget
.getVectorLength();
1968 MVT WordTy
= MVT::getVectorVT(MVT::i32
, HwLen
/4);
1969 SDValue VQ
= compressHvxPred(Val
, dl
, WordTy
, DAG
);
1970 unsigned BitWidth
= ResTy
.getSizeInBits();
1972 if (BitWidth
< 64) {
1973 SDValue W0
= extractHvxElementReg(VQ
, DAG
.getConstant(0, dl
, MVT::i32
),
1977 assert(BitWidth
< 32u);
1978 return DAG
.getZExtOrTrunc(W0
, dl
, ResTy
);
1981 // The result is >= 64 bits. The only options are 64 or 128.
1982 assert(BitWidth
== 64 || BitWidth
== 128);
1983 SmallVector
<SDValue
,4> Words
;
1984 for (unsigned i
= 0; i
!= BitWidth
/32; ++i
) {
1985 SDValue W
= extractHvxElementReg(
1986 VQ
, DAG
.getConstant(i
, dl
, MVT::i32
), dl
, MVT::i32
, DAG
);
1989 SmallVector
<SDValue
,2> Combines
;
1990 assert(Words
.size() % 2 == 0);
1991 for (unsigned i
= 0, e
= Words
.size(); i
< e
; i
+= 2) {
1992 SDValue C
= getCombine(Words
[i
+1], Words
[i
], dl
, MVT::i64
, DAG
);
1993 Combines
.push_back(C
);
1999 return DAG
.getNode(ISD::BUILD_PAIR
, dl
, ResTy
, Combines
);
2001 if (isHvxBoolTy(ResTy
) && ValTy
.isScalarInteger()) {
2002 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2003 unsigned BitWidth
= ValTy
.getSizeInBits();
2004 unsigned HwLen
= Subtarget
.getVectorLength();
2005 assert(BitWidth
== HwLen
);
2007 MVT ValAsVecTy
= MVT::getVectorVT(MVT::i8
, BitWidth
/ 8);
2008 SDValue ValAsVec
= DAG
.getBitcast(ValAsVecTy
, Val
);
2009 // Splat each byte of Val 8 times.
2010 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2011 // where b0, b1,..., b15 are least to most significant bytes of I.
2012 SmallVector
<SDValue
, 128> Bytes
;
2013 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2014 // These are bytes with the LSB rotated left with respect to their index.
2015 SmallVector
<SDValue
, 128> Tmp
;
2016 for (unsigned I
= 0; I
!= HwLen
/ 8; ++I
) {
2017 SDValue Idx
= DAG
.getConstant(I
, dl
, MVT::i32
);
2019 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i8
, ValAsVec
, Idx
);
2020 for (unsigned J
= 0; J
!= 8; ++J
) {
2021 Bytes
.push_back(Byte
);
2022 Tmp
.push_back(DAG
.getConstant(1ull << J
, dl
, MVT::i8
));
2026 MVT ConstantVecTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
2027 SDValue ConstantVec
= DAG
.getBuildVector(ConstantVecTy
, dl
, Tmp
);
2028 SDValue I2V
= buildHvxVectorReg(Bytes
, dl
, ConstantVecTy
, DAG
);
2030 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2031 I2V
= DAG
.getNode(ISD::AND
, dl
, ConstantVecTy
, {I2V
, ConstantVec
});
2032 return DAG
.getNode(HexagonISD::V2Q
, dl
, ResTy
, I2V
);
2039 HexagonTargetLowering::LowerHvxExtend(SDValue Op
, SelectionDAG
&DAG
) const {
2040 // Sign- and zero-extends are legal.
2041 assert(Op
.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
);
2042 return DAG
.getNode(ISD::ZERO_EXTEND_VECTOR_INREG
, SDLoc(Op
), ty(Op
),
2047 HexagonTargetLowering::LowerHvxSelect(SDValue Op
, SelectionDAG
&DAG
) const {
2049 if (ResTy
.getVectorElementType() != MVT::i1
)
2052 const SDLoc
&dl(Op
);
2053 unsigned HwLen
= Subtarget
.getVectorLength();
2054 unsigned VecLen
= ResTy
.getVectorNumElements();
2055 assert(HwLen
% VecLen
== 0);
2056 unsigned ElemSize
= HwLen
/ VecLen
;
2058 MVT VecTy
= MVT::getVectorVT(MVT::getIntegerVT(ElemSize
* 8), VecLen
);
2060 DAG
.getNode(ISD::SELECT
, dl
, VecTy
, Op
.getOperand(0),
2061 DAG
.getNode(HexagonISD::Q2V
, dl
, VecTy
, Op
.getOperand(1)),
2062 DAG
.getNode(HexagonISD::Q2V
, dl
, VecTy
, Op
.getOperand(2)));
2063 return DAG
.getNode(HexagonISD::V2Q
, dl
, ResTy
, S
);
2067 HexagonTargetLowering::LowerHvxShift(SDValue Op
, SelectionDAG
&DAG
) const {
2068 if (SDValue S
= getVectorShiftByInt(Op
, DAG
))
2074 HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op
,
2075 SelectionDAG
&DAG
) const {
2076 unsigned Opc
= Op
.getOpcode();
2077 assert(Opc
== ISD::FSHL
|| Opc
== ISD::FSHR
);
2079 // Make sure the shift amount is within the range of the bitwidth
2080 // of the element type.
2081 SDValue A
= Op
.getOperand(0);
2082 SDValue B
= Op
.getOperand(1);
2083 SDValue S
= Op
.getOperand(2);
2086 MVT ElemTy
= InpTy
.getVectorElementType();
2088 const SDLoc
&dl(Op
);
2089 unsigned ElemWidth
= ElemTy
.getSizeInBits();
2090 bool IsLeft
= Opc
== ISD::FSHL
;
2092 // The expansion into regular shifts produces worse code for i8 and for
2093 // right shift of i32 on v65+.
2094 bool UseShifts
= ElemTy
!= MVT::i8
;
2095 if (Subtarget
.useHVXV65Ops() && ElemTy
== MVT::i32
)
2098 if (SDValue SplatV
= getSplatValue(S
, DAG
); SplatV
&& UseShifts
) {
2099 // If this is a funnel shift by a scalar, lower it into regular shifts.
2100 SDValue Mask
= DAG
.getConstant(ElemWidth
- 1, dl
, MVT::i32
);
2102 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
2103 {DAG
.getZExtOrTrunc(SplatV
, dl
, MVT::i32
), Mask
});
2105 DAG
.getNode(ISD::SUB
, dl
, MVT::i32
,
2106 {DAG
.getConstant(ElemWidth
, dl
, MVT::i32
), ModS
});
2108 DAG
.getSetCC(dl
, MVT::i1
, ModS
, getZero(dl
, MVT::i32
, DAG
), ISD::SETEQ
);
2109 // FSHL A, B => A << | B >>n
2110 // FSHR A, B => A <<n | B >>
2112 DAG
.getNode(HexagonISD::VASL
, dl
, InpTy
, {A
, IsLeft
? ModS
: NegS
});
2114 DAG
.getNode(HexagonISD::VLSR
, dl
, InpTy
, {B
, IsLeft
? NegS
: ModS
});
2115 SDValue Or
= DAG
.getNode(ISD::OR
, dl
, InpTy
, {Part1
, Part2
});
2116 // If the shift amount was 0, pick A or B, depending on the direction.
2117 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2118 return DAG
.getNode(ISD::SELECT
, dl
, InpTy
, {IsZero
, (IsLeft
? A
: B
), Or
});
2121 SDValue Mask
= DAG
.getSplatBuildVector(
2122 InpTy
, dl
, DAG
.getConstant(ElemWidth
- 1, dl
, ElemTy
));
2124 unsigned MOpc
= Opc
== ISD::FSHL
? HexagonISD::MFSHL
: HexagonISD::MFSHR
;
2125 return DAG
.getNode(MOpc
, dl
, ty(Op
),
2126 {A
, B
, DAG
.getNode(ISD::AND
, dl
, InpTy
, {S
, Mask
})});
2130 HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op
, SelectionDAG
&DAG
) const {
2131 const SDLoc
&dl(Op
);
2132 unsigned IntNo
= Op
.getConstantOperandVal(0);
2133 SmallVector
<SDValue
> Ops(Op
->ops());
2135 auto Swap
= [&](SDValue P
) {
2136 return DAG
.getMergeValues({P
.getValue(1), P
.getValue(0)}, dl
);
2140 case Intrinsic::hexagon_V6_pred_typecast
:
2141 case Intrinsic::hexagon_V6_pred_typecast_128B
: {
2142 MVT ResTy
= ty(Op
), InpTy
= ty(Ops
[1]);
2143 if (isHvxBoolTy(ResTy
) && isHvxBoolTy(InpTy
)) {
2146 return DAG
.getNode(HexagonISD::TYPECAST
, dl
, ResTy
, Ops
[1]);
2150 case Intrinsic::hexagon_V6_vmpyss_parts
:
2151 case Intrinsic::hexagon_V6_vmpyss_parts_128B
:
2152 return Swap(DAG
.getNode(HexagonISD::SMUL_LOHI
, dl
, Op
->getVTList(),
2154 case Intrinsic::hexagon_V6_vmpyuu_parts
:
2155 case Intrinsic::hexagon_V6_vmpyuu_parts_128B
:
2156 return Swap(DAG
.getNode(HexagonISD::UMUL_LOHI
, dl
, Op
->getVTList(),
2158 case Intrinsic::hexagon_V6_vmpyus_parts
:
2159 case Intrinsic::hexagon_V6_vmpyus_parts_128B
: {
2160 return Swap(DAG
.getNode(HexagonISD::USMUL_LOHI
, dl
, Op
->getVTList(),
2169 HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op
, SelectionDAG
&DAG
) const {
2170 const SDLoc
&dl(Op
);
2171 unsigned HwLen
= Subtarget
.getVectorLength();
2172 MachineFunction
&MF
= DAG
.getMachineFunction();
2173 auto *MaskN
= cast
<MaskedLoadStoreSDNode
>(Op
.getNode());
2174 SDValue Mask
= MaskN
->getMask();
2175 SDValue Chain
= MaskN
->getChain();
2176 SDValue Base
= MaskN
->getBasePtr();
2177 auto *MemOp
= MF
.getMachineMemOperand(MaskN
->getMemOperand(), 0, HwLen
);
2179 unsigned Opc
= Op
->getOpcode();
2180 assert(Opc
== ISD::MLOAD
|| Opc
== ISD::MSTORE
);
2182 if (Opc
== ISD::MLOAD
) {
2184 SDValue Load
= DAG
.getLoad(ValTy
, dl
, Chain
, Base
, MemOp
);
2185 SDValue Thru
= cast
<MaskedLoadSDNode
>(MaskN
)->getPassThru();
2188 SDValue VSel
= DAG
.getNode(ISD::VSELECT
, dl
, ValTy
, Mask
, Load
, Thru
);
2189 return DAG
.getMergeValues({VSel
, Load
.getValue(1)}, dl
);
2193 // HVX only has aligned masked stores.
2195 // TODO: Fold negations of the mask into the store.
2196 unsigned StoreOpc
= Hexagon::V6_vS32b_qpred_ai
;
2197 SDValue Value
= cast
<MaskedStoreSDNode
>(MaskN
)->getValue();
2198 SDValue Offset0
= DAG
.getTargetConstant(0, dl
, ty(Base
));
2200 if (MaskN
->getAlign().value() % HwLen
== 0) {
2201 SDValue Store
= getInstr(StoreOpc
, dl
, MVT::Other
,
2202 {Mask
, Base
, Offset0
, Value
, Chain
}, DAG
);
2203 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(Store
.getNode()), {MemOp
});
2208 auto StoreAlign
= [&](SDValue V
, SDValue A
) {
2209 SDValue Z
= getZero(dl
, ty(V
), DAG
);
2210 // TODO: use funnel shifts?
2211 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2213 SDValue LoV
= getInstr(Hexagon::V6_vlalignb
, dl
, ty(V
), {V
, Z
, A
}, DAG
);
2214 SDValue HiV
= getInstr(Hexagon::V6_vlalignb
, dl
, ty(V
), {Z
, V
, A
}, DAG
);
2215 return std::make_pair(LoV
, HiV
);
2218 MVT ByteTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
2219 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
2220 SDValue MaskV
= DAG
.getNode(HexagonISD::Q2V
, dl
, ByteTy
, Mask
);
2221 VectorPair Tmp
= StoreAlign(MaskV
, Base
);
2222 VectorPair MaskU
= {DAG
.getNode(HexagonISD::V2Q
, dl
, BoolTy
, Tmp
.first
),
2223 DAG
.getNode(HexagonISD::V2Q
, dl
, BoolTy
, Tmp
.second
)};
2224 VectorPair ValueU
= StoreAlign(Value
, Base
);
2226 SDValue Offset1
= DAG
.getTargetConstant(HwLen
, dl
, MVT::i32
);
2228 getInstr(StoreOpc
, dl
, MVT::Other
,
2229 {MaskU
.first
, Base
, Offset0
, ValueU
.first
, Chain
}, DAG
);
2231 getInstr(StoreOpc
, dl
, MVT::Other
,
2232 {MaskU
.second
, Base
, Offset1
, ValueU
.second
, Chain
}, DAG
);
2233 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(StoreLo
.getNode()), {MemOp
});
2234 DAG
.setNodeMemRefs(cast
<MachineSDNode
>(StoreHi
.getNode()), {MemOp
});
2235 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, {StoreLo
, StoreHi
});
2238 SDValue
HexagonTargetLowering::LowerHvxFpExtend(SDValue Op
,
2239 SelectionDAG
&DAG
) const {
2240 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2241 // is legal (done via a pattern).
2242 assert(Subtarget
.useHVXQFloatOps());
2244 assert(Op
->getOpcode() == ISD::FP_EXTEND
);
2247 MVT ArgTy
= ty(Op
.getOperand(0));
2248 const SDLoc
&dl(Op
);
2249 assert(VecTy
== MVT::v64f32
&& ArgTy
== MVT::v64f16
);
2251 SDValue F16Vec
= Op
.getOperand(0);
2253 APFloat FloatVal
= APFloat(1.0f
);
2255 FloatVal
.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven
, &Ignored
);
2256 SDValue Fp16Ones
= DAG
.getConstantFP(FloatVal
, dl
, ArgTy
);
2258 getInstr(Hexagon::V6_vmpy_qf32_hf
, dl
, VecTy
, {F16Vec
, Fp16Ones
}, DAG
);
2260 MVT HalfTy
= typeSplit(VecTy
).first
;
2261 VectorPair Pair
= opSplit(VmpyVec
, dl
, DAG
);
2263 getInstr(Hexagon::V6_vconv_sf_qf32
, dl
, HalfTy
, {Pair
.first
}, DAG
);
2265 getInstr(Hexagon::V6_vconv_sf_qf32
, dl
, HalfTy
, {Pair
.second
}, DAG
);
2268 getInstr(Hexagon::V6_vshuffvdd
, dl
, VecTy
,
2269 {HiVec
, LoVec
, DAG
.getSignedConstant(-4, dl
, MVT::i32
)}, DAG
);
2275 HexagonTargetLowering::LowerHvxFpToInt(SDValue Op
, SelectionDAG
&DAG
) const {
2276 // Catch invalid conversion ops (just in case).
2277 assert(Op
.getOpcode() == ISD::FP_TO_SINT
||
2278 Op
.getOpcode() == ISD::FP_TO_UINT
);
2281 MVT FpTy
= ty(Op
.getOperand(0)).getVectorElementType();
2282 MVT IntTy
= ResTy
.getVectorElementType();
2284 if (Subtarget
.useHVXIEEEFPOps()) {
2285 // There are only conversions from f16.
2286 if (FpTy
== MVT::f16
) {
2287 // Other int types aren't legal in HVX, so we shouldn't see them here.
2288 assert(IntTy
== MVT::i8
|| IntTy
== MVT::i16
|| IntTy
== MVT::i32
);
2289 // Conversions to i8 and i16 are legal.
2290 if (IntTy
== MVT::i8
|| IntTy
== MVT::i16
)
2295 if (IntTy
.getSizeInBits() != FpTy
.getSizeInBits())
2296 return EqualizeFpIntConversion(Op
, DAG
);
2298 return ExpandHvxFpToInt(Op
, DAG
);
2302 HexagonTargetLowering::LowerHvxIntToFp(SDValue Op
, SelectionDAG
&DAG
) const {
2303 // Catch invalid conversion ops (just in case).
2304 assert(Op
.getOpcode() == ISD::SINT_TO_FP
||
2305 Op
.getOpcode() == ISD::UINT_TO_FP
);
2308 MVT IntTy
= ty(Op
.getOperand(0)).getVectorElementType();
2309 MVT FpTy
= ResTy
.getVectorElementType();
2311 if (Subtarget
.useHVXIEEEFPOps()) {
2312 // There are only conversions to f16.
2313 if (FpTy
== MVT::f16
) {
2314 // Other int types aren't legal in HVX, so we shouldn't see them here.
2315 assert(IntTy
== MVT::i8
|| IntTy
== MVT::i16
|| IntTy
== MVT::i32
);
2316 // i8, i16 -> f16 is legal.
2317 if (IntTy
== MVT::i8
|| IntTy
== MVT::i16
)
2322 if (IntTy
.getSizeInBits() != FpTy
.getSizeInBits())
2323 return EqualizeFpIntConversion(Op
, DAG
);
2325 return ExpandHvxIntToFp(Op
, DAG
);
2328 HexagonTargetLowering::TypePair
2329 HexagonTargetLowering::typeExtendToWider(MVT Ty0
, MVT Ty1
) const {
2330 // Compare the widths of elements of the two types, and extend the narrower
2331 // type to match the with of the wider type. For vector types, apply this
2332 // to the element type.
2333 assert(Ty0
.isVector() == Ty1
.isVector());
2335 MVT ElemTy0
= Ty0
.getScalarType();
2336 MVT ElemTy1
= Ty1
.getScalarType();
2338 unsigned Width0
= ElemTy0
.getSizeInBits();
2339 unsigned Width1
= ElemTy1
.getSizeInBits();
2340 unsigned MaxWidth
= std::max(Width0
, Width1
);
2342 auto getScalarWithWidth
= [](MVT ScalarTy
, unsigned Width
) {
2343 if (ScalarTy
.isInteger())
2344 return MVT::getIntegerVT(Width
);
2345 assert(ScalarTy
.isFloatingPoint());
2346 return MVT::getFloatingPointVT(Width
);
2349 MVT WideETy0
= getScalarWithWidth(ElemTy0
, MaxWidth
);
2350 MVT WideETy1
= getScalarWithWidth(ElemTy1
, MaxWidth
);
2352 if (!Ty0
.isVector()) {
2353 // Both types are scalars.
2354 return {WideETy0
, WideETy1
};
2358 unsigned NumElem
= Ty0
.getVectorNumElements();
2359 assert(NumElem
== Ty1
.getVectorNumElements());
2361 return {MVT::getVectorVT(WideETy0
, NumElem
),
2362 MVT::getVectorVT(WideETy1
, NumElem
)};
2365 HexagonTargetLowering::TypePair
2366 HexagonTargetLowering::typeWidenToWider(MVT Ty0
, MVT Ty1
) const {
2367 // Compare the numbers of elements of two vector types, and widen the
2368 // narrower one to match the number of elements in the wider one.
2369 assert(Ty0
.isVector() && Ty1
.isVector());
2371 unsigned Len0
= Ty0
.getVectorNumElements();
2372 unsigned Len1
= Ty1
.getVectorNumElements();
2376 unsigned MaxLen
= std::max(Len0
, Len1
);
2377 return {MVT::getVectorVT(Ty0
.getVectorElementType(), MaxLen
),
2378 MVT::getVectorVT(Ty1
.getVectorElementType(), MaxLen
)};
2382 HexagonTargetLowering::typeLegalize(MVT Ty
, SelectionDAG
&DAG
) const {
2383 EVT LegalTy
= getTypeToTransformTo(*DAG
.getContext(), Ty
);
2384 assert(LegalTy
.isSimple());
2385 return LegalTy
.getSimpleVT();
2389 HexagonTargetLowering::typeWidenToHvx(MVT Ty
) const {
2390 unsigned HwWidth
= 8 * Subtarget
.getVectorLength();
2391 assert(Ty
.getSizeInBits() <= HwWidth
);
2392 if (Ty
.getSizeInBits() == HwWidth
)
2395 MVT ElemTy
= Ty
.getScalarType();
2396 return MVT::getVectorVT(ElemTy
, HwWidth
/ ElemTy
.getSizeInBits());
2399 HexagonTargetLowering::VectorPair
2400 HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A
, SDValue B
,
2401 const SDLoc
&dl
, bool Signed
, SelectionDAG
&DAG
) const {
2402 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2403 // whether an overflow has occured.
2405 assert(ResTy
== ty(B
));
2406 MVT PredTy
= MVT::getVectorVT(MVT::i1
, ResTy
.getVectorNumElements());
2409 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2410 // save any instructions.
2411 SDValue Add
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {A
, B
});
2412 SDValue Ovf
= DAG
.getSetCC(dl
, PredTy
, Add
, A
, ISD::SETULT
);
2416 // Signed overflow has happened, if:
2417 // (A, B have the same sign) and (A+B has a different sign from either)
2418 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2419 SDValue Add
= DAG
.getNode(ISD::ADD
, dl
, ResTy
, {A
, B
});
2421 DAG
.getNode(ISD::XOR
, dl
, ResTy
, {A
, DAG
.getAllOnesConstant(dl
, ResTy
)});
2422 SDValue Xor0
= DAG
.getNode(ISD::XOR
, dl
, ResTy
, {NotA
, B
});
2423 SDValue Xor1
= DAG
.getNode(ISD::XOR
, dl
, ResTy
, {Add
, B
});
2424 SDValue And
= DAG
.getNode(ISD::AND
, dl
, ResTy
, {Xor0
, Xor1
});
2426 DAG
.getSetCC(dl
, PredTy
, And
, getZero(dl
, ResTy
, DAG
), ISD::SETLT
);
2430 HexagonTargetLowering::VectorPair
2431 HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val
, unsigned Amt
,
2432 bool Signed
, SelectionDAG
&DAG
) const {
2433 // Shift Val right by Amt bits, round the result to the nearest integer,
2434 // tie-break by rounding halves to even integer.
2436 const SDLoc
&dl(Val
);
2437 MVT ValTy
= ty(Val
);
2439 // This should also work for signed integers.
2441 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2442 // bool ovf = (inp > tmp0);
2443 // uint rup = inp & (1 << (Amt+1));
2445 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2446 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2447 // uint tmp3 = tmp2 + rup;
2448 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2449 unsigned ElemWidth
= ValTy
.getVectorElementType().getSizeInBits();
2450 MVT ElemTy
= MVT::getIntegerVT(ElemWidth
);
2451 MVT IntTy
= tyVector(ValTy
, ElemTy
);
2452 MVT PredTy
= MVT::getVectorVT(MVT::i1
, IntTy
.getVectorNumElements());
2453 unsigned ShRight
= Signed
? ISD::SRA
: ISD::SRL
;
2455 SDValue Inp
= DAG
.getBitcast(IntTy
, Val
);
2456 SDValue LowBits
= DAG
.getConstant((1ull << (Amt
- 1)) - 1, dl
, IntTy
);
2458 SDValue AmtP1
= DAG
.getConstant(1ull << Amt
, dl
, IntTy
);
2459 SDValue And
= DAG
.getNode(ISD::AND
, dl
, IntTy
, {Inp
, AmtP1
});
2460 SDValue Zero
= getZero(dl
, IntTy
, DAG
);
2461 SDValue Bit
= DAG
.getSetCC(dl
, PredTy
, And
, Zero
, ISD::SETNE
);
2462 SDValue Rup
= DAG
.getZExtOrTrunc(Bit
, dl
, IntTy
);
2463 auto [Tmp0
, Ovf
] = emitHvxAddWithOverflow(Inp
, LowBits
, dl
, Signed
, DAG
);
2465 SDValue AmtM1
= DAG
.getConstant(Amt
- 1, dl
, IntTy
);
2466 SDValue Tmp1
= DAG
.getNode(ShRight
, dl
, IntTy
, Inp
, AmtM1
);
2467 SDValue Tmp2
= DAG
.getNode(ShRight
, dl
, IntTy
, Tmp0
, AmtM1
);
2468 SDValue Tmp3
= DAG
.getNode(ISD::ADD
, dl
, IntTy
, Tmp2
, Rup
);
2470 SDValue Eq
= DAG
.getSetCC(dl
, PredTy
, Tmp1
, Tmp2
, ISD::SETEQ
);
2471 SDValue One
= DAG
.getConstant(1, dl
, IntTy
);
2472 SDValue Tmp4
= DAG
.getNode(ShRight
, dl
, IntTy
, {Tmp2
, One
});
2473 SDValue Tmp5
= DAG
.getNode(ShRight
, dl
, IntTy
, {Tmp3
, One
});
2474 SDValue Mux
= DAG
.getNode(ISD::VSELECT
, dl
, IntTy
, {Eq
, Tmp5
, Tmp4
});
2479 HexagonTargetLowering::emitHvxMulHsV60(SDValue A
, SDValue B
, const SDLoc
&dl
,
2480 SelectionDAG
&DAG
) const {
2482 MVT PairTy
= typeJoin({VecTy
, VecTy
});
2483 assert(VecTy
.getVectorElementType() == MVT::i32
);
2485 SDValue S16
= DAG
.getConstant(16, dl
, MVT::i32
);
2488 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2489 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2490 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2491 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2492 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2493 // anything, so it cannot produce any carry over to higher bits),
2494 // so everything in [] can be shifted by 16 without loss of precision.
2495 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2496 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2497 // The final additions need to make sure to properly maintain any carry-
2503 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2504 // Hi(B)*Lo(A) | + dropping the low 16 bits
2508 SDValue T0
= getInstr(Hexagon::V6_vmpyewuh
, dl
, VecTy
, {B
, A
}, DAG
);
2509 // T1 = get Hi(A) into low halves.
2510 SDValue T1
= getInstr(Hexagon::V6_vasrw
, dl
, VecTy
, {A
, S16
}, DAG
);
2511 // P0 = interleaved T1.h*B.uh (full precision product)
2512 SDValue P0
= getInstr(Hexagon::V6_vmpyhus
, dl
, PairTy
, {T1
, B
}, DAG
);
2513 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2514 SDValue T2
= LoHalf(P0
, DAG
);
2515 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2516 // added to the final sum.
2517 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2518 SDValue P1
= getInstr(Hexagon::V6_vadduhw
, dl
, PairTy
, {T0
, T2
}, DAG
);
2519 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2520 SDValue P2
= getInstr(Hexagon::V6_vaddhw
, dl
, PairTy
, {T0
, T2
}, DAG
);
2521 // T3 = full-precision(T0+T2) >> 16
2522 // The low halves are added-unsigned, the high ones are added-signed.
2523 SDValue T3
= getInstr(Hexagon::V6_vasrw_acc
, dl
, VecTy
,
2524 {HiHalf(P2
, DAG
), LoHalf(P1
, DAG
), S16
}, DAG
);
2525 SDValue T4
= getInstr(Hexagon::V6_vasrw
, dl
, VecTy
, {B
, S16
}, DAG
);
2526 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2527 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2528 SDValue P3
= getInstr(Hexagon::V6_vmpyhv
, dl
, PairTy
, {T1
, T4
}, DAG
);
2529 SDValue T5
= LoHalf(P3
, DAG
);
2531 SDValue T6
= DAG
.getNode(ISD::ADD
, dl
, VecTy
, {T3
, T5
});
2536 HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A
, bool SignedA
, SDValue B
,
2537 bool SignedB
, const SDLoc
&dl
,
2538 SelectionDAG
&DAG
) const {
2540 MVT PairTy
= typeJoin({VecTy
, VecTy
});
2541 assert(VecTy
.getVectorElementType() == MVT::i32
);
2543 SDValue S16
= DAG
.getConstant(16, dl
, MVT::i32
);
2545 if (SignedA
&& !SignedB
) {
2546 // Make A:unsigned, B:signed.
2548 std::swap(SignedA
, SignedB
);
2551 // Do halfword-wise multiplications for unsigned*unsigned product, then
2552 // add corrections for signed and unsigned*signed.
2556 // P0:lo = (uu) products of low halves of A and B,
2557 // P0:hi = (uu) products of high halves.
2558 SDValue P0
= getInstr(Hexagon::V6_vmpyuhv
, dl
, PairTy
, {A
, B
}, DAG
);
2560 // Swap low/high halves in B
2561 SDValue T0
= getInstr(Hexagon::V6_lvsplatw
, dl
, VecTy
,
2562 {DAG
.getConstant(0x02020202, dl
, MVT::i32
)}, DAG
);
2563 SDValue T1
= getInstr(Hexagon::V6_vdelta
, dl
, VecTy
, {B
, T0
}, DAG
);
2564 // P1 = products of even/odd halfwords.
2565 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2566 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2567 SDValue P1
= getInstr(Hexagon::V6_vmpyuhv
, dl
, PairTy
, {A
, T1
}, DAG
);
2569 // P2:lo = low halves of P1:lo + P1:hi,
2570 // P2:hi = high halves of P1:lo + P1:hi.
2571 SDValue P2
= getInstr(Hexagon::V6_vadduhw
, dl
, PairTy
,
2572 {HiHalf(P1
, DAG
), LoHalf(P1
, DAG
)}, DAG
);
2573 // Still need to add the high halves of P0:lo to P2:lo
2575 getInstr(Hexagon::V6_vlsrw
, dl
, VecTy
, {LoHalf(P0
, DAG
), S16
}, DAG
);
2576 SDValue T3
= DAG
.getNode(ISD::ADD
, dl
, VecTy
, {LoHalf(P2
, DAG
), T2
});
2578 // The high halves of T3 will contribute to the HI part of LOHI.
2579 SDValue T4
= getInstr(Hexagon::V6_vasrw_acc
, dl
, VecTy
,
2580 {HiHalf(P2
, DAG
), T3
, S16
}, DAG
);
2582 // The low halves of P2 need to be added to high halves of the LO part.
2583 Lo
= getInstr(Hexagon::V6_vaslw_acc
, dl
, VecTy
,
2584 {LoHalf(P0
, DAG
), LoHalf(P2
, DAG
), S16
}, DAG
);
2585 Hi
= DAG
.getNode(ISD::ADD
, dl
, VecTy
, {HiHalf(P0
, DAG
), T4
});
2588 assert(SignedB
&& "Signed A and unsigned B should have been inverted");
2590 MVT PredTy
= MVT::getVectorVT(MVT::i1
, VecTy
.getVectorNumElements());
2591 SDValue Zero
= getZero(dl
, VecTy
, DAG
);
2592 SDValue Q0
= DAG
.getSetCC(dl
, PredTy
, A
, Zero
, ISD::SETLT
);
2593 SDValue Q1
= DAG
.getSetCC(dl
, PredTy
, B
, Zero
, ISD::SETLT
);
2594 SDValue X0
= DAG
.getNode(ISD::VSELECT
, dl
, VecTy
, {Q0
, B
, Zero
});
2595 SDValue X1
= getInstr(Hexagon::V6_vaddwq
, dl
, VecTy
, {Q1
, X0
, A
}, DAG
);
2596 Hi
= getInstr(Hexagon::V6_vsubw
, dl
, VecTy
, {Hi
, X1
}, DAG
);
2597 } else if (SignedB
) {
2598 // Same correction as for mulhus:
2599 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2600 MVT PredTy
= MVT::getVectorVT(MVT::i1
, VecTy
.getVectorNumElements());
2601 SDValue Zero
= getZero(dl
, VecTy
, DAG
);
2602 SDValue Q1
= DAG
.getSetCC(dl
, PredTy
, B
, Zero
, ISD::SETLT
);
2603 Hi
= getInstr(Hexagon::V6_vsubwq
, dl
, VecTy
, {Q1
, Hi
, A
}, DAG
);
2605 assert(!SignedA
&& !SignedB
);
2608 return DAG
.getMergeValues({Lo
, Hi
}, dl
);
2612 HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A
, bool SignedA
,
2613 SDValue B
, bool SignedB
,
2615 SelectionDAG
&DAG
) const {
2617 MVT PairTy
= typeJoin({VecTy
, VecTy
});
2618 assert(VecTy
.getVectorElementType() == MVT::i32
);
2620 if (SignedA
&& !SignedB
) {
2621 // Make A:unsigned, B:signed.
2623 std::swap(SignedA
, SignedB
);
2626 // Do S*S first, then make corrections for U*S or U*U if needed.
2627 SDValue P0
= getInstr(Hexagon::V6_vmpyewuh_64
, dl
, PairTy
, {A
, B
}, DAG
);
2629 getInstr(Hexagon::V6_vmpyowh_64_acc
, dl
, PairTy
, {P0
, A
, B
}, DAG
);
2630 SDValue Lo
= LoHalf(P1
, DAG
);
2631 SDValue Hi
= HiHalf(P1
, DAG
);
2634 assert(!SignedA
&& "Signed A and unsigned B should have been inverted");
2635 SDValue Zero
= getZero(dl
, VecTy
, DAG
);
2636 MVT PredTy
= MVT::getVectorVT(MVT::i1
, VecTy
.getVectorNumElements());
2638 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2639 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2640 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2641 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2642 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2644 SDValue Q0
= DAG
.getSetCC(dl
, PredTy
, A
, Zero
, ISD::SETLT
);
2645 SDValue Q1
= DAG
.getSetCC(dl
, PredTy
, B
, Zero
, ISD::SETLT
);
2646 SDValue T0
= getInstr(Hexagon::V6_vandvqv
, dl
, VecTy
, {Q0
, B
}, DAG
);
2647 SDValue T1
= getInstr(Hexagon::V6_vaddwq
, dl
, VecTy
, {Q1
, T0
, A
}, DAG
);
2648 Hi
= getInstr(Hexagon::V6_vaddw
, dl
, VecTy
, {Hi
, T1
}, DAG
);
2649 } else if (!SignedA
) {
2650 SDValue Zero
= getZero(dl
, VecTy
, DAG
);
2651 MVT PredTy
= MVT::getVectorVT(MVT::i1
, VecTy
.getVectorNumElements());
2653 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2654 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2655 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2656 // (HiHalf (Muls64O $A, $B)),
2658 SDValue Q0
= DAG
.getSetCC(dl
, PredTy
, A
, Zero
, ISD::SETLT
);
2659 Hi
= getInstr(Hexagon::V6_vaddwq
, dl
, VecTy
, {Q0
, Hi
, B
}, DAG
);
2662 return DAG
.getMergeValues({Lo
, Hi
}, dl
);
2666 HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op
, SelectionDAG
&DAG
)
2668 // Rewrite conversion between integer and floating-point in such a way that
2669 // the integer type is extended/narrowed to match the bitwidth of the
2670 // floating-point type, combined with additional integer-integer extensions
2671 // or narrowings to match the original input/result types.
2672 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2674 // The input/result types are not required to be legal, but if they are
2675 // legal, this function should not introduce illegal types.
2677 unsigned Opc
= Op
.getOpcode();
2678 assert(Opc
== ISD::FP_TO_SINT
|| Opc
== ISD::FP_TO_UINT
||
2679 Opc
== ISD::SINT_TO_FP
|| Opc
== ISD::UINT_TO_FP
);
2681 SDValue Inp
= Op
.getOperand(0);
2682 MVT InpTy
= ty(Inp
);
2688 const SDLoc
&dl(Op
);
2689 bool Signed
= Opc
== ISD::FP_TO_SINT
|| Opc
== ISD::SINT_TO_FP
;
2691 auto [WInpTy
, WResTy
] = typeExtendToWider(InpTy
, ResTy
);
2692 SDValue WInp
= resizeToWidth(Inp
, WInpTy
, Signed
, dl
, DAG
);
2693 SDValue Conv
= DAG
.getNode(Opc
, dl
, WResTy
, WInp
);
2694 SDValue Res
= resizeToWidth(Conv
, ResTy
, Signed
, dl
, DAG
);
2699 HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op
, SelectionDAG
&DAG
) const {
2700 unsigned Opc
= Op
.getOpcode();
2701 assert(Opc
== ISD::FP_TO_SINT
|| Opc
== ISD::FP_TO_UINT
);
2703 const SDLoc
&dl(Op
);
2704 SDValue Op0
= Op
.getOperand(0);
2705 MVT InpTy
= ty(Op0
);
2707 assert(InpTy
.changeTypeToInteger() == ResTy
);
2709 // int32_t conv_f32_to_i32(uint32_t inp) {
2710 // // s | exp8 | frac23
2712 // int neg = (int32_t)inp < 0;
2714 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2715 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2716 // // produce a large positive "expm1", which will result in max u/int.
2717 // // In all IEEE formats, bias is the largest positive number that can be
2718 // // represented in bias-width bits (i.e. 011..1).
2719 // int32_t expm1 = (inp << 1) - 0x80000000;
2722 // // Always insert the "implicit 1". Subnormal numbers will become 0
2724 // uint32_t frac = (inp << 8) | 0x80000000;
2726 // // "frac" is the fraction part represented as Q1.31. If it was
2727 // // interpreted as uint32_t, it would be the fraction part multiplied
2730 // // Calculate the amount of right shift, since shifting further to the
2731 // // left would lose significant bits. Limit it to 32, because we want
2732 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2733 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2734 // // left by 31). "rsh" can be negative.
2735 // int32_t rsh = min(31 - (expm1 + 1), 32);
2737 // frac >>= rsh; // rsh == 32 will produce 0
2739 // // Everything up to this point is the same for conversion to signed
2740 // // unsigned integer.
2742 // if (neg) // Only for signed int
2744 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2745 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2746 // if (rsh <= 0 && !neg) //
2747 // frac = 0x7fffffff; //
2749 // if (neg) // Only for unsigned int
2751 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2752 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2757 MVT PredTy
= MVT::getVectorVT(MVT::i1
, ResTy
.getVectorElementCount());
2760 // Neg = V6_vgtw(Zero, Inp);
2761 // One = V6_lvsplatw(1);
2762 // M80 = V6_lvsplatw(0x80000000);
2763 // Exp00 = V6_vaslwv(Inp, One);
2764 // Exp01 = V6_vsubw(Exp00, M80);
2765 // ExpM1 = V6_vasrw(Exp01, 24);
2766 // Frc00 = V6_vaslw(Inp, 8);
2767 // Frc01 = V6_vor(Frc00, M80);
2768 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2769 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2770 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2773 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2774 // Pos = V6_vgtw(Rsh01, Zero);
2775 // Frc13 = V6_vsubw(Zero, Frc02);
2776 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2777 // Int = V6_vmux(Pos, Frc14, Bnd);
2780 // Rsn = V6_vgtw(Zero, Rsh01)
2781 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2782 // Int = V6_vmux(Neg, Zero, Frc23)
2784 auto [ExpWidth
, ExpBias
, FracWidth
] = getIEEEProperties(InpTy
);
2785 unsigned ElemWidth
= 1 + ExpWidth
+ FracWidth
;
2786 assert((1ull << (ExpWidth
- 1)) == (1 + ExpBias
));
2788 SDValue Inp
= DAG
.getBitcast(ResTy
, Op0
);
2789 SDValue Zero
= getZero(dl
, ResTy
, DAG
);
2790 SDValue Neg
= DAG
.getSetCC(dl
, PredTy
, Inp
, Zero
, ISD::SETLT
);
2791 SDValue M80
= DAG
.getConstant(1ull << (ElemWidth
- 1), dl
, ResTy
);
2792 SDValue M7F
= DAG
.getConstant((1ull << (ElemWidth
- 1)) - 1, dl
, ResTy
);
2793 SDValue One
= DAG
.getConstant(1, dl
, ResTy
);
2794 SDValue Exp00
= DAG
.getNode(ISD::SHL
, dl
, ResTy
, {Inp
, One
});
2795 SDValue Exp01
= DAG
.getNode(ISD::SUB
, dl
, ResTy
, {Exp00
, M80
});
2796 SDValue MNE
= DAG
.getConstant(ElemWidth
- ExpWidth
, dl
, ResTy
);
2797 SDValue ExpM1
= DAG
.getNode(ISD::SRA
, dl
, ResTy
, {Exp01
, MNE
});
2799 SDValue ExpW
= DAG
.getConstant(ExpWidth
, dl
, ResTy
);
2800 SDValue Frc00
= DAG
.getNode(ISD::SHL
, dl
, ResTy
, {Inp
, ExpW
});
2801 SDValue Frc01
= DAG
.getNode(ISD::OR
, dl
, ResTy
, {Frc00
, M80
});
2803 SDValue MN2
= DAG
.getConstant(ElemWidth
- 2, dl
, ResTy
);
2804 SDValue Rsh00
= DAG
.getNode(ISD::SUB
, dl
, ResTy
, {MN2
, ExpM1
});
2805 SDValue MW
= DAG
.getConstant(ElemWidth
, dl
, ResTy
);
2806 SDValue Rsh01
= DAG
.getNode(ISD::SMIN
, dl
, ResTy
, {Rsh00
, MW
});
2807 SDValue Frc02
= DAG
.getNode(ISD::SRL
, dl
, ResTy
, {Frc01
, Rsh01
});
2811 if (Opc
== ISD::FP_TO_SINT
) {
2812 SDValue Bnd
= DAG
.getNode(ISD::VSELECT
, dl
, ResTy
, {Neg
, M80
, M7F
});
2813 SDValue Pos
= DAG
.getSetCC(dl
, PredTy
, Rsh01
, Zero
, ISD::SETGT
);
2814 SDValue Frc13
= DAG
.getNode(ISD::SUB
, dl
, ResTy
, {Zero
, Frc02
});
2815 SDValue Frc14
= DAG
.getNode(ISD::VSELECT
, dl
, ResTy
, {Neg
, Frc13
, Frc02
});
2816 Int
= DAG
.getNode(ISD::VSELECT
, dl
, ResTy
, {Pos
, Frc14
, Bnd
});
2818 assert(Opc
== ISD::FP_TO_UINT
);
2819 SDValue Rsn
= DAG
.getSetCC(dl
, PredTy
, Rsh01
, Zero
, ISD::SETLT
);
2820 SDValue Frc23
= DAG
.getNode(ISD::VSELECT
, dl
, ResTy
, Rsn
, M7F
, Frc02
);
2821 Int
= DAG
.getNode(ISD::VSELECT
, dl
, ResTy
, Neg
, Zero
, Frc23
);
2828 HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op
, SelectionDAG
&DAG
) const {
2829 unsigned Opc
= Op
.getOpcode();
2830 assert(Opc
== ISD::SINT_TO_FP
|| Opc
== ISD::UINT_TO_FP
);
2832 const SDLoc
&dl(Op
);
2833 SDValue Op0
= Op
.getOperand(0);
2834 MVT InpTy
= ty(Op0
);
2836 assert(ResTy
.changeTypeToInteger() == InpTy
);
2838 // uint32_t vnoc1_rnd(int32_t w) {
2839 // int32_t iszero = w == 0;
2840 // int32_t isneg = w < 0;
2841 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
2843 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2844 // uint32_t frac0 = (uint64_t)u << norm_left;
2847 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
2848 // uint32_t renorm = (frac0 > frac1);
2849 // uint32_t rup = (int)(frac0 << 22) < 0;
2851 // uint32_t frac2 = frac0 >> 8;
2852 // uint32_t frac3 = frac1 >> 8;
2853 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2855 // int32_t exp = 32 - norm_left + renorm + 127;
2858 // uint32_t sign = 0x80000000 * isneg;
2859 // uint32_t f = sign | exp | frac;
2860 // return iszero ? 0 : f;
2863 MVT PredTy
= MVT::getVectorVT(MVT::i1
, InpTy
.getVectorElementCount());
2864 bool Signed
= Opc
== ISD::SINT_TO_FP
;
2866 auto [ExpWidth
, ExpBias
, FracWidth
] = getIEEEProperties(ResTy
);
2867 unsigned ElemWidth
= 1 + ExpWidth
+ FracWidth
;
2869 SDValue Zero
= getZero(dl
, InpTy
, DAG
);
2870 SDValue One
= DAG
.getConstant(1, dl
, InpTy
);
2871 SDValue IsZero
= DAG
.getSetCC(dl
, PredTy
, Op0
, Zero
, ISD::SETEQ
);
2872 SDValue Abs
= Signed
? DAG
.getNode(ISD::ABS
, dl
, InpTy
, Op0
) : Op0
;
2873 SDValue Clz
= DAG
.getNode(ISD::CTLZ
, dl
, InpTy
, Abs
);
2874 SDValue NLeft
= DAG
.getNode(ISD::ADD
, dl
, InpTy
, {Clz
, One
});
2875 SDValue Frac0
= DAG
.getNode(ISD::SHL
, dl
, InpTy
, {Abs
, NLeft
});
2877 auto [Frac
, Ovf
] = emitHvxShiftRightRnd(Frac0
, ExpWidth
+ 1, false, DAG
);
2879 SDValue IsNeg
= DAG
.getSetCC(dl
, PredTy
, Op0
, Zero
, ISD::SETLT
);
2880 SDValue M80
= DAG
.getConstant(1ull << (ElemWidth
- 1), dl
, InpTy
);
2881 SDValue Sign
= DAG
.getNode(ISD::VSELECT
, dl
, InpTy
, {IsNeg
, M80
, Zero
});
2882 Frac
= DAG
.getNode(ISD::OR
, dl
, InpTy
, {Sign
, Frac
});
2885 SDValue Rnrm
= DAG
.getZExtOrTrunc(Ovf
, dl
, InpTy
);
2886 SDValue Exp0
= DAG
.getConstant(ElemWidth
+ ExpBias
, dl
, InpTy
);
2887 SDValue Exp1
= DAG
.getNode(ISD::ADD
, dl
, InpTy
, {Rnrm
, Exp0
});
2888 SDValue Exp2
= DAG
.getNode(ISD::SUB
, dl
, InpTy
, {Exp1
, NLeft
});
2889 SDValue Exp3
= DAG
.getNode(ISD::SHL
, dl
, InpTy
,
2890 {Exp2
, DAG
.getConstant(FracWidth
, dl
, InpTy
)});
2891 SDValue Flt0
= DAG
.getNode(ISD::OR
, dl
, InpTy
, {Frac
, Exp3
});
2892 SDValue Flt1
= DAG
.getNode(ISD::VSELECT
, dl
, InpTy
, {IsZero
, Zero
, Flt0
});
2893 SDValue Flt
= DAG
.getBitcast(ResTy
, Flt1
);
2899 HexagonTargetLowering::CreateTLWrapper(SDValue Op
, SelectionDAG
&DAG
) const {
2900 unsigned Opc
= Op
.getOpcode();
2903 case ISD::ANY_EXTEND
:
2904 case ISD::SIGN_EXTEND
:
2905 case ISD::ZERO_EXTEND
:
2906 TLOpc
= HexagonISD::TL_EXTEND
;
2909 TLOpc
= HexagonISD::TL_TRUNCATE
;
2914 llvm_unreachable("Unepected operator");
2917 const SDLoc
&dl(Op
);
2918 return DAG
.getNode(TLOpc
, dl
, ty(Op
), Op
.getOperand(0),
2919 DAG
.getUNDEF(MVT::i128
), // illegal type
2920 DAG
.getConstant(Opc
, dl
, MVT::i32
));
2924 HexagonTargetLowering::RemoveTLWrapper(SDValue Op
, SelectionDAG
&DAG
) const {
2925 assert(Op
.getOpcode() == HexagonISD::TL_EXTEND
||
2926 Op
.getOpcode() == HexagonISD::TL_TRUNCATE
);
2927 unsigned Opc
= Op
.getConstantOperandVal(2);
2928 return DAG
.getNode(Opc
, SDLoc(Op
), ty(Op
), Op
.getOperand(0));
2931 HexagonTargetLowering::VectorPair
2932 HexagonTargetLowering::SplitVectorOp(SDValue Op
, SelectionDAG
&DAG
) const {
2933 assert(!Op
.isMachineOpcode());
2934 SmallVector
<SDValue
, 2> OpsL
, OpsH
;
2935 const SDLoc
&dl(Op
);
2937 auto SplitVTNode
= [&DAG
, this](const VTSDNode
*N
) {
2938 MVT Ty
= typeSplit(N
->getVT().getSimpleVT()).first
;
2939 SDValue TV
= DAG
.getValueType(Ty
);
2940 return std::make_pair(TV
, TV
);
2943 for (SDValue A
: Op
.getNode()->ops()) {
2945 ty(A
).isVector() ? opSplit(A
, dl
, DAG
) : std::make_pair(A
, A
);
2946 // Special case for type operand.
2947 switch (Op
.getOpcode()) {
2948 case ISD::SIGN_EXTEND_INREG
:
2949 case HexagonISD::SSAT
:
2950 case HexagonISD::USAT
:
2951 if (const auto *N
= dyn_cast
<const VTSDNode
>(A
.getNode()))
2952 std::tie(Lo
, Hi
) = SplitVTNode(N
);
2960 MVT HalfTy
= typeSplit(ResTy
).first
;
2961 SDValue L
= DAG
.getNode(Op
.getOpcode(), dl
, HalfTy
, OpsL
);
2962 SDValue H
= DAG
.getNode(Op
.getOpcode(), dl
, HalfTy
, OpsH
);
2967 HexagonTargetLowering::SplitHvxMemOp(SDValue Op
, SelectionDAG
&DAG
) const {
2968 auto *MemN
= cast
<MemSDNode
>(Op
.getNode());
2970 MVT MemTy
= MemN
->getMemoryVT().getSimpleVT();
2971 if (!isHvxPairTy(MemTy
))
2974 const SDLoc
&dl(Op
);
2975 unsigned HwLen
= Subtarget
.getVectorLength();
2976 MVT SingleTy
= typeSplit(MemTy
).first
;
2977 SDValue Chain
= MemN
->getChain();
2978 SDValue Base0
= MemN
->getBasePtr();
2980 DAG
.getMemBasePlusOffset(Base0
, TypeSize::getFixed(HwLen
), dl
);
2981 unsigned MemOpc
= MemN
->getOpcode();
2983 MachineMemOperand
*MOp0
= nullptr, *MOp1
= nullptr;
2984 if (MachineMemOperand
*MMO
= MemN
->getMemOperand()) {
2985 MachineFunction
&MF
= DAG
.getMachineFunction();
2986 uint64_t MemSize
= (MemOpc
== ISD::MLOAD
|| MemOpc
== ISD::MSTORE
)
2987 ? (uint64_t)MemoryLocation::UnknownSize
2989 MOp0
= MF
.getMachineMemOperand(MMO
, 0, MemSize
);
2990 MOp1
= MF
.getMachineMemOperand(MMO
, HwLen
, MemSize
);
2993 if (MemOpc
== ISD::LOAD
) {
2994 assert(cast
<LoadSDNode
>(Op
)->isUnindexed());
2995 SDValue Load0
= DAG
.getLoad(SingleTy
, dl
, Chain
, Base0
, MOp0
);
2996 SDValue Load1
= DAG
.getLoad(SingleTy
, dl
, Chain
, Base1
, MOp1
);
2997 return DAG
.getMergeValues(
2998 { DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, MemTy
, Load0
, Load1
),
2999 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
3000 Load0
.getValue(1), Load1
.getValue(1)) }, dl
);
3002 if (MemOpc
== ISD::STORE
) {
3003 assert(cast
<StoreSDNode
>(Op
)->isUnindexed());
3004 VectorPair Vals
= opSplit(cast
<StoreSDNode
>(Op
)->getValue(), dl
, DAG
);
3005 SDValue Store0
= DAG
.getStore(Chain
, dl
, Vals
.first
, Base0
, MOp0
);
3006 SDValue Store1
= DAG
.getStore(Chain
, dl
, Vals
.second
, Base1
, MOp1
);
3007 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Store0
, Store1
);
3010 assert(MemOpc
== ISD::MLOAD
|| MemOpc
== ISD::MSTORE
);
3012 auto MaskN
= cast
<MaskedLoadStoreSDNode
>(Op
);
3013 assert(MaskN
->isUnindexed());
3014 VectorPair Masks
= opSplit(MaskN
->getMask(), dl
, DAG
);
3015 SDValue Offset
= DAG
.getUNDEF(MVT::i32
);
3017 if (MemOpc
== ISD::MLOAD
) {
3019 opSplit(cast
<MaskedLoadSDNode
>(Op
)->getPassThru(), dl
, DAG
);
3021 DAG
.getMaskedLoad(SingleTy
, dl
, Chain
, Base0
, Offset
, Masks
.first
,
3022 Thru
.first
, SingleTy
, MOp0
, ISD::UNINDEXED
,
3023 ISD::NON_EXTLOAD
, false);
3025 DAG
.getMaskedLoad(SingleTy
, dl
, Chain
, Base1
, Offset
, Masks
.second
,
3026 Thru
.second
, SingleTy
, MOp1
, ISD::UNINDEXED
,
3027 ISD::NON_EXTLOAD
, false);
3028 return DAG
.getMergeValues(
3029 { DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, MemTy
, MLoad0
, MLoad1
),
3030 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
3031 MLoad0
.getValue(1), MLoad1
.getValue(1)) }, dl
);
3033 if (MemOpc
== ISD::MSTORE
) {
3034 VectorPair Vals
= opSplit(cast
<MaskedStoreSDNode
>(Op
)->getValue(), dl
, DAG
);
3035 SDValue MStore0
= DAG
.getMaskedStore(Chain
, dl
, Vals
.first
, Base0
, Offset
,
3036 Masks
.first
, SingleTy
, MOp0
,
3037 ISD::UNINDEXED
, false, false);
3038 SDValue MStore1
= DAG
.getMaskedStore(Chain
, dl
, Vals
.second
, Base1
, Offset
,
3039 Masks
.second
, SingleTy
, MOp1
,
3040 ISD::UNINDEXED
, false, false);
3041 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MStore0
, MStore1
);
3044 std::string Name
= "Unexpected operation: " + Op
->getOperationName(&DAG
);
3045 llvm_unreachable(Name
.c_str());
3049 HexagonTargetLowering::WidenHvxLoad(SDValue Op
, SelectionDAG
&DAG
) const {
3050 const SDLoc
&dl(Op
);
3051 auto *LoadN
= cast
<LoadSDNode
>(Op
.getNode());
3052 assert(LoadN
->isUnindexed() && "Not widening indexed loads yet");
3053 assert(LoadN
->getMemoryVT().getVectorElementType() != MVT::i1
&&
3054 "Not widening loads of i1 yet");
3056 SDValue Chain
= LoadN
->getChain();
3057 SDValue Base
= LoadN
->getBasePtr();
3058 SDValue Offset
= DAG
.getUNDEF(MVT::i32
);
3061 unsigned HwLen
= Subtarget
.getVectorLength();
3062 unsigned ResLen
= ResTy
.getStoreSize();
3063 assert(ResLen
< HwLen
&& "vsetq(v1) prerequisite");
3065 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
3066 SDValue Mask
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
3067 {DAG
.getConstant(ResLen
, dl
, MVT::i32
)}, DAG
);
3069 MVT LoadTy
= MVT::getVectorVT(MVT::i8
, HwLen
);
3070 MachineFunction
&MF
= DAG
.getMachineFunction();
3071 auto *MemOp
= MF
.getMachineMemOperand(LoadN
->getMemOperand(), 0, HwLen
);
3073 SDValue Load
= DAG
.getMaskedLoad(LoadTy
, dl
, Chain
, Base
, Offset
, Mask
,
3074 DAG
.getUNDEF(LoadTy
), LoadTy
, MemOp
,
3075 ISD::UNINDEXED
, ISD::NON_EXTLOAD
, false);
3076 SDValue Value
= opCastElem(Load
, ResTy
.getVectorElementType(), DAG
);
3077 return DAG
.getMergeValues({Value
, Load
.getValue(1)}, dl
);
3081 HexagonTargetLowering::WidenHvxStore(SDValue Op
, SelectionDAG
&DAG
) const {
3082 const SDLoc
&dl(Op
);
3083 auto *StoreN
= cast
<StoreSDNode
>(Op
.getNode());
3084 assert(StoreN
->isUnindexed() && "Not widening indexed stores yet");
3085 assert(StoreN
->getMemoryVT().getVectorElementType() != MVT::i1
&&
3086 "Not widening stores of i1 yet");
3088 SDValue Chain
= StoreN
->getChain();
3089 SDValue Base
= StoreN
->getBasePtr();
3090 SDValue Offset
= DAG
.getUNDEF(MVT::i32
);
3092 SDValue Value
= opCastElem(StoreN
->getValue(), MVT::i8
, DAG
);
3093 MVT ValueTy
= ty(Value
);
3094 unsigned ValueLen
= ValueTy
.getVectorNumElements();
3095 unsigned HwLen
= Subtarget
.getVectorLength();
3096 assert(isPowerOf2_32(ValueLen
));
3098 for (unsigned Len
= ValueLen
; Len
< HwLen
; ) {
3099 Value
= opJoin({Value
, DAG
.getUNDEF(ty(Value
))}, dl
, DAG
);
3100 Len
= ty(Value
).getVectorNumElements(); // This is Len *= 2
3102 assert(ty(Value
).getVectorNumElements() == HwLen
); // Paranoia
3104 assert(ValueLen
< HwLen
&& "vsetq(v1) prerequisite");
3105 MVT BoolTy
= MVT::getVectorVT(MVT::i1
, HwLen
);
3106 SDValue Mask
= getInstr(Hexagon::V6_pred_scalar2
, dl
, BoolTy
,
3107 {DAG
.getConstant(ValueLen
, dl
, MVT::i32
)}, DAG
);
3108 MachineFunction
&MF
= DAG
.getMachineFunction();
3109 auto *MemOp
= MF
.getMachineMemOperand(StoreN
->getMemOperand(), 0, HwLen
);
3110 return DAG
.getMaskedStore(Chain
, dl
, Value
, Base
, Offset
, Mask
, ty(Value
),
3111 MemOp
, ISD::UNINDEXED
, false, false);
3115 HexagonTargetLowering::WidenHvxSetCC(SDValue Op
, SelectionDAG
&DAG
) const {
3116 const SDLoc
&dl(Op
);
3117 SDValue Op0
= Op
.getOperand(0), Op1
= Op
.getOperand(1);
3118 MVT ElemTy
= ty(Op0
).getVectorElementType();
3119 unsigned HwLen
= Subtarget
.getVectorLength();
3121 unsigned WideOpLen
= (8 * HwLen
) / ElemTy
.getSizeInBits();
3122 assert(WideOpLen
* ElemTy
.getSizeInBits() == 8 * HwLen
);
3123 MVT WideOpTy
= MVT::getVectorVT(ElemTy
, WideOpLen
);
3124 if (!Subtarget
.isHVXVectorType(WideOpTy
, true))
3127 SDValue WideOp0
= appendUndef(Op0
, WideOpTy
, DAG
);
3128 SDValue WideOp1
= appendUndef(Op1
, WideOpTy
, DAG
);
3130 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), WideOpTy
);
3131 SDValue SetCC
= DAG
.getNode(ISD::SETCC
, dl
, ResTy
,
3132 {WideOp0
, WideOp1
, Op
.getOperand(2)});
3134 EVT RetTy
= typeLegalize(ty(Op
), DAG
);
3135 return DAG
.getNode(ISD::EXTRACT_SUBVECTOR
, dl
, RetTy
,
3136 {SetCC
, getZero(dl
, MVT::i32
, DAG
)});
3140 HexagonTargetLowering::LowerHvxOperation(SDValue Op
, SelectionDAG
&DAG
) const {
3141 unsigned Opc
= Op
.getOpcode();
3142 bool IsPairOp
= isHvxPairTy(ty(Op
)) ||
3143 llvm::any_of(Op
.getNode()->ops(), [this] (SDValue V
) {
3144 return isHvxPairTy(ty(V
));
3155 return SplitHvxMemOp(Op
, DAG
);
3156 case ISD::SINT_TO_FP
:
3157 case ISD::UINT_TO_FP
:
3158 case ISD::FP_TO_SINT
:
3159 case ISD::FP_TO_UINT
:
3160 if (ty(Op
).getSizeInBits() == ty(Op
.getOperand(0)).getSizeInBits())
3161 return opJoin(SplitVectorOp(Op
, DAG
), SDLoc(Op
), DAG
);
3189 case ISD::SIGN_EXTEND_INREG
:
3190 case ISD::SPLAT_VECTOR
:
3191 return opJoin(SplitVectorOp(Op
, DAG
), SDLoc(Op
), DAG
);
3192 case ISD::SIGN_EXTEND
:
3193 case ISD::ZERO_EXTEND
:
3194 // In general, sign- and zero-extends can't be split and still
3195 // be legal. The only exception is extending bool vectors.
3196 if (ty(Op
.getOperand(0)).getVectorElementType() == MVT::i1
)
3197 return opJoin(SplitVectorOp(Op
, DAG
), SDLoc(Op
), DAG
);
3205 case ISD::BUILD_VECTOR
: return LowerHvxBuildVector(Op
, DAG
);
3206 case ISD::SPLAT_VECTOR
: return LowerHvxSplatVector(Op
, DAG
);
3207 case ISD::CONCAT_VECTORS
: return LowerHvxConcatVectors(Op
, DAG
);
3208 case ISD::INSERT_SUBVECTOR
: return LowerHvxInsertSubvector(Op
, DAG
);
3209 case ISD::INSERT_VECTOR_ELT
: return LowerHvxInsertElement(Op
, DAG
);
3210 case ISD::EXTRACT_SUBVECTOR
: return LowerHvxExtractSubvector(Op
, DAG
);
3211 case ISD::EXTRACT_VECTOR_ELT
: return LowerHvxExtractElement(Op
, DAG
);
3212 case ISD::BITCAST
: return LowerHvxBitcast(Op
, DAG
);
3213 case ISD::ANY_EXTEND
: return LowerHvxAnyExt(Op
, DAG
);
3214 case ISD::SIGN_EXTEND
: return LowerHvxSignExt(Op
, DAG
);
3215 case ISD::ZERO_EXTEND
: return LowerHvxZeroExt(Op
, DAG
);
3216 case ISD::CTTZ
: return LowerHvxCttz(Op
, DAG
);
3217 case ISD::SELECT
: return LowerHvxSelect(Op
, DAG
);
3220 case ISD::SRL
: return LowerHvxShift(Op
, DAG
);
3222 case ISD::FSHR
: return LowerHvxFunnelShift(Op
, DAG
);
3224 case ISD::MULHU
: return LowerHvxMulh(Op
, DAG
);
3225 case ISD::SMUL_LOHI
:
3226 case ISD::UMUL_LOHI
: return LowerHvxMulLoHi(Op
, DAG
);
3227 case ISD::ANY_EXTEND_VECTOR_INREG
: return LowerHvxExtend(Op
, DAG
);
3229 case ISD::INTRINSIC_VOID
: return Op
;
3230 case ISD::INTRINSIC_WO_CHAIN
: return LowerHvxIntrinsic(Op
, DAG
);
3232 case ISD::MSTORE
: return LowerHvxMaskedOp(Op
, DAG
);
3233 // Unaligned loads will be handled by the default lowering.
3234 case ISD::LOAD
: return SDValue();
3235 case ISD::FP_EXTEND
: return LowerHvxFpExtend(Op
, DAG
);
3236 case ISD::FP_TO_SINT
:
3237 case ISD::FP_TO_UINT
: return LowerHvxFpToInt(Op
, DAG
);
3238 case ISD::SINT_TO_FP
:
3239 case ISD::UINT_TO_FP
: return LowerHvxIntToFp(Op
, DAG
);
3242 case HexagonISD::SMUL_LOHI
:
3243 case HexagonISD::UMUL_LOHI
:
3244 case HexagonISD::USMUL_LOHI
: return LowerHvxMulLoHi(Op
, DAG
);
3249 llvm_unreachable("Unhandled HVX operation");
3253 HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op
, SelectionDAG
&DAG
)
3255 // Rewrite the extension/truncation/saturation op into steps where each
3256 // step changes the type widths by a factor of 2.
3257 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3259 // Some of the vector types in Op may not be legal.
3261 unsigned Opc
= Op
.getOpcode();
3263 case HexagonISD::SSAT
:
3264 case HexagonISD::USAT
:
3265 case HexagonISD::TL_EXTEND
:
3266 case HexagonISD::TL_TRUNCATE
:
3268 case ISD::ANY_EXTEND
:
3269 case ISD::ZERO_EXTEND
:
3270 case ISD::SIGN_EXTEND
:
3272 llvm_unreachable("ISD:: ops will be auto-folded");
3277 llvm_unreachable("Unexpected operation");
3280 SDValue Inp
= Op
.getOperand(0);
3281 MVT InpTy
= ty(Inp
);
3284 unsigned InpWidth
= InpTy
.getVectorElementType().getSizeInBits();
3285 unsigned ResWidth
= ResTy
.getVectorElementType().getSizeInBits();
3286 assert(InpWidth
!= ResWidth
);
3288 if (InpWidth
== 2 * ResWidth
|| ResWidth
== 2 * InpWidth
)
3291 const SDLoc
&dl(Op
);
3292 unsigned NumElems
= InpTy
.getVectorNumElements();
3293 assert(NumElems
== ResTy
.getVectorNumElements());
3295 auto repeatOp
= [&](unsigned NewWidth
, SDValue Arg
) {
3296 MVT Ty
= MVT::getVectorVT(MVT::getIntegerVT(NewWidth
), NumElems
);
3298 case HexagonISD::SSAT
:
3299 case HexagonISD::USAT
:
3300 return DAG
.getNode(Opc
, dl
, Ty
, {Arg
, DAG
.getValueType(Ty
)});
3301 case HexagonISD::TL_EXTEND
:
3302 case HexagonISD::TL_TRUNCATE
:
3303 return DAG
.getNode(Opc
, dl
, Ty
, {Arg
, Op
.getOperand(1), Op
.getOperand(2)});
3305 llvm_unreachable("Unexpected opcode");
3310 if (InpWidth
< ResWidth
) {
3311 assert(ResWidth
% InpWidth
== 0 && isPowerOf2_32(ResWidth
/ InpWidth
));
3312 while (InpWidth
* 2 <= ResWidth
)
3313 S
= repeatOp(InpWidth
*= 2, S
);
3315 // InpWidth > ResWidth
3316 assert(InpWidth
% ResWidth
== 0 && isPowerOf2_32(InpWidth
/ ResWidth
));
3317 while (InpWidth
/ 2 >= ResWidth
)
3318 S
= repeatOp(InpWidth
/= 2, S
);
3324 HexagonTargetLowering::LegalizeHvxResize(SDValue Op
, SelectionDAG
&DAG
) const {
3325 SDValue Inp0
= Op
.getOperand(0);
3326 MVT InpTy
= ty(Inp0
);
3328 unsigned InpWidth
= InpTy
.getSizeInBits();
3329 unsigned ResWidth
= ResTy
.getSizeInBits();
3330 unsigned Opc
= Op
.getOpcode();
3332 if (shouldWidenToHvx(InpTy
, DAG
) || shouldWidenToHvx(ResTy
, DAG
)) {
3333 // First, make sure that the narrower type is widened to HVX.
3334 // This may cause the result to be wider than what the legalizer
3335 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3337 auto [WInpTy
, WResTy
] =
3338 InpWidth
< ResWidth
? typeWidenToWider(typeWidenToHvx(InpTy
), ResTy
)
3339 : typeWidenToWider(InpTy
, typeWidenToHvx(ResTy
));
3340 SDValue W
= appendUndef(Inp0
, WInpTy
, DAG
);
3342 if (Opc
== HexagonISD::TL_EXTEND
|| Opc
== HexagonISD::TL_TRUNCATE
) {
3343 S
= DAG
.getNode(Opc
, SDLoc(Op
), WResTy
, W
, Op
.getOperand(1),
3346 S
= DAG
.getNode(Opc
, SDLoc(Op
), WResTy
, W
, DAG
.getValueType(WResTy
));
3348 SDValue T
= ExpandHvxResizeIntoSteps(S
, DAG
);
3349 return extractSubvector(T
, typeLegalize(ResTy
, DAG
), 0, DAG
);
3350 } else if (shouldSplitToHvx(InpWidth
< ResWidth
? ResTy
: InpTy
, DAG
)) {
3351 return opJoin(SplitVectorOp(Op
, DAG
), SDLoc(Op
), DAG
);
3353 assert(isTypeLegal(InpTy
) && isTypeLegal(ResTy
));
3354 return RemoveTLWrapper(Op
, DAG
);
3356 llvm_unreachable("Unexpected situation");
3360 HexagonTargetLowering::LowerHvxOperationWrapper(SDNode
*N
,
3361 SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
) const {
3362 unsigned Opc
= N
->getOpcode();
3364 SDValue Inp0
; // Optional first argument.
3365 if (N
->getNumOperands() > 0)
3366 Inp0
= Op
.getOperand(0);
3369 case ISD::ANY_EXTEND
:
3370 case ISD::SIGN_EXTEND
:
3371 case ISD::ZERO_EXTEND
:
3373 if (Subtarget
.isHVXElementType(ty(Op
)) &&
3374 Subtarget
.isHVXElementType(ty(Inp0
))) {
3375 Results
.push_back(CreateTLWrapper(Op
, DAG
));
3379 if (shouldWidenToHvx(ty(Inp0
), DAG
)) {
3380 if (SDValue T
= WidenHvxSetCC(Op
, DAG
))
3381 Results
.push_back(T
);
3385 if (shouldWidenToHvx(ty(cast
<StoreSDNode
>(N
)->getValue()), DAG
)) {
3386 SDValue Store
= WidenHvxStore(Op
, DAG
);
3387 Results
.push_back(Store
);
3392 if (isHvxPairTy(ty(Op
))) {
3393 SDValue S
= SplitHvxMemOp(Op
, DAG
);
3394 assert(S
->getOpcode() == ISD::MERGE_VALUES
);
3395 Results
.push_back(S
.getOperand(0));
3396 Results
.push_back(S
.getOperand(1));
3400 if (isHvxPairTy(ty(Op
->getOperand(1)))) { // Stored value
3401 SDValue S
= SplitHvxMemOp(Op
, DAG
);
3402 Results
.push_back(S
);
3405 case ISD::SINT_TO_FP
:
3406 case ISD::UINT_TO_FP
:
3407 case ISD::FP_TO_SINT
:
3408 case ISD::FP_TO_UINT
:
3409 if (ty(Op
).getSizeInBits() != ty(Inp0
).getSizeInBits()) {
3410 SDValue T
= EqualizeFpIntConversion(Op
, DAG
);
3411 Results
.push_back(T
);
3414 case HexagonISD::SSAT
:
3415 case HexagonISD::USAT
:
3416 case HexagonISD::TL_EXTEND
:
3417 case HexagonISD::TL_TRUNCATE
:
3418 Results
.push_back(LegalizeHvxResize(Op
, DAG
));
3426 HexagonTargetLowering::ReplaceHvxNodeResults(SDNode
*N
,
3427 SmallVectorImpl
<SDValue
> &Results
, SelectionDAG
&DAG
) const {
3428 unsigned Opc
= N
->getOpcode();
3430 SDValue Inp0
; // Optional first argument.
3431 if (N
->getNumOperands() > 0)
3432 Inp0
= Op
.getOperand(0);
3435 case ISD::ANY_EXTEND
:
3436 case ISD::SIGN_EXTEND
:
3437 case ISD::ZERO_EXTEND
:
3439 if (Subtarget
.isHVXElementType(ty(Op
)) &&
3440 Subtarget
.isHVXElementType(ty(Inp0
))) {
3441 Results
.push_back(CreateTLWrapper(Op
, DAG
));
3445 if (shouldWidenToHvx(ty(Op
), DAG
)) {
3446 if (SDValue T
= WidenHvxSetCC(Op
, DAG
))
3447 Results
.push_back(T
);
3451 if (shouldWidenToHvx(ty(Op
), DAG
)) {
3452 SDValue Load
= WidenHvxLoad(Op
, DAG
);
3453 assert(Load
->getOpcode() == ISD::MERGE_VALUES
);
3454 Results
.push_back(Load
.getOperand(0));
3455 Results
.push_back(Load
.getOperand(1));
3460 if (isHvxBoolTy(ty(Inp0
))) {
3461 SDValue C
= LowerHvxBitcast(Op
, DAG
);
3462 Results
.push_back(C
);
3465 case ISD::FP_TO_SINT
:
3466 case ISD::FP_TO_UINT
:
3467 if (ty(Op
).getSizeInBits() != ty(Inp0
).getSizeInBits()) {
3468 SDValue T
= EqualizeFpIntConversion(Op
, DAG
);
3469 Results
.push_back(T
);
3472 case HexagonISD::SSAT
:
3473 case HexagonISD::USAT
:
3474 case HexagonISD::TL_EXTEND
:
3475 case HexagonISD::TL_TRUNCATE
:
3476 Results
.push_back(LegalizeHvxResize(Op
, DAG
));
3484 HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op
,
3485 DAGCombinerInfo
&DCI
) const {
3486 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3487 // to extract-subvector (shuffle V, pick even, pick odd)
3489 assert(Op
.getOpcode() == ISD::TRUNCATE
);
3490 SelectionDAG
&DAG
= DCI
.DAG
;
3491 const SDLoc
&dl(Op
);
3493 if (Op
.getOperand(0).getOpcode() == ISD::BITCAST
)
3495 SDValue Cast
= Op
.getOperand(0);
3496 SDValue Src
= Cast
.getOperand(0);
3498 EVT TruncTy
= Op
.getValueType();
3499 EVT CastTy
= Cast
.getValueType();
3500 EVT SrcTy
= Src
.getValueType();
3501 if (SrcTy
.isSimple())
3503 if (SrcTy
.getVectorElementType() != TruncTy
.getVectorElementType())
3505 unsigned SrcLen
= SrcTy
.getVectorNumElements();
3506 unsigned CastLen
= CastTy
.getVectorNumElements();
3507 if (2 * CastLen
!= SrcLen
)
3510 SmallVector
<int, 128> Mask(SrcLen
);
3511 for (int i
= 0; i
!= static_cast<int>(CastLen
); ++i
) {
3513 Mask
[i
+ CastLen
] = 2 * i
+ 1;
3516 DAG
.getVectorShuffle(SrcTy
, dl
, Src
, DAG
.getUNDEF(SrcTy
), Mask
);
3517 return opSplit(Deal
, dl
, DAG
).first
;
3521 HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3522 SDValue Op
, DAGCombinerInfo
&DCI
) const {
3524 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3526 // shuffle (concat x, y), undef, m3
3527 if (Op
.getNumOperands() != 2)
3530 SelectionDAG
&DAG
= DCI
.DAG
;
3531 const SDLoc
&dl(Op
);
3532 SDValue V0
= Op
.getOperand(0);
3533 SDValue V1
= Op
.getOperand(1);
3535 if (V0
.getOpcode() != ISD::VECTOR_SHUFFLE
)
3537 if (V1
.getOpcode() != ISD::VECTOR_SHUFFLE
)
3540 SetVector
<SDValue
> Order
;
3541 Order
.insert(V0
.getOperand(0));
3542 Order
.insert(V0
.getOperand(1));
3543 Order
.insert(V1
.getOperand(0));
3544 Order
.insert(V1
.getOperand(1));
3546 if (Order
.size() > 2)
3549 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3550 // result must be the same.
3551 EVT InpTy
= V0
.getValueType();
3552 assert(InpTy
.isVector());
3553 unsigned InpLen
= InpTy
.getVectorNumElements();
3555 SmallVector
<int, 128> LongMask
;
3556 auto AppendToMask
= [&](SDValue Shuffle
) {
3557 auto *SV
= cast
<ShuffleVectorSDNode
>(Shuffle
.getNode());
3558 ArrayRef
<int> Mask
= SV
->getMask();
3559 SDValue X
= Shuffle
.getOperand(0);
3560 SDValue Y
= Shuffle
.getOperand(1);
3561 for (int M
: Mask
) {
3563 LongMask
.push_back(M
);
3566 SDValue Src
= static_cast<unsigned>(M
) < InpLen
? X
: Y
;
3567 if (static_cast<unsigned>(M
) >= InpLen
)
3570 int OutOffset
= Order
[0] == Src
? 0 : InpLen
;
3571 LongMask
.push_back(M
+ OutOffset
);
3578 SDValue C0
= Order
.front();
3579 SDValue C1
= Order
.back(); // Can be same as front
3580 EVT LongTy
= InpTy
.getDoubleNumVectorElementsVT(*DAG
.getContext());
3582 SDValue Cat
= DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, LongTy
, {C0
, C1
});
3583 return DAG
.getVectorShuffle(LongTy
, dl
, Cat
, DAG
.getUNDEF(LongTy
), LongMask
);
3587 HexagonTargetLowering::PerformHvxDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
)
3590 SelectionDAG
&DAG
= DCI
.DAG
;
3592 unsigned Opc
= Op
.getOpcode();
3594 SmallVector
<SDValue
, 4> Ops(N
->ops());
3596 if (Opc
== ISD::TRUNCATE
)
3597 return combineTruncateBeforeLegal(Op
, DCI
);
3598 if (Opc
== ISD::CONCAT_VECTORS
)
3599 return combineConcatVectorsBeforeLegal(Op
, DCI
);
3601 if (DCI
.isBeforeLegalizeOps())
3605 case ISD::VSELECT
: {
3606 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3607 SDValue Cond
= Ops
[0];
3608 if (Cond
->getOpcode() == ISD::XOR
) {
3609 SDValue C0
= Cond
.getOperand(0), C1
= Cond
.getOperand(1);
3610 if (C1
->getOpcode() == HexagonISD::QTRUE
)
3611 return DAG
.getNode(ISD::VSELECT
, dl
, ty(Op
), C0
, Ops
[2], Ops
[1]);
3615 case HexagonISD::V2Q
:
3616 if (Ops
[0].getOpcode() == ISD::SPLAT_VECTOR
) {
3617 if (const auto *C
= dyn_cast
<ConstantSDNode
>(Ops
[0].getOperand(0)))
3618 return C
->isZero() ? DAG
.getNode(HexagonISD::QFALSE
, dl
, ty(Op
))
3619 : DAG
.getNode(HexagonISD::QTRUE
, dl
, ty(Op
));
3622 case HexagonISD::Q2V
:
3623 if (Ops
[0].getOpcode() == HexagonISD::QTRUE
)
3624 return DAG
.getNode(ISD::SPLAT_VECTOR
, dl
, ty(Op
),
3625 DAG
.getAllOnesConstant(dl
, MVT::i32
));
3626 if (Ops
[0].getOpcode() == HexagonISD::QFALSE
)
3627 return getZero(dl
, ty(Op
), DAG
);
3629 case HexagonISD::VINSERTW0
:
3630 if (isUndef(Ops
[1]))
3633 case HexagonISD::VROR
: {
3634 if (Ops
[0].getOpcode() == HexagonISD::VROR
) {
3635 SDValue Vec
= Ops
[0].getOperand(0);
3636 SDValue Rot0
= Ops
[1], Rot1
= Ops
[0].getOperand(1);
3637 SDValue Rot
= DAG
.getNode(ISD::ADD
, dl
, ty(Rot0
), {Rot0
, Rot1
});
3638 return DAG
.getNode(HexagonISD::VROR
, dl
, ty(Op
), {Vec
, Rot
});
3648 HexagonTargetLowering::shouldSplitToHvx(MVT Ty
, SelectionDAG
&DAG
) const {
3649 if (Subtarget
.isHVXVectorType(Ty
, true))
3651 auto Action
= getPreferredHvxVectorAction(Ty
);
3652 if (Action
== TargetLoweringBase::TypeSplitVector
)
3653 return Subtarget
.isHVXVectorType(typeLegalize(Ty
, DAG
), true);
3658 HexagonTargetLowering::shouldWidenToHvx(MVT Ty
, SelectionDAG
&DAG
) const {
3659 if (Subtarget
.isHVXVectorType(Ty
, true))
3661 auto Action
= getPreferredHvxVectorAction(Ty
);
3662 if (Action
== TargetLoweringBase::TypeWidenVector
)
3663 return Subtarget
.isHVXVectorType(typeLegalize(Ty
, DAG
), true);
3668 HexagonTargetLowering::isHvxOperation(SDNode
*N
, SelectionDAG
&DAG
) const {
3669 if (!Subtarget
.useHVXOps())
3671 // If the type of any result, or any operand type are HVX vector types,
3672 // this is an HVX operation.
3673 auto IsHvxTy
= [this](EVT Ty
) {
3674 return Ty
.isSimple() && Subtarget
.isHVXVectorType(Ty
.getSimpleVT(), true);
3676 auto IsHvxOp
= [this](SDValue Op
) {
3677 return Op
.getValueType().isSimple() &&
3678 Subtarget
.isHVXVectorType(ty(Op
), true);
3680 if (llvm::any_of(N
->values(), IsHvxTy
) || llvm::any_of(N
->ops(), IsHvxOp
))
3683 // Check if this could be an HVX operation after type widening.
3684 auto IsWidenedToHvx
= [this, &DAG
](SDValue Op
) {
3685 if (!Op
.getValueType().isSimple())
3688 return ValTy
.isVector() && shouldWidenToHvx(ValTy
, DAG
);
3691 for (int i
= 0, e
= N
->getNumValues(); i
!= e
; ++i
) {
3692 if (IsWidenedToHvx(SDValue(N
, i
)))
3695 return llvm::any_of(N
->ops(), IsWidenedToHvx
);