1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SelectionDAG::LegalizeVectors method.
11 // The vector legalizer looks for vector operations which might need to be
12 // scalarized and legalizes them. This is a separate step from Legalize because
13 // scalarizing can introduce illegal types. For example, suppose we have an
14 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16 // operation, which introduces nodes with the illegal type i64 which must be
17 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18 // the operation must be unrolled, which introduces nodes with the illegal
19 // type i8 which must be promoted.
21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22 // or operations that happen to take a vector which are custom-lowered;
23 // the legalization for such operations never produces nodes
24 // with illegal types, so it's okay to put off legalizing them until
25 // SelectionDAG::Legalize runs.
27 //===----------------------------------------------------------------------===//
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/CodeGen/ISDOpcodes.h"
33 #include "llvm/CodeGen/MachineMemOperand.h"
34 #include "llvm/CodeGen/SelectionDAG.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/TargetLowering.h"
37 #include "llvm/CodeGen/ValueTypes.h"
38 #include "llvm/IR/DataLayout.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/Compiler.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MachineValueType.h"
44 #include "llvm/Support/MathExtras.h"
52 #define DEBUG_TYPE "legalizevectorops"
56 class VectorLegalizer
{
58 const TargetLowering
&TLI
;
59 bool Changed
= false; // Keep track of whether anything changed
61 /// For nodes that are of legal width, and that have more than one use, this
62 /// map indicates what regularized operand to use. This allows us to avoid
63 /// legalizing the same thing more than once.
64 SmallDenseMap
<SDValue
, SDValue
, 64> LegalizedNodes
;
66 /// Adds a node to the translation cache.
67 void AddLegalizedOperand(SDValue From
, SDValue To
) {
68 LegalizedNodes
.insert(std::make_pair(From
, To
));
69 // If someone requests legalization of the new node, return itself.
71 LegalizedNodes
.insert(std::make_pair(To
, To
));
74 /// Legalizes the given node.
75 SDValue
LegalizeOp(SDValue Op
);
77 /// Assuming the node is legal, "legalize" the results.
78 SDValue
TranslateLegalizeResults(SDValue Op
, SDNode
*Result
);
80 /// Make sure Results are legal and update the translation cache.
81 SDValue
RecursivelyLegalizeResults(SDValue Op
,
82 MutableArrayRef
<SDValue
> Results
);
84 /// Wrapper to interface LowerOperation with a vector of Results.
85 /// Returns false if the target wants to use default expansion. Otherwise
86 /// returns true. If return is true and the Results are empty, then the
87 /// target wants to keep the input node as is.
88 bool LowerOperationWrapper(SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
);
90 /// Implements unrolling a VSETCC.
91 SDValue
UnrollVSETCC(SDNode
*Node
);
93 /// Implement expand-based legalization of vector operations.
95 /// This is just a high-level routine to dispatch to specific code paths for
96 /// operations to legalize them.
97 void Expand(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
99 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
100 /// FP_TO_SINT isn't legal.
101 void ExpandFP_TO_UINT(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
103 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
104 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
105 void ExpandUINT_TO_FLOAT(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
107 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
108 SDValue
ExpandSEXTINREG(SDNode
*Node
);
110 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
112 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
113 /// type. The contents of the bits in the extended part of each element are
115 SDValue
ExpandANY_EXTEND_VECTOR_INREG(SDNode
*Node
);
117 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
119 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
120 /// type, then shifts left and arithmetic shifts right to introduce a sign
122 SDValue
ExpandSIGN_EXTEND_VECTOR_INREG(SDNode
*Node
);
124 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
126 /// Shuffles the low lanes of the operand into place and blends zeros into
127 /// the remaining lanes, finally bitcasting to the proper type.
128 SDValue
ExpandZERO_EXTEND_VECTOR_INREG(SDNode
*Node
);
130 /// Expand bswap of vectors into a shuffle if legal.
131 SDValue
ExpandBSWAP(SDNode
*Node
);
133 /// Implement vselect in terms of XOR, AND, OR when blend is not
134 /// supported by the target.
135 SDValue
ExpandVSELECT(SDNode
*Node
);
136 SDValue
ExpandVP_SELECT(SDNode
*Node
);
137 SDValue
ExpandSELECT(SDNode
*Node
);
138 std::pair
<SDValue
, SDValue
> ExpandLoad(SDNode
*N
);
139 SDValue
ExpandStore(SDNode
*N
);
140 SDValue
ExpandFNEG(SDNode
*Node
);
141 void ExpandFSUB(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
142 void ExpandSETCC(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
143 void ExpandBITREVERSE(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
144 void ExpandUADDSUBO(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
145 void ExpandSADDSUBO(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
146 void ExpandMULO(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
147 void ExpandFixedPointDiv(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
148 void ExpandStrictFPOp(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
149 void ExpandREM(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
151 void UnrollStrictFPOp(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
153 /// Implements vector promotion.
155 /// This is essentially just bitcasting the operands to a different type and
156 /// bitcasting the result back to the original type.
157 void Promote(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
159 /// Implements [SU]INT_TO_FP vector promotion.
161 /// This is a [zs]ext of the input operand to a larger integer type.
162 void PromoteINT_TO_FP(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
164 /// Implements FP_TO_[SU]INT vector promotion of the result type.
166 /// It is promoted to a larger integer type. The result is then
167 /// truncated back to the original type.
168 void PromoteFP_TO_INT(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
171 VectorLegalizer(SelectionDAG
& dag
) :
172 DAG(dag
), TLI(dag
.getTargetLoweringInfo()) {}
174 /// Begin legalizer the vector operations in the DAG.
178 } // end anonymous namespace
180 bool VectorLegalizer::Run() {
181 // Before we start legalizing vector nodes, check if there are any vectors.
182 bool HasVectors
= false;
183 for (SelectionDAG::allnodes_iterator I
= DAG
.allnodes_begin(),
184 E
= std::prev(DAG
.allnodes_end()); I
!= std::next(E
); ++I
) {
185 // Check if the values of the nodes contain vectors. We don't need to check
186 // the operands because we are going to check their values at some point.
187 HasVectors
= llvm::any_of(I
->values(), [](EVT T
) { return T
.isVector(); });
189 // If we found a vector node we can start the legalization.
194 // If this basic block has no vectors then no need to legalize vectors.
198 // The legalize process is inherently a bottom-up recursive process (users
199 // legalize their uses before themselves). Given infinite stack space, we
200 // could just start legalizing on the root and traverse the whole graph. In
201 // practice however, this causes us to run out of stack space on large basic
202 // blocks. To avoid this problem, compute an ordering of the nodes where each
203 // node is only legalized after all of its operands are legalized.
204 DAG
.AssignTopologicalOrder();
205 for (SelectionDAG::allnodes_iterator I
= DAG
.allnodes_begin(),
206 E
= std::prev(DAG
.allnodes_end()); I
!= std::next(E
); ++I
)
207 LegalizeOp(SDValue(&*I
, 0));
209 // Finally, it's possible the root changed. Get the new root.
210 SDValue OldRoot
= DAG
.getRoot();
211 assert(LegalizedNodes
.count(OldRoot
) && "Root didn't get legalized?");
212 DAG
.setRoot(LegalizedNodes
[OldRoot
]);
214 LegalizedNodes
.clear();
216 // Remove dead nodes now.
217 DAG
.RemoveDeadNodes();
222 SDValue
VectorLegalizer::TranslateLegalizeResults(SDValue Op
, SDNode
*Result
) {
223 assert(Op
->getNumValues() == Result
->getNumValues() &&
224 "Unexpected number of results");
225 // Generic legalization: just pass the operand through.
226 for (unsigned i
= 0, e
= Op
->getNumValues(); i
!= e
; ++i
)
227 AddLegalizedOperand(Op
.getValue(i
), SDValue(Result
, i
));
228 return SDValue(Result
, Op
.getResNo());
232 VectorLegalizer::RecursivelyLegalizeResults(SDValue Op
,
233 MutableArrayRef
<SDValue
> Results
) {
234 assert(Results
.size() == Op
->getNumValues() &&
235 "Unexpected number of results");
236 // Make sure that the generated code is itself legal.
237 for (unsigned i
= 0, e
= Results
.size(); i
!= e
; ++i
) {
238 Results
[i
] = LegalizeOp(Results
[i
]);
239 AddLegalizedOperand(Op
.getValue(i
), Results
[i
]);
242 return Results
[Op
.getResNo()];
245 SDValue
VectorLegalizer::LegalizeOp(SDValue Op
) {
246 // Note that LegalizeOp may be reentered even from single-use nodes, which
247 // means that we always must cache transformed nodes.
248 DenseMap
<SDValue
, SDValue
>::iterator I
= LegalizedNodes
.find(Op
);
249 if (I
!= LegalizedNodes
.end()) return I
->second
;
251 // Legalize the operands
252 SmallVector
<SDValue
, 8> Ops
;
253 for (const SDValue
&Oper
: Op
->op_values())
254 Ops
.push_back(LegalizeOp(Oper
));
256 SDNode
*Node
= DAG
.UpdateNodeOperands(Op
.getNode(), Ops
);
258 bool HasVectorValueOrOp
=
259 llvm::any_of(Node
->values(), [](EVT T
) { return T
.isVector(); }) ||
260 llvm::any_of(Node
->op_values(),
261 [](SDValue O
) { return O
.getValueType().isVector(); });
262 if (!HasVectorValueOrOp
)
263 return TranslateLegalizeResults(Op
, Node
);
265 TargetLowering::LegalizeAction Action
= TargetLowering::Legal
;
267 switch (Op
.getOpcode()) {
269 return TranslateLegalizeResults(Op
, Node
);
271 LoadSDNode
*LD
= cast
<LoadSDNode
>(Node
);
272 ISD::LoadExtType ExtType
= LD
->getExtensionType();
273 EVT LoadedVT
= LD
->getMemoryVT();
274 if (LoadedVT
.isVector() && ExtType
!= ISD::NON_EXTLOAD
)
275 Action
= TLI
.getLoadExtAction(ExtType
, LD
->getValueType(0), LoadedVT
);
279 StoreSDNode
*ST
= cast
<StoreSDNode
>(Node
);
280 EVT StVT
= ST
->getMemoryVT();
281 MVT ValVT
= ST
->getValue().getSimpleValueType();
282 if (StVT
.isVector() && ST
->isTruncatingStore())
283 Action
= TLI
.getTruncStoreAction(ValVT
, StVT
);
286 case ISD::MERGE_VALUES
:
287 Action
= TLI
.getOperationAction(Node
->getOpcode(), Node
->getValueType(0));
288 // This operation lies about being legal: when it claims to be legal,
289 // it should actually be expanded.
290 if (Action
== TargetLowering::Legal
)
291 Action
= TargetLowering::Expand
;
293 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
294 case ISD::STRICT_##DAGN:
295 #include "llvm/IR/ConstrainedOps.def"
296 ValVT
= Node
->getValueType(0);
297 if (Op
.getOpcode() == ISD::STRICT_SINT_TO_FP
||
298 Op
.getOpcode() == ISD::STRICT_UINT_TO_FP
)
299 ValVT
= Node
->getOperand(1).getValueType();
300 Action
= TLI
.getOperationAction(Node
->getOpcode(), ValVT
);
301 // If we're asked to expand a strict vector floating-point operation,
302 // by default we're going to simply unroll it. That is usually the
303 // best approach, except in the case where the resulting strict (scalar)
304 // operations would themselves use the fallback mutation to non-strict.
305 // In that specific case, just do the fallback on the vector op.
306 if (Action
== TargetLowering::Expand
&& !TLI
.isStrictFPEnabled() &&
307 TLI
.getStrictFPOperationAction(Node
->getOpcode(), ValVT
) ==
308 TargetLowering::Legal
) {
309 EVT EltVT
= ValVT
.getVectorElementType();
310 if (TLI
.getOperationAction(Node
->getOpcode(), EltVT
)
311 == TargetLowering::Expand
&&
312 TLI
.getStrictFPOperationAction(Node
->getOpcode(), EltVT
)
313 == TargetLowering::Legal
)
314 Action
= TargetLowering::Legal
;
345 case ISD::BITREVERSE
:
348 case ISD::CTLZ_ZERO_UNDEF
:
349 case ISD::CTTZ_ZERO_UNDEF
:
354 case ISD::ZERO_EXTEND
:
355 case ISD::ANY_EXTEND
:
357 case ISD::SIGN_EXTEND
:
358 case ISD::FP_TO_SINT
:
359 case ISD::FP_TO_UINT
:
364 case ISD::FMINNUM_IEEE
:
365 case ISD::FMAXNUM_IEEE
:
382 case ISD::FNEARBYINT
:
384 case ISD::FROUNDEVEN
:
389 case ISD::SIGN_EXTEND_INREG
:
390 case ISD::ANY_EXTEND_VECTOR_INREG
:
391 case ISD::SIGN_EXTEND_VECTOR_INREG
:
392 case ISD::ZERO_EXTEND_VECTOR_INREG
:
405 case ISD::FCANONICALIZE
:
412 case ISD::FP_TO_SINT_SAT
:
413 case ISD::FP_TO_UINT_SAT
:
415 Action
= TLI
.getOperationAction(Node
->getOpcode(), Node
->getValueType(0));
418 case ISD::SMULFIXSAT
:
420 case ISD::UMULFIXSAT
:
422 case ISD::SDIVFIXSAT
:
424 case ISD::UDIVFIXSAT
: {
425 unsigned Scale
= Node
->getConstantOperandVal(2);
426 Action
= TLI
.getFixedPointOperationAction(Node
->getOpcode(),
427 Node
->getValueType(0), Scale
);
430 case ISD::SINT_TO_FP
:
431 case ISD::UINT_TO_FP
:
432 case ISD::VECREDUCE_ADD
:
433 case ISD::VECREDUCE_MUL
:
434 case ISD::VECREDUCE_AND
:
435 case ISD::VECREDUCE_OR
:
436 case ISD::VECREDUCE_XOR
:
437 case ISD::VECREDUCE_SMAX
:
438 case ISD::VECREDUCE_SMIN
:
439 case ISD::VECREDUCE_UMAX
:
440 case ISD::VECREDUCE_UMIN
:
441 case ISD::VECREDUCE_FADD
:
442 case ISD::VECREDUCE_FMUL
:
443 case ISD::VECREDUCE_FMAX
:
444 case ISD::VECREDUCE_FMIN
:
445 Action
= TLI
.getOperationAction(Node
->getOpcode(),
446 Node
->getOperand(0).getValueType());
448 case ISD::VECREDUCE_SEQ_FADD
:
449 case ISD::VECREDUCE_SEQ_FMUL
:
450 Action
= TLI
.getOperationAction(Node
->getOpcode(),
451 Node
->getOperand(1).getValueType());
454 MVT OpVT
= Node
->getOperand(0).getSimpleValueType();
455 ISD::CondCode CCCode
= cast
<CondCodeSDNode
>(Node
->getOperand(2))->get();
456 Action
= TLI
.getCondCodeAction(CCCode
, OpVT
);
457 if (Action
== TargetLowering::Legal
)
458 Action
= TLI
.getOperationAction(Node
->getOpcode(), Node
->getValueType(0));
462 #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
464 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
465 : Node->getOperand(LEGALPOS).getValueType(); \
466 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
468 #include "llvm/IR/VPIntrinsics.def"
471 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node
->dump(&DAG
));
473 SmallVector
<SDValue
, 8> ResultVals
;
475 default: llvm_unreachable("This action is not supported yet!");
476 case TargetLowering::Promote
:
477 assert((Op
.getOpcode() != ISD::LOAD
&& Op
.getOpcode() != ISD::STORE
) &&
478 "This action is not supported yet!");
479 LLVM_DEBUG(dbgs() << "Promoting\n");
480 Promote(Node
, ResultVals
);
481 assert(!ResultVals
.empty() && "No results for promotion?");
483 case TargetLowering::Legal
:
484 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
486 case TargetLowering::Custom
:
487 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
488 if (LowerOperationWrapper(Node
, ResultVals
))
490 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
492 case TargetLowering::Expand
:
493 LLVM_DEBUG(dbgs() << "Expanding\n");
494 Expand(Node
, ResultVals
);
498 if (ResultVals
.empty())
499 return TranslateLegalizeResults(Op
, Node
);
502 return RecursivelyLegalizeResults(Op
, ResultVals
);
505 // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
506 // merge them somehow?
507 bool VectorLegalizer::LowerOperationWrapper(SDNode
*Node
,
508 SmallVectorImpl
<SDValue
> &Results
) {
509 SDValue Res
= TLI
.LowerOperation(SDValue(Node
, 0), DAG
);
514 if (Res
== SDValue(Node
, 0))
517 // If the original node has one result, take the return value from
518 // LowerOperation as is. It might not be result number 0.
519 if (Node
->getNumValues() == 1) {
520 Results
.push_back(Res
);
524 // If the original node has multiple results, then the return node should
525 // have the same number of results.
526 assert((Node
->getNumValues() == Res
->getNumValues()) &&
527 "Lowering returned the wrong number of results!");
529 // Places new result values base on N result number.
530 for (unsigned I
= 0, E
= Node
->getNumValues(); I
!= E
; ++I
)
531 Results
.push_back(Res
.getValue(I
));
536 void VectorLegalizer::Promote(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
) {
537 // For a few operations there is a specific concept for promotion based on
538 // the operand's type.
539 switch (Node
->getOpcode()) {
540 case ISD::SINT_TO_FP
:
541 case ISD::UINT_TO_FP
:
542 case ISD::STRICT_SINT_TO_FP
:
543 case ISD::STRICT_UINT_TO_FP
:
544 // "Promote" the operation by extending the operand.
545 PromoteINT_TO_FP(Node
, Results
);
547 case ISD::FP_TO_UINT
:
548 case ISD::FP_TO_SINT
:
549 case ISD::STRICT_FP_TO_UINT
:
550 case ISD::STRICT_FP_TO_SINT
:
551 // Promote the operation by extending the operand.
552 PromoteFP_TO_INT(Node
, Results
);
556 // These operations are used to do promotion so they can't be promoted
558 llvm_unreachable("Don't know how to promote this operation!");
561 // There are currently two cases of vector promotion:
562 // 1) Bitcasting a vector of integers to a different type to a vector of the
563 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
564 // 2) Extending a vector of floats to a vector of the same number of larger
565 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
566 assert(Node
->getNumValues() == 1 &&
567 "Can't promote a vector with multiple results!");
568 MVT VT
= Node
->getSimpleValueType(0);
569 MVT NVT
= TLI
.getTypeToPromoteTo(Node
->getOpcode(), VT
);
571 SmallVector
<SDValue
, 4> Operands(Node
->getNumOperands());
573 for (unsigned j
= 0; j
!= Node
->getNumOperands(); ++j
) {
574 if (Node
->getOperand(j
).getValueType().isVector())
575 if (Node
->getOperand(j
)
577 .getVectorElementType()
578 .isFloatingPoint() &&
579 NVT
.isVector() && NVT
.getVectorElementType().isFloatingPoint())
580 Operands
[j
] = DAG
.getNode(ISD::FP_EXTEND
, dl
, NVT
, Node
->getOperand(j
));
582 Operands
[j
] = DAG
.getNode(ISD::BITCAST
, dl
, NVT
, Node
->getOperand(j
));
584 Operands
[j
] = Node
->getOperand(j
);
588 DAG
.getNode(Node
->getOpcode(), dl
, NVT
, Operands
, Node
->getFlags());
590 if ((VT
.isFloatingPoint() && NVT
.isFloatingPoint()) ||
591 (VT
.isVector() && VT
.getVectorElementType().isFloatingPoint() &&
592 NVT
.isVector() && NVT
.getVectorElementType().isFloatingPoint()))
593 Res
= DAG
.getNode(ISD::FP_ROUND
, dl
, VT
, Res
, DAG
.getIntPtrConstant(0, dl
));
595 Res
= DAG
.getNode(ISD::BITCAST
, dl
, VT
, Res
);
597 Results
.push_back(Res
);
600 void VectorLegalizer::PromoteINT_TO_FP(SDNode
*Node
,
601 SmallVectorImpl
<SDValue
> &Results
) {
602 // INT_TO_FP operations may require the input operand be promoted even
603 // when the type is otherwise legal.
604 bool IsStrict
= Node
->isStrictFPOpcode();
605 MVT VT
= Node
->getOperand(IsStrict
? 1 : 0).getSimpleValueType();
606 MVT NVT
= TLI
.getTypeToPromoteTo(Node
->getOpcode(), VT
);
607 assert(NVT
.getVectorNumElements() == VT
.getVectorNumElements() &&
608 "Vectors have different number of elements!");
611 SmallVector
<SDValue
, 4> Operands(Node
->getNumOperands());
613 unsigned Opc
= (Node
->getOpcode() == ISD::UINT_TO_FP
||
614 Node
->getOpcode() == ISD::STRICT_UINT_TO_FP
)
617 for (unsigned j
= 0; j
!= Node
->getNumOperands(); ++j
) {
618 if (Node
->getOperand(j
).getValueType().isVector())
619 Operands
[j
] = DAG
.getNode(Opc
, dl
, NVT
, Node
->getOperand(j
));
621 Operands
[j
] = Node
->getOperand(j
);
625 SDValue Res
= DAG
.getNode(Node
->getOpcode(), dl
,
626 {Node
->getValueType(0), MVT::Other
}, Operands
);
627 Results
.push_back(Res
);
628 Results
.push_back(Res
.getValue(1));
633 DAG
.getNode(Node
->getOpcode(), dl
, Node
->getValueType(0), Operands
);
634 Results
.push_back(Res
);
637 // For FP_TO_INT we promote the result type to a vector type with wider
638 // elements and then truncate the result. This is different from the default
639 // PromoteVector which uses bitcast to promote thus assumning that the
640 // promoted vector type has the same overall size.
641 void VectorLegalizer::PromoteFP_TO_INT(SDNode
*Node
,
642 SmallVectorImpl
<SDValue
> &Results
) {
643 MVT VT
= Node
->getSimpleValueType(0);
644 MVT NVT
= TLI
.getTypeToPromoteTo(Node
->getOpcode(), VT
);
645 bool IsStrict
= Node
->isStrictFPOpcode();
646 assert(NVT
.getVectorNumElements() == VT
.getVectorNumElements() &&
647 "Vectors have different number of elements!");
649 unsigned NewOpc
= Node
->getOpcode();
650 // Change FP_TO_UINT to FP_TO_SINT if possible.
651 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
652 if (NewOpc
== ISD::FP_TO_UINT
&&
653 TLI
.isOperationLegalOrCustom(ISD::FP_TO_SINT
, NVT
))
654 NewOpc
= ISD::FP_TO_SINT
;
656 if (NewOpc
== ISD::STRICT_FP_TO_UINT
&&
657 TLI
.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT
, NVT
))
658 NewOpc
= ISD::STRICT_FP_TO_SINT
;
661 SDValue Promoted
, Chain
;
663 Promoted
= DAG
.getNode(NewOpc
, dl
, {NVT
, MVT::Other
},
664 {Node
->getOperand(0), Node
->getOperand(1)});
665 Chain
= Promoted
.getValue(1);
667 Promoted
= DAG
.getNode(NewOpc
, dl
, NVT
, Node
->getOperand(0));
669 // Assert that the converted value fits in the original type. If it doesn't
670 // (eg: because the value being converted is too big), then the result of the
671 // original operation was undefined anyway, so the assert is still correct.
672 if (Node
->getOpcode() == ISD::FP_TO_UINT
||
673 Node
->getOpcode() == ISD::STRICT_FP_TO_UINT
)
674 NewOpc
= ISD::AssertZext
;
676 NewOpc
= ISD::AssertSext
;
678 Promoted
= DAG
.getNode(NewOpc
, dl
, NVT
, Promoted
,
679 DAG
.getValueType(VT
.getScalarType()));
680 Promoted
= DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Promoted
);
681 Results
.push_back(Promoted
);
683 Results
.push_back(Chain
);
686 std::pair
<SDValue
, SDValue
> VectorLegalizer::ExpandLoad(SDNode
*N
) {
687 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
688 return TLI
.scalarizeVectorLoad(LD
, DAG
);
691 SDValue
VectorLegalizer::ExpandStore(SDNode
*N
) {
692 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
693 SDValue TF
= TLI
.scalarizeVectorStore(ST
, DAG
);
697 void VectorLegalizer::Expand(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
) {
698 switch (Node
->getOpcode()) {
700 std::pair
<SDValue
, SDValue
> Tmp
= ExpandLoad(Node
);
701 Results
.push_back(Tmp
.first
);
702 Results
.push_back(Tmp
.second
);
706 Results
.push_back(ExpandStore(Node
));
708 case ISD::MERGE_VALUES
:
709 for (unsigned i
= 0, e
= Node
->getNumValues(); i
!= e
; ++i
)
710 Results
.push_back(Node
->getOperand(i
));
712 case ISD::SIGN_EXTEND_INREG
:
713 Results
.push_back(ExpandSEXTINREG(Node
));
715 case ISD::ANY_EXTEND_VECTOR_INREG
:
716 Results
.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node
));
718 case ISD::SIGN_EXTEND_VECTOR_INREG
:
719 Results
.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node
));
721 case ISD::ZERO_EXTEND_VECTOR_INREG
:
722 Results
.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node
));
725 Results
.push_back(ExpandBSWAP(Node
));
728 Results
.push_back(ExpandVSELECT(Node
));
731 Results
.push_back(ExpandVP_SELECT(Node
));
734 Results
.push_back(ExpandSELECT(Node
));
736 case ISD::FP_TO_UINT
:
737 ExpandFP_TO_UINT(Node
, Results
);
739 case ISD::UINT_TO_FP
:
740 ExpandUINT_TO_FLOAT(Node
, Results
);
743 Results
.push_back(ExpandFNEG(Node
));
746 ExpandFSUB(Node
, Results
);
749 ExpandSETCC(Node
, Results
);
752 if (SDValue Expanded
= TLI
.expandABS(Node
, DAG
)) {
753 Results
.push_back(Expanded
);
757 case ISD::BITREVERSE
:
758 ExpandBITREVERSE(Node
, Results
);
761 if (SDValue Expanded
= TLI
.expandCTPOP(Node
, DAG
)) {
762 Results
.push_back(Expanded
);
767 case ISD::CTLZ_ZERO_UNDEF
:
768 if (SDValue Expanded
= TLI
.expandCTLZ(Node
, DAG
)) {
769 Results
.push_back(Expanded
);
774 case ISD::CTTZ_ZERO_UNDEF
:
775 if (SDValue Expanded
= TLI
.expandCTTZ(Node
, DAG
)) {
776 Results
.push_back(Expanded
);
782 if (SDValue Expanded
= TLI
.expandFunnelShift(Node
, DAG
)) {
783 Results
.push_back(Expanded
);
789 if (SDValue Expanded
= TLI
.expandROT(Node
, false /*AllowVectorOps*/, DAG
)) {
790 Results
.push_back(Expanded
);
796 if (SDValue Expanded
= TLI
.expandFMINNUM_FMAXNUM(Node
, DAG
)) {
797 Results
.push_back(Expanded
);
805 if (SDValue Expanded
= TLI
.expandIntMINMAX(Node
, DAG
)) {
806 Results
.push_back(Expanded
);
812 ExpandUADDSUBO(Node
, Results
);
816 ExpandSADDSUBO(Node
, Results
);
820 ExpandMULO(Node
, Results
);
826 if (SDValue Expanded
= TLI
.expandAddSubSat(Node
, DAG
)) {
827 Results
.push_back(Expanded
);
833 if (SDValue Expanded
= TLI
.expandFixedPointMul(Node
, DAG
)) {
834 Results
.push_back(Expanded
);
838 case ISD::SMULFIXSAT
:
839 case ISD::UMULFIXSAT
:
840 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
841 // why. Maybe it results in worse codegen compared to the unroll for some
842 // targets? This should probably be investigated. And if we still prefer to
843 // unroll an explanation could be helpful.
847 ExpandFixedPointDiv(Node
, Results
);
849 case ISD::SDIVFIXSAT
:
850 case ISD::UDIVFIXSAT
:
852 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
853 case ISD::STRICT_##DAGN:
854 #include "llvm/IR/ConstrainedOps.def"
855 ExpandStrictFPOp(Node
, Results
);
857 case ISD::VECREDUCE_ADD
:
858 case ISD::VECREDUCE_MUL
:
859 case ISD::VECREDUCE_AND
:
860 case ISD::VECREDUCE_OR
:
861 case ISD::VECREDUCE_XOR
:
862 case ISD::VECREDUCE_SMAX
:
863 case ISD::VECREDUCE_SMIN
:
864 case ISD::VECREDUCE_UMAX
:
865 case ISD::VECREDUCE_UMIN
:
866 case ISD::VECREDUCE_FADD
:
867 case ISD::VECREDUCE_FMUL
:
868 case ISD::VECREDUCE_FMAX
:
869 case ISD::VECREDUCE_FMIN
:
870 Results
.push_back(TLI
.expandVecReduce(Node
, DAG
));
872 case ISD::VECREDUCE_SEQ_FADD
:
873 case ISD::VECREDUCE_SEQ_FMUL
:
874 Results
.push_back(TLI
.expandVecReduceSeq(Node
, DAG
));
878 ExpandREM(Node
, Results
);
882 Results
.push_back(DAG
.UnrollVectorOp(Node
));
885 SDValue
VectorLegalizer::ExpandSELECT(SDNode
*Node
) {
886 // Lower a select instruction where the condition is a scalar and the
887 // operands are vectors. Lower this select to VSELECT and implement it
888 // using XOR AND OR. The selector bit is broadcasted.
889 EVT VT
= Node
->getValueType(0);
892 SDValue Mask
= Node
->getOperand(0);
893 SDValue Op1
= Node
->getOperand(1);
894 SDValue Op2
= Node
->getOperand(2);
896 assert(VT
.isVector() && !Mask
.getValueType().isVector()
897 && Op1
.getValueType() == Op2
.getValueType() && "Invalid type");
899 // If we can't even use the basic vector operations of
900 // AND,OR,XOR, we will have to scalarize the op.
901 // Notice that the operation may be 'promoted' which means that it is
902 // 'bitcasted' to another type which is handled.
903 // Also, we need to be able to construct a splat vector using either
904 // BUILD_VECTOR or SPLAT_VECTOR.
905 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
907 if (TLI
.getOperationAction(ISD::AND
, VT
) == TargetLowering::Expand
||
908 TLI
.getOperationAction(ISD::XOR
, VT
) == TargetLowering::Expand
||
909 TLI
.getOperationAction(ISD::OR
, VT
) == TargetLowering::Expand
||
910 TLI
.getOperationAction(VT
.isFixedLengthVector() ? ISD::BUILD_VECTOR
912 VT
) == TargetLowering::Expand
)
913 return DAG
.UnrollVectorOp(Node
);
915 // Generate a mask operand.
916 EVT MaskTy
= VT
.changeVectorElementTypeToInteger();
918 // What is the size of each element in the vector mask.
919 EVT BitTy
= MaskTy
.getScalarType();
921 Mask
= DAG
.getSelect(DL
, BitTy
, Mask
, DAG
.getAllOnesConstant(DL
, BitTy
),
922 DAG
.getConstant(0, DL
, BitTy
));
924 // Broadcast the mask so that the entire vector is all one or all zero.
925 if (VT
.isFixedLengthVector())
926 Mask
= DAG
.getSplatBuildVector(MaskTy
, DL
, Mask
);
928 Mask
= DAG
.getSplatVector(MaskTy
, DL
, Mask
);
930 // Bitcast the operands to be the same type as the mask.
931 // This is needed when we select between FP types because
932 // the mask is a vector of integers.
933 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, MaskTy
, Op1
);
934 Op2
= DAG
.getNode(ISD::BITCAST
, DL
, MaskTy
, Op2
);
936 SDValue NotMask
= DAG
.getNOT(DL
, Mask
, MaskTy
);
938 Op1
= DAG
.getNode(ISD::AND
, DL
, MaskTy
, Op1
, Mask
);
939 Op2
= DAG
.getNode(ISD::AND
, DL
, MaskTy
, Op2
, NotMask
);
940 SDValue Val
= DAG
.getNode(ISD::OR
, DL
, MaskTy
, Op1
, Op2
);
941 return DAG
.getNode(ISD::BITCAST
, DL
, Node
->getValueType(0), Val
);
944 SDValue
VectorLegalizer::ExpandSEXTINREG(SDNode
*Node
) {
945 EVT VT
= Node
->getValueType(0);
947 // Make sure that the SRA and SHL instructions are available.
948 if (TLI
.getOperationAction(ISD::SRA
, VT
) == TargetLowering::Expand
||
949 TLI
.getOperationAction(ISD::SHL
, VT
) == TargetLowering::Expand
)
950 return DAG
.UnrollVectorOp(Node
);
953 EVT OrigTy
= cast
<VTSDNode
>(Node
->getOperand(1))->getVT();
955 unsigned BW
= VT
.getScalarSizeInBits();
956 unsigned OrigBW
= OrigTy
.getScalarSizeInBits();
957 SDValue ShiftSz
= DAG
.getConstant(BW
- OrigBW
, DL
, VT
);
959 SDValue Op
= DAG
.getNode(ISD::SHL
, DL
, VT
, Node
->getOperand(0), ShiftSz
);
960 return DAG
.getNode(ISD::SRA
, DL
, VT
, Op
, ShiftSz
);
963 // Generically expand a vector anyext in register to a shuffle of the relevant
964 // lanes into the appropriate locations, with other lanes left undef.
965 SDValue
VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode
*Node
) {
967 EVT VT
= Node
->getValueType(0);
968 int NumElements
= VT
.getVectorNumElements();
969 SDValue Src
= Node
->getOperand(0);
970 EVT SrcVT
= Src
.getValueType();
971 int NumSrcElements
= SrcVT
.getVectorNumElements();
973 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
974 // into a larger vector type.
975 if (SrcVT
.bitsLE(VT
)) {
976 assert((VT
.getSizeInBits() % SrcVT
.getScalarSizeInBits()) == 0 &&
977 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
978 NumSrcElements
= VT
.getSizeInBits() / SrcVT
.getScalarSizeInBits();
979 SrcVT
= EVT::getVectorVT(*DAG
.getContext(), SrcVT
.getScalarType(),
981 Src
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, SrcVT
, DAG
.getUNDEF(SrcVT
),
982 Src
, DAG
.getVectorIdxConstant(0, DL
));
985 // Build a base mask of undef shuffles.
986 SmallVector
<int, 16> ShuffleMask
;
987 ShuffleMask
.resize(NumSrcElements
, -1);
989 // Place the extended lanes into the correct locations.
990 int ExtLaneScale
= NumSrcElements
/ NumElements
;
991 int EndianOffset
= DAG
.getDataLayout().isBigEndian() ? ExtLaneScale
- 1 : 0;
992 for (int i
= 0; i
< NumElements
; ++i
)
993 ShuffleMask
[i
* ExtLaneScale
+ EndianOffset
] = i
;
996 ISD::BITCAST
, DL
, VT
,
997 DAG
.getVectorShuffle(SrcVT
, DL
, Src
, DAG
.getUNDEF(SrcVT
), ShuffleMask
));
1000 SDValue
VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode
*Node
) {
1002 EVT VT
= Node
->getValueType(0);
1003 SDValue Src
= Node
->getOperand(0);
1004 EVT SrcVT
= Src
.getValueType();
1006 // First build an any-extend node which can be legalized above when we
1007 // recurse through it.
1008 SDValue Op
= DAG
.getNode(ISD::ANY_EXTEND_VECTOR_INREG
, DL
, VT
, Src
);
1010 // Now we need sign extend. Do this by shifting the elements. Even if these
1011 // aren't legal operations, they have a better chance of being legalized
1012 // without full scalarization than the sign extension does.
1013 unsigned EltWidth
= VT
.getScalarSizeInBits();
1014 unsigned SrcEltWidth
= SrcVT
.getScalarSizeInBits();
1015 SDValue ShiftAmount
= DAG
.getConstant(EltWidth
- SrcEltWidth
, DL
, VT
);
1016 return DAG
.getNode(ISD::SRA
, DL
, VT
,
1017 DAG
.getNode(ISD::SHL
, DL
, VT
, Op
, ShiftAmount
),
1021 // Generically expand a vector zext in register to a shuffle of the relevant
1022 // lanes into the appropriate locations, a blend of zero into the high bits,
1023 // and a bitcast to the wider element type.
1024 SDValue
VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode
*Node
) {
1026 EVT VT
= Node
->getValueType(0);
1027 int NumElements
= VT
.getVectorNumElements();
1028 SDValue Src
= Node
->getOperand(0);
1029 EVT SrcVT
= Src
.getValueType();
1030 int NumSrcElements
= SrcVT
.getVectorNumElements();
1032 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1033 // into a larger vector type.
1034 if (SrcVT
.bitsLE(VT
)) {
1035 assert((VT
.getSizeInBits() % SrcVT
.getScalarSizeInBits()) == 0 &&
1036 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1037 NumSrcElements
= VT
.getSizeInBits() / SrcVT
.getScalarSizeInBits();
1038 SrcVT
= EVT::getVectorVT(*DAG
.getContext(), SrcVT
.getScalarType(),
1040 Src
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, SrcVT
, DAG
.getUNDEF(SrcVT
),
1041 Src
, DAG
.getVectorIdxConstant(0, DL
));
1044 // Build up a zero vector to blend into this one.
1045 SDValue Zero
= DAG
.getConstant(0, DL
, SrcVT
);
1047 // Shuffle the incoming lanes into the correct position, and pull all other
1048 // lanes from the zero vector.
1049 SmallVector
<int, 16> ShuffleMask
;
1050 ShuffleMask
.reserve(NumSrcElements
);
1051 for (int i
= 0; i
< NumSrcElements
; ++i
)
1052 ShuffleMask
.push_back(i
);
1054 int ExtLaneScale
= NumSrcElements
/ NumElements
;
1055 int EndianOffset
= DAG
.getDataLayout().isBigEndian() ? ExtLaneScale
- 1 : 0;
1056 for (int i
= 0; i
< NumElements
; ++i
)
1057 ShuffleMask
[i
* ExtLaneScale
+ EndianOffset
] = NumSrcElements
+ i
;
1059 return DAG
.getNode(ISD::BITCAST
, DL
, VT
,
1060 DAG
.getVectorShuffle(SrcVT
, DL
, Zero
, Src
, ShuffleMask
));
1063 static void createBSWAPShuffleMask(EVT VT
, SmallVectorImpl
<int> &ShuffleMask
) {
1064 int ScalarSizeInBytes
= VT
.getScalarSizeInBits() / 8;
1065 for (int I
= 0, E
= VT
.getVectorNumElements(); I
!= E
; ++I
)
1066 for (int J
= ScalarSizeInBytes
- 1; J
>= 0; --J
)
1067 ShuffleMask
.push_back((I
* ScalarSizeInBytes
) + J
);
1070 SDValue
VectorLegalizer::ExpandBSWAP(SDNode
*Node
) {
1071 EVT VT
= Node
->getValueType(0);
1073 // Scalable vectors can't use shuffle expansion.
1074 if (VT
.isScalableVector())
1075 return TLI
.expandBSWAP(Node
, DAG
);
1077 // Generate a byte wise shuffle mask for the BSWAP.
1078 SmallVector
<int, 16> ShuffleMask
;
1079 createBSWAPShuffleMask(VT
, ShuffleMask
);
1080 EVT ByteVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::i8
, ShuffleMask
.size());
1082 // Only emit a shuffle if the mask is legal.
1083 if (TLI
.isShuffleMaskLegal(ShuffleMask
, ByteVT
)) {
1085 SDValue Op
= DAG
.getNode(ISD::BITCAST
, DL
, ByteVT
, Node
->getOperand(0));
1086 Op
= DAG
.getVectorShuffle(ByteVT
, DL
, Op
, DAG
.getUNDEF(ByteVT
), ShuffleMask
);
1087 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
1090 // If we have the appropriate vector bit operations, it is better to use them
1091 // than unrolling and expanding each component.
1092 if (TLI
.isOperationLegalOrCustom(ISD::SHL
, VT
) &&
1093 TLI
.isOperationLegalOrCustom(ISD::SRL
, VT
) &&
1094 TLI
.isOperationLegalOrCustomOrPromote(ISD::AND
, VT
) &&
1095 TLI
.isOperationLegalOrCustomOrPromote(ISD::OR
, VT
))
1096 return TLI
.expandBSWAP(Node
, DAG
);
1098 // Otherwise unroll.
1099 return DAG
.UnrollVectorOp(Node
);
1102 void VectorLegalizer::ExpandBITREVERSE(SDNode
*Node
,
1103 SmallVectorImpl
<SDValue
> &Results
) {
1104 EVT VT
= Node
->getValueType(0);
1106 // We can't unroll or use shuffles for scalable vectors.
1107 if (VT
.isScalableVector()) {
1108 Results
.push_back(TLI
.expandBITREVERSE(Node
, DAG
));
1112 // If we have the scalar operation, it's probably cheaper to unroll it.
1113 if (TLI
.isOperationLegalOrCustom(ISD::BITREVERSE
, VT
.getScalarType())) {
1114 SDValue Tmp
= DAG
.UnrollVectorOp(Node
);
1115 Results
.push_back(Tmp
);
1119 // If the vector element width is a whole number of bytes, test if its legal
1120 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1121 // vector. This greatly reduces the number of bit shifts necessary.
1122 unsigned ScalarSizeInBits
= VT
.getScalarSizeInBits();
1123 if (ScalarSizeInBits
> 8 && (ScalarSizeInBits
% 8) == 0) {
1124 SmallVector
<int, 16> BSWAPMask
;
1125 createBSWAPShuffleMask(VT
, BSWAPMask
);
1127 EVT ByteVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::i8
, BSWAPMask
.size());
1128 if (TLI
.isShuffleMaskLegal(BSWAPMask
, ByteVT
) &&
1129 (TLI
.isOperationLegalOrCustom(ISD::BITREVERSE
, ByteVT
) ||
1130 (TLI
.isOperationLegalOrCustom(ISD::SHL
, ByteVT
) &&
1131 TLI
.isOperationLegalOrCustom(ISD::SRL
, ByteVT
) &&
1132 TLI
.isOperationLegalOrCustomOrPromote(ISD::AND
, ByteVT
) &&
1133 TLI
.isOperationLegalOrCustomOrPromote(ISD::OR
, ByteVT
)))) {
1135 SDValue Op
= DAG
.getNode(ISD::BITCAST
, DL
, ByteVT
, Node
->getOperand(0));
1136 Op
= DAG
.getVectorShuffle(ByteVT
, DL
, Op
, DAG
.getUNDEF(ByteVT
),
1138 Op
= DAG
.getNode(ISD::BITREVERSE
, DL
, ByteVT
, Op
);
1139 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
1140 Results
.push_back(Op
);
1145 // If we have the appropriate vector bit operations, it is better to use them
1146 // than unrolling and expanding each component.
1147 if (TLI
.isOperationLegalOrCustom(ISD::SHL
, VT
) &&
1148 TLI
.isOperationLegalOrCustom(ISD::SRL
, VT
) &&
1149 TLI
.isOperationLegalOrCustomOrPromote(ISD::AND
, VT
) &&
1150 TLI
.isOperationLegalOrCustomOrPromote(ISD::OR
, VT
)) {
1151 Results
.push_back(TLI
.expandBITREVERSE(Node
, DAG
));
1155 // Otherwise unroll.
1156 SDValue Tmp
= DAG
.UnrollVectorOp(Node
);
1157 Results
.push_back(Tmp
);
1160 SDValue
VectorLegalizer::ExpandVSELECT(SDNode
*Node
) {
1161 // Implement VSELECT in terms of XOR, AND, OR
1162 // on platforms which do not support blend natively.
1165 SDValue Mask
= Node
->getOperand(0);
1166 SDValue Op1
= Node
->getOperand(1);
1167 SDValue Op2
= Node
->getOperand(2);
1169 EVT VT
= Mask
.getValueType();
1171 // If we can't even use the basic vector operations of
1172 // AND,OR,XOR, we will have to scalarize the op.
1173 // Notice that the operation may be 'promoted' which means that it is
1174 // 'bitcasted' to another type which is handled.
1175 if (TLI
.getOperationAction(ISD::AND
, VT
) == TargetLowering::Expand
||
1176 TLI
.getOperationAction(ISD::XOR
, VT
) == TargetLowering::Expand
||
1177 TLI
.getOperationAction(ISD::OR
, VT
) == TargetLowering::Expand
)
1178 return DAG
.UnrollVectorOp(Node
);
1180 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1181 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1182 // vector constant to mask with.
1183 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1184 auto BoolContents
= TLI
.getBooleanContents(Op1
.getValueType());
1185 if (BoolContents
!= TargetLowering::ZeroOrNegativeOneBooleanContent
&&
1186 !(BoolContents
== TargetLowering::ZeroOrOneBooleanContent
&&
1187 Op1
.getValueType().getVectorElementType() == MVT::i1
))
1188 return DAG
.UnrollVectorOp(Node
);
1190 // If the mask and the type are different sizes, unroll the vector op. This
1191 // can occur when getSetCCResultType returns something that is different in
1192 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1193 if (VT
.getSizeInBits() != Op1
.getValueSizeInBits())
1194 return DAG
.UnrollVectorOp(Node
);
1196 // Bitcast the operands to be the same type as the mask.
1197 // This is needed when we select between FP types because
1198 // the mask is a vector of integers.
1199 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op1
);
1200 Op2
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op2
);
1202 SDValue NotMask
= DAG
.getNOT(DL
, Mask
, VT
);
1204 Op1
= DAG
.getNode(ISD::AND
, DL
, VT
, Op1
, Mask
);
1205 Op2
= DAG
.getNode(ISD::AND
, DL
, VT
, Op2
, NotMask
);
1206 SDValue Val
= DAG
.getNode(ISD::OR
, DL
, VT
, Op1
, Op2
);
1207 return DAG
.getNode(ISD::BITCAST
, DL
, Node
->getValueType(0), Val
);
1210 SDValue
VectorLegalizer::ExpandVP_SELECT(SDNode
*Node
) {
1211 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1212 // do not support it natively.
1215 SDValue Mask
= Node
->getOperand(0);
1216 SDValue Op1
= Node
->getOperand(1);
1217 SDValue Op2
= Node
->getOperand(2);
1218 SDValue EVL
= Node
->getOperand(3);
1220 EVT VT
= Mask
.getValueType();
1222 // If we can't even use the basic vector operations of
1223 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1224 if (TLI
.getOperationAction(ISD::VP_AND
, VT
) == TargetLowering::Expand
||
1225 TLI
.getOperationAction(ISD::VP_XOR
, VT
) == TargetLowering::Expand
||
1226 TLI
.getOperationAction(ISD::VP_OR
, VT
) == TargetLowering::Expand
)
1227 return DAG
.UnrollVectorOp(Node
);
1229 // This operation also isn't safe when the operands aren't also booleans.
1230 if (Op1
.getValueType().getVectorElementType() != MVT::i1
)
1231 return DAG
.UnrollVectorOp(Node
);
1233 SDValue Ones
= DAG
.getAllOnesConstant(DL
, VT
);
1234 SDValue NotMask
= DAG
.getNode(ISD::VP_XOR
, DL
, VT
, Mask
, Ones
, Mask
, EVL
);
1236 Op1
= DAG
.getNode(ISD::VP_AND
, DL
, VT
, Op1
, Mask
, Mask
, EVL
);
1237 Op2
= DAG
.getNode(ISD::VP_AND
, DL
, VT
, Op2
, NotMask
, Mask
, EVL
);
1238 return DAG
.getNode(ISD::VP_OR
, DL
, VT
, Op1
, Op2
, Mask
, EVL
);
1241 void VectorLegalizer::ExpandFP_TO_UINT(SDNode
*Node
,
1242 SmallVectorImpl
<SDValue
> &Results
) {
1243 // Attempt to expand using TargetLowering.
1244 SDValue Result
, Chain
;
1245 if (TLI
.expandFP_TO_UINT(Node
, Result
, Chain
, DAG
)) {
1246 Results
.push_back(Result
);
1247 if (Node
->isStrictFPOpcode())
1248 Results
.push_back(Chain
);
1252 // Otherwise go ahead and unroll.
1253 if (Node
->isStrictFPOpcode()) {
1254 UnrollStrictFPOp(Node
, Results
);
1258 Results
.push_back(DAG
.UnrollVectorOp(Node
));
1261 void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode
*Node
,
1262 SmallVectorImpl
<SDValue
> &Results
) {
1263 bool IsStrict
= Node
->isStrictFPOpcode();
1264 unsigned OpNo
= IsStrict
? 1 : 0;
1265 SDValue Src
= Node
->getOperand(OpNo
);
1266 EVT VT
= Src
.getValueType();
1269 // Attempt to expand using TargetLowering.
1272 if (TLI
.expandUINT_TO_FP(Node
, Result
, Chain
, DAG
)) {
1273 Results
.push_back(Result
);
1275 Results
.push_back(Chain
);
1279 // Make sure that the SINT_TO_FP and SRL instructions are available.
1280 if (((!IsStrict
&& TLI
.getOperationAction(ISD::SINT_TO_FP
, VT
) ==
1281 TargetLowering::Expand
) ||
1282 (IsStrict
&& TLI
.getOperationAction(ISD::STRICT_SINT_TO_FP
, VT
) ==
1283 TargetLowering::Expand
)) ||
1284 TLI
.getOperationAction(ISD::SRL
, VT
) == TargetLowering::Expand
) {
1286 UnrollStrictFPOp(Node
, Results
);
1290 Results
.push_back(DAG
.UnrollVectorOp(Node
));
1294 unsigned BW
= VT
.getScalarSizeInBits();
1295 assert((BW
== 64 || BW
== 32) &&
1296 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1298 SDValue HalfWord
= DAG
.getConstant(BW
/ 2, DL
, VT
);
1300 // Constants to clear the upper part of the word.
1301 // Notice that we can also use SHL+SHR, but using a constant is slightly
1303 uint64_t HWMask
= (BW
== 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1304 SDValue HalfWordMask
= DAG
.getConstant(HWMask
, DL
, VT
);
1306 // Two to the power of half-word-size.
1308 DAG
.getConstantFP(1ULL << (BW
/ 2), DL
, Node
->getValueType(0));
1310 // Clear upper part of LO, lower HI
1311 SDValue HI
= DAG
.getNode(ISD::SRL
, DL
, VT
, Src
, HalfWord
);
1312 SDValue LO
= DAG
.getNode(ISD::AND
, DL
, VT
, Src
, HalfWordMask
);
1315 // Convert hi and lo to floats
1316 // Convert the hi part back to the upper values
1317 // TODO: Can any fast-math-flags be set on these nodes?
1318 SDValue fHI
= DAG
.getNode(ISD::STRICT_SINT_TO_FP
, DL
,
1319 {Node
->getValueType(0), MVT::Other
},
1320 {Node
->getOperand(0), HI
});
1321 fHI
= DAG
.getNode(ISD::STRICT_FMUL
, DL
, {Node
->getValueType(0), MVT::Other
},
1322 {fHI
.getValue(1), fHI
, TWOHW
});
1323 SDValue fLO
= DAG
.getNode(ISD::STRICT_SINT_TO_FP
, DL
,
1324 {Node
->getValueType(0), MVT::Other
},
1325 {Node
->getOperand(0), LO
});
1327 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, fHI
.getValue(1),
1330 // Add the two halves
1332 DAG
.getNode(ISD::STRICT_FADD
, DL
, {Node
->getValueType(0), MVT::Other
},
1335 Results
.push_back(Result
);
1336 Results
.push_back(Result
.getValue(1));
1340 // Convert hi and lo to floats
1341 // Convert the hi part back to the upper values
1342 // TODO: Can any fast-math-flags be set on these nodes?
1343 SDValue fHI
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, Node
->getValueType(0), HI
);
1344 fHI
= DAG
.getNode(ISD::FMUL
, DL
, Node
->getValueType(0), fHI
, TWOHW
);
1345 SDValue fLO
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, Node
->getValueType(0), LO
);
1347 // Add the two halves
1349 DAG
.getNode(ISD::FADD
, DL
, Node
->getValueType(0), fHI
, fLO
));
1352 SDValue
VectorLegalizer::ExpandFNEG(SDNode
*Node
) {
1353 if (TLI
.isOperationLegalOrCustom(ISD::FSUB
, Node
->getValueType(0))) {
1355 SDValue Zero
= DAG
.getConstantFP(-0.0, DL
, Node
->getValueType(0));
1356 // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1357 return DAG
.getNode(ISD::FSUB
, DL
, Node
->getValueType(0), Zero
,
1358 Node
->getOperand(0));
1360 return DAG
.UnrollVectorOp(Node
);
1363 void VectorLegalizer::ExpandFSUB(SDNode
*Node
,
1364 SmallVectorImpl
<SDValue
> &Results
) {
1365 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1366 // we can defer this to operation legalization where it will be lowered as
1368 EVT VT
= Node
->getValueType(0);
1369 if (TLI
.isOperationLegalOrCustom(ISD::FNEG
, VT
) &&
1370 TLI
.isOperationLegalOrCustom(ISD::FADD
, VT
))
1371 return; // Defer to LegalizeDAG
1373 SDValue Tmp
= DAG
.UnrollVectorOp(Node
);
1374 Results
.push_back(Tmp
);
1377 void VectorLegalizer::ExpandSETCC(SDNode
*Node
,
1378 SmallVectorImpl
<SDValue
> &Results
) {
1379 bool NeedInvert
= false;
1381 MVT OpVT
= Node
->getOperand(0).getSimpleValueType();
1382 ISD::CondCode CCCode
= cast
<CondCodeSDNode
>(Node
->getOperand(2))->get();
1384 if (TLI
.getCondCodeAction(CCCode
, OpVT
) != TargetLowering::Expand
) {
1385 Results
.push_back(UnrollVSETCC(Node
));
1390 SDValue LHS
= Node
->getOperand(0);
1391 SDValue RHS
= Node
->getOperand(1);
1392 SDValue CC
= Node
->getOperand(2);
1393 bool Legalized
= TLI
.LegalizeSetCCCondCode(DAG
, Node
->getValueType(0), LHS
,
1394 RHS
, CC
, NeedInvert
, dl
, Chain
);
1397 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
1398 // condition code, create a new SETCC node.
1400 LHS
= DAG
.getNode(ISD::SETCC
, dl
, Node
->getValueType(0), LHS
, RHS
, CC
,
1403 // If we expanded the SETCC by inverting the condition code, then wrap
1404 // the existing SETCC in a NOT to restore the intended condition.
1406 LHS
= DAG
.getLogicalNOT(dl
, LHS
, LHS
->getValueType(0));
1408 // Otherwise, SETCC for the given comparison type must be completely
1409 // illegal; expand it into a SELECT_CC.
1410 EVT VT
= Node
->getValueType(0);
1412 DAG
.getNode(ISD::SELECT_CC
, dl
, VT
, LHS
, RHS
,
1413 DAG
.getBoolConstant(true, dl
, VT
, LHS
.getValueType()),
1414 DAG
.getBoolConstant(false, dl
, VT
, LHS
.getValueType()), CC
);
1415 LHS
->setFlags(Node
->getFlags());
1418 Results
.push_back(LHS
);
1421 void VectorLegalizer::ExpandUADDSUBO(SDNode
*Node
,
1422 SmallVectorImpl
<SDValue
> &Results
) {
1423 SDValue Result
, Overflow
;
1424 TLI
.expandUADDSUBO(Node
, Result
, Overflow
, DAG
);
1425 Results
.push_back(Result
);
1426 Results
.push_back(Overflow
);
1429 void VectorLegalizer::ExpandSADDSUBO(SDNode
*Node
,
1430 SmallVectorImpl
<SDValue
> &Results
) {
1431 SDValue Result
, Overflow
;
1432 TLI
.expandSADDSUBO(Node
, Result
, Overflow
, DAG
);
1433 Results
.push_back(Result
);
1434 Results
.push_back(Overflow
);
1437 void VectorLegalizer::ExpandMULO(SDNode
*Node
,
1438 SmallVectorImpl
<SDValue
> &Results
) {
1439 SDValue Result
, Overflow
;
1440 if (!TLI
.expandMULO(Node
, Result
, Overflow
, DAG
))
1441 std::tie(Result
, Overflow
) = DAG
.UnrollVectorOverflowOp(Node
);
1443 Results
.push_back(Result
);
1444 Results
.push_back(Overflow
);
1447 void VectorLegalizer::ExpandFixedPointDiv(SDNode
*Node
,
1448 SmallVectorImpl
<SDValue
> &Results
) {
1450 if (SDValue Expanded
= TLI
.expandFixedPointDiv(N
->getOpcode(), SDLoc(N
),
1451 N
->getOperand(0), N
->getOperand(1), N
->getConstantOperandVal(2), DAG
))
1452 Results
.push_back(Expanded
);
1455 void VectorLegalizer::ExpandStrictFPOp(SDNode
*Node
,
1456 SmallVectorImpl
<SDValue
> &Results
) {
1457 if (Node
->getOpcode() == ISD::STRICT_UINT_TO_FP
) {
1458 ExpandUINT_TO_FLOAT(Node
, Results
);
1461 if (Node
->getOpcode() == ISD::STRICT_FP_TO_UINT
) {
1462 ExpandFP_TO_UINT(Node
, Results
);
1466 UnrollStrictFPOp(Node
, Results
);
1469 void VectorLegalizer::ExpandREM(SDNode
*Node
,
1470 SmallVectorImpl
<SDValue
> &Results
) {
1471 assert((Node
->getOpcode() == ISD::SREM
|| Node
->getOpcode() == ISD::UREM
) &&
1472 "Expected REM node");
1475 if (!TLI
.expandREM(Node
, Result
, DAG
))
1476 Result
= DAG
.UnrollVectorOp(Node
);
1477 Results
.push_back(Result
);
1480 void VectorLegalizer::UnrollStrictFPOp(SDNode
*Node
,
1481 SmallVectorImpl
<SDValue
> &Results
) {
1482 EVT VT
= Node
->getValueType(0);
1483 EVT EltVT
= VT
.getVectorElementType();
1484 unsigned NumElems
= VT
.getVectorNumElements();
1485 unsigned NumOpers
= Node
->getNumOperands();
1486 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
1488 EVT TmpEltVT
= EltVT
;
1489 if (Node
->getOpcode() == ISD::STRICT_FSETCC
||
1490 Node
->getOpcode() == ISD::STRICT_FSETCCS
)
1491 TmpEltVT
= TLI
.getSetCCResultType(DAG
.getDataLayout(),
1492 *DAG
.getContext(), TmpEltVT
);
1494 EVT ValueVTs
[] = {TmpEltVT
, MVT::Other
};
1495 SDValue Chain
= Node
->getOperand(0);
1498 SmallVector
<SDValue
, 32> OpValues
;
1499 SmallVector
<SDValue
, 32> OpChains
;
1500 for (unsigned i
= 0; i
< NumElems
; ++i
) {
1501 SmallVector
<SDValue
, 4> Opers
;
1502 SDValue Idx
= DAG
.getVectorIdxConstant(i
, dl
);
1504 // The Chain is the first operand.
1505 Opers
.push_back(Chain
);
1507 // Now process the remaining operands.
1508 for (unsigned j
= 1; j
< NumOpers
; ++j
) {
1509 SDValue Oper
= Node
->getOperand(j
);
1510 EVT OperVT
= Oper
.getValueType();
1512 if (OperVT
.isVector())
1513 Oper
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
,
1514 OperVT
.getVectorElementType(), Oper
, Idx
);
1516 Opers
.push_back(Oper
);
1519 SDValue ScalarOp
= DAG
.getNode(Node
->getOpcode(), dl
, ValueVTs
, Opers
);
1520 SDValue ScalarResult
= ScalarOp
.getValue(0);
1521 SDValue ScalarChain
= ScalarOp
.getValue(1);
1523 if (Node
->getOpcode() == ISD::STRICT_FSETCC
||
1524 Node
->getOpcode() == ISD::STRICT_FSETCCS
)
1525 ScalarResult
= DAG
.getSelect(dl
, EltVT
, ScalarResult
,
1526 DAG
.getAllOnesConstant(dl
, EltVT
),
1527 DAG
.getConstant(0, dl
, EltVT
));
1529 OpValues
.push_back(ScalarResult
);
1530 OpChains
.push_back(ScalarChain
);
1533 SDValue Result
= DAG
.getBuildVector(VT
, dl
, OpValues
);
1534 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, OpChains
);
1536 Results
.push_back(Result
);
1537 Results
.push_back(NewChain
);
1540 SDValue
VectorLegalizer::UnrollVSETCC(SDNode
*Node
) {
1541 EVT VT
= Node
->getValueType(0);
1542 unsigned NumElems
= VT
.getVectorNumElements();
1543 EVT EltVT
= VT
.getVectorElementType();
1544 SDValue LHS
= Node
->getOperand(0);
1545 SDValue RHS
= Node
->getOperand(1);
1546 SDValue CC
= Node
->getOperand(2);
1547 EVT TmpEltVT
= LHS
.getValueType().getVectorElementType();
1549 SmallVector
<SDValue
, 8> Ops(NumElems
);
1550 for (unsigned i
= 0; i
< NumElems
; ++i
) {
1551 SDValue LHSElem
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, TmpEltVT
, LHS
,
1552 DAG
.getVectorIdxConstant(i
, dl
));
1553 SDValue RHSElem
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, TmpEltVT
, RHS
,
1554 DAG
.getVectorIdxConstant(i
, dl
));
1555 Ops
[i
] = DAG
.getNode(ISD::SETCC
, dl
,
1556 TLI
.getSetCCResultType(DAG
.getDataLayout(),
1557 *DAG
.getContext(), TmpEltVT
),
1558 LHSElem
, RHSElem
, CC
);
1559 Ops
[i
] = DAG
.getSelect(dl
, EltVT
, Ops
[i
], DAG
.getAllOnesConstant(dl
, EltVT
),
1560 DAG
.getConstant(0, dl
, EltVT
));
1562 return DAG
.getBuildVector(VT
, dl
, Ops
);
1565 bool SelectionDAG::LegalizeVectors() {
1566 return VectorLegalizer(*this).Run();