1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SelectionDAG::LegalizeVectors method.
11 // The vector legalizer looks for vector operations which might need to be
12 // scalarized and legalizes them. This is a separate step from Legalize because
13 // scalarizing can introduce illegal types. For example, suppose we have an
14 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16 // operation, which introduces nodes with the illegal type i64 which must be
17 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18 // the operation must be unrolled, which introduces nodes with the illegal
19 // type i8 which must be promoted.
21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22 // or operations that happen to take a vector which are custom-lowered;
23 // the legalization for such operations never produces nodes
24 // with illegal types, so it's okay to put off legalizing them until
25 // SelectionDAG::Legalize runs.
27 //===----------------------------------------------------------------------===//
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/CodeGen/ISDOpcodes.h"
33 #include "llvm/CodeGen/MachineMemOperand.h"
34 #include "llvm/CodeGen/SelectionDAG.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/TargetLowering.h"
37 #include "llvm/CodeGen/ValueTypes.h"
38 #include "llvm/IR/DataLayout.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/Compiler.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MachineValueType.h"
44 #include "llvm/Support/MathExtras.h"
52 #define DEBUG_TYPE "legalizevectorops"
56 class VectorLegalizer
{
58 const TargetLowering
&TLI
;
59 bool Changed
= false; // Keep track of whether anything changed
61 /// For nodes that are of legal width, and that have more than one use, this
62 /// map indicates what regularized operand to use. This allows us to avoid
63 /// legalizing the same thing more than once.
64 SmallDenseMap
<SDValue
, SDValue
, 64> LegalizedNodes
;
66 /// Adds a node to the translation cache.
67 void AddLegalizedOperand(SDValue From
, SDValue To
) {
68 LegalizedNodes
.insert(std::make_pair(From
, To
));
69 // If someone requests legalization of the new node, return itself.
71 LegalizedNodes
.insert(std::make_pair(To
, To
));
74 /// Legalizes the given node.
75 SDValue
LegalizeOp(SDValue Op
);
77 /// Assuming the node is legal, "legalize" the results.
78 SDValue
TranslateLegalizeResults(SDValue Op
, SDNode
*Result
);
80 /// Make sure Results are legal and update the translation cache.
81 SDValue
RecursivelyLegalizeResults(SDValue Op
,
82 MutableArrayRef
<SDValue
> Results
);
84 /// Wrapper to interface LowerOperation with a vector of Results.
85 /// Returns false if the target wants to use default expansion. Otherwise
86 /// returns true. If return is true and the Results are empty, then the
87 /// target wants to keep the input node as is.
88 bool LowerOperationWrapper(SDNode
*N
, SmallVectorImpl
<SDValue
> &Results
);
90 /// Implements unrolling a VSETCC.
91 SDValue
UnrollVSETCC(SDNode
*Node
);
93 /// Implement expand-based legalization of vector operations.
95 /// This is just a high-level routine to dispatch to specific code paths for
96 /// operations to legalize them.
97 void Expand(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
99 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
100 /// FP_TO_SINT isn't legal.
101 void ExpandFP_TO_UINT(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
103 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
104 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
105 void ExpandUINT_TO_FLOAT(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
107 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
108 SDValue
ExpandSEXTINREG(SDNode
*Node
);
110 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
112 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
113 /// type. The contents of the bits in the extended part of each element are
115 SDValue
ExpandANY_EXTEND_VECTOR_INREG(SDNode
*Node
);
117 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
119 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
120 /// type, then shifts left and arithmetic shifts right to introduce a sign
122 SDValue
ExpandSIGN_EXTEND_VECTOR_INREG(SDNode
*Node
);
124 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
126 /// Shuffles the low lanes of the operand into place and blends zeros into
127 /// the remaining lanes, finally bitcasting to the proper type.
128 SDValue
ExpandZERO_EXTEND_VECTOR_INREG(SDNode
*Node
);
130 /// Expand bswap of vectors into a shuffle if legal.
131 SDValue
ExpandBSWAP(SDNode
*Node
);
133 /// Implement vselect in terms of XOR, AND, OR when blend is not
134 /// supported by the target.
135 SDValue
ExpandVSELECT(SDNode
*Node
);
136 SDValue
ExpandSELECT(SDNode
*Node
);
137 std::pair
<SDValue
, SDValue
> ExpandLoad(SDNode
*N
);
138 SDValue
ExpandStore(SDNode
*N
);
139 SDValue
ExpandFNEG(SDNode
*Node
);
140 void ExpandFSUB(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
141 void ExpandSETCC(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
142 void ExpandBITREVERSE(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
143 void ExpandUADDSUBO(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
144 void ExpandSADDSUBO(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
145 void ExpandMULO(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
146 void ExpandFixedPointDiv(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
147 void ExpandStrictFPOp(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
148 void ExpandREM(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
150 void UnrollStrictFPOp(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
152 /// Implements vector promotion.
154 /// This is essentially just bitcasting the operands to a different type and
155 /// bitcasting the result back to the original type.
156 void Promote(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
158 /// Implements [SU]INT_TO_FP vector promotion.
160 /// This is a [zs]ext of the input operand to a larger integer type.
161 void PromoteINT_TO_FP(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
163 /// Implements FP_TO_[SU]INT vector promotion of the result type.
165 /// It is promoted to a larger integer type. The result is then
166 /// truncated back to the original type.
167 void PromoteFP_TO_INT(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
);
170 VectorLegalizer(SelectionDAG
& dag
) :
171 DAG(dag
), TLI(dag
.getTargetLoweringInfo()) {}
173 /// Begin legalizer the vector operations in the DAG.
177 } // end anonymous namespace
179 bool VectorLegalizer::Run() {
180 // Before we start legalizing vector nodes, check if there are any vectors.
181 bool HasVectors
= false;
182 for (SelectionDAG::allnodes_iterator I
= DAG
.allnodes_begin(),
183 E
= std::prev(DAG
.allnodes_end()); I
!= std::next(E
); ++I
) {
184 // Check if the values of the nodes contain vectors. We don't need to check
185 // the operands because we are going to check their values at some point.
186 HasVectors
= llvm::any_of(I
->values(), [](EVT T
) { return T
.isVector(); });
188 // If we found a vector node we can start the legalization.
193 // If this basic block has no vectors then no need to legalize vectors.
197 // The legalize process is inherently a bottom-up recursive process (users
198 // legalize their uses before themselves). Given infinite stack space, we
199 // could just start legalizing on the root and traverse the whole graph. In
200 // practice however, this causes us to run out of stack space on large basic
201 // blocks. To avoid this problem, compute an ordering of the nodes where each
202 // node is only legalized after all of its operands are legalized.
203 DAG
.AssignTopologicalOrder();
204 for (SelectionDAG::allnodes_iterator I
= DAG
.allnodes_begin(),
205 E
= std::prev(DAG
.allnodes_end()); I
!= std::next(E
); ++I
)
206 LegalizeOp(SDValue(&*I
, 0));
208 // Finally, it's possible the root changed. Get the new root.
209 SDValue OldRoot
= DAG
.getRoot();
210 assert(LegalizedNodes
.count(OldRoot
) && "Root didn't get legalized?");
211 DAG
.setRoot(LegalizedNodes
[OldRoot
]);
213 LegalizedNodes
.clear();
215 // Remove dead nodes now.
216 DAG
.RemoveDeadNodes();
221 SDValue
VectorLegalizer::TranslateLegalizeResults(SDValue Op
, SDNode
*Result
) {
222 assert(Op
->getNumValues() == Result
->getNumValues() &&
223 "Unexpected number of results");
224 // Generic legalization: just pass the operand through.
225 for (unsigned i
= 0, e
= Op
->getNumValues(); i
!= e
; ++i
)
226 AddLegalizedOperand(Op
.getValue(i
), SDValue(Result
, i
));
227 return SDValue(Result
, Op
.getResNo());
231 VectorLegalizer::RecursivelyLegalizeResults(SDValue Op
,
232 MutableArrayRef
<SDValue
> Results
) {
233 assert(Results
.size() == Op
->getNumValues() &&
234 "Unexpected number of results");
235 // Make sure that the generated code is itself legal.
236 for (unsigned i
= 0, e
= Results
.size(); i
!= e
; ++i
) {
237 Results
[i
] = LegalizeOp(Results
[i
]);
238 AddLegalizedOperand(Op
.getValue(i
), Results
[i
]);
241 return Results
[Op
.getResNo()];
244 SDValue
VectorLegalizer::LegalizeOp(SDValue Op
) {
245 // Note that LegalizeOp may be reentered even from single-use nodes, which
246 // means that we always must cache transformed nodes.
247 DenseMap
<SDValue
, SDValue
>::iterator I
= LegalizedNodes
.find(Op
);
248 if (I
!= LegalizedNodes
.end()) return I
->second
;
250 // Legalize the operands
251 SmallVector
<SDValue
, 8> Ops
;
252 for (const SDValue
&Oper
: Op
->op_values())
253 Ops
.push_back(LegalizeOp(Oper
));
255 SDNode
*Node
= DAG
.UpdateNodeOperands(Op
.getNode(), Ops
);
257 if (Op
.getOpcode() == ISD::LOAD
) {
258 LoadSDNode
*LD
= cast
<LoadSDNode
>(Node
);
259 ISD::LoadExtType ExtType
= LD
->getExtensionType();
260 if (LD
->getMemoryVT().isVector() && ExtType
!= ISD::NON_EXTLOAD
) {
261 LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
263 switch (TLI
.getLoadExtAction(LD
->getExtensionType(), LD
->getValueType(0),
264 LD
->getMemoryVT())) {
265 default: llvm_unreachable("This action is not supported yet!");
266 case TargetLowering::Legal
:
267 return TranslateLegalizeResults(Op
, Node
);
268 case TargetLowering::Custom
: {
269 SmallVector
<SDValue
, 2> ResultVals
;
270 if (LowerOperationWrapper(Node
, ResultVals
)) {
271 if (ResultVals
.empty())
272 return TranslateLegalizeResults(Op
, Node
);
275 return RecursivelyLegalizeResults(Op
, ResultVals
);
279 case TargetLowering::Expand
: {
281 std::pair
<SDValue
, SDValue
> Tmp
= ExpandLoad(Node
);
282 AddLegalizedOperand(Op
.getValue(0), Tmp
.first
);
283 AddLegalizedOperand(Op
.getValue(1), Tmp
.second
);
284 return Op
.getResNo() ? Tmp
.first
: Tmp
.second
;
288 } else if (Op
.getOpcode() == ISD::STORE
) {
289 StoreSDNode
*ST
= cast
<StoreSDNode
>(Node
);
290 EVT StVT
= ST
->getMemoryVT();
291 MVT ValVT
= ST
->getValue().getSimpleValueType();
292 if (StVT
.isVector() && ST
->isTruncatingStore()) {
293 LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
295 switch (TLI
.getTruncStoreAction(ValVT
, StVT
)) {
296 default: llvm_unreachable("This action is not supported yet!");
297 case TargetLowering::Legal
:
298 return TranslateLegalizeResults(Op
, Node
);
299 case TargetLowering::Custom
: {
300 SmallVector
<SDValue
, 1> ResultVals
;
301 if (LowerOperationWrapper(Node
, ResultVals
)) {
302 if (ResultVals
.empty())
303 return TranslateLegalizeResults(Op
, Node
);
306 return RecursivelyLegalizeResults(Op
, ResultVals
);
310 case TargetLowering::Expand
: {
312 SDValue Chain
= ExpandStore(Node
);
313 AddLegalizedOperand(Op
, Chain
);
320 bool HasVectorValueOrOp
=
321 llvm::any_of(Node
->values(), [](EVT T
) { return T
.isVector(); }) ||
322 llvm::any_of(Node
->op_values(),
323 [](SDValue O
) { return O
.getValueType().isVector(); });
324 if (!HasVectorValueOrOp
)
325 return TranslateLegalizeResults(Op
, Node
);
327 TargetLowering::LegalizeAction Action
= TargetLowering::Legal
;
329 switch (Op
.getOpcode()) {
331 return TranslateLegalizeResults(Op
, Node
);
332 case ISD::MERGE_VALUES
:
333 Action
= TLI
.getOperationAction(Node
->getOpcode(), Node
->getValueType(0));
334 // This operation lies about being legal: when it claims to be legal,
335 // it should actually be expanded.
336 if (Action
== TargetLowering::Legal
)
337 Action
= TargetLowering::Expand
;
339 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
340 case ISD::STRICT_##DAGN:
341 #include "llvm/IR/ConstrainedOps.def"
342 ValVT
= Node
->getValueType(0);
343 if (Op
.getOpcode() == ISD::STRICT_SINT_TO_FP
||
344 Op
.getOpcode() == ISD::STRICT_UINT_TO_FP
)
345 ValVT
= Node
->getOperand(1).getValueType();
346 Action
= TLI
.getOperationAction(Node
->getOpcode(), ValVT
);
347 // If we're asked to expand a strict vector floating-point operation,
348 // by default we're going to simply unroll it. That is usually the
349 // best approach, except in the case where the resulting strict (scalar)
350 // operations would themselves use the fallback mutation to non-strict.
351 // In that specific case, just do the fallback on the vector op.
352 if (Action
== TargetLowering::Expand
&& !TLI
.isStrictFPEnabled() &&
353 TLI
.getStrictFPOperationAction(Node
->getOpcode(), ValVT
) ==
354 TargetLowering::Legal
) {
355 EVT EltVT
= ValVT
.getVectorElementType();
356 if (TLI
.getOperationAction(Node
->getOpcode(), EltVT
)
357 == TargetLowering::Expand
&&
358 TLI
.getStrictFPOperationAction(Node
->getOpcode(), EltVT
)
359 == TargetLowering::Legal
)
360 Action
= TargetLowering::Legal
;
391 case ISD::BITREVERSE
:
394 case ISD::CTLZ_ZERO_UNDEF
:
395 case ISD::CTTZ_ZERO_UNDEF
:
400 case ISD::ZERO_EXTEND
:
401 case ISD::ANY_EXTEND
:
403 case ISD::SIGN_EXTEND
:
404 case ISD::FP_TO_SINT
:
405 case ISD::FP_TO_UINT
:
410 case ISD::FMINNUM_IEEE
:
411 case ISD::FMAXNUM_IEEE
:
428 case ISD::FNEARBYINT
:
430 case ISD::FROUNDEVEN
:
435 case ISD::SIGN_EXTEND_INREG
:
436 case ISD::ANY_EXTEND_VECTOR_INREG
:
437 case ISD::SIGN_EXTEND_VECTOR_INREG
:
438 case ISD::ZERO_EXTEND_VECTOR_INREG
:
451 case ISD::FCANONICALIZE
:
458 case ISD::FP_TO_SINT_SAT
:
459 case ISD::FP_TO_UINT_SAT
:
461 Action
= TLI
.getOperationAction(Node
->getOpcode(), Node
->getValueType(0));
464 case ISD::SMULFIXSAT
:
466 case ISD::UMULFIXSAT
:
468 case ISD::SDIVFIXSAT
:
470 case ISD::UDIVFIXSAT
: {
471 unsigned Scale
= Node
->getConstantOperandVal(2);
472 Action
= TLI
.getFixedPointOperationAction(Node
->getOpcode(),
473 Node
->getValueType(0), Scale
);
476 case ISD::SINT_TO_FP
:
477 case ISD::UINT_TO_FP
:
478 case ISD::VECREDUCE_ADD
:
479 case ISD::VECREDUCE_MUL
:
480 case ISD::VECREDUCE_AND
:
481 case ISD::VECREDUCE_OR
:
482 case ISD::VECREDUCE_XOR
:
483 case ISD::VECREDUCE_SMAX
:
484 case ISD::VECREDUCE_SMIN
:
485 case ISD::VECREDUCE_UMAX
:
486 case ISD::VECREDUCE_UMIN
:
487 case ISD::VECREDUCE_FADD
:
488 case ISD::VECREDUCE_FMUL
:
489 case ISD::VECREDUCE_FMAX
:
490 case ISD::VECREDUCE_FMIN
:
491 Action
= TLI
.getOperationAction(Node
->getOpcode(),
492 Node
->getOperand(0).getValueType());
494 case ISD::VECREDUCE_SEQ_FADD
:
495 case ISD::VECREDUCE_SEQ_FMUL
:
496 Action
= TLI
.getOperationAction(Node
->getOpcode(),
497 Node
->getOperand(1).getValueType());
500 MVT OpVT
= Node
->getOperand(0).getSimpleValueType();
501 ISD::CondCode CCCode
= cast
<CondCodeSDNode
>(Node
->getOperand(2))->get();
502 Action
= TLI
.getCondCodeAction(CCCode
, OpVT
);
503 if (Action
== TargetLowering::Legal
)
504 Action
= TLI
.getOperationAction(Node
->getOpcode(), Node
->getValueType(0));
509 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node
->dump(&DAG
));
511 SmallVector
<SDValue
, 8> ResultVals
;
513 default: llvm_unreachable("This action is not supported yet!");
514 case TargetLowering::Promote
:
515 LLVM_DEBUG(dbgs() << "Promoting\n");
516 Promote(Node
, ResultVals
);
517 assert(!ResultVals
.empty() && "No results for promotion?");
519 case TargetLowering::Legal
:
520 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
522 case TargetLowering::Custom
:
523 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
524 if (LowerOperationWrapper(Node
, ResultVals
))
526 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
528 case TargetLowering::Expand
:
529 LLVM_DEBUG(dbgs() << "Expanding\n");
530 Expand(Node
, ResultVals
);
534 if (ResultVals
.empty())
535 return TranslateLegalizeResults(Op
, Node
);
538 return RecursivelyLegalizeResults(Op
, ResultVals
);
541 // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
542 // merge them somehow?
543 bool VectorLegalizer::LowerOperationWrapper(SDNode
*Node
,
544 SmallVectorImpl
<SDValue
> &Results
) {
545 SDValue Res
= TLI
.LowerOperation(SDValue(Node
, 0), DAG
);
550 if (Res
== SDValue(Node
, 0))
553 // If the original node has one result, take the return value from
554 // LowerOperation as is. It might not be result number 0.
555 if (Node
->getNumValues() == 1) {
556 Results
.push_back(Res
);
560 // If the original node has multiple results, then the return node should
561 // have the same number of results.
562 assert((Node
->getNumValues() == Res
->getNumValues()) &&
563 "Lowering returned the wrong number of results!");
565 // Places new result values base on N result number.
566 for (unsigned I
= 0, E
= Node
->getNumValues(); I
!= E
; ++I
)
567 Results
.push_back(Res
.getValue(I
));
572 void VectorLegalizer::Promote(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
) {
573 // For a few operations there is a specific concept for promotion based on
574 // the operand's type.
575 switch (Node
->getOpcode()) {
576 case ISD::SINT_TO_FP
:
577 case ISD::UINT_TO_FP
:
578 case ISD::STRICT_SINT_TO_FP
:
579 case ISD::STRICT_UINT_TO_FP
:
580 // "Promote" the operation by extending the operand.
581 PromoteINT_TO_FP(Node
, Results
);
583 case ISD::FP_TO_UINT
:
584 case ISD::FP_TO_SINT
:
585 case ISD::STRICT_FP_TO_UINT
:
586 case ISD::STRICT_FP_TO_SINT
:
587 // Promote the operation by extending the operand.
588 PromoteFP_TO_INT(Node
, Results
);
592 // These operations are used to do promotion so they can't be promoted
594 llvm_unreachable("Don't know how to promote this operation!");
597 // There are currently two cases of vector promotion:
598 // 1) Bitcasting a vector of integers to a different type to a vector of the
599 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
600 // 2) Extending a vector of floats to a vector of the same number of larger
601 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
602 assert(Node
->getNumValues() == 1 &&
603 "Can't promote a vector with multiple results!");
604 MVT VT
= Node
->getSimpleValueType(0);
605 MVT NVT
= TLI
.getTypeToPromoteTo(Node
->getOpcode(), VT
);
607 SmallVector
<SDValue
, 4> Operands(Node
->getNumOperands());
609 for (unsigned j
= 0; j
!= Node
->getNumOperands(); ++j
) {
610 if (Node
->getOperand(j
).getValueType().isVector())
611 if (Node
->getOperand(j
)
613 .getVectorElementType()
614 .isFloatingPoint() &&
615 NVT
.isVector() && NVT
.getVectorElementType().isFloatingPoint())
616 Operands
[j
] = DAG
.getNode(ISD::FP_EXTEND
, dl
, NVT
, Node
->getOperand(j
));
618 Operands
[j
] = DAG
.getNode(ISD::BITCAST
, dl
, NVT
, Node
->getOperand(j
));
620 Operands
[j
] = Node
->getOperand(j
);
624 DAG
.getNode(Node
->getOpcode(), dl
, NVT
, Operands
, Node
->getFlags());
626 if ((VT
.isFloatingPoint() && NVT
.isFloatingPoint()) ||
627 (VT
.isVector() && VT
.getVectorElementType().isFloatingPoint() &&
628 NVT
.isVector() && NVT
.getVectorElementType().isFloatingPoint()))
629 Res
= DAG
.getNode(ISD::FP_ROUND
, dl
, VT
, Res
, DAG
.getIntPtrConstant(0, dl
));
631 Res
= DAG
.getNode(ISD::BITCAST
, dl
, VT
, Res
);
633 Results
.push_back(Res
);
636 void VectorLegalizer::PromoteINT_TO_FP(SDNode
*Node
,
637 SmallVectorImpl
<SDValue
> &Results
) {
638 // INT_TO_FP operations may require the input operand be promoted even
639 // when the type is otherwise legal.
640 bool IsStrict
= Node
->isStrictFPOpcode();
641 MVT VT
= Node
->getOperand(IsStrict
? 1 : 0).getSimpleValueType();
642 MVT NVT
= TLI
.getTypeToPromoteTo(Node
->getOpcode(), VT
);
643 assert(NVT
.getVectorNumElements() == VT
.getVectorNumElements() &&
644 "Vectors have different number of elements!");
647 SmallVector
<SDValue
, 4> Operands(Node
->getNumOperands());
649 unsigned Opc
= (Node
->getOpcode() == ISD::UINT_TO_FP
||
650 Node
->getOpcode() == ISD::STRICT_UINT_TO_FP
)
653 for (unsigned j
= 0; j
!= Node
->getNumOperands(); ++j
) {
654 if (Node
->getOperand(j
).getValueType().isVector())
655 Operands
[j
] = DAG
.getNode(Opc
, dl
, NVT
, Node
->getOperand(j
));
657 Operands
[j
] = Node
->getOperand(j
);
661 SDValue Res
= DAG
.getNode(Node
->getOpcode(), dl
,
662 {Node
->getValueType(0), MVT::Other
}, Operands
);
663 Results
.push_back(Res
);
664 Results
.push_back(Res
.getValue(1));
669 DAG
.getNode(Node
->getOpcode(), dl
, Node
->getValueType(0), Operands
);
670 Results
.push_back(Res
);
673 // For FP_TO_INT we promote the result type to a vector type with wider
674 // elements and then truncate the result. This is different from the default
675 // PromoteVector which uses bitcast to promote thus assumning that the
676 // promoted vector type has the same overall size.
677 void VectorLegalizer::PromoteFP_TO_INT(SDNode
*Node
,
678 SmallVectorImpl
<SDValue
> &Results
) {
679 MVT VT
= Node
->getSimpleValueType(0);
680 MVT NVT
= TLI
.getTypeToPromoteTo(Node
->getOpcode(), VT
);
681 bool IsStrict
= Node
->isStrictFPOpcode();
682 assert(NVT
.getVectorNumElements() == VT
.getVectorNumElements() &&
683 "Vectors have different number of elements!");
685 unsigned NewOpc
= Node
->getOpcode();
686 // Change FP_TO_UINT to FP_TO_SINT if possible.
687 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
688 if (NewOpc
== ISD::FP_TO_UINT
&&
689 TLI
.isOperationLegalOrCustom(ISD::FP_TO_SINT
, NVT
))
690 NewOpc
= ISD::FP_TO_SINT
;
692 if (NewOpc
== ISD::STRICT_FP_TO_UINT
&&
693 TLI
.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT
, NVT
))
694 NewOpc
= ISD::STRICT_FP_TO_SINT
;
697 SDValue Promoted
, Chain
;
699 Promoted
= DAG
.getNode(NewOpc
, dl
, {NVT
, MVT::Other
},
700 {Node
->getOperand(0), Node
->getOperand(1)});
701 Chain
= Promoted
.getValue(1);
703 Promoted
= DAG
.getNode(NewOpc
, dl
, NVT
, Node
->getOperand(0));
705 // Assert that the converted value fits in the original type. If it doesn't
706 // (eg: because the value being converted is too big), then the result of the
707 // original operation was undefined anyway, so the assert is still correct.
708 if (Node
->getOpcode() == ISD::FP_TO_UINT
||
709 Node
->getOpcode() == ISD::STRICT_FP_TO_UINT
)
710 NewOpc
= ISD::AssertZext
;
712 NewOpc
= ISD::AssertSext
;
714 Promoted
= DAG
.getNode(NewOpc
, dl
, NVT
, Promoted
,
715 DAG
.getValueType(VT
.getScalarType()));
716 Promoted
= DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Promoted
);
717 Results
.push_back(Promoted
);
719 Results
.push_back(Chain
);
722 std::pair
<SDValue
, SDValue
> VectorLegalizer::ExpandLoad(SDNode
*N
) {
723 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
724 return TLI
.scalarizeVectorLoad(LD
, DAG
);
727 SDValue
VectorLegalizer::ExpandStore(SDNode
*N
) {
728 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
729 SDValue TF
= TLI
.scalarizeVectorStore(ST
, DAG
);
733 void VectorLegalizer::Expand(SDNode
*Node
, SmallVectorImpl
<SDValue
> &Results
) {
735 switch (Node
->getOpcode()) {
736 case ISD::MERGE_VALUES
:
737 for (unsigned i
= 0, e
= Node
->getNumValues(); i
!= e
; ++i
)
738 Results
.push_back(Node
->getOperand(i
));
740 case ISD::SIGN_EXTEND_INREG
:
741 Results
.push_back(ExpandSEXTINREG(Node
));
743 case ISD::ANY_EXTEND_VECTOR_INREG
:
744 Results
.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node
));
746 case ISD::SIGN_EXTEND_VECTOR_INREG
:
747 Results
.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node
));
749 case ISD::ZERO_EXTEND_VECTOR_INREG
:
750 Results
.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node
));
753 Results
.push_back(ExpandBSWAP(Node
));
756 Results
.push_back(ExpandVSELECT(Node
));
759 Results
.push_back(ExpandSELECT(Node
));
761 case ISD::FP_TO_UINT
:
762 ExpandFP_TO_UINT(Node
, Results
);
764 case ISD::UINT_TO_FP
:
765 ExpandUINT_TO_FLOAT(Node
, Results
);
768 Results
.push_back(ExpandFNEG(Node
));
771 ExpandFSUB(Node
, Results
);
774 ExpandSETCC(Node
, Results
);
777 if (TLI
.expandABS(Node
, Tmp
, DAG
)) {
778 Results
.push_back(Tmp
);
782 case ISD::BITREVERSE
:
783 ExpandBITREVERSE(Node
, Results
);
786 if (TLI
.expandCTPOP(Node
, Tmp
, DAG
)) {
787 Results
.push_back(Tmp
);
792 case ISD::CTLZ_ZERO_UNDEF
:
793 if (TLI
.expandCTLZ(Node
, Tmp
, DAG
)) {
794 Results
.push_back(Tmp
);
799 case ISD::CTTZ_ZERO_UNDEF
:
800 if (TLI
.expandCTTZ(Node
, Tmp
, DAG
)) {
801 Results
.push_back(Tmp
);
807 if (TLI
.expandFunnelShift(Node
, Tmp
, DAG
)) {
808 Results
.push_back(Tmp
);
814 if (TLI
.expandROT(Node
, false /*AllowVectorOps*/, Tmp
, DAG
)) {
815 Results
.push_back(Tmp
);
821 if (SDValue Expanded
= TLI
.expandFMINNUM_FMAXNUM(Node
, DAG
)) {
822 Results
.push_back(Expanded
);
830 if (SDValue Expanded
= TLI
.expandIntMINMAX(Node
, DAG
)) {
831 Results
.push_back(Expanded
);
837 ExpandUADDSUBO(Node
, Results
);
841 ExpandSADDSUBO(Node
, Results
);
845 ExpandMULO(Node
, Results
);
851 if (SDValue Expanded
= TLI
.expandAddSubSat(Node
, DAG
)) {
852 Results
.push_back(Expanded
);
858 if (SDValue Expanded
= TLI
.expandFixedPointMul(Node
, DAG
)) {
859 Results
.push_back(Expanded
);
863 case ISD::SMULFIXSAT
:
864 case ISD::UMULFIXSAT
:
865 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
866 // why. Maybe it results in worse codegen compared to the unroll for some
867 // targets? This should probably be investigated. And if we still prefer to
868 // unroll an explanation could be helpful.
872 ExpandFixedPointDiv(Node
, Results
);
874 case ISD::SDIVFIXSAT
:
875 case ISD::UDIVFIXSAT
:
877 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
878 case ISD::STRICT_##DAGN:
879 #include "llvm/IR/ConstrainedOps.def"
880 ExpandStrictFPOp(Node
, Results
);
882 case ISD::VECREDUCE_ADD
:
883 case ISD::VECREDUCE_MUL
:
884 case ISD::VECREDUCE_AND
:
885 case ISD::VECREDUCE_OR
:
886 case ISD::VECREDUCE_XOR
:
887 case ISD::VECREDUCE_SMAX
:
888 case ISD::VECREDUCE_SMIN
:
889 case ISD::VECREDUCE_UMAX
:
890 case ISD::VECREDUCE_UMIN
:
891 case ISD::VECREDUCE_FADD
:
892 case ISD::VECREDUCE_FMUL
:
893 case ISD::VECREDUCE_FMAX
:
894 case ISD::VECREDUCE_FMIN
:
895 Results
.push_back(TLI
.expandVecReduce(Node
, DAG
));
897 case ISD::VECREDUCE_SEQ_FADD
:
898 case ISD::VECREDUCE_SEQ_FMUL
:
899 Results
.push_back(TLI
.expandVecReduceSeq(Node
, DAG
));
903 ExpandREM(Node
, Results
);
907 Results
.push_back(DAG
.UnrollVectorOp(Node
));
910 SDValue
VectorLegalizer::ExpandSELECT(SDNode
*Node
) {
911 // Lower a select instruction where the condition is a scalar and the
912 // operands are vectors. Lower this select to VSELECT and implement it
913 // using XOR AND OR. The selector bit is broadcasted.
914 EVT VT
= Node
->getValueType(0);
917 SDValue Mask
= Node
->getOperand(0);
918 SDValue Op1
= Node
->getOperand(1);
919 SDValue Op2
= Node
->getOperand(2);
921 assert(VT
.isVector() && !Mask
.getValueType().isVector()
922 && Op1
.getValueType() == Op2
.getValueType() && "Invalid type");
924 // If we can't even use the basic vector operations of
925 // AND,OR,XOR, we will have to scalarize the op.
926 // Notice that the operation may be 'promoted' which means that it is
927 // 'bitcasted' to another type which is handled.
928 // Also, we need to be able to construct a splat vector using either
929 // BUILD_VECTOR or SPLAT_VECTOR.
930 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
932 if (TLI
.getOperationAction(ISD::AND
, VT
) == TargetLowering::Expand
||
933 TLI
.getOperationAction(ISD::XOR
, VT
) == TargetLowering::Expand
||
934 TLI
.getOperationAction(ISD::OR
, VT
) == TargetLowering::Expand
||
935 TLI
.getOperationAction(VT
.isFixedLengthVector() ? ISD::BUILD_VECTOR
937 VT
) == TargetLowering::Expand
)
938 return DAG
.UnrollVectorOp(Node
);
940 // Generate a mask operand.
941 EVT MaskTy
= VT
.changeVectorElementTypeToInteger();
943 // What is the size of each element in the vector mask.
944 EVT BitTy
= MaskTy
.getScalarType();
946 Mask
= DAG
.getSelect(DL
, BitTy
, Mask
,
947 DAG
.getConstant(APInt::getAllOnesValue(BitTy
.getSizeInBits()), DL
,
949 DAG
.getConstant(0, DL
, BitTy
));
951 // Broadcast the mask so that the entire vector is all one or all zero.
952 if (VT
.isFixedLengthVector())
953 Mask
= DAG
.getSplatBuildVector(MaskTy
, DL
, Mask
);
955 Mask
= DAG
.getSplatVector(MaskTy
, DL
, Mask
);
957 // Bitcast the operands to be the same type as the mask.
958 // This is needed when we select between FP types because
959 // the mask is a vector of integers.
960 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, MaskTy
, Op1
);
961 Op2
= DAG
.getNode(ISD::BITCAST
, DL
, MaskTy
, Op2
);
963 SDValue AllOnes
= DAG
.getConstant(
964 APInt::getAllOnesValue(BitTy
.getSizeInBits()), DL
, MaskTy
);
965 SDValue NotMask
= DAG
.getNode(ISD::XOR
, DL
, MaskTy
, Mask
, AllOnes
);
967 Op1
= DAG
.getNode(ISD::AND
, DL
, MaskTy
, Op1
, Mask
);
968 Op2
= DAG
.getNode(ISD::AND
, DL
, MaskTy
, Op2
, NotMask
);
969 SDValue Val
= DAG
.getNode(ISD::OR
, DL
, MaskTy
, Op1
, Op2
);
970 return DAG
.getNode(ISD::BITCAST
, DL
, Node
->getValueType(0), Val
);
973 SDValue
VectorLegalizer::ExpandSEXTINREG(SDNode
*Node
) {
974 EVT VT
= Node
->getValueType(0);
976 // Make sure that the SRA and SHL instructions are available.
977 if (TLI
.getOperationAction(ISD::SRA
, VT
) == TargetLowering::Expand
||
978 TLI
.getOperationAction(ISD::SHL
, VT
) == TargetLowering::Expand
)
979 return DAG
.UnrollVectorOp(Node
);
982 EVT OrigTy
= cast
<VTSDNode
>(Node
->getOperand(1))->getVT();
984 unsigned BW
= VT
.getScalarSizeInBits();
985 unsigned OrigBW
= OrigTy
.getScalarSizeInBits();
986 SDValue ShiftSz
= DAG
.getConstant(BW
- OrigBW
, DL
, VT
);
988 SDValue Op
= DAG
.getNode(ISD::SHL
, DL
, VT
, Node
->getOperand(0), ShiftSz
);
989 return DAG
.getNode(ISD::SRA
, DL
, VT
, Op
, ShiftSz
);
992 // Generically expand a vector anyext in register to a shuffle of the relevant
993 // lanes into the appropriate locations, with other lanes left undef.
994 SDValue
VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode
*Node
) {
996 EVT VT
= Node
->getValueType(0);
997 int NumElements
= VT
.getVectorNumElements();
998 SDValue Src
= Node
->getOperand(0);
999 EVT SrcVT
= Src
.getValueType();
1000 int NumSrcElements
= SrcVT
.getVectorNumElements();
1002 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1003 // into a larger vector type.
1004 if (SrcVT
.bitsLE(VT
)) {
1005 assert((VT
.getSizeInBits() % SrcVT
.getScalarSizeInBits()) == 0 &&
1006 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1007 NumSrcElements
= VT
.getSizeInBits() / SrcVT
.getScalarSizeInBits();
1008 SrcVT
= EVT::getVectorVT(*DAG
.getContext(), SrcVT
.getScalarType(),
1010 Src
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, SrcVT
, DAG
.getUNDEF(SrcVT
),
1011 Src
, DAG
.getVectorIdxConstant(0, DL
));
1014 // Build a base mask of undef shuffles.
1015 SmallVector
<int, 16> ShuffleMask
;
1016 ShuffleMask
.resize(NumSrcElements
, -1);
1018 // Place the extended lanes into the correct locations.
1019 int ExtLaneScale
= NumSrcElements
/ NumElements
;
1020 int EndianOffset
= DAG
.getDataLayout().isBigEndian() ? ExtLaneScale
- 1 : 0;
1021 for (int i
= 0; i
< NumElements
; ++i
)
1022 ShuffleMask
[i
* ExtLaneScale
+ EndianOffset
] = i
;
1025 ISD::BITCAST
, DL
, VT
,
1026 DAG
.getVectorShuffle(SrcVT
, DL
, Src
, DAG
.getUNDEF(SrcVT
), ShuffleMask
));
1029 SDValue
VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode
*Node
) {
1031 EVT VT
= Node
->getValueType(0);
1032 SDValue Src
= Node
->getOperand(0);
1033 EVT SrcVT
= Src
.getValueType();
1035 // First build an any-extend node which can be legalized above when we
1036 // recurse through it.
1037 SDValue Op
= DAG
.getNode(ISD::ANY_EXTEND_VECTOR_INREG
, DL
, VT
, Src
);
1039 // Now we need sign extend. Do this by shifting the elements. Even if these
1040 // aren't legal operations, they have a better chance of being legalized
1041 // without full scalarization than the sign extension does.
1042 unsigned EltWidth
= VT
.getScalarSizeInBits();
1043 unsigned SrcEltWidth
= SrcVT
.getScalarSizeInBits();
1044 SDValue ShiftAmount
= DAG
.getConstant(EltWidth
- SrcEltWidth
, DL
, VT
);
1045 return DAG
.getNode(ISD::SRA
, DL
, VT
,
1046 DAG
.getNode(ISD::SHL
, DL
, VT
, Op
, ShiftAmount
),
1050 // Generically expand a vector zext in register to a shuffle of the relevant
1051 // lanes into the appropriate locations, a blend of zero into the high bits,
1052 // and a bitcast to the wider element type.
1053 SDValue
VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode
*Node
) {
1055 EVT VT
= Node
->getValueType(0);
1056 int NumElements
= VT
.getVectorNumElements();
1057 SDValue Src
= Node
->getOperand(0);
1058 EVT SrcVT
= Src
.getValueType();
1059 int NumSrcElements
= SrcVT
.getVectorNumElements();
1061 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1062 // into a larger vector type.
1063 if (SrcVT
.bitsLE(VT
)) {
1064 assert((VT
.getSizeInBits() % SrcVT
.getScalarSizeInBits()) == 0 &&
1065 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1066 NumSrcElements
= VT
.getSizeInBits() / SrcVT
.getScalarSizeInBits();
1067 SrcVT
= EVT::getVectorVT(*DAG
.getContext(), SrcVT
.getScalarType(),
1069 Src
= DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, SrcVT
, DAG
.getUNDEF(SrcVT
),
1070 Src
, DAG
.getVectorIdxConstant(0, DL
));
1073 // Build up a zero vector to blend into this one.
1074 SDValue Zero
= DAG
.getConstant(0, DL
, SrcVT
);
1076 // Shuffle the incoming lanes into the correct position, and pull all other
1077 // lanes from the zero vector.
1078 SmallVector
<int, 16> ShuffleMask
;
1079 ShuffleMask
.reserve(NumSrcElements
);
1080 for (int i
= 0; i
< NumSrcElements
; ++i
)
1081 ShuffleMask
.push_back(i
);
1083 int ExtLaneScale
= NumSrcElements
/ NumElements
;
1084 int EndianOffset
= DAG
.getDataLayout().isBigEndian() ? ExtLaneScale
- 1 : 0;
1085 for (int i
= 0; i
< NumElements
; ++i
)
1086 ShuffleMask
[i
* ExtLaneScale
+ EndianOffset
] = NumSrcElements
+ i
;
1088 return DAG
.getNode(ISD::BITCAST
, DL
, VT
,
1089 DAG
.getVectorShuffle(SrcVT
, DL
, Zero
, Src
, ShuffleMask
));
1092 static void createBSWAPShuffleMask(EVT VT
, SmallVectorImpl
<int> &ShuffleMask
) {
1093 int ScalarSizeInBytes
= VT
.getScalarSizeInBits() / 8;
1094 for (int I
= 0, E
= VT
.getVectorNumElements(); I
!= E
; ++I
)
1095 for (int J
= ScalarSizeInBytes
- 1; J
>= 0; --J
)
1096 ShuffleMask
.push_back((I
* ScalarSizeInBytes
) + J
);
1099 SDValue
VectorLegalizer::ExpandBSWAP(SDNode
*Node
) {
1100 EVT VT
= Node
->getValueType(0);
1102 // Generate a byte wise shuffle mask for the BSWAP.
1103 SmallVector
<int, 16> ShuffleMask
;
1104 createBSWAPShuffleMask(VT
, ShuffleMask
);
1105 EVT ByteVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::i8
, ShuffleMask
.size());
1107 // Only emit a shuffle if the mask is legal.
1108 if (!TLI
.isShuffleMaskLegal(ShuffleMask
, ByteVT
))
1109 return DAG
.UnrollVectorOp(Node
);
1112 SDValue Op
= DAG
.getNode(ISD::BITCAST
, DL
, ByteVT
, Node
->getOperand(0));
1113 Op
= DAG
.getVectorShuffle(ByteVT
, DL
, Op
, DAG
.getUNDEF(ByteVT
), ShuffleMask
);
1114 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
1117 void VectorLegalizer::ExpandBITREVERSE(SDNode
*Node
,
1118 SmallVectorImpl
<SDValue
> &Results
) {
1119 EVT VT
= Node
->getValueType(0);
1121 // If we have the scalar operation, it's probably cheaper to unroll it.
1122 if (TLI
.isOperationLegalOrCustom(ISD::BITREVERSE
, VT
.getScalarType())) {
1123 SDValue Tmp
= DAG
.UnrollVectorOp(Node
);
1124 Results
.push_back(Tmp
);
1128 // If the vector element width is a whole number of bytes, test if its legal
1129 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1130 // vector. This greatly reduces the number of bit shifts necessary.
1131 unsigned ScalarSizeInBits
= VT
.getScalarSizeInBits();
1132 if (ScalarSizeInBits
> 8 && (ScalarSizeInBits
% 8) == 0) {
1133 SmallVector
<int, 16> BSWAPMask
;
1134 createBSWAPShuffleMask(VT
, BSWAPMask
);
1136 EVT ByteVT
= EVT::getVectorVT(*DAG
.getContext(), MVT::i8
, BSWAPMask
.size());
1137 if (TLI
.isShuffleMaskLegal(BSWAPMask
, ByteVT
) &&
1138 (TLI
.isOperationLegalOrCustom(ISD::BITREVERSE
, ByteVT
) ||
1139 (TLI
.isOperationLegalOrCustom(ISD::SHL
, ByteVT
) &&
1140 TLI
.isOperationLegalOrCustom(ISD::SRL
, ByteVT
) &&
1141 TLI
.isOperationLegalOrCustomOrPromote(ISD::AND
, ByteVT
) &&
1142 TLI
.isOperationLegalOrCustomOrPromote(ISD::OR
, ByteVT
)))) {
1144 SDValue Op
= DAG
.getNode(ISD::BITCAST
, DL
, ByteVT
, Node
->getOperand(0));
1145 Op
= DAG
.getVectorShuffle(ByteVT
, DL
, Op
, DAG
.getUNDEF(ByteVT
),
1147 Op
= DAG
.getNode(ISD::BITREVERSE
, DL
, ByteVT
, Op
);
1148 Op
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op
);
1149 Results
.push_back(Op
);
1154 // If we have the appropriate vector bit operations, it is better to use them
1155 // than unrolling and expanding each component.
1156 if (TLI
.isOperationLegalOrCustom(ISD::SHL
, VT
) &&
1157 TLI
.isOperationLegalOrCustom(ISD::SRL
, VT
) &&
1158 TLI
.isOperationLegalOrCustomOrPromote(ISD::AND
, VT
) &&
1159 TLI
.isOperationLegalOrCustomOrPromote(ISD::OR
, VT
))
1160 // Let LegalizeDAG handle this later.
1163 // Otherwise unroll.
1164 SDValue Tmp
= DAG
.UnrollVectorOp(Node
);
1165 Results
.push_back(Tmp
);
1168 SDValue
VectorLegalizer::ExpandVSELECT(SDNode
*Node
) {
1169 // Implement VSELECT in terms of XOR, AND, OR
1170 // on platforms which do not support blend natively.
1173 SDValue Mask
= Node
->getOperand(0);
1174 SDValue Op1
= Node
->getOperand(1);
1175 SDValue Op2
= Node
->getOperand(2);
1177 EVT VT
= Mask
.getValueType();
1179 // If we can't even use the basic vector operations of
1180 // AND,OR,XOR, we will have to scalarize the op.
1181 // Notice that the operation may be 'promoted' which means that it is
1182 // 'bitcasted' to another type which is handled.
1183 if (TLI
.getOperationAction(ISD::AND
, VT
) == TargetLowering::Expand
||
1184 TLI
.getOperationAction(ISD::XOR
, VT
) == TargetLowering::Expand
||
1185 TLI
.getOperationAction(ISD::OR
, VT
) == TargetLowering::Expand
)
1186 return DAG
.UnrollVectorOp(Node
);
1188 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1189 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1190 // vector constant to mask with.
1191 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1192 auto BoolContents
= TLI
.getBooleanContents(Op1
.getValueType());
1193 if (BoolContents
!= TargetLowering::ZeroOrNegativeOneBooleanContent
&&
1194 !(BoolContents
== TargetLowering::ZeroOrOneBooleanContent
&&
1195 Op1
.getValueType().getVectorElementType() == MVT::i1
))
1196 return DAG
.UnrollVectorOp(Node
);
1198 // If the mask and the type are different sizes, unroll the vector op. This
1199 // can occur when getSetCCResultType returns something that is different in
1200 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1201 if (VT
.getSizeInBits() != Op1
.getValueSizeInBits())
1202 return DAG
.UnrollVectorOp(Node
);
1204 // Bitcast the operands to be the same type as the mask.
1205 // This is needed when we select between FP types because
1206 // the mask is a vector of integers.
1207 Op1
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op1
);
1208 Op2
= DAG
.getNode(ISD::BITCAST
, DL
, VT
, Op2
);
1210 SDValue AllOnes
= DAG
.getConstant(
1211 APInt::getAllOnesValue(VT
.getScalarSizeInBits()), DL
, VT
);
1212 SDValue NotMask
= DAG
.getNode(ISD::XOR
, DL
, VT
, Mask
, AllOnes
);
1214 Op1
= DAG
.getNode(ISD::AND
, DL
, VT
, Op1
, Mask
);
1215 Op2
= DAG
.getNode(ISD::AND
, DL
, VT
, Op2
, NotMask
);
1216 SDValue Val
= DAG
.getNode(ISD::OR
, DL
, VT
, Op1
, Op2
);
1217 return DAG
.getNode(ISD::BITCAST
, DL
, Node
->getValueType(0), Val
);
1220 void VectorLegalizer::ExpandFP_TO_UINT(SDNode
*Node
,
1221 SmallVectorImpl
<SDValue
> &Results
) {
1222 // Attempt to expand using TargetLowering.
1223 SDValue Result
, Chain
;
1224 if (TLI
.expandFP_TO_UINT(Node
, Result
, Chain
, DAG
)) {
1225 Results
.push_back(Result
);
1226 if (Node
->isStrictFPOpcode())
1227 Results
.push_back(Chain
);
1231 // Otherwise go ahead and unroll.
1232 if (Node
->isStrictFPOpcode()) {
1233 UnrollStrictFPOp(Node
, Results
);
1237 Results
.push_back(DAG
.UnrollVectorOp(Node
));
1240 void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode
*Node
,
1241 SmallVectorImpl
<SDValue
> &Results
) {
1242 bool IsStrict
= Node
->isStrictFPOpcode();
1243 unsigned OpNo
= IsStrict
? 1 : 0;
1244 SDValue Src
= Node
->getOperand(OpNo
);
1245 EVT VT
= Src
.getValueType();
1248 // Attempt to expand using TargetLowering.
1251 if (TLI
.expandUINT_TO_FP(Node
, Result
, Chain
, DAG
)) {
1252 Results
.push_back(Result
);
1254 Results
.push_back(Chain
);
1258 // Make sure that the SINT_TO_FP and SRL instructions are available.
1259 if (((!IsStrict
&& TLI
.getOperationAction(ISD::SINT_TO_FP
, VT
) ==
1260 TargetLowering::Expand
) ||
1261 (IsStrict
&& TLI
.getOperationAction(ISD::STRICT_SINT_TO_FP
, VT
) ==
1262 TargetLowering::Expand
)) ||
1263 TLI
.getOperationAction(ISD::SRL
, VT
) == TargetLowering::Expand
) {
1265 UnrollStrictFPOp(Node
, Results
);
1269 Results
.push_back(DAG
.UnrollVectorOp(Node
));
1273 unsigned BW
= VT
.getScalarSizeInBits();
1274 assert((BW
== 64 || BW
== 32) &&
1275 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1277 SDValue HalfWord
= DAG
.getConstant(BW
/ 2, DL
, VT
);
1279 // Constants to clear the upper part of the word.
1280 // Notice that we can also use SHL+SHR, but using a constant is slightly
1282 uint64_t HWMask
= (BW
== 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1283 SDValue HalfWordMask
= DAG
.getConstant(HWMask
, DL
, VT
);
1285 // Two to the power of half-word-size.
1287 DAG
.getConstantFP(1ULL << (BW
/ 2), DL
, Node
->getValueType(0));
1289 // Clear upper part of LO, lower HI
1290 SDValue HI
= DAG
.getNode(ISD::SRL
, DL
, VT
, Src
, HalfWord
);
1291 SDValue LO
= DAG
.getNode(ISD::AND
, DL
, VT
, Src
, HalfWordMask
);
1294 // Convert hi and lo to floats
1295 // Convert the hi part back to the upper values
1296 // TODO: Can any fast-math-flags be set on these nodes?
1297 SDValue fHI
= DAG
.getNode(ISD::STRICT_SINT_TO_FP
, DL
,
1298 {Node
->getValueType(0), MVT::Other
},
1299 {Node
->getOperand(0), HI
});
1300 fHI
= DAG
.getNode(ISD::STRICT_FMUL
, DL
, {Node
->getValueType(0), MVT::Other
},
1301 {fHI
.getValue(1), fHI
, TWOHW
});
1302 SDValue fLO
= DAG
.getNode(ISD::STRICT_SINT_TO_FP
, DL
,
1303 {Node
->getValueType(0), MVT::Other
},
1304 {Node
->getOperand(0), LO
});
1306 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, DL
, MVT::Other
, fHI
.getValue(1),
1309 // Add the two halves
1311 DAG
.getNode(ISD::STRICT_FADD
, DL
, {Node
->getValueType(0), MVT::Other
},
1314 Results
.push_back(Result
);
1315 Results
.push_back(Result
.getValue(1));
1319 // Convert hi and lo to floats
1320 // Convert the hi part back to the upper values
1321 // TODO: Can any fast-math-flags be set on these nodes?
1322 SDValue fHI
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, Node
->getValueType(0), HI
);
1323 fHI
= DAG
.getNode(ISD::FMUL
, DL
, Node
->getValueType(0), fHI
, TWOHW
);
1324 SDValue fLO
= DAG
.getNode(ISD::SINT_TO_FP
, DL
, Node
->getValueType(0), LO
);
1326 // Add the two halves
1328 DAG
.getNode(ISD::FADD
, DL
, Node
->getValueType(0), fHI
, fLO
));
1331 SDValue
VectorLegalizer::ExpandFNEG(SDNode
*Node
) {
1332 if (TLI
.isOperationLegalOrCustom(ISD::FSUB
, Node
->getValueType(0))) {
1334 SDValue Zero
= DAG
.getConstantFP(-0.0, DL
, Node
->getValueType(0));
1335 // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1336 return DAG
.getNode(ISD::FSUB
, DL
, Node
->getValueType(0), Zero
,
1337 Node
->getOperand(0));
1339 return DAG
.UnrollVectorOp(Node
);
1342 void VectorLegalizer::ExpandFSUB(SDNode
*Node
,
1343 SmallVectorImpl
<SDValue
> &Results
) {
1344 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1345 // we can defer this to operation legalization where it will be lowered as
1347 EVT VT
= Node
->getValueType(0);
1348 if (TLI
.isOperationLegalOrCustom(ISD::FNEG
, VT
) &&
1349 TLI
.isOperationLegalOrCustom(ISD::FADD
, VT
))
1350 return; // Defer to LegalizeDAG
1352 SDValue Tmp
= DAG
.UnrollVectorOp(Node
);
1353 Results
.push_back(Tmp
);
1356 void VectorLegalizer::ExpandSETCC(SDNode
*Node
,
1357 SmallVectorImpl
<SDValue
> &Results
) {
1358 bool NeedInvert
= false;
1360 MVT OpVT
= Node
->getOperand(0).getSimpleValueType();
1361 ISD::CondCode CCCode
= cast
<CondCodeSDNode
>(Node
->getOperand(2))->get();
1363 if (TLI
.getCondCodeAction(CCCode
, OpVT
) != TargetLowering::Expand
) {
1364 Results
.push_back(UnrollVSETCC(Node
));
1369 SDValue LHS
= Node
->getOperand(0);
1370 SDValue RHS
= Node
->getOperand(1);
1371 SDValue CC
= Node
->getOperand(2);
1372 bool Legalized
= TLI
.LegalizeSetCCCondCode(DAG
, Node
->getValueType(0), LHS
,
1373 RHS
, CC
, NeedInvert
, dl
, Chain
);
1376 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
1377 // condition code, create a new SETCC node.
1379 LHS
= DAG
.getNode(ISD::SETCC
, dl
, Node
->getValueType(0), LHS
, RHS
, CC
,
1382 // If we expanded the SETCC by inverting the condition code, then wrap
1383 // the existing SETCC in a NOT to restore the intended condition.
1385 LHS
= DAG
.getLogicalNOT(dl
, LHS
, LHS
->getValueType(0));
1387 // Otherwise, SETCC for the given comparison type must be completely
1388 // illegal; expand it into a SELECT_CC.
1389 EVT VT
= Node
->getValueType(0);
1391 DAG
.getNode(ISD::SELECT_CC
, dl
, VT
, LHS
, RHS
,
1392 DAG
.getBoolConstant(true, dl
, VT
, LHS
.getValueType()),
1393 DAG
.getBoolConstant(false, dl
, VT
, LHS
.getValueType()), CC
);
1394 LHS
->setFlags(Node
->getFlags());
1397 Results
.push_back(LHS
);
1400 void VectorLegalizer::ExpandUADDSUBO(SDNode
*Node
,
1401 SmallVectorImpl
<SDValue
> &Results
) {
1402 SDValue Result
, Overflow
;
1403 TLI
.expandUADDSUBO(Node
, Result
, Overflow
, DAG
);
1404 Results
.push_back(Result
);
1405 Results
.push_back(Overflow
);
1408 void VectorLegalizer::ExpandSADDSUBO(SDNode
*Node
,
1409 SmallVectorImpl
<SDValue
> &Results
) {
1410 SDValue Result
, Overflow
;
1411 TLI
.expandSADDSUBO(Node
, Result
, Overflow
, DAG
);
1412 Results
.push_back(Result
);
1413 Results
.push_back(Overflow
);
1416 void VectorLegalizer::ExpandMULO(SDNode
*Node
,
1417 SmallVectorImpl
<SDValue
> &Results
) {
1418 SDValue Result
, Overflow
;
1419 if (!TLI
.expandMULO(Node
, Result
, Overflow
, DAG
))
1420 std::tie(Result
, Overflow
) = DAG
.UnrollVectorOverflowOp(Node
);
1422 Results
.push_back(Result
);
1423 Results
.push_back(Overflow
);
1426 void VectorLegalizer::ExpandFixedPointDiv(SDNode
*Node
,
1427 SmallVectorImpl
<SDValue
> &Results
) {
1429 if (SDValue Expanded
= TLI
.expandFixedPointDiv(N
->getOpcode(), SDLoc(N
),
1430 N
->getOperand(0), N
->getOperand(1), N
->getConstantOperandVal(2), DAG
))
1431 Results
.push_back(Expanded
);
1434 void VectorLegalizer::ExpandStrictFPOp(SDNode
*Node
,
1435 SmallVectorImpl
<SDValue
> &Results
) {
1436 if (Node
->getOpcode() == ISD::STRICT_UINT_TO_FP
) {
1437 ExpandUINT_TO_FLOAT(Node
, Results
);
1440 if (Node
->getOpcode() == ISD::STRICT_FP_TO_UINT
) {
1441 ExpandFP_TO_UINT(Node
, Results
);
1445 UnrollStrictFPOp(Node
, Results
);
1448 void VectorLegalizer::ExpandREM(SDNode
*Node
,
1449 SmallVectorImpl
<SDValue
> &Results
) {
1450 assert((Node
->getOpcode() == ISD::SREM
|| Node
->getOpcode() == ISD::UREM
) &&
1451 "Expected REM node");
1454 if (!TLI
.expandREM(Node
, Result
, DAG
))
1455 Result
= DAG
.UnrollVectorOp(Node
);
1456 Results
.push_back(Result
);
1459 void VectorLegalizer::UnrollStrictFPOp(SDNode
*Node
,
1460 SmallVectorImpl
<SDValue
> &Results
) {
1461 EVT VT
= Node
->getValueType(0);
1462 EVT EltVT
= VT
.getVectorElementType();
1463 unsigned NumElems
= VT
.getVectorNumElements();
1464 unsigned NumOpers
= Node
->getNumOperands();
1465 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
1467 EVT TmpEltVT
= EltVT
;
1468 if (Node
->getOpcode() == ISD::STRICT_FSETCC
||
1469 Node
->getOpcode() == ISD::STRICT_FSETCCS
)
1470 TmpEltVT
= TLI
.getSetCCResultType(DAG
.getDataLayout(),
1471 *DAG
.getContext(), TmpEltVT
);
1473 EVT ValueVTs
[] = {TmpEltVT
, MVT::Other
};
1474 SDValue Chain
= Node
->getOperand(0);
1477 SmallVector
<SDValue
, 32> OpValues
;
1478 SmallVector
<SDValue
, 32> OpChains
;
1479 for (unsigned i
= 0; i
< NumElems
; ++i
) {
1480 SmallVector
<SDValue
, 4> Opers
;
1481 SDValue Idx
= DAG
.getVectorIdxConstant(i
, dl
);
1483 // The Chain is the first operand.
1484 Opers
.push_back(Chain
);
1486 // Now process the remaining operands.
1487 for (unsigned j
= 1; j
< NumOpers
; ++j
) {
1488 SDValue Oper
= Node
->getOperand(j
);
1489 EVT OperVT
= Oper
.getValueType();
1491 if (OperVT
.isVector())
1492 Oper
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
,
1493 OperVT
.getVectorElementType(), Oper
, Idx
);
1495 Opers
.push_back(Oper
);
1498 SDValue ScalarOp
= DAG
.getNode(Node
->getOpcode(), dl
, ValueVTs
, Opers
);
1499 SDValue ScalarResult
= ScalarOp
.getValue(0);
1500 SDValue ScalarChain
= ScalarOp
.getValue(1);
1502 if (Node
->getOpcode() == ISD::STRICT_FSETCC
||
1503 Node
->getOpcode() == ISD::STRICT_FSETCCS
)
1504 ScalarResult
= DAG
.getSelect(dl
, EltVT
, ScalarResult
,
1505 DAG
.getConstant(APInt::getAllOnesValue
1506 (EltVT
.getSizeInBits()), dl
, EltVT
),
1507 DAG
.getConstant(0, dl
, EltVT
));
1509 OpValues
.push_back(ScalarResult
);
1510 OpChains
.push_back(ScalarChain
);
1513 SDValue Result
= DAG
.getBuildVector(VT
, dl
, OpValues
);
1514 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, OpChains
);
1516 Results
.push_back(Result
);
1517 Results
.push_back(NewChain
);
1520 SDValue
VectorLegalizer::UnrollVSETCC(SDNode
*Node
) {
1521 EVT VT
= Node
->getValueType(0);
1522 unsigned NumElems
= VT
.getVectorNumElements();
1523 EVT EltVT
= VT
.getVectorElementType();
1524 SDValue LHS
= Node
->getOperand(0);
1525 SDValue RHS
= Node
->getOperand(1);
1526 SDValue CC
= Node
->getOperand(2);
1527 EVT TmpEltVT
= LHS
.getValueType().getVectorElementType();
1529 SmallVector
<SDValue
, 8> Ops(NumElems
);
1530 for (unsigned i
= 0; i
< NumElems
; ++i
) {
1531 SDValue LHSElem
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, TmpEltVT
, LHS
,
1532 DAG
.getVectorIdxConstant(i
, dl
));
1533 SDValue RHSElem
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, TmpEltVT
, RHS
,
1534 DAG
.getVectorIdxConstant(i
, dl
));
1535 Ops
[i
] = DAG
.getNode(ISD::SETCC
, dl
,
1536 TLI
.getSetCCResultType(DAG
.getDataLayout(),
1537 *DAG
.getContext(), TmpEltVT
),
1538 LHSElem
, RHSElem
, CC
);
1539 Ops
[i
] = DAG
.getSelect(dl
, EltVT
, Ops
[i
],
1540 DAG
.getConstant(APInt::getAllOnesValue
1541 (EltVT
.getSizeInBits()), dl
, EltVT
),
1542 DAG
.getConstant(0, dl
, EltVT
));
1544 return DAG
.getBuildVector(VT
, dl
, Ops
);
1547 bool SelectionDAG::LegalizeVectors() {
1548 return VectorLegalizer(*this).Run();