[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / CodeGen / SelectionDAG / LegalizeVectorOps.cpp
blobc29822120921ba59c4ca0d64b48313e8cc9a8b99
1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SelectionDAG::LegalizeVectors method.
11 // The vector legalizer looks for vector operations which might need to be
12 // scalarized and legalizes them. This is a separate step from Legalize because
13 // scalarizing can introduce illegal types. For example, suppose we have an
14 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16 // operation, which introduces nodes with the illegal type i64 which must be
17 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18 // the operation must be unrolled, which introduces nodes with the illegal
19 // type i8 which must be promoted.
21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22 // or operations that happen to take a vector which are custom-lowered;
23 // the legalization for such operations never produces nodes
24 // with illegal types, so it's okay to put off legalizing them until
25 // SelectionDAG::Legalize runs.
27 //===----------------------------------------------------------------------===//
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/CodeGen/ISDOpcodes.h"
33 #include "llvm/CodeGen/MachineMemOperand.h"
34 #include "llvm/CodeGen/SelectionDAG.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/TargetLowering.h"
37 #include "llvm/CodeGen/ValueTypes.h"
38 #include "llvm/IR/DataLayout.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/Compiler.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MachineValueType.h"
44 #include "llvm/Support/MathExtras.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
50 using namespace llvm;
52 #define DEBUG_TYPE "legalizevectorops"
54 namespace {
56 class VectorLegalizer {
57 SelectionDAG& DAG;
58 const TargetLowering &TLI;
59 bool Changed = false; // Keep track of whether anything changed
61 /// For nodes that are of legal width, and that have more than one use, this
62 /// map indicates what regularized operand to use. This allows us to avoid
63 /// legalizing the same thing more than once.
64 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
66 /// Adds a node to the translation cache.
67 void AddLegalizedOperand(SDValue From, SDValue To) {
68 LegalizedNodes.insert(std::make_pair(From, To));
69 // If someone requests legalization of the new node, return itself.
70 if (From != To)
71 LegalizedNodes.insert(std::make_pair(To, To));
74 /// Legalizes the given node.
75 SDValue LegalizeOp(SDValue Op);
77 /// Assuming the node is legal, "legalize" the results.
78 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
80 /// Make sure Results are legal and update the translation cache.
81 SDValue RecursivelyLegalizeResults(SDValue Op,
82 MutableArrayRef<SDValue> Results);
84 /// Wrapper to interface LowerOperation with a vector of Results.
85 /// Returns false if the target wants to use default expansion. Otherwise
86 /// returns true. If return is true and the Results are empty, then the
87 /// target wants to keep the input node as is.
88 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
90 /// Implements unrolling a VSETCC.
91 SDValue UnrollVSETCC(SDNode *Node);
93 /// Implement expand-based legalization of vector operations.
94 ///
95 /// This is just a high-level routine to dispatch to specific code paths for
96 /// operations to legalize them.
97 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
99 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
100 /// FP_TO_SINT isn't legal.
101 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
103 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
104 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
105 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
107 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
108 SDValue ExpandSEXTINREG(SDNode *Node);
110 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
112 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
113 /// type. The contents of the bits in the extended part of each element are
114 /// undef.
115 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
117 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
119 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
120 /// type, then shifts left and arithmetic shifts right to introduce a sign
121 /// extension.
122 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
124 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
126 /// Shuffles the low lanes of the operand into place and blends zeros into
127 /// the remaining lanes, finally bitcasting to the proper type.
128 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
130 /// Expand bswap of vectors into a shuffle if legal.
131 SDValue ExpandBSWAP(SDNode *Node);
133 /// Implement vselect in terms of XOR, AND, OR when blend is not
134 /// supported by the target.
135 SDValue ExpandVSELECT(SDNode *Node);
136 SDValue ExpandSELECT(SDNode *Node);
137 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
138 SDValue ExpandStore(SDNode *N);
139 SDValue ExpandFNEG(SDNode *Node);
140 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
141 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
142 void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
143 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
144 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
145 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
146 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
147 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
148 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
150 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152 /// Implements vector promotion.
154 /// This is essentially just bitcasting the operands to a different type and
155 /// bitcasting the result back to the original type.
156 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
158 /// Implements [SU]INT_TO_FP vector promotion.
160 /// This is a [zs]ext of the input operand to a larger integer type.
161 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
163 /// Implements FP_TO_[SU]INT vector promotion of the result type.
165 /// It is promoted to a larger integer type. The result is then
166 /// truncated back to the original type.
167 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
169 public:
170 VectorLegalizer(SelectionDAG& dag) :
171 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
173 /// Begin legalizer the vector operations in the DAG.
174 bool Run();
177 } // end anonymous namespace
179 bool VectorLegalizer::Run() {
180 // Before we start legalizing vector nodes, check if there are any vectors.
181 bool HasVectors = false;
182 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
183 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
184 // Check if the values of the nodes contain vectors. We don't need to check
185 // the operands because we are going to check their values at some point.
186 HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
188 // If we found a vector node we can start the legalization.
189 if (HasVectors)
190 break;
193 // If this basic block has no vectors then no need to legalize vectors.
194 if (!HasVectors)
195 return false;
197 // The legalize process is inherently a bottom-up recursive process (users
198 // legalize their uses before themselves). Given infinite stack space, we
199 // could just start legalizing on the root and traverse the whole graph. In
200 // practice however, this causes us to run out of stack space on large basic
201 // blocks. To avoid this problem, compute an ordering of the nodes where each
202 // node is only legalized after all of its operands are legalized.
203 DAG.AssignTopologicalOrder();
204 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
205 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
206 LegalizeOp(SDValue(&*I, 0));
208 // Finally, it's possible the root changed. Get the new root.
209 SDValue OldRoot = DAG.getRoot();
210 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
211 DAG.setRoot(LegalizedNodes[OldRoot]);
213 LegalizedNodes.clear();
215 // Remove dead nodes now.
216 DAG.RemoveDeadNodes();
218 return Changed;
221 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
222 assert(Op->getNumValues() == Result->getNumValues() &&
223 "Unexpected number of results");
224 // Generic legalization: just pass the operand through.
225 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
226 AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
227 return SDValue(Result, Op.getResNo());
230 SDValue
231 VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
232 MutableArrayRef<SDValue> Results) {
233 assert(Results.size() == Op->getNumValues() &&
234 "Unexpected number of results");
235 // Make sure that the generated code is itself legal.
236 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
237 Results[i] = LegalizeOp(Results[i]);
238 AddLegalizedOperand(Op.getValue(i), Results[i]);
241 return Results[Op.getResNo()];
244 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
245 // Note that LegalizeOp may be reentered even from single-use nodes, which
246 // means that we always must cache transformed nodes.
247 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
248 if (I != LegalizedNodes.end()) return I->second;
250 // Legalize the operands
251 SmallVector<SDValue, 8> Ops;
252 for (const SDValue &Oper : Op->op_values())
253 Ops.push_back(LegalizeOp(Oper));
255 SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
257 if (Op.getOpcode() == ISD::LOAD) {
258 LoadSDNode *LD = cast<LoadSDNode>(Node);
259 ISD::LoadExtType ExtType = LD->getExtensionType();
260 if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
261 LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
262 Node->dump(&DAG));
263 switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
264 LD->getMemoryVT())) {
265 default: llvm_unreachable("This action is not supported yet!");
266 case TargetLowering::Legal:
267 return TranslateLegalizeResults(Op, Node);
268 case TargetLowering::Custom: {
269 SmallVector<SDValue, 2> ResultVals;
270 if (LowerOperationWrapper(Node, ResultVals)) {
271 if (ResultVals.empty())
272 return TranslateLegalizeResults(Op, Node);
274 Changed = true;
275 return RecursivelyLegalizeResults(Op, ResultVals);
277 LLVM_FALLTHROUGH;
279 case TargetLowering::Expand: {
280 Changed = true;
281 std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
282 AddLegalizedOperand(Op.getValue(0), Tmp.first);
283 AddLegalizedOperand(Op.getValue(1), Tmp.second);
284 return Op.getResNo() ? Tmp.first : Tmp.second;
288 } else if (Op.getOpcode() == ISD::STORE) {
289 StoreSDNode *ST = cast<StoreSDNode>(Node);
290 EVT StVT = ST->getMemoryVT();
291 MVT ValVT = ST->getValue().getSimpleValueType();
292 if (StVT.isVector() && ST->isTruncatingStore()) {
293 LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
294 Node->dump(&DAG));
295 switch (TLI.getTruncStoreAction(ValVT, StVT)) {
296 default: llvm_unreachable("This action is not supported yet!");
297 case TargetLowering::Legal:
298 return TranslateLegalizeResults(Op, Node);
299 case TargetLowering::Custom: {
300 SmallVector<SDValue, 1> ResultVals;
301 if (LowerOperationWrapper(Node, ResultVals)) {
302 if (ResultVals.empty())
303 return TranslateLegalizeResults(Op, Node);
305 Changed = true;
306 return RecursivelyLegalizeResults(Op, ResultVals);
308 LLVM_FALLTHROUGH;
310 case TargetLowering::Expand: {
311 Changed = true;
312 SDValue Chain = ExpandStore(Node);
313 AddLegalizedOperand(Op, Chain);
314 return Chain;
320 bool HasVectorValueOrOp =
321 llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
322 llvm::any_of(Node->op_values(),
323 [](SDValue O) { return O.getValueType().isVector(); });
324 if (!HasVectorValueOrOp)
325 return TranslateLegalizeResults(Op, Node);
327 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
328 EVT ValVT;
329 switch (Op.getOpcode()) {
330 default:
331 return TranslateLegalizeResults(Op, Node);
332 case ISD::MERGE_VALUES:
333 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
334 // This operation lies about being legal: when it claims to be legal,
335 // it should actually be expanded.
336 if (Action == TargetLowering::Legal)
337 Action = TargetLowering::Expand;
338 break;
339 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
340 case ISD::STRICT_##DAGN:
341 #include "llvm/IR/ConstrainedOps.def"
342 ValVT = Node->getValueType(0);
343 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
344 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
345 ValVT = Node->getOperand(1).getValueType();
346 Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
347 // If we're asked to expand a strict vector floating-point operation,
348 // by default we're going to simply unroll it. That is usually the
349 // best approach, except in the case where the resulting strict (scalar)
350 // operations would themselves use the fallback mutation to non-strict.
351 // In that specific case, just do the fallback on the vector op.
352 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
353 TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
354 TargetLowering::Legal) {
355 EVT EltVT = ValVT.getVectorElementType();
356 if (TLI.getOperationAction(Node->getOpcode(), EltVT)
357 == TargetLowering::Expand &&
358 TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
359 == TargetLowering::Legal)
360 Action = TargetLowering::Legal;
362 break;
363 case ISD::ADD:
364 case ISD::SUB:
365 case ISD::MUL:
366 case ISD::MULHS:
367 case ISD::MULHU:
368 case ISD::SDIV:
369 case ISD::UDIV:
370 case ISD::SREM:
371 case ISD::UREM:
372 case ISD::SDIVREM:
373 case ISD::UDIVREM:
374 case ISD::FADD:
375 case ISD::FSUB:
376 case ISD::FMUL:
377 case ISD::FDIV:
378 case ISD::FREM:
379 case ISD::AND:
380 case ISD::OR:
381 case ISD::XOR:
382 case ISD::SHL:
383 case ISD::SRA:
384 case ISD::SRL:
385 case ISD::FSHL:
386 case ISD::FSHR:
387 case ISD::ROTL:
388 case ISD::ROTR:
389 case ISD::ABS:
390 case ISD::BSWAP:
391 case ISD::BITREVERSE:
392 case ISD::CTLZ:
393 case ISD::CTTZ:
394 case ISD::CTLZ_ZERO_UNDEF:
395 case ISD::CTTZ_ZERO_UNDEF:
396 case ISD::CTPOP:
397 case ISD::SELECT:
398 case ISD::VSELECT:
399 case ISD::SELECT_CC:
400 case ISD::ZERO_EXTEND:
401 case ISD::ANY_EXTEND:
402 case ISD::TRUNCATE:
403 case ISD::SIGN_EXTEND:
404 case ISD::FP_TO_SINT:
405 case ISD::FP_TO_UINT:
406 case ISD::FNEG:
407 case ISD::FABS:
408 case ISD::FMINNUM:
409 case ISD::FMAXNUM:
410 case ISD::FMINNUM_IEEE:
411 case ISD::FMAXNUM_IEEE:
412 case ISD::FMINIMUM:
413 case ISD::FMAXIMUM:
414 case ISD::FCOPYSIGN:
415 case ISD::FSQRT:
416 case ISD::FSIN:
417 case ISD::FCOS:
418 case ISD::FPOWI:
419 case ISD::FPOW:
420 case ISD::FLOG:
421 case ISD::FLOG2:
422 case ISD::FLOG10:
423 case ISD::FEXP:
424 case ISD::FEXP2:
425 case ISD::FCEIL:
426 case ISD::FTRUNC:
427 case ISD::FRINT:
428 case ISD::FNEARBYINT:
429 case ISD::FROUND:
430 case ISD::FROUNDEVEN:
431 case ISD::FFLOOR:
432 case ISD::FP_ROUND:
433 case ISD::FP_EXTEND:
434 case ISD::FMA:
435 case ISD::SIGN_EXTEND_INREG:
436 case ISD::ANY_EXTEND_VECTOR_INREG:
437 case ISD::SIGN_EXTEND_VECTOR_INREG:
438 case ISD::ZERO_EXTEND_VECTOR_INREG:
439 case ISD::SMIN:
440 case ISD::SMAX:
441 case ISD::UMIN:
442 case ISD::UMAX:
443 case ISD::SMUL_LOHI:
444 case ISD::UMUL_LOHI:
445 case ISD::SADDO:
446 case ISD::UADDO:
447 case ISD::SSUBO:
448 case ISD::USUBO:
449 case ISD::SMULO:
450 case ISD::UMULO:
451 case ISD::FCANONICALIZE:
452 case ISD::SADDSAT:
453 case ISD::UADDSAT:
454 case ISD::SSUBSAT:
455 case ISD::USUBSAT:
456 case ISD::SSHLSAT:
457 case ISD::USHLSAT:
458 case ISD::FP_TO_SINT_SAT:
459 case ISD::FP_TO_UINT_SAT:
460 case ISD::MGATHER:
461 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
462 break;
463 case ISD::SMULFIX:
464 case ISD::SMULFIXSAT:
465 case ISD::UMULFIX:
466 case ISD::UMULFIXSAT:
467 case ISD::SDIVFIX:
468 case ISD::SDIVFIXSAT:
469 case ISD::UDIVFIX:
470 case ISD::UDIVFIXSAT: {
471 unsigned Scale = Node->getConstantOperandVal(2);
472 Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
473 Node->getValueType(0), Scale);
474 break;
476 case ISD::SINT_TO_FP:
477 case ISD::UINT_TO_FP:
478 case ISD::VECREDUCE_ADD:
479 case ISD::VECREDUCE_MUL:
480 case ISD::VECREDUCE_AND:
481 case ISD::VECREDUCE_OR:
482 case ISD::VECREDUCE_XOR:
483 case ISD::VECREDUCE_SMAX:
484 case ISD::VECREDUCE_SMIN:
485 case ISD::VECREDUCE_UMAX:
486 case ISD::VECREDUCE_UMIN:
487 case ISD::VECREDUCE_FADD:
488 case ISD::VECREDUCE_FMUL:
489 case ISD::VECREDUCE_FMAX:
490 case ISD::VECREDUCE_FMIN:
491 Action = TLI.getOperationAction(Node->getOpcode(),
492 Node->getOperand(0).getValueType());
493 break;
494 case ISD::VECREDUCE_SEQ_FADD:
495 case ISD::VECREDUCE_SEQ_FMUL:
496 Action = TLI.getOperationAction(Node->getOpcode(),
497 Node->getOperand(1).getValueType());
498 break;
499 case ISD::SETCC: {
500 MVT OpVT = Node->getOperand(0).getSimpleValueType();
501 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
502 Action = TLI.getCondCodeAction(CCCode, OpVT);
503 if (Action == TargetLowering::Legal)
504 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
505 break;
509 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
511 SmallVector<SDValue, 8> ResultVals;
512 switch (Action) {
513 default: llvm_unreachable("This action is not supported yet!");
514 case TargetLowering::Promote:
515 LLVM_DEBUG(dbgs() << "Promoting\n");
516 Promote(Node, ResultVals);
517 assert(!ResultVals.empty() && "No results for promotion?");
518 break;
519 case TargetLowering::Legal:
520 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
521 break;
522 case TargetLowering::Custom:
523 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
524 if (LowerOperationWrapper(Node, ResultVals))
525 break;
526 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
527 LLVM_FALLTHROUGH;
528 case TargetLowering::Expand:
529 LLVM_DEBUG(dbgs() << "Expanding\n");
530 Expand(Node, ResultVals);
531 break;
534 if (ResultVals.empty())
535 return TranslateLegalizeResults(Op, Node);
537 Changed = true;
538 return RecursivelyLegalizeResults(Op, ResultVals);
541 // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
542 // merge them somehow?
543 bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
544 SmallVectorImpl<SDValue> &Results) {
545 SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
547 if (!Res.getNode())
548 return false;
550 if (Res == SDValue(Node, 0))
551 return true;
553 // If the original node has one result, take the return value from
554 // LowerOperation as is. It might not be result number 0.
555 if (Node->getNumValues() == 1) {
556 Results.push_back(Res);
557 return true;
560 // If the original node has multiple results, then the return node should
561 // have the same number of results.
562 assert((Node->getNumValues() == Res->getNumValues()) &&
563 "Lowering returned the wrong number of results!");
565 // Places new result values base on N result number.
566 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
567 Results.push_back(Res.getValue(I));
569 return true;
572 void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
573 // For a few operations there is a specific concept for promotion based on
574 // the operand's type.
575 switch (Node->getOpcode()) {
576 case ISD::SINT_TO_FP:
577 case ISD::UINT_TO_FP:
578 case ISD::STRICT_SINT_TO_FP:
579 case ISD::STRICT_UINT_TO_FP:
580 // "Promote" the operation by extending the operand.
581 PromoteINT_TO_FP(Node, Results);
582 return;
583 case ISD::FP_TO_UINT:
584 case ISD::FP_TO_SINT:
585 case ISD::STRICT_FP_TO_UINT:
586 case ISD::STRICT_FP_TO_SINT:
587 // Promote the operation by extending the operand.
588 PromoteFP_TO_INT(Node, Results);
589 return;
590 case ISD::FP_ROUND:
591 case ISD::FP_EXTEND:
592 // These operations are used to do promotion so they can't be promoted
593 // themselves.
594 llvm_unreachable("Don't know how to promote this operation!");
597 // There are currently two cases of vector promotion:
598 // 1) Bitcasting a vector of integers to a different type to a vector of the
599 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
600 // 2) Extending a vector of floats to a vector of the same number of larger
601 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
602 assert(Node->getNumValues() == 1 &&
603 "Can't promote a vector with multiple results!");
604 MVT VT = Node->getSimpleValueType(0);
605 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
606 SDLoc dl(Node);
607 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
609 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
610 if (Node->getOperand(j).getValueType().isVector())
611 if (Node->getOperand(j)
612 .getValueType()
613 .getVectorElementType()
614 .isFloatingPoint() &&
615 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
616 Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
617 else
618 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
619 else
620 Operands[j] = Node->getOperand(j);
623 SDValue Res =
624 DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
626 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
627 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
628 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
629 Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl));
630 else
631 Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
633 Results.push_back(Res);
636 void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
637 SmallVectorImpl<SDValue> &Results) {
638 // INT_TO_FP operations may require the input operand be promoted even
639 // when the type is otherwise legal.
640 bool IsStrict = Node->isStrictFPOpcode();
641 MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
642 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
643 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
644 "Vectors have different number of elements!");
646 SDLoc dl(Node);
647 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
649 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
650 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
651 ? ISD::ZERO_EXTEND
652 : ISD::SIGN_EXTEND;
653 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
654 if (Node->getOperand(j).getValueType().isVector())
655 Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
656 else
657 Operands[j] = Node->getOperand(j);
660 if (IsStrict) {
661 SDValue Res = DAG.getNode(Node->getOpcode(), dl,
662 {Node->getValueType(0), MVT::Other}, Operands);
663 Results.push_back(Res);
664 Results.push_back(Res.getValue(1));
665 return;
668 SDValue Res =
669 DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
670 Results.push_back(Res);
673 // For FP_TO_INT we promote the result type to a vector type with wider
674 // elements and then truncate the result. This is different from the default
675 // PromoteVector which uses bitcast to promote thus assumning that the
676 // promoted vector type has the same overall size.
677 void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
678 SmallVectorImpl<SDValue> &Results) {
679 MVT VT = Node->getSimpleValueType(0);
680 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
681 bool IsStrict = Node->isStrictFPOpcode();
682 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
683 "Vectors have different number of elements!");
685 unsigned NewOpc = Node->getOpcode();
686 // Change FP_TO_UINT to FP_TO_SINT if possible.
687 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
688 if (NewOpc == ISD::FP_TO_UINT &&
689 TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
690 NewOpc = ISD::FP_TO_SINT;
692 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
693 TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
694 NewOpc = ISD::STRICT_FP_TO_SINT;
696 SDLoc dl(Node);
697 SDValue Promoted, Chain;
698 if (IsStrict) {
699 Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
700 {Node->getOperand(0), Node->getOperand(1)});
701 Chain = Promoted.getValue(1);
702 } else
703 Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
705 // Assert that the converted value fits in the original type. If it doesn't
706 // (eg: because the value being converted is too big), then the result of the
707 // original operation was undefined anyway, so the assert is still correct.
708 if (Node->getOpcode() == ISD::FP_TO_UINT ||
709 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
710 NewOpc = ISD::AssertZext;
711 else
712 NewOpc = ISD::AssertSext;
714 Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
715 DAG.getValueType(VT.getScalarType()));
716 Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
717 Results.push_back(Promoted);
718 if (IsStrict)
719 Results.push_back(Chain);
722 std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
723 LoadSDNode *LD = cast<LoadSDNode>(N);
724 return TLI.scalarizeVectorLoad(LD, DAG);
727 SDValue VectorLegalizer::ExpandStore(SDNode *N) {
728 StoreSDNode *ST = cast<StoreSDNode>(N);
729 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
730 return TF;
733 void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
734 SDValue Tmp;
735 switch (Node->getOpcode()) {
736 case ISD::MERGE_VALUES:
737 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
738 Results.push_back(Node->getOperand(i));
739 return;
740 case ISD::SIGN_EXTEND_INREG:
741 Results.push_back(ExpandSEXTINREG(Node));
742 return;
743 case ISD::ANY_EXTEND_VECTOR_INREG:
744 Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
745 return;
746 case ISD::SIGN_EXTEND_VECTOR_INREG:
747 Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
748 return;
749 case ISD::ZERO_EXTEND_VECTOR_INREG:
750 Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
751 return;
752 case ISD::BSWAP:
753 Results.push_back(ExpandBSWAP(Node));
754 return;
755 case ISD::VSELECT:
756 Results.push_back(ExpandVSELECT(Node));
757 return;
758 case ISD::SELECT:
759 Results.push_back(ExpandSELECT(Node));
760 return;
761 case ISD::FP_TO_UINT:
762 ExpandFP_TO_UINT(Node, Results);
763 return;
764 case ISD::UINT_TO_FP:
765 ExpandUINT_TO_FLOAT(Node, Results);
766 return;
767 case ISD::FNEG:
768 Results.push_back(ExpandFNEG(Node));
769 return;
770 case ISD::FSUB:
771 ExpandFSUB(Node, Results);
772 return;
773 case ISD::SETCC:
774 ExpandSETCC(Node, Results);
775 return;
776 case ISD::ABS:
777 if (TLI.expandABS(Node, Tmp, DAG)) {
778 Results.push_back(Tmp);
779 return;
781 break;
782 case ISD::BITREVERSE:
783 ExpandBITREVERSE(Node, Results);
784 return;
785 case ISD::CTPOP:
786 if (TLI.expandCTPOP(Node, Tmp, DAG)) {
787 Results.push_back(Tmp);
788 return;
790 break;
791 case ISD::CTLZ:
792 case ISD::CTLZ_ZERO_UNDEF:
793 if (TLI.expandCTLZ(Node, Tmp, DAG)) {
794 Results.push_back(Tmp);
795 return;
797 break;
798 case ISD::CTTZ:
799 case ISD::CTTZ_ZERO_UNDEF:
800 if (TLI.expandCTTZ(Node, Tmp, DAG)) {
801 Results.push_back(Tmp);
802 return;
804 break;
805 case ISD::FSHL:
806 case ISD::FSHR:
807 if (TLI.expandFunnelShift(Node, Tmp, DAG)) {
808 Results.push_back(Tmp);
809 return;
811 break;
812 case ISD::ROTL:
813 case ISD::ROTR:
814 if (TLI.expandROT(Node, false /*AllowVectorOps*/, Tmp, DAG)) {
815 Results.push_back(Tmp);
816 return;
818 break;
819 case ISD::FMINNUM:
820 case ISD::FMAXNUM:
821 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
822 Results.push_back(Expanded);
823 return;
825 break;
826 case ISD::SMIN:
827 case ISD::SMAX:
828 case ISD::UMIN:
829 case ISD::UMAX:
830 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
831 Results.push_back(Expanded);
832 return;
834 break;
835 case ISD::UADDO:
836 case ISD::USUBO:
837 ExpandUADDSUBO(Node, Results);
838 return;
839 case ISD::SADDO:
840 case ISD::SSUBO:
841 ExpandSADDSUBO(Node, Results);
842 return;
843 case ISD::UMULO:
844 case ISD::SMULO:
845 ExpandMULO(Node, Results);
846 return;
847 case ISD::USUBSAT:
848 case ISD::SSUBSAT:
849 case ISD::UADDSAT:
850 case ISD::SADDSAT:
851 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
852 Results.push_back(Expanded);
853 return;
855 break;
856 case ISD::SMULFIX:
857 case ISD::UMULFIX:
858 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
859 Results.push_back(Expanded);
860 return;
862 break;
863 case ISD::SMULFIXSAT:
864 case ISD::UMULFIXSAT:
865 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
866 // why. Maybe it results in worse codegen compared to the unroll for some
867 // targets? This should probably be investigated. And if we still prefer to
868 // unroll an explanation could be helpful.
869 break;
870 case ISD::SDIVFIX:
871 case ISD::UDIVFIX:
872 ExpandFixedPointDiv(Node, Results);
873 return;
874 case ISD::SDIVFIXSAT:
875 case ISD::UDIVFIXSAT:
876 break;
877 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
878 case ISD::STRICT_##DAGN:
879 #include "llvm/IR/ConstrainedOps.def"
880 ExpandStrictFPOp(Node, Results);
881 return;
882 case ISD::VECREDUCE_ADD:
883 case ISD::VECREDUCE_MUL:
884 case ISD::VECREDUCE_AND:
885 case ISD::VECREDUCE_OR:
886 case ISD::VECREDUCE_XOR:
887 case ISD::VECREDUCE_SMAX:
888 case ISD::VECREDUCE_SMIN:
889 case ISD::VECREDUCE_UMAX:
890 case ISD::VECREDUCE_UMIN:
891 case ISD::VECREDUCE_FADD:
892 case ISD::VECREDUCE_FMUL:
893 case ISD::VECREDUCE_FMAX:
894 case ISD::VECREDUCE_FMIN:
895 Results.push_back(TLI.expandVecReduce(Node, DAG));
896 return;
897 case ISD::VECREDUCE_SEQ_FADD:
898 case ISD::VECREDUCE_SEQ_FMUL:
899 Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
900 return;
901 case ISD::SREM:
902 case ISD::UREM:
903 ExpandREM(Node, Results);
904 return;
907 Results.push_back(DAG.UnrollVectorOp(Node));
910 SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
911 // Lower a select instruction where the condition is a scalar and the
912 // operands are vectors. Lower this select to VSELECT and implement it
913 // using XOR AND OR. The selector bit is broadcasted.
914 EVT VT = Node->getValueType(0);
915 SDLoc DL(Node);
917 SDValue Mask = Node->getOperand(0);
918 SDValue Op1 = Node->getOperand(1);
919 SDValue Op2 = Node->getOperand(2);
921 assert(VT.isVector() && !Mask.getValueType().isVector()
922 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
924 // If we can't even use the basic vector operations of
925 // AND,OR,XOR, we will have to scalarize the op.
926 // Notice that the operation may be 'promoted' which means that it is
927 // 'bitcasted' to another type which is handled.
928 // Also, we need to be able to construct a splat vector using either
929 // BUILD_VECTOR or SPLAT_VECTOR.
930 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
931 // BUILD_VECTOR?
932 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
933 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
934 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
935 TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
936 : ISD::SPLAT_VECTOR,
937 VT) == TargetLowering::Expand)
938 return DAG.UnrollVectorOp(Node);
940 // Generate a mask operand.
941 EVT MaskTy = VT.changeVectorElementTypeToInteger();
943 // What is the size of each element in the vector mask.
944 EVT BitTy = MaskTy.getScalarType();
946 Mask = DAG.getSelect(DL, BitTy, Mask,
947 DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
948 BitTy),
949 DAG.getConstant(0, DL, BitTy));
951 // Broadcast the mask so that the entire vector is all one or all zero.
952 if (VT.isFixedLengthVector())
953 Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
954 else
955 Mask = DAG.getSplatVector(MaskTy, DL, Mask);
957 // Bitcast the operands to be the same type as the mask.
958 // This is needed when we select between FP types because
959 // the mask is a vector of integers.
960 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
961 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
963 SDValue AllOnes = DAG.getConstant(
964 APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
965 SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
967 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
968 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
969 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
970 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
973 SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
974 EVT VT = Node->getValueType(0);
976 // Make sure that the SRA and SHL instructions are available.
977 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
978 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
979 return DAG.UnrollVectorOp(Node);
981 SDLoc DL(Node);
982 EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
984 unsigned BW = VT.getScalarSizeInBits();
985 unsigned OrigBW = OrigTy.getScalarSizeInBits();
986 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
988 SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
989 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
992 // Generically expand a vector anyext in register to a shuffle of the relevant
993 // lanes into the appropriate locations, with other lanes left undef.
994 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
995 SDLoc DL(Node);
996 EVT VT = Node->getValueType(0);
997 int NumElements = VT.getVectorNumElements();
998 SDValue Src = Node->getOperand(0);
999 EVT SrcVT = Src.getValueType();
1000 int NumSrcElements = SrcVT.getVectorNumElements();
1002 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1003 // into a larger vector type.
1004 if (SrcVT.bitsLE(VT)) {
1005 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1006 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1007 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1008 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1009 NumSrcElements);
1010 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1011 Src, DAG.getVectorIdxConstant(0, DL));
1014 // Build a base mask of undef shuffles.
1015 SmallVector<int, 16> ShuffleMask;
1016 ShuffleMask.resize(NumSrcElements, -1);
1018 // Place the extended lanes into the correct locations.
1019 int ExtLaneScale = NumSrcElements / NumElements;
1020 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1021 for (int i = 0; i < NumElements; ++i)
1022 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1024 return DAG.getNode(
1025 ISD::BITCAST, DL, VT,
1026 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
1029 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1030 SDLoc DL(Node);
1031 EVT VT = Node->getValueType(0);
1032 SDValue Src = Node->getOperand(0);
1033 EVT SrcVT = Src.getValueType();
1035 // First build an any-extend node which can be legalized above when we
1036 // recurse through it.
1037 SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
1039 // Now we need sign extend. Do this by shifting the elements. Even if these
1040 // aren't legal operations, they have a better chance of being legalized
1041 // without full scalarization than the sign extension does.
1042 unsigned EltWidth = VT.getScalarSizeInBits();
1043 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1044 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
1045 return DAG.getNode(ISD::SRA, DL, VT,
1046 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
1047 ShiftAmount);
1050 // Generically expand a vector zext in register to a shuffle of the relevant
1051 // lanes into the appropriate locations, a blend of zero into the high bits,
1052 // and a bitcast to the wider element type.
1053 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1054 SDLoc DL(Node);
1055 EVT VT = Node->getValueType(0);
1056 int NumElements = VT.getVectorNumElements();
1057 SDValue Src = Node->getOperand(0);
1058 EVT SrcVT = Src.getValueType();
1059 int NumSrcElements = SrcVT.getVectorNumElements();
1061 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1062 // into a larger vector type.
1063 if (SrcVT.bitsLE(VT)) {
1064 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1065 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1066 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1067 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1068 NumSrcElements);
1069 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1070 Src, DAG.getVectorIdxConstant(0, DL));
1073 // Build up a zero vector to blend into this one.
1074 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1076 // Shuffle the incoming lanes into the correct position, and pull all other
1077 // lanes from the zero vector.
1078 SmallVector<int, 16> ShuffleMask;
1079 ShuffleMask.reserve(NumSrcElements);
1080 for (int i = 0; i < NumSrcElements; ++i)
1081 ShuffleMask.push_back(i);
1083 int ExtLaneScale = NumSrcElements / NumElements;
1084 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1085 for (int i = 0; i < NumElements; ++i)
1086 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1088 return DAG.getNode(ISD::BITCAST, DL, VT,
1089 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
1092 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1093 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1094 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1095 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1096 ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
1099 SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1100 EVT VT = Node->getValueType(0);
1102 // Generate a byte wise shuffle mask for the BSWAP.
1103 SmallVector<int, 16> ShuffleMask;
1104 createBSWAPShuffleMask(VT, ShuffleMask);
1105 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
1107 // Only emit a shuffle if the mask is legal.
1108 if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
1109 return DAG.UnrollVectorOp(Node);
1111 SDLoc DL(Node);
1112 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1113 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
1114 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
1117 void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
1118 SmallVectorImpl<SDValue> &Results) {
1119 EVT VT = Node->getValueType(0);
1121 // If we have the scalar operation, it's probably cheaper to unroll it.
1122 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
1123 SDValue Tmp = DAG.UnrollVectorOp(Node);
1124 Results.push_back(Tmp);
1125 return;
1128 // If the vector element width is a whole number of bytes, test if its legal
1129 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1130 // vector. This greatly reduces the number of bit shifts necessary.
1131 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1132 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1133 SmallVector<int, 16> BSWAPMask;
1134 createBSWAPShuffleMask(VT, BSWAPMask);
1136 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
1137 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1138 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
1139 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
1140 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
1141 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
1142 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
1143 SDLoc DL(Node);
1144 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1145 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
1146 BSWAPMask);
1147 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
1148 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
1149 Results.push_back(Op);
1150 return;
1154 // If we have the appropriate vector bit operations, it is better to use them
1155 // than unrolling and expanding each component.
1156 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1157 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1158 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1159 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1160 // Let LegalizeDAG handle this later.
1161 return;
1163 // Otherwise unroll.
1164 SDValue Tmp = DAG.UnrollVectorOp(Node);
1165 Results.push_back(Tmp);
1168 SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1169 // Implement VSELECT in terms of XOR, AND, OR
1170 // on platforms which do not support blend natively.
1171 SDLoc DL(Node);
1173 SDValue Mask = Node->getOperand(0);
1174 SDValue Op1 = Node->getOperand(1);
1175 SDValue Op2 = Node->getOperand(2);
1177 EVT VT = Mask.getValueType();
1179 // If we can't even use the basic vector operations of
1180 // AND,OR,XOR, we will have to scalarize the op.
1181 // Notice that the operation may be 'promoted' which means that it is
1182 // 'bitcasted' to another type which is handled.
1183 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1184 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1185 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
1186 return DAG.UnrollVectorOp(Node);
1188 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1189 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1190 // vector constant to mask with.
1191 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1192 auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
1193 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1194 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1195 Op1.getValueType().getVectorElementType() == MVT::i1))
1196 return DAG.UnrollVectorOp(Node);
1198 // If the mask and the type are different sizes, unroll the vector op. This
1199 // can occur when getSetCCResultType returns something that is different in
1200 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1201 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1202 return DAG.UnrollVectorOp(Node);
1204 // Bitcast the operands to be the same type as the mask.
1205 // This is needed when we select between FP types because
1206 // the mask is a vector of integers.
1207 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1208 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1210 SDValue AllOnes = DAG.getConstant(
1211 APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
1212 SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
1214 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1215 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1216 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1217 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1220 void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1221 SmallVectorImpl<SDValue> &Results) {
1222 // Attempt to expand using TargetLowering.
1223 SDValue Result, Chain;
1224 if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
1225 Results.push_back(Result);
1226 if (Node->isStrictFPOpcode())
1227 Results.push_back(Chain);
1228 return;
1231 // Otherwise go ahead and unroll.
1232 if (Node->isStrictFPOpcode()) {
1233 UnrollStrictFPOp(Node, Results);
1234 return;
1237 Results.push_back(DAG.UnrollVectorOp(Node));
1240 void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1241 SmallVectorImpl<SDValue> &Results) {
1242 bool IsStrict = Node->isStrictFPOpcode();
1243 unsigned OpNo = IsStrict ? 1 : 0;
1244 SDValue Src = Node->getOperand(OpNo);
1245 EVT VT = Src.getValueType();
1246 SDLoc DL(Node);
1248 // Attempt to expand using TargetLowering.
1249 SDValue Result;
1250 SDValue Chain;
1251 if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
1252 Results.push_back(Result);
1253 if (IsStrict)
1254 Results.push_back(Chain);
1255 return;
1258 // Make sure that the SINT_TO_FP and SRL instructions are available.
1259 if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
1260 TargetLowering::Expand) ||
1261 (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
1262 TargetLowering::Expand)) ||
1263 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
1264 if (IsStrict) {
1265 UnrollStrictFPOp(Node, Results);
1266 return;
1269 Results.push_back(DAG.UnrollVectorOp(Node));
1270 return;
1273 unsigned BW = VT.getScalarSizeInBits();
1274 assert((BW == 64 || BW == 32) &&
1275 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1277 SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
1279 // Constants to clear the upper part of the word.
1280 // Notice that we can also use SHL+SHR, but using a constant is slightly
1281 // faster on x86.
1282 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1283 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
1285 // Two to the power of half-word-size.
1286 SDValue TWOHW =
1287 DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
1289 // Clear upper part of LO, lower HI
1290 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
1291 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
1293 if (IsStrict) {
1294 // Convert hi and lo to floats
1295 // Convert the hi part back to the upper values
1296 // TODO: Can any fast-math-flags be set on these nodes?
1297 SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
1298 {Node->getValueType(0), MVT::Other},
1299 {Node->getOperand(0), HI});
1300 fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
1301 {fHI.getValue(1), fHI, TWOHW});
1302 SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
1303 {Node->getValueType(0), MVT::Other},
1304 {Node->getOperand(0), LO});
1306 SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
1307 fLO.getValue(1));
1309 // Add the two halves
1310 SDValue Result =
1311 DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
1312 {TF, fHI, fLO});
1314 Results.push_back(Result);
1315 Results.push_back(Result.getValue(1));
1316 return;
1319 // Convert hi and lo to floats
1320 // Convert the hi part back to the upper values
1321 // TODO: Can any fast-math-flags be set on these nodes?
1322 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
1323 fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
1324 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
1326 // Add the two halves
1327 Results.push_back(
1328 DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
1331 SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
1332 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
1333 SDLoc DL(Node);
1334 SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
1335 // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1336 return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
1337 Node->getOperand(0));
1339 return DAG.UnrollVectorOp(Node);
1342 void VectorLegalizer::ExpandFSUB(SDNode *Node,
1343 SmallVectorImpl<SDValue> &Results) {
1344 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1345 // we can defer this to operation legalization where it will be lowered as
1346 // a+(-b).
1347 EVT VT = Node->getValueType(0);
1348 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1349 TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1350 return; // Defer to LegalizeDAG
1352 SDValue Tmp = DAG.UnrollVectorOp(Node);
1353 Results.push_back(Tmp);
1356 void VectorLegalizer::ExpandSETCC(SDNode *Node,
1357 SmallVectorImpl<SDValue> &Results) {
1358 bool NeedInvert = false;
1359 SDLoc dl(Node);
1360 MVT OpVT = Node->getOperand(0).getSimpleValueType();
1361 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
1363 if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
1364 Results.push_back(UnrollVSETCC(Node));
1365 return;
1368 SDValue Chain;
1369 SDValue LHS = Node->getOperand(0);
1370 SDValue RHS = Node->getOperand(1);
1371 SDValue CC = Node->getOperand(2);
1372 bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS,
1373 RHS, CC, NeedInvert, dl, Chain);
1375 if (Legalized) {
1376 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
1377 // condition code, create a new SETCC node.
1378 if (CC.getNode())
1379 LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
1380 Node->getFlags());
1382 // If we expanded the SETCC by inverting the condition code, then wrap
1383 // the existing SETCC in a NOT to restore the intended condition.
1384 if (NeedInvert)
1385 LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
1386 } else {
1387 // Otherwise, SETCC for the given comparison type must be completely
1388 // illegal; expand it into a SELECT_CC.
1389 EVT VT = Node->getValueType(0);
1390 LHS =
1391 DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
1392 DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
1393 DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
1394 LHS->setFlags(Node->getFlags());
1397 Results.push_back(LHS);
1400 void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
1401 SmallVectorImpl<SDValue> &Results) {
1402 SDValue Result, Overflow;
1403 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
1404 Results.push_back(Result);
1405 Results.push_back(Overflow);
1408 void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
1409 SmallVectorImpl<SDValue> &Results) {
1410 SDValue Result, Overflow;
1411 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
1412 Results.push_back(Result);
1413 Results.push_back(Overflow);
1416 void VectorLegalizer::ExpandMULO(SDNode *Node,
1417 SmallVectorImpl<SDValue> &Results) {
1418 SDValue Result, Overflow;
1419 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
1420 std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
1422 Results.push_back(Result);
1423 Results.push_back(Overflow);
1426 void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
1427 SmallVectorImpl<SDValue> &Results) {
1428 SDNode *N = Node;
1429 if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
1430 N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
1431 Results.push_back(Expanded);
1434 void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
1435 SmallVectorImpl<SDValue> &Results) {
1436 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
1437 ExpandUINT_TO_FLOAT(Node, Results);
1438 return;
1440 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
1441 ExpandFP_TO_UINT(Node, Results);
1442 return;
1445 UnrollStrictFPOp(Node, Results);
1448 void VectorLegalizer::ExpandREM(SDNode *Node,
1449 SmallVectorImpl<SDValue> &Results) {
1450 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
1451 "Expected REM node");
1453 SDValue Result;
1454 if (!TLI.expandREM(Node, Result, DAG))
1455 Result = DAG.UnrollVectorOp(Node);
1456 Results.push_back(Result);
1459 void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
1460 SmallVectorImpl<SDValue> &Results) {
1461 EVT VT = Node->getValueType(0);
1462 EVT EltVT = VT.getVectorElementType();
1463 unsigned NumElems = VT.getVectorNumElements();
1464 unsigned NumOpers = Node->getNumOperands();
1465 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1467 EVT TmpEltVT = EltVT;
1468 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1469 Node->getOpcode() == ISD::STRICT_FSETCCS)
1470 TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
1471 *DAG.getContext(), TmpEltVT);
1473 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
1474 SDValue Chain = Node->getOperand(0);
1475 SDLoc dl(Node);
1477 SmallVector<SDValue, 32> OpValues;
1478 SmallVector<SDValue, 32> OpChains;
1479 for (unsigned i = 0; i < NumElems; ++i) {
1480 SmallVector<SDValue, 4> Opers;
1481 SDValue Idx = DAG.getVectorIdxConstant(i, dl);
1483 // The Chain is the first operand.
1484 Opers.push_back(Chain);
1486 // Now process the remaining operands.
1487 for (unsigned j = 1; j < NumOpers; ++j) {
1488 SDValue Oper = Node->getOperand(j);
1489 EVT OperVT = Oper.getValueType();
1491 if (OperVT.isVector())
1492 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1493 OperVT.getVectorElementType(), Oper, Idx);
1495 Opers.push_back(Oper);
1498 SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
1499 SDValue ScalarResult = ScalarOp.getValue(0);
1500 SDValue ScalarChain = ScalarOp.getValue(1);
1502 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
1503 Node->getOpcode() == ISD::STRICT_FSETCCS)
1504 ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
1505 DAG.getConstant(APInt::getAllOnesValue
1506 (EltVT.getSizeInBits()), dl, EltVT),
1507 DAG.getConstant(0, dl, EltVT));
1509 OpValues.push_back(ScalarResult);
1510 OpChains.push_back(ScalarChain);
1513 SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
1514 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
1516 Results.push_back(Result);
1517 Results.push_back(NewChain);
1520 SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
1521 EVT VT = Node->getValueType(0);
1522 unsigned NumElems = VT.getVectorNumElements();
1523 EVT EltVT = VT.getVectorElementType();
1524 SDValue LHS = Node->getOperand(0);
1525 SDValue RHS = Node->getOperand(1);
1526 SDValue CC = Node->getOperand(2);
1527 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1528 SDLoc dl(Node);
1529 SmallVector<SDValue, 8> Ops(NumElems);
1530 for (unsigned i = 0; i < NumElems; ++i) {
1531 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1532 DAG.getVectorIdxConstant(i, dl));
1533 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1534 DAG.getVectorIdxConstant(i, dl));
1535 Ops[i] = DAG.getNode(ISD::SETCC, dl,
1536 TLI.getSetCCResultType(DAG.getDataLayout(),
1537 *DAG.getContext(), TmpEltVT),
1538 LHSElem, RHSElem, CC);
1539 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
1540 DAG.getConstant(APInt::getAllOnesValue
1541 (EltVT.getSizeInBits()), dl, EltVT),
1542 DAG.getConstant(0, dl, EltVT));
1544 return DAG.getBuildVector(VT, dl, Ops);
1547 bool SelectionDAG::LegalizeVectors() {
1548 return VectorLegalizer(*this).Run();