[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / llvm / lib / CodeGen / SelectionDAG / SelectionDAGBuilder.cpp
blob41460f78e1c238b1421cc12aa8c3c04c84386bab
1 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //===----------------------------------------------------------------------===//
13 #include "SelectionDAGBuilder.h"
14 #include "SDNodeDbgValue.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/ADT/Triple.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/AliasAnalysis.h"
27 #include "llvm/Analysis/BlockFrequencyInfo.h"
28 #include "llvm/Analysis/BranchProbabilityInfo.h"
29 #include "llvm/Analysis/ConstantFolding.h"
30 #include "llvm/Analysis/EHPersonalities.h"
31 #include "llvm/Analysis/Loads.h"
32 #include "llvm/Analysis/MemoryLocation.h"
33 #include "llvm/Analysis/ProfileSummaryInfo.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/ValueTracking.h"
36 #include "llvm/Analysis/VectorUtils.h"
37 #include "llvm/CodeGen/Analysis.h"
38 #include "llvm/CodeGen/FunctionLoweringInfo.h"
39 #include "llvm/CodeGen/GCMetadata.h"
40 #include "llvm/CodeGen/MachineBasicBlock.h"
41 #include "llvm/CodeGen/MachineFrameInfo.h"
42 #include "llvm/CodeGen/MachineFunction.h"
43 #include "llvm/CodeGen/MachineInstr.h"
44 #include "llvm/CodeGen/MachineInstrBuilder.h"
45 #include "llvm/CodeGen/MachineJumpTableInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
53 #include "llvm/CodeGen/StackMaps.h"
54 #include "llvm/CodeGen/SwiftErrorValueTracking.h"
55 #include "llvm/CodeGen/TargetFrameLowering.h"
56 #include "llvm/CodeGen/TargetInstrInfo.h"
57 #include "llvm/CodeGen/TargetOpcodes.h"
58 #include "llvm/CodeGen/TargetRegisterInfo.h"
59 #include "llvm/CodeGen/TargetSubtargetInfo.h"
60 #include "llvm/CodeGen/WinEHFuncInfo.h"
61 #include "llvm/IR/Argument.h"
62 #include "llvm/IR/Attributes.h"
63 #include "llvm/IR/BasicBlock.h"
64 #include "llvm/IR/CFG.h"
65 #include "llvm/IR/CallingConv.h"
66 #include "llvm/IR/Constant.h"
67 #include "llvm/IR/ConstantRange.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DataLayout.h"
70 #include "llvm/IR/DebugInfoMetadata.h"
71 #include "llvm/IR/DerivedTypes.h"
72 #include "llvm/IR/DiagnosticInfo.h"
73 #include "llvm/IR/Function.h"
74 #include "llvm/IR/GetElementPtrTypeIterator.h"
75 #include "llvm/IR/InlineAsm.h"
76 #include "llvm/IR/InstrTypes.h"
77 #include "llvm/IR/Instructions.h"
78 #include "llvm/IR/IntrinsicInst.h"
79 #include "llvm/IR/Intrinsics.h"
80 #include "llvm/IR/IntrinsicsAArch64.h"
81 #include "llvm/IR/IntrinsicsWebAssembly.h"
82 #include "llvm/IR/LLVMContext.h"
83 #include "llvm/IR/Metadata.h"
84 #include "llvm/IR/Module.h"
85 #include "llvm/IR/Operator.h"
86 #include "llvm/IR/PatternMatch.h"
87 #include "llvm/IR/Statepoint.h"
88 #include "llvm/IR/Type.h"
89 #include "llvm/IR/User.h"
90 #include "llvm/IR/Value.h"
91 #include "llvm/MC/MCContext.h"
92 #include "llvm/MC/MCSymbol.h"
93 #include "llvm/Support/AtomicOrdering.h"
94 #include "llvm/Support/Casting.h"
95 #include "llvm/Support/CommandLine.h"
96 #include "llvm/Support/Compiler.h"
97 #include "llvm/Support/Debug.h"
98 #include "llvm/Support/MathExtras.h"
99 #include "llvm/Support/raw_ostream.h"
100 #include "llvm/Target/TargetIntrinsicInfo.h"
101 #include "llvm/Target/TargetMachine.h"
102 #include "llvm/Target/TargetOptions.h"
103 #include "llvm/Transforms/Utils/Local.h"
104 #include <cstddef>
105 #include <cstring>
106 #include <iterator>
107 #include <limits>
108 #include <numeric>
109 #include <tuple>
111 using namespace llvm;
112 using namespace PatternMatch;
113 using namespace SwitchCG;
115 #define DEBUG_TYPE "isel"
117 /// LimitFloatPrecision - Generate low-precision inline sequences for
118 /// some float libcalls (6, 8 or 12 bits).
119 static unsigned LimitFloatPrecision;
121 static cl::opt<bool>
122 InsertAssertAlign("insert-assert-align", cl::init(true),
123 cl::desc("Insert the experimental `assertalign` node."),
124 cl::ReallyHidden);
126 static cl::opt<unsigned, true>
127 LimitFPPrecision("limit-float-precision",
128 cl::desc("Generate low-precision inline sequences "
129 "for some float libcalls"),
130 cl::location(LimitFloatPrecision), cl::Hidden,
131 cl::init(0));
133 static cl::opt<unsigned> SwitchPeelThreshold(
134 "switch-peel-threshold", cl::Hidden, cl::init(66),
135 cl::desc("Set the case probability threshold for peeling the case from a "
136 "switch statement. A value greater than 100 will void this "
137 "optimization"));
139 // Limit the width of DAG chains. This is important in general to prevent
140 // DAG-based analysis from blowing up. For example, alias analysis and
141 // load clustering may not complete in reasonable time. It is difficult to
142 // recognize and avoid this situation within each individual analysis, and
143 // future analyses are likely to have the same behavior. Limiting DAG width is
144 // the safe approach and will be especially important with global DAGs.
146 // MaxParallelChains default is arbitrarily high to avoid affecting
147 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
148 // sequence over this should have been converted to llvm.memcpy by the
149 // frontend. It is easy to induce this behavior with .ll code such as:
150 // %buffer = alloca [4096 x i8]
151 // %data = load [4096 x i8]* %argPtr
152 // store [4096 x i8] %data, [4096 x i8]* %buffer
153 static const unsigned MaxParallelChains = 64;
155 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
156 const SDValue *Parts, unsigned NumParts,
157 MVT PartVT, EVT ValueVT, const Value *V,
158 Optional<CallingConv::ID> CC);
160 /// getCopyFromParts - Create a value that contains the specified legal parts
161 /// combined into the value they represent. If the parts combine to a type
162 /// larger than ValueVT then AssertOp can be used to specify whether the extra
163 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
164 /// (ISD::AssertSext).
165 static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
166 const SDValue *Parts, unsigned NumParts,
167 MVT PartVT, EVT ValueVT, const Value *V,
168 Optional<CallingConv::ID> CC = None,
169 Optional<ISD::NodeType> AssertOp = None) {
170 // Let the target assemble the parts if it wants to
171 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
172 if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
173 PartVT, ValueVT, CC))
174 return Val;
176 if (ValueVT.isVector())
177 return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
178 CC);
180 assert(NumParts > 0 && "No parts to assemble!");
181 SDValue Val = Parts[0];
183 if (NumParts > 1) {
184 // Assemble the value from multiple parts.
185 if (ValueVT.isInteger()) {
186 unsigned PartBits = PartVT.getSizeInBits();
187 unsigned ValueBits = ValueVT.getSizeInBits();
189 // Assemble the power of 2 part.
190 unsigned RoundParts =
191 (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
192 unsigned RoundBits = PartBits * RoundParts;
193 EVT RoundVT = RoundBits == ValueBits ?
194 ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
195 SDValue Lo, Hi;
197 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
199 if (RoundParts > 2) {
200 Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
201 PartVT, HalfVT, V);
202 Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
203 RoundParts / 2, PartVT, HalfVT, V);
204 } else {
205 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
206 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
209 if (DAG.getDataLayout().isBigEndian())
210 std::swap(Lo, Hi);
212 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
214 if (RoundParts < NumParts) {
215 // Assemble the trailing non-power-of-2 part.
216 unsigned OddParts = NumParts - RoundParts;
217 EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
218 Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
219 OddVT, V, CC);
221 // Combine the round and odd parts.
222 Lo = Val;
223 if (DAG.getDataLayout().isBigEndian())
224 std::swap(Lo, Hi);
225 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
226 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
227 Hi =
228 DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
229 DAG.getConstant(Lo.getValueSizeInBits(), DL,
230 TLI.getPointerTy(DAG.getDataLayout())));
231 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
232 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
234 } else if (PartVT.isFloatingPoint()) {
235 // FP split into multiple FP parts (for ppcf128)
236 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
237 "Unexpected split");
238 SDValue Lo, Hi;
239 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
240 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
241 if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
242 std::swap(Lo, Hi);
243 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
244 } else {
245 // FP split into integer parts (soft fp)
246 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
247 !PartVT.isVector() && "Unexpected split");
248 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
249 Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
253 // There is now one part, held in Val. Correct it to match ValueVT.
254 // PartEVT is the type of the register class that holds the value.
255 // ValueVT is the type of the inline asm operation.
256 EVT PartEVT = Val.getValueType();
258 if (PartEVT == ValueVT)
259 return Val;
261 if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
262 ValueVT.bitsLT(PartEVT)) {
263 // For an FP value in an integer part, we need to truncate to the right
264 // width first.
265 PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
266 Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
269 // Handle types that have the same size.
270 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
271 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
273 // Handle types with different sizes.
274 if (PartEVT.isInteger() && ValueVT.isInteger()) {
275 if (ValueVT.bitsLT(PartEVT)) {
276 // For a truncate, see if we have any information to
277 // indicate whether the truncated bits will always be
278 // zero or sign-extension.
279 if (AssertOp.hasValue())
280 Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
281 DAG.getValueType(ValueVT));
282 return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
284 return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
287 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
288 // FP_ROUND's are always exact here.
289 if (ValueVT.bitsLT(Val.getValueType()))
290 return DAG.getNode(
291 ISD::FP_ROUND, DL, ValueVT, Val,
292 DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
294 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
297 // Handle MMX to a narrower integer type by bitcasting MMX to integer and
298 // then truncating.
299 if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
300 ValueVT.bitsLT(PartEVT)) {
301 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
302 return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
305 report_fatal_error("Unknown mismatch in getCopyFromParts!");
308 static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
309 const Twine &ErrMsg) {
310 const Instruction *I = dyn_cast_or_null<Instruction>(V);
311 if (!V)
312 return Ctx.emitError(ErrMsg);
314 const char *AsmError = ", possible invalid constraint for vector type";
315 if (const CallInst *CI = dyn_cast<CallInst>(I))
316 if (CI->isInlineAsm())
317 return Ctx.emitError(I, ErrMsg + AsmError);
319 return Ctx.emitError(I, ErrMsg);
322 /// getCopyFromPartsVector - Create a value that contains the specified legal
323 /// parts combined into the value they represent. If the parts combine to a
324 /// type larger than ValueVT then AssertOp can be used to specify whether the
325 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
326 /// ValueVT (ISD::AssertSext).
327 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
328 const SDValue *Parts, unsigned NumParts,
329 MVT PartVT, EVT ValueVT, const Value *V,
330 Optional<CallingConv::ID> CallConv) {
331 assert(ValueVT.isVector() && "Not a vector value");
332 assert(NumParts > 0 && "No parts to assemble!");
333 const bool IsABIRegCopy = CallConv.hasValue();
335 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
336 SDValue Val = Parts[0];
338 // Handle a multi-element vector.
339 if (NumParts > 1) {
340 EVT IntermediateVT;
341 MVT RegisterVT;
342 unsigned NumIntermediates;
343 unsigned NumRegs;
345 if (IsABIRegCopy) {
346 NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
347 *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
348 NumIntermediates, RegisterVT);
349 } else {
350 NumRegs =
351 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
352 NumIntermediates, RegisterVT);
355 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
356 NumParts = NumRegs; // Silence a compiler warning.
357 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
358 assert(RegisterVT.getSizeInBits() ==
359 Parts[0].getSimpleValueType().getSizeInBits() &&
360 "Part type sizes don't match!");
362 // Assemble the parts into intermediate operands.
363 SmallVector<SDValue, 8> Ops(NumIntermediates);
364 if (NumIntermediates == NumParts) {
365 // If the register was not expanded, truncate or copy the value,
366 // as appropriate.
367 for (unsigned i = 0; i != NumParts; ++i)
368 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
369 PartVT, IntermediateVT, V, CallConv);
370 } else if (NumParts > 0) {
371 // If the intermediate type was expanded, build the intermediate
372 // operands from the parts.
373 assert(NumParts % NumIntermediates == 0 &&
374 "Must expand into a divisible number of parts!");
375 unsigned Factor = NumParts / NumIntermediates;
376 for (unsigned i = 0; i != NumIntermediates; ++i)
377 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
378 PartVT, IntermediateVT, V, CallConv);
381 // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
382 // intermediate operands.
383 EVT BuiltVectorTy =
384 IntermediateVT.isVector()
385 ? EVT::getVectorVT(
386 *DAG.getContext(), IntermediateVT.getScalarType(),
387 IntermediateVT.getVectorElementCount() * NumParts)
388 : EVT::getVectorVT(*DAG.getContext(),
389 IntermediateVT.getScalarType(),
390 NumIntermediates);
391 Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
392 : ISD::BUILD_VECTOR,
393 DL, BuiltVectorTy, Ops);
396 // There is now one part, held in Val. Correct it to match ValueVT.
397 EVT PartEVT = Val.getValueType();
399 if (PartEVT == ValueVT)
400 return Val;
402 if (PartEVT.isVector()) {
403 // Vector/Vector bitcast.
404 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
405 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
407 // If the element type of the source/dest vectors are the same, but the
408 // parts vector has more elements than the value vector, then we have a
409 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
410 // elements we want.
411 if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
412 assert((PartEVT.getVectorElementCount().getKnownMinValue() >
413 ValueVT.getVectorElementCount().getKnownMinValue()) &&
414 (PartEVT.getVectorElementCount().isScalable() ==
415 ValueVT.getVectorElementCount().isScalable()) &&
416 "Cannot narrow, it would be a lossy transformation");
417 PartEVT =
418 EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
419 ValueVT.getVectorElementCount());
420 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,
421 DAG.getVectorIdxConstant(0, DL));
422 if (PartEVT == ValueVT)
423 return Val;
426 // Promoted vector extract
427 return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
430 // Trivial bitcast if the types are the same size and the destination
431 // vector type is legal.
432 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
433 TLI.isTypeLegal(ValueVT))
434 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
436 if (ValueVT.getVectorNumElements() != 1) {
437 // Certain ABIs require that vectors are passed as integers. For vectors
438 // are the same size, this is an obvious bitcast.
439 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
440 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
441 } else if (ValueVT.bitsLT(PartEVT)) {
442 const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
443 EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
444 // Drop the extra bits.
445 Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
446 return DAG.getBitcast(ValueVT, Val);
449 diagnosePossiblyInvalidConstraint(
450 *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
451 return DAG.getUNDEF(ValueVT);
454 // Handle cases such as i8 -> <1 x i1>
455 EVT ValueSVT = ValueVT.getVectorElementType();
456 if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
457 if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
458 Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
459 else
460 Val = ValueVT.isFloatingPoint()
461 ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
462 : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
465 return DAG.getBuildVector(ValueVT, DL, Val);
468 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
469 SDValue Val, SDValue *Parts, unsigned NumParts,
470 MVT PartVT, const Value *V,
471 Optional<CallingConv::ID> CallConv);
473 /// getCopyToParts - Create a series of nodes that contain the specified value
474 /// split into legal parts. If the parts contain more bits than Val, then, for
475 /// integers, ExtendKind can be used to specify how to generate the extra bits.
476 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
477 SDValue *Parts, unsigned NumParts, MVT PartVT,
478 const Value *V,
479 Optional<CallingConv::ID> CallConv = None,
480 ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
481 // Let the target split the parts if it wants to
482 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
483 if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
484 CallConv))
485 return;
486 EVT ValueVT = Val.getValueType();
488 // Handle the vector case separately.
489 if (ValueVT.isVector())
490 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
491 CallConv);
493 unsigned PartBits = PartVT.getSizeInBits();
494 unsigned OrigNumParts = NumParts;
495 assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
496 "Copying to an illegal type!");
498 if (NumParts == 0)
499 return;
501 assert(!ValueVT.isVector() && "Vector case handled elsewhere");
502 EVT PartEVT = PartVT;
503 if (PartEVT == ValueVT) {
504 assert(NumParts == 1 && "No-op copy with multiple parts!");
505 Parts[0] = Val;
506 return;
509 if (NumParts * PartBits > ValueVT.getSizeInBits()) {
510 // If the parts cover more bits than the value has, promote the value.
511 if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
512 assert(NumParts == 1 && "Do not know what to promote to!");
513 Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
514 } else {
515 if (ValueVT.isFloatingPoint()) {
516 // FP values need to be bitcast, then extended if they are being put
517 // into a larger container.
518 ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
519 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
521 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
522 ValueVT.isInteger() &&
523 "Unknown mismatch!");
524 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
525 Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
526 if (PartVT == MVT::x86mmx)
527 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
529 } else if (PartBits == ValueVT.getSizeInBits()) {
530 // Different types of the same size.
531 assert(NumParts == 1 && PartEVT != ValueVT);
532 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
533 } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
534 // If the parts cover less bits than value has, truncate the value.
535 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
536 ValueVT.isInteger() &&
537 "Unknown mismatch!");
538 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
539 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
540 if (PartVT == MVT::x86mmx)
541 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
544 // The value may have changed - recompute ValueVT.
545 ValueVT = Val.getValueType();
546 assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
547 "Failed to tile the value with PartVT!");
549 if (NumParts == 1) {
550 if (PartEVT != ValueVT) {
551 diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
552 "scalar-to-vector conversion failed");
553 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
556 Parts[0] = Val;
557 return;
560 // Expand the value into multiple parts.
561 if (NumParts & (NumParts - 1)) {
562 // The number of parts is not a power of 2. Split off and copy the tail.
563 assert(PartVT.isInteger() && ValueVT.isInteger() &&
564 "Do not know what to expand to!");
565 unsigned RoundParts = 1 << Log2_32(NumParts);
566 unsigned RoundBits = RoundParts * PartBits;
567 unsigned OddParts = NumParts - RoundParts;
568 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
569 DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
571 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
572 CallConv);
574 if (DAG.getDataLayout().isBigEndian())
575 // The odd parts were reversed by getCopyToParts - unreverse them.
576 std::reverse(Parts + RoundParts, Parts + NumParts);
578 NumParts = RoundParts;
579 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
580 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
583 // The number of parts is a power of 2. Repeatedly bisect the value using
584 // EXTRACT_ELEMENT.
585 Parts[0] = DAG.getNode(ISD::BITCAST, DL,
586 EVT::getIntegerVT(*DAG.getContext(),
587 ValueVT.getSizeInBits()),
588 Val);
590 for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
591 for (unsigned i = 0; i < NumParts; i += StepSize) {
592 unsigned ThisBits = StepSize * PartBits / 2;
593 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
594 SDValue &Part0 = Parts[i];
595 SDValue &Part1 = Parts[i+StepSize/2];
597 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
598 ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
599 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
600 ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
602 if (ThisBits == PartBits && ThisVT != PartVT) {
603 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
604 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
609 if (DAG.getDataLayout().isBigEndian())
610 std::reverse(Parts, Parts + OrigNumParts);
613 static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
614 const SDLoc &DL, EVT PartVT) {
615 if (!PartVT.isVector())
616 return SDValue();
618 EVT ValueVT = Val.getValueType();
619 ElementCount PartNumElts = PartVT.getVectorElementCount();
620 ElementCount ValueNumElts = ValueVT.getVectorElementCount();
622 // We only support widening vectors with equivalent element types and
623 // fixed/scalable properties. If a target needs to widen a fixed-length type
624 // to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
625 if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
626 PartNumElts.isScalable() != ValueNumElts.isScalable() ||
627 PartVT.getVectorElementType() != ValueVT.getVectorElementType())
628 return SDValue();
630 // Widening a scalable vector to another scalable vector is done by inserting
631 // the vector into a larger undef one.
632 if (PartNumElts.isScalable())
633 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
634 Val, DAG.getVectorIdxConstant(0, DL));
636 EVT ElementVT = PartVT.getVectorElementType();
637 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
638 // undef elements.
639 SmallVector<SDValue, 16> Ops;
640 DAG.ExtractVectorElements(Val, Ops);
641 SDValue EltUndef = DAG.getUNDEF(ElementVT);
642 Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);
644 // FIXME: Use CONCAT for 2x -> 4x.
645 return DAG.getBuildVector(PartVT, DL, Ops);
648 /// getCopyToPartsVector - Create a series of nodes that contain the specified
649 /// value split into legal parts.
650 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
651 SDValue Val, SDValue *Parts, unsigned NumParts,
652 MVT PartVT, const Value *V,
653 Optional<CallingConv::ID> CallConv) {
654 EVT ValueVT = Val.getValueType();
655 assert(ValueVT.isVector() && "Not a vector");
656 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
657 const bool IsABIRegCopy = CallConv.hasValue();
659 if (NumParts == 1) {
660 EVT PartEVT = PartVT;
661 if (PartEVT == ValueVT) {
662 // Nothing to do.
663 } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
664 // Bitconvert vector->vector case.
665 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
666 } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
667 Val = Widened;
668 } else if (PartVT.isVector() &&
669 PartEVT.getVectorElementType().bitsGE(
670 ValueVT.getVectorElementType()) &&
671 PartEVT.getVectorElementCount() ==
672 ValueVT.getVectorElementCount()) {
674 // Promoted vector extract
675 Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
676 } else if (PartEVT.isVector() &&
677 PartEVT.getVectorElementType() !=
678 ValueVT.getVectorElementType() &&
679 TLI.getTypeAction(*DAG.getContext(), ValueVT) ==
680 TargetLowering::TypeWidenVector) {
681 // Combination of widening and promotion.
682 EVT WidenVT =
683 EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
684 PartVT.getVectorElementCount());
685 SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
686 Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
687 } else {
688 if (ValueVT.getVectorElementCount().isScalar()) {
689 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
690 DAG.getVectorIdxConstant(0, DL));
691 } else {
692 uint64_t ValueSize = ValueVT.getFixedSizeInBits();
693 assert(PartVT.getFixedSizeInBits() > ValueSize &&
694 "lossy conversion of vector to scalar type");
695 EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
696 Val = DAG.getBitcast(IntermediateType, Val);
697 Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
701 assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
702 Parts[0] = Val;
703 return;
706 // Handle a multi-element vector.
707 EVT IntermediateVT;
708 MVT RegisterVT;
709 unsigned NumIntermediates;
710 unsigned NumRegs;
711 if (IsABIRegCopy) {
712 NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
713 *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
714 NumIntermediates, RegisterVT);
715 } else {
716 NumRegs =
717 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
718 NumIntermediates, RegisterVT);
721 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
722 NumParts = NumRegs; // Silence a compiler warning.
723 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
725 assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
726 "Mixing scalable and fixed vectors when copying in parts");
728 Optional<ElementCount> DestEltCnt;
730 if (IntermediateVT.isVector())
731 DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
732 else
733 DestEltCnt = ElementCount::getFixed(NumIntermediates);
735 EVT BuiltVectorTy = EVT::getVectorVT(
736 *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());
738 if (ValueVT == BuiltVectorTy) {
739 // Nothing to do.
740 } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
741 // Bitconvert vector->vector case.
742 Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
743 } else {
744 if (BuiltVectorTy.getVectorElementType().bitsGT(
745 ValueVT.getVectorElementType())) {
746 // Integer promotion.
747 ValueVT = EVT::getVectorVT(*DAG.getContext(),
748 BuiltVectorTy.getVectorElementType(),
749 ValueVT.getVectorElementCount());
750 Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
753 if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
754 Val = Widened;
758 assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
760 // Split the vector into intermediate operands.
761 SmallVector<SDValue, 8> Ops(NumIntermediates);
762 for (unsigned i = 0; i != NumIntermediates; ++i) {
763 if (IntermediateVT.isVector()) {
764 // This does something sensible for scalable vectors - see the
765 // definition of EXTRACT_SUBVECTOR for further details.
766 unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
767 Ops[i] =
768 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
769 DAG.getVectorIdxConstant(i * IntermediateNumElts, DL));
770 } else {
771 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
772 DAG.getVectorIdxConstant(i, DL));
776 // Split the intermediate operands into legal parts.
777 if (NumParts == NumIntermediates) {
778 // If the register was not expanded, promote or copy the value,
779 // as appropriate.
780 for (unsigned i = 0; i != NumParts; ++i)
781 getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
782 } else if (NumParts > 0) {
783 // If the intermediate type was expanded, split each the value into
784 // legal parts.
785 assert(NumIntermediates != 0 && "division by zero");
786 assert(NumParts % NumIntermediates == 0 &&
787 "Must expand into a divisible number of parts!");
788 unsigned Factor = NumParts / NumIntermediates;
789 for (unsigned i = 0; i != NumIntermediates; ++i)
790 getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
791 CallConv);
795 RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
796 EVT valuevt, Optional<CallingConv::ID> CC)
797 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
798 RegCount(1, regs.size()), CallConv(CC) {}
800 RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
801 const DataLayout &DL, unsigned Reg, Type *Ty,
802 Optional<CallingConv::ID> CC) {
803 ComputeValueVTs(TLI, DL, Ty, ValueVTs);
805 CallConv = CC;
807 for (EVT ValueVT : ValueVTs) {
808 unsigned NumRegs =
809 isABIMangled()
810 ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
811 : TLI.getNumRegisters(Context, ValueVT);
812 MVT RegisterVT =
813 isABIMangled()
814 ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
815 : TLI.getRegisterType(Context, ValueVT);
816 for (unsigned i = 0; i != NumRegs; ++i)
817 Regs.push_back(Reg + i);
818 RegVTs.push_back(RegisterVT);
819 RegCount.push_back(NumRegs);
820 Reg += NumRegs;
824 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
825 FunctionLoweringInfo &FuncInfo,
826 const SDLoc &dl, SDValue &Chain,
827 SDValue *Flag, const Value *V) const {
828 // A Value with type {} or [0 x %t] needs no registers.
829 if (ValueVTs.empty())
830 return SDValue();
832 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
834 // Assemble the legal parts into the final values.
835 SmallVector<SDValue, 4> Values(ValueVTs.size());
836 SmallVector<SDValue, 8> Parts;
837 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
838 // Copy the legal parts from the registers.
839 EVT ValueVT = ValueVTs[Value];
840 unsigned NumRegs = RegCount[Value];
841 MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
842 *DAG.getContext(),
843 CallConv.getValue(), RegVTs[Value])
844 : RegVTs[Value];
846 Parts.resize(NumRegs);
847 for (unsigned i = 0; i != NumRegs; ++i) {
848 SDValue P;
849 if (!Flag) {
850 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
851 } else {
852 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
853 *Flag = P.getValue(2);
856 Chain = P.getValue(1);
857 Parts[i] = P;
859 // If the source register was virtual and if we know something about it,
860 // add an assert node.
861 if (!Register::isVirtualRegister(Regs[Part + i]) ||
862 !RegisterVT.isInteger())
863 continue;
865 const FunctionLoweringInfo::LiveOutInfo *LOI =
866 FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
867 if (!LOI)
868 continue;
870 unsigned RegSize = RegisterVT.getScalarSizeInBits();
871 unsigned NumSignBits = LOI->NumSignBits;
872 unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
874 if (NumZeroBits == RegSize) {
875 // The current value is a zero.
876 // Explicitly express that as it would be easier for
877 // optimizations to kick in.
878 Parts[i] = DAG.getConstant(0, dl, RegisterVT);
879 continue;
882 // FIXME: We capture more information than the dag can represent. For
883 // now, just use the tightest assertzext/assertsext possible.
884 bool isSExt;
885 EVT FromVT(MVT::Other);
886 if (NumZeroBits) {
887 FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
888 isSExt = false;
889 } else if (NumSignBits > 1) {
890 FromVT =
891 EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
892 isSExt = true;
893 } else {
894 continue;
896 // Add an assertion node.
897 assert(FromVT != MVT::Other);
898 Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
899 RegisterVT, P, DAG.getValueType(FromVT));
902 Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
903 RegisterVT, ValueVT, V, CallConv);
904 Part += NumRegs;
905 Parts.clear();
908 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
911 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
912 const SDLoc &dl, SDValue &Chain, SDValue *Flag,
913 const Value *V,
914 ISD::NodeType PreferredExtendType) const {
915 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
916 ISD::NodeType ExtendKind = PreferredExtendType;
918 // Get the list of the values's legal parts.
919 unsigned NumRegs = Regs.size();
920 SmallVector<SDValue, 8> Parts(NumRegs);
921 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
922 unsigned NumParts = RegCount[Value];
924 MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
925 *DAG.getContext(),
926 CallConv.getValue(), RegVTs[Value])
927 : RegVTs[Value];
929 if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
930 ExtendKind = ISD::ZERO_EXTEND;
932 getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
933 NumParts, RegisterVT, V, CallConv, ExtendKind);
934 Part += NumParts;
937 // Copy the parts into the registers.
938 SmallVector<SDValue, 8> Chains(NumRegs);
939 for (unsigned i = 0; i != NumRegs; ++i) {
940 SDValue Part;
941 if (!Flag) {
942 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
943 } else {
944 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
945 *Flag = Part.getValue(1);
948 Chains[i] = Part.getValue(0);
951 if (NumRegs == 1 || Flag)
952 // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
953 // flagged to it. That is the CopyToReg nodes and the user are considered
954 // a single scheduling unit. If we create a TokenFactor and return it as
955 // chain, then the TokenFactor is both a predecessor (operand) of the
956 // user as well as a successor (the TF operands are flagged to the user).
957 // c1, f1 = CopyToReg
958 // c2, f2 = CopyToReg
959 // c3 = TokenFactor c1, c2
960 // ...
961 // = op c3, ..., f2
962 Chain = Chains[NumRegs-1];
963 else
964 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
967 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
968 unsigned MatchingIdx, const SDLoc &dl,
969 SelectionDAG &DAG,
970 std::vector<SDValue> &Ops) const {
971 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
973 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
974 if (HasMatching)
975 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
976 else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
977 // Put the register class of the virtual registers in the flag word. That
978 // way, later passes can recompute register class constraints for inline
979 // assembly as well as normal instructions.
980 // Don't do this for tied operands that can use the regclass information
981 // from the def.
982 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
983 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
984 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
987 SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
988 Ops.push_back(Res);
990 if (Code == InlineAsm::Kind_Clobber) {
991 // Clobbers should always have a 1:1 mapping with registers, and may
992 // reference registers that have illegal (e.g. vector) types. Hence, we
993 // shouldn't try to apply any sort of splitting logic to them.
994 assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
995 "No 1:1 mapping from clobbers to regs?");
996 Register SP = TLI.getStackPointerRegisterToSaveRestore();
997 (void)SP;
998 for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
999 Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
1000 assert(
1001 (Regs[I] != SP ||
1002 DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
1003 "If we clobbered the stack pointer, MFI should know about it.");
1005 return;
1008 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
1009 MVT RegisterVT = RegVTs[Value];
1010 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value],
1011 RegisterVT);
1012 for (unsigned i = 0; i != NumRegs; ++i) {
1013 assert(Reg < Regs.size() && "Mismatch in # registers expected");
1014 unsigned TheReg = Regs[Reg++];
1015 Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
1020 SmallVector<std::pair<unsigned, TypeSize>, 4>
1021 RegsForValue::getRegsAndSizes() const {
1022 SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
1023 unsigned I = 0;
1024 for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
1025 unsigned RegCount = std::get<0>(CountAndVT);
1026 MVT RegisterVT = std::get<1>(CountAndVT);
1027 TypeSize RegisterSize = RegisterVT.getSizeInBits();
1028 for (unsigned E = I + RegCount; I != E; ++I)
1029 OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
1031 return OutVec;
1034 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
1035 const TargetLibraryInfo *li) {
1036 AA = aa;
1037 GFI = gfi;
1038 LibInfo = li;
1039 Context = DAG.getContext();
1040 LPadToCallSiteMap.clear();
1041 SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
1044 void SelectionDAGBuilder::clear() {
1045 NodeMap.clear();
1046 UnusedArgNodeMap.clear();
1047 PendingLoads.clear();
1048 PendingExports.clear();
1049 PendingConstrainedFP.clear();
1050 PendingConstrainedFPStrict.clear();
1051 CurInst = nullptr;
1052 HasTailCall = false;
1053 SDNodeOrder = LowestSDNodeOrder;
1054 StatepointLowering.clear();
1057 void SelectionDAGBuilder::clearDanglingDebugInfo() {
1058 DanglingDebugInfoMap.clear();
1061 // Update DAG root to include dependencies on Pending chains.
1062 SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
1063 SDValue Root = DAG.getRoot();
1065 if (Pending.empty())
1066 return Root;
1068 // Add current root to PendingChains, unless we already indirectly
1069 // depend on it.
1070 if (Root.getOpcode() != ISD::EntryToken) {
1071 unsigned i = 0, e = Pending.size();
1072 for (; i != e; ++i) {
1073 assert(Pending[i].getNode()->getNumOperands() > 1);
1074 if (Pending[i].getNode()->getOperand(0) == Root)
1075 break; // Don't add the root if we already indirectly depend on it.
1078 if (i == e)
1079 Pending.push_back(Root);
1082 if (Pending.size() == 1)
1083 Root = Pending[0];
1084 else
1085 Root = DAG.getTokenFactor(getCurSDLoc(), Pending);
1087 DAG.setRoot(Root);
1088 Pending.clear();
1089 return Root;
1092 SDValue SelectionDAGBuilder::getMemoryRoot() {
1093 return updateRoot(PendingLoads);
1096 SDValue SelectionDAGBuilder::getRoot() {
1097 // Chain up all pending constrained intrinsics together with all
1098 // pending loads, by simply appending them to PendingLoads and
1099 // then calling getMemoryRoot().
1100 PendingLoads.reserve(PendingLoads.size() +
1101 PendingConstrainedFP.size() +
1102 PendingConstrainedFPStrict.size());
1103 PendingLoads.append(PendingConstrainedFP.begin(),
1104 PendingConstrainedFP.end());
1105 PendingLoads.append(PendingConstrainedFPStrict.begin(),
1106 PendingConstrainedFPStrict.end());
1107 PendingConstrainedFP.clear();
1108 PendingConstrainedFPStrict.clear();
1109 return getMemoryRoot();
1112 SDValue SelectionDAGBuilder::getControlRoot() {
1113 // We need to emit pending fpexcept.strict constrained intrinsics,
1114 // so append them to the PendingExports list.
1115 PendingExports.append(PendingConstrainedFPStrict.begin(),
1116 PendingConstrainedFPStrict.end());
1117 PendingConstrainedFPStrict.clear();
1118 return updateRoot(PendingExports);
1121 void SelectionDAGBuilder::visit(const Instruction &I) {
1122 // Set up outgoing PHI node register values before emitting the terminator.
1123 if (I.isTerminator()) {
1124 HandlePHINodesInSuccessorBlocks(I.getParent());
1127 // Increase the SDNodeOrder if dealing with a non-debug instruction.
1128 if (!isa<DbgInfoIntrinsic>(I))
1129 ++SDNodeOrder;
1131 CurInst = &I;
1133 visit(I.getOpcode(), I);
1135 if (!I.isTerminator() && !HasTailCall &&
1136 !isa<GCStatepointInst>(I)) // statepoints handle their exports internally
1137 CopyToExportRegsIfNeeded(&I);
1139 CurInst = nullptr;
1142 void SelectionDAGBuilder::visitPHI(const PHINode &) {
1143 llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1146 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1147 // Note: this doesn't use InstVisitor, because it has to work with
1148 // ConstantExpr's in addition to instructions.
1149 switch (Opcode) {
1150 default: llvm_unreachable("Unknown instruction type encountered!");
1151 // Build the switch statement using the Instruction.def file.
1152 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1153 case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1154 #include "llvm/IR/Instruction.def"
1158 void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
1159 DebugLoc DL, unsigned Order) {
1160 // We treat variadic dbg_values differently at this stage.
1161 if (DI->hasArgList()) {
1162 // For variadic dbg_values we will now insert an undef.
1163 // FIXME: We can potentially recover these!
1164 SmallVector<SDDbgOperand, 2> Locs;
1165 for (const Value *V : DI->getValues()) {
1166 auto Undef = UndefValue::get(V->getType());
1167 Locs.push_back(SDDbgOperand::fromConst(Undef));
1169 SDDbgValue *SDV = DAG.getDbgValueList(
1170 DI->getVariable(), DI->getExpression(), Locs, {},
1171 /*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true);
1172 DAG.AddDbgValue(SDV, /*isParameter=*/false);
1173 } else {
1174 // TODO: Dangling debug info will eventually either be resolved or produce
1175 // an Undef DBG_VALUE. However in the resolution case, a gap may appear
1176 // between the original dbg.value location and its resolved DBG_VALUE,
1177 // which we should ideally fill with an extra Undef DBG_VALUE.
1178 assert(DI->getNumVariableLocationOps() == 1 &&
1179 "DbgValueInst without an ArgList should have a single location "
1180 "operand.");
1181 DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order);
1185 void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
1186 const DIExpression *Expr) {
1187 auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
1188 const DbgValueInst *DI = DDI.getDI();
1189 DIVariable *DanglingVariable = DI->getVariable();
1190 DIExpression *DanglingExpr = DI->getExpression();
1191 if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
1192 LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
1193 return true;
1195 return false;
1198 for (auto &DDIMI : DanglingDebugInfoMap) {
1199 DanglingDebugInfoVector &DDIV = DDIMI.second;
1201 // If debug info is to be dropped, run it through final checks to see
1202 // whether it can be salvaged.
1203 for (auto &DDI : DDIV)
1204 if (isMatchingDbgValue(DDI))
1205 salvageUnresolvedDbgValue(DDI);
1207 erase_if(DDIV, isMatchingDbgValue);
1211 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1212 // generate the debug data structures now that we've seen its definition.
1213 void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
1214 SDValue Val) {
1215 auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
1216 if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
1217 return;
1219 DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
1220 for (auto &DDI : DDIV) {
1221 const DbgValueInst *DI = DDI.getDI();
1222 assert(!DI->hasArgList() && "Not implemented for variadic dbg_values");
1223 assert(DI && "Ill-formed DanglingDebugInfo");
1224 DebugLoc dl = DDI.getdl();
1225 unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
1226 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1227 DILocalVariable *Variable = DI->getVariable();
1228 DIExpression *Expr = DI->getExpression();
1229 assert(Variable->isValidLocationForIntrinsic(dl) &&
1230 "Expected inlined-at fields to agree");
1231 SDDbgValue *SDV;
1232 if (Val.getNode()) {
1233 // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
1234 // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
1235 // we couldn't resolve it directly when examining the DbgValue intrinsic
1236 // in the first place we should not be more successful here). Unless we
1237 // have some test case that prove this to be correct we should avoid
1238 // calling EmitFuncArgumentDbgValue here.
1239 if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1240 LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
1241 << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
1242 LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
1243 // Increase the SDNodeOrder for the DbgValue here to make sure it is
1244 // inserted after the definition of Val when emitting the instructions
1245 // after ISel. An alternative could be to teach
1246 // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1247 LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
1248 << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
1249 << ValSDNodeOrder << "\n");
1250 SDV = getDbgValue(Val, Variable, Expr, dl,
1251 std::max(DbgSDNodeOrder, ValSDNodeOrder));
1252 DAG.AddDbgValue(SDV, false);
1253 } else
1254 LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
1255 << "in EmitFuncArgumentDbgValue\n");
1256 } else {
1257 LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1258 auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
1259 auto SDV =
1260 DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
1261 DAG.AddDbgValue(SDV, false);
1264 DDIV.clear();
1267 void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
1268 // TODO: For the variadic implementation, instead of only checking the fail
1269 // state of `handleDebugValue`, we need know specifically which values were
1270 // invalid, so that we attempt to salvage only those values when processing
1271 // a DIArgList.
1272 assert(!DDI.getDI()->hasArgList() &&
1273 "Not implemented for variadic dbg_values");
1274 Value *V = DDI.getDI()->getValue(0);
1275 DILocalVariable *Var = DDI.getDI()->getVariable();
1276 DIExpression *Expr = DDI.getDI()->getExpression();
1277 DebugLoc DL = DDI.getdl();
1278 DebugLoc InstDL = DDI.getDI()->getDebugLoc();
1279 unsigned SDOrder = DDI.getSDNodeOrder();
1280 // Currently we consider only dbg.value intrinsics -- we tell the salvager
1281 // that DW_OP_stack_value is desired.
1282 assert(isa<DbgValueInst>(DDI.getDI()));
1283 bool StackValue = true;
1285 // Can this Value can be encoded without any further work?
1286 if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false))
1287 return;
1289 // Attempt to salvage back through as many instructions as possible. Bail if
1290 // a non-instruction is seen, such as a constant expression or global
1291 // variable. FIXME: Further work could recover those too.
1292 while (isa<Instruction>(V)) {
1293 Instruction &VAsInst = *cast<Instruction>(V);
1294 // Temporary "0", awaiting real implementation.
1295 SmallVector<uint64_t, 16> Ops;
1296 SmallVector<Value *, 4> AdditionalValues;
1297 V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
1298 AdditionalValues);
1299 // If we cannot salvage any further, and haven't yet found a suitable debug
1300 // expression, bail out.
1301 if (!V)
1302 break;
1304 // TODO: If AdditionalValues isn't empty, then the salvage can only be
1305 // represented with a DBG_VALUE_LIST, so we give up. When we have support
1306 // here for variadic dbg_values, remove that condition.
1307 if (!AdditionalValues.empty())
1308 break;
1310 // New value and expr now represent this debuginfo.
1311 Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);
1313 // Some kind of simplification occurred: check whether the operand of the
1314 // salvaged debug expression can be encoded in this DAG.
1315 if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
1316 /*IsVariadic=*/false)) {
1317 LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
1318 << DDI.getDI() << "\nBy stripping back to:\n " << V);
1319 return;
1323 // This was the final opportunity to salvage this debug information, and it
1324 // couldn't be done. Place an undef DBG_VALUE at this location to terminate
1325 // any earlier variable location.
1326 auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
1327 auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
1328 DAG.AddDbgValue(SDV, false);
1330 LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
1331 << "\n");
1332 LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
1333 << "\n");
1336 bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
1337 DILocalVariable *Var,
1338 DIExpression *Expr, DebugLoc dl,
1339 DebugLoc InstDL, unsigned Order,
1340 bool IsVariadic) {
1341 if (Values.empty())
1342 return true;
1343 SmallVector<SDDbgOperand> LocationOps;
1344 SmallVector<SDNode *> Dependencies;
1345 for (const Value *V : Values) {
1346 // Constant value.
1347 if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
1348 isa<ConstantPointerNull>(V)) {
1349 LocationOps.emplace_back(SDDbgOperand::fromConst(V));
1350 continue;
1353 // If the Value is a frame index, we can create a FrameIndex debug value
1354 // without relying on the DAG at all.
1355 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1356 auto SI = FuncInfo.StaticAllocaMap.find(AI);
1357 if (SI != FuncInfo.StaticAllocaMap.end()) {
1358 LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second));
1359 continue;
1363 // Do not use getValue() in here; we don't want to generate code at
1364 // this point if it hasn't been done yet.
1365 SDValue N = NodeMap[V];
1366 if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
1367 N = UnusedArgNodeMap[V];
1368 if (N.getNode()) {
1369 // Only emit func arg dbg value for non-variadic dbg.values for now.
1370 if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
1371 return true;
1372 if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
1373 // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
1374 // describe stack slot locations.
1376 // Consider "int x = 0; int *px = &x;". There are two kinds of
1377 // interesting debug values here after optimization:
1379 // dbg.value(i32* %px, !"int *px", !DIExpression()), and
1380 // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
1382 // Both describe the direct values of their associated variables.
1383 Dependencies.push_back(N.getNode());
1384 LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex()));
1385 continue;
1387 LocationOps.emplace_back(
1388 SDDbgOperand::fromNode(N.getNode(), N.getResNo()));
1389 continue;
1392 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1393 // Special rules apply for the first dbg.values of parameter variables in a
1394 // function. Identify them by the fact they reference Argument Values, that
1395 // they're parameters, and they are parameters of the current function. We
1396 // need to let them dangle until they get an SDNode.
1397 bool IsParamOfFunc =
1398 isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt();
1399 if (IsParamOfFunc)
1400 return false;
1402 // The value is not used in this block yet (or it would have an SDNode).
1403 // We still want the value to appear for the user if possible -- if it has
1404 // an associated VReg, we can refer to that instead.
1405 auto VMI = FuncInfo.ValueMap.find(V);
1406 if (VMI != FuncInfo.ValueMap.end()) {
1407 unsigned Reg = VMI->second;
1408 // If this is a PHI node, it may be split up into several MI PHI nodes
1409 // (in FunctionLoweringInfo::set).
1410 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
1411 V->getType(), None);
1412 if (RFV.occupiesMultipleRegs()) {
1413 // FIXME: We could potentially support variadic dbg_values here.
1414 if (IsVariadic)
1415 return false;
1416 unsigned Offset = 0;
1417 unsigned BitsToDescribe = 0;
1418 if (auto VarSize = Var->getSizeInBits())
1419 BitsToDescribe = *VarSize;
1420 if (auto Fragment = Expr->getFragmentInfo())
1421 BitsToDescribe = Fragment->SizeInBits;
1422 for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
1423 // Bail out if all bits are described already.
1424 if (Offset >= BitsToDescribe)
1425 break;
1426 // TODO: handle scalable vectors.
1427 unsigned RegisterSize = RegAndSize.second;
1428 unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
1429 ? BitsToDescribe - Offset
1430 : RegisterSize;
1431 auto FragmentExpr = DIExpression::createFragmentExpression(
1432 Expr, Offset, FragmentSize);
1433 if (!FragmentExpr)
1434 continue;
1435 SDDbgValue *SDV = DAG.getVRegDbgValue(
1436 Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder);
1437 DAG.AddDbgValue(SDV, false);
1438 Offset += RegisterSize;
1440 return true;
1442 // We can use simple vreg locations for variadic dbg_values as well.
1443 LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg));
1444 continue;
1446 // We failed to create a SDDbgOperand for V.
1447 return false;
1450 // We have created a SDDbgOperand for each Value in Values.
1451 // Should use Order instead of SDNodeOrder?
1452 assert(!LocationOps.empty());
1453 SDDbgValue *SDV =
1454 DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
1455 /*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic);
1456 DAG.AddDbgValue(SDV, /*isParameter=*/false);
1457 return true;
1460 void SelectionDAGBuilder::resolveOrClearDbgInfo() {
1461 // Try to fixup any remaining dangling debug info -- and drop it if we can't.
1462 for (auto &Pair : DanglingDebugInfoMap)
1463 for (auto &DDI : Pair.second)
1464 salvageUnresolvedDbgValue(DDI);
1465 clearDanglingDebugInfo();
1468 /// getCopyFromRegs - If there was virtual register allocated for the value V
1469 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1470 SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
1471 DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V);
1472 SDValue Result;
1474 if (It != FuncInfo.ValueMap.end()) {
1475 Register InReg = It->second;
1477 RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1478 DAG.getDataLayout(), InReg, Ty,
1479 None); // This is not an ABI copy.
1480 SDValue Chain = DAG.getEntryNode();
1481 Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1483 resolveDanglingDebugInfo(V, Result);
1486 return Result;
1489 /// getValue - Return an SDValue for the given Value.
1490 SDValue SelectionDAGBuilder::getValue(const Value *V) {
1491 // If we already have an SDValue for this value, use it. It's important
1492 // to do this first, so that we don't create a CopyFromReg if we already
1493 // have a regular SDValue.
1494 SDValue &N = NodeMap[V];
1495 if (N.getNode()) return N;
1497 // If there's a virtual register allocated and initialized for this
1498 // value, use it.
1499 if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1500 return copyFromReg;
1502 // Otherwise create a new SDValue and remember it.
1503 SDValue Val = getValueImpl(V);
1504 NodeMap[V] = Val;
1505 resolveDanglingDebugInfo(V, Val);
1506 return Val;
1509 /// getNonRegisterValue - Return an SDValue for the given Value, but
1510 /// don't look in FuncInfo.ValueMap for a virtual register.
1511 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
1512 // If we already have an SDValue for this value, use it.
1513 SDValue &N = NodeMap[V];
1514 if (N.getNode()) {
1515 if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1516 // Remove the debug location from the node as the node is about to be used
1517 // in a location which may differ from the original debug location. This
1518 // is relevant to Constant and ConstantFP nodes because they can appear
1519 // as constant expressions inside PHI nodes.
1520 N->setDebugLoc(DebugLoc());
1522 return N;
1525 // Otherwise create a new SDValue and remember it.
1526 SDValue Val = getValueImpl(V);
1527 NodeMap[V] = Val;
1528 resolveDanglingDebugInfo(V, Val);
1529 return Val;
1532 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1533 /// Create an SDValue for the given value.
1534 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
1535 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1537 if (const Constant *C = dyn_cast<Constant>(V)) {
1538 EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1540 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1541 return DAG.getConstant(*CI, getCurSDLoc(), VT);
1543 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1544 return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1546 if (isa<ConstantPointerNull>(C)) {
1547 unsigned AS = V->getType()->getPointerAddressSpace();
1548 return DAG.getConstant(0, getCurSDLoc(),
1549 TLI.getPointerTy(DAG.getDataLayout(), AS));
1552 if (match(C, m_VScale(DAG.getDataLayout())))
1553 return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));
1555 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1556 return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1558 if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1559 return DAG.getUNDEF(VT);
1561 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1562 visit(CE->getOpcode(), *CE);
1563 SDValue N1 = NodeMap[V];
1564 assert(N1.getNode() && "visit didn't populate the NodeMap!");
1565 return N1;
1568 if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1569 SmallVector<SDValue, 4> Constants;
1570 for (const Use &U : C->operands()) {
1571 SDNode *Val = getValue(U).getNode();
1572 // If the operand is an empty aggregate, there are no values.
1573 if (!Val) continue;
1574 // Add each leaf value from the operand to the Constants list
1575 // to form a flattened list of all the values.
1576 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1577 Constants.push_back(SDValue(Val, i));
1580 return DAG.getMergeValues(Constants, getCurSDLoc());
1583 if (const ConstantDataSequential *CDS =
1584 dyn_cast<ConstantDataSequential>(C)) {
1585 SmallVector<SDValue, 4> Ops;
1586 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1587 SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1588 // Add each leaf value from the operand to the Constants list
1589 // to form a flattened list of all the values.
1590 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1591 Ops.push_back(SDValue(Val, i));
1594 if (isa<ArrayType>(CDS->getType()))
1595 return DAG.getMergeValues(Ops, getCurSDLoc());
1596 return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1599 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1600 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1601 "Unknown struct or array constant!");
1603 SmallVector<EVT, 4> ValueVTs;
1604 ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1605 unsigned NumElts = ValueVTs.size();
1606 if (NumElts == 0)
1607 return SDValue(); // empty struct
1608 SmallVector<SDValue, 4> Constants(NumElts);
1609 for (unsigned i = 0; i != NumElts; ++i) {
1610 EVT EltVT = ValueVTs[i];
1611 if (isa<UndefValue>(C))
1612 Constants[i] = DAG.getUNDEF(EltVT);
1613 else if (EltVT.isFloatingPoint())
1614 Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1615 else
1616 Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1619 return DAG.getMergeValues(Constants, getCurSDLoc());
1622 if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1623 return DAG.getBlockAddress(BA, VT);
1625 if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
1626 return getValue(Equiv->getGlobalValue());
1628 if (const auto *NC = dyn_cast<NoCFIValue>(C))
1629 return getValue(NC->getGlobalValue());
1631 VectorType *VecTy = cast<VectorType>(V->getType());
1633 // Now that we know the number and type of the elements, get that number of
1634 // elements into the Ops array based on what kind of constant it is.
1635 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1636 SmallVector<SDValue, 16> Ops;
1637 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1638 for (unsigned i = 0; i != NumElements; ++i)
1639 Ops.push_back(getValue(CV->getOperand(i)));
1641 return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1642 } else if (isa<ConstantAggregateZero>(C)) {
1643 EVT EltVT =
1644 TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1646 SDValue Op;
1647 if (EltVT.isFloatingPoint())
1648 Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1649 else
1650 Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1652 if (isa<ScalableVectorType>(VecTy))
1653 return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
1654 else {
1655 SmallVector<SDValue, 16> Ops;
1656 Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
1657 return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1660 llvm_unreachable("Unknown vector constant");
1663 // If this is a static alloca, generate it as the frameindex instead of
1664 // computation.
1665 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1666 DenseMap<const AllocaInst*, int>::iterator SI =
1667 FuncInfo.StaticAllocaMap.find(AI);
1668 if (SI != FuncInfo.StaticAllocaMap.end())
1669 return DAG.getFrameIndex(SI->second,
1670 TLI.getFrameIndexTy(DAG.getDataLayout()));
1673 // If this is an instruction which fast-isel has deferred, select it now.
1674 if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1675 unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1677 RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1678 Inst->getType(), None);
1679 SDValue Chain = DAG.getEntryNode();
1680 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1683 if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
1684 return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
1686 if (const auto *BB = dyn_cast<BasicBlock>(V))
1687 return DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
1688 llvm_unreachable("Can't get register for value!");
1691 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1692 auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1693 bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1694 bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1695 bool IsSEH = isAsynchronousEHPersonality(Pers);
1696 MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1697 if (!IsSEH)
1698 CatchPadMBB->setIsEHScopeEntry();
1699 // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1700 if (IsMSVCCXX || IsCoreCLR)
1701 CatchPadMBB->setIsEHFuncletEntry();
1704 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1705 // Update machine-CFG edge.
1706 MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1707 FuncInfo.MBB->addSuccessor(TargetMBB);
1708 TargetMBB->setIsEHCatchretTarget(true);
1709 DAG.getMachineFunction().setHasEHCatchret(true);
1711 auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1712 bool IsSEH = isAsynchronousEHPersonality(Pers);
1713 if (IsSEH) {
1714 // If this is not a fall-through branch or optimizations are switched off,
1715 // emit the branch.
1716 if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1717 TM.getOptLevel() == CodeGenOpt::None)
1718 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1719 getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1720 return;
1723 // Figure out the funclet membership for the catchret's successor.
1724 // This will be used by the FuncletLayout pass to determine how to order the
1725 // BB's.
1726 // A 'catchret' returns to the outer scope's color.
1727 Value *ParentPad = I.getCatchSwitchParentPad();
1728 const BasicBlock *SuccessorColor;
1729 if (isa<ConstantTokenNone>(ParentPad))
1730 SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1731 else
1732 SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1733 assert(SuccessorColor && "No parent funclet for catchret!");
1734 MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1735 assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1737 // Create the terminator node.
1738 SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1739 getControlRoot(), DAG.getBasicBlock(TargetMBB),
1740 DAG.getBasicBlock(SuccessorColorMBB));
1741 DAG.setRoot(Ret);
1744 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1745 // Don't emit any special code for the cleanuppad instruction. It just marks
1746 // the start of an EH scope/funclet.
1747 FuncInfo.MBB->setIsEHScopeEntry();
1748 auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1749 if (Pers != EHPersonality::Wasm_CXX) {
1750 FuncInfo.MBB->setIsEHFuncletEntry();
1751 FuncInfo.MBB->setIsCleanupFuncletEntry();
1755 // In wasm EH, even though a catchpad may not catch an exception if a tag does
1756 // not match, it is OK to add only the first unwind destination catchpad to the
1757 // successors, because there will be at least one invoke instruction within the
1758 // catch scope that points to the next unwind destination, if one exists, so
1759 // CFGSort cannot mess up with BB sorting order.
1760 // (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
1761 // call within them, and catchpads only consisting of 'catch (...)' have a
1762 // '__cxa_end_catch' call within them, both of which generate invokes in case
1763 // the next unwind destination exists, i.e., the next unwind destination is not
1764 // the caller.)
1766 // Having at most one EH pad successor is also simpler and helps later
1767 // transformations.
1769 // For example,
1770 // current:
1771 // invoke void @foo to ... unwind label %catch.dispatch
1772 // catch.dispatch:
1773 // %0 = catchswitch within ... [label %catch.start] unwind label %next
1774 // catch.start:
1775 // ...
1776 // ... in this BB or some other child BB dominated by this BB there will be an
1777 // invoke that points to 'next' BB as an unwind destination
1779 // next: ; We don't need to add this to 'current' BB's successor
1780 // ...
1781 static void findWasmUnwindDestinations(
1782 FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1783 BranchProbability Prob,
1784 SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1785 &UnwindDests) {
1786 while (EHPadBB) {
1787 const Instruction *Pad = EHPadBB->getFirstNonPHI();
1788 if (isa<CleanupPadInst>(Pad)) {
1789 // Stop on cleanup pads.
1790 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1791 UnwindDests.back().first->setIsEHScopeEntry();
1792 break;
1793 } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1794 // Add the catchpad handlers to the possible destinations. We don't
1795 // continue to the unwind destination of the catchswitch for wasm.
1796 for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1797 UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1798 UnwindDests.back().first->setIsEHScopeEntry();
1800 break;
1801 } else {
1802 continue;
1807 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1808 /// many places it could ultimately go. In the IR, we have a single unwind
1809 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1810 /// This function skips over imaginary basic blocks that hold catchswitch
1811 /// instructions, and finds all the "real" machine
1812 /// basic block destinations. As those destinations may not be successors of
1813 /// EHPadBB, here we also calculate the edge probability to those destinations.
1814 /// The passed-in Prob is the edge probability to EHPadBB.
1815 static void findUnwindDestinations(
1816 FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1817 BranchProbability Prob,
1818 SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1819 &UnwindDests) {
1820 EHPersonality Personality =
1821 classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1822 bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1823 bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1824 bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
1825 bool IsSEH = isAsynchronousEHPersonality(Personality);
1827 if (IsWasmCXX) {
1828 findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
1829 assert(UnwindDests.size() <= 1 &&
1830 "There should be at most one unwind destination for wasm");
1831 return;
1834 while (EHPadBB) {
1835 const Instruction *Pad = EHPadBB->getFirstNonPHI();
1836 BasicBlock *NewEHPadBB = nullptr;
1837 if (isa<LandingPadInst>(Pad)) {
1838 // Stop on landingpads. They are not funclets.
1839 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1840 break;
1841 } else if (isa<CleanupPadInst>(Pad)) {
1842 // Stop on cleanup pads. Cleanups are always funclet entries for all known
1843 // personalities.
1844 UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1845 UnwindDests.back().first->setIsEHScopeEntry();
1846 UnwindDests.back().first->setIsEHFuncletEntry();
1847 break;
1848 } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1849 // Add the catchpad handlers to the possible destinations.
1850 for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1851 UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1852 // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1853 if (IsMSVCCXX || IsCoreCLR)
1854 UnwindDests.back().first->setIsEHFuncletEntry();
1855 if (!IsSEH)
1856 UnwindDests.back().first->setIsEHScopeEntry();
1858 NewEHPadBB = CatchSwitch->getUnwindDest();
1859 } else {
1860 continue;
1863 BranchProbabilityInfo *BPI = FuncInfo.BPI;
1864 if (BPI && NewEHPadBB)
1865 Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1866 EHPadBB = NewEHPadBB;
1870 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1871 // Update successor info.
1872 SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
1873 auto UnwindDest = I.getUnwindDest();
1874 BranchProbabilityInfo *BPI = FuncInfo.BPI;
1875 BranchProbability UnwindDestProb =
1876 (BPI && UnwindDest)
1877 ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1878 : BranchProbability::getZero();
1879 findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1880 for (auto &UnwindDest : UnwindDests) {
1881 UnwindDest.first->setIsEHPad();
1882 addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1884 FuncInfo.MBB->normalizeSuccProbs();
1886 // Create the terminator node.
1887 SDValue Ret =
1888 DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1889 DAG.setRoot(Ret);
1892 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1893 report_fatal_error("visitCatchSwitch not yet implemented!");
1896 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1897 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1898 auto &DL = DAG.getDataLayout();
1899 SDValue Chain = getControlRoot();
1900 SmallVector<ISD::OutputArg, 8> Outs;
1901 SmallVector<SDValue, 8> OutVals;
1903 // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1904 // lower
1906 // %val = call <ty> @llvm.experimental.deoptimize()
1907 // ret <ty> %val
1909 // differently.
1910 if (I.getParent()->getTerminatingDeoptimizeCall()) {
1911 LowerDeoptimizingReturn();
1912 return;
1915 if (!FuncInfo.CanLowerReturn) {
1916 unsigned DemoteReg = FuncInfo.DemoteRegister;
1917 const Function *F = I.getParent()->getParent();
1919 // Emit a store of the return value through the virtual register.
1920 // Leave Outs empty so that LowerReturn won't try to load return
1921 // registers the usual way.
1922 SmallVector<EVT, 1> PtrValueVTs;
1923 ComputeValueVTs(TLI, DL,
1924 F->getReturnType()->getPointerTo(
1925 DAG.getDataLayout().getAllocaAddrSpace()),
1926 PtrValueVTs);
1928 SDValue RetPtr =
1929 DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]);
1930 SDValue RetOp = getValue(I.getOperand(0));
1932 SmallVector<EVT, 4> ValueVTs, MemVTs;
1933 SmallVector<uint64_t, 4> Offsets;
1934 ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
1935 &Offsets);
1936 unsigned NumValues = ValueVTs.size();
1938 SmallVector<SDValue, 4> Chains(NumValues);
1939 Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
1940 for (unsigned i = 0; i != NumValues; ++i) {
1941 // An aggregate return value cannot wrap around the address space, so
1942 // offsets to its parts don't wrap either.
1943 SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
1944 TypeSize::Fixed(Offsets[i]));
1946 SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
1947 if (MemVTs[i] != ValueVTs[i])
1948 Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
1949 Chains[i] = DAG.getStore(
1950 Chain, getCurSDLoc(), Val,
1951 // FIXME: better loc info would be nice.
1952 Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
1953 commonAlignment(BaseAlign, Offsets[i]));
1956 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1957 MVT::Other, Chains);
1958 } else if (I.getNumOperands() != 0) {
1959 SmallVector<EVT, 4> ValueVTs;
1960 ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1961 unsigned NumValues = ValueVTs.size();
1962 if (NumValues) {
1963 SDValue RetOp = getValue(I.getOperand(0));
1965 const Function *F = I.getParent()->getParent();
1967 bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
1968 I.getOperand(0)->getType(), F->getCallingConv(),
1969 /*IsVarArg*/ false, DL);
1971 ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1972 if (F->getAttributes().hasRetAttr(Attribute::SExt))
1973 ExtendKind = ISD::SIGN_EXTEND;
1974 else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
1975 ExtendKind = ISD::ZERO_EXTEND;
1977 LLVMContext &Context = F->getContext();
1978 bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
1980 for (unsigned j = 0; j != NumValues; ++j) {
1981 EVT VT = ValueVTs[j];
1983 if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1984 VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1986 CallingConv::ID CC = F->getCallingConv();
1988 unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
1989 MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
1990 SmallVector<SDValue, 4> Parts(NumParts);
1991 getCopyToParts(DAG, getCurSDLoc(),
1992 SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1993 &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
1995 // 'inreg' on function refers to return value
1996 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1997 if (RetInReg)
1998 Flags.setInReg();
2000 if (I.getOperand(0)->getType()->isPointerTy()) {
2001 Flags.setPointer();
2002 Flags.setPointerAddrSpace(
2003 cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
2006 if (NeedsRegBlock) {
2007 Flags.setInConsecutiveRegs();
2008 if (j == NumValues - 1)
2009 Flags.setInConsecutiveRegsLast();
2012 // Propagate extension type if any
2013 if (ExtendKind == ISD::SIGN_EXTEND)
2014 Flags.setSExt();
2015 else if (ExtendKind == ISD::ZERO_EXTEND)
2016 Flags.setZExt();
2018 for (unsigned i = 0; i < NumParts; ++i) {
2019 Outs.push_back(ISD::OutputArg(Flags,
2020 Parts[i].getValueType().getSimpleVT(),
2021 VT, /*isfixed=*/true, 0, 0));
2022 OutVals.push_back(Parts[i]);
2028 // Push in swifterror virtual register as the last element of Outs. This makes
2029 // sure swifterror virtual register will be returned in the swifterror
2030 // physical register.
2031 const Function *F = I.getParent()->getParent();
2032 if (TLI.supportSwiftError() &&
2033 F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
2034 assert(SwiftError.getFunctionArg() && "Need a swift error argument");
2035 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
2036 Flags.setSwiftError();
2037 Outs.push_back(ISD::OutputArg(
2038 Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
2039 /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
2040 // Create SDNode for the swifterror virtual register.
2041 OutVals.push_back(
2042 DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
2043 &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
2044 EVT(TLI.getPointerTy(DL))));
2047 bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
2048 CallingConv::ID CallConv =
2049 DAG.getMachineFunction().getFunction().getCallingConv();
2050 Chain = DAG.getTargetLoweringInfo().LowerReturn(
2051 Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
2053 // Verify that the target's LowerReturn behaved as expected.
2054 assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
2055 "LowerReturn didn't return a valid chain!");
2057 // Update the DAG with the new chain value resulting from return lowering.
2058 DAG.setRoot(Chain);
2061 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
2062 /// created for it, emit nodes to copy the value into the virtual
2063 /// registers.
2064 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
2065 // Skip empty types
2066 if (V->getType()->isEmptyTy())
2067 return;
2069 DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
2070 if (VMI != FuncInfo.ValueMap.end()) {
2071 assert(!V->use_empty() && "Unused value assigned virtual registers!");
2072 CopyValueToVirtualRegister(V, VMI->second);
2076 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
2077 /// the current basic block, add it to ValueMap now so that we'll get a
2078 /// CopyTo/FromReg.
2079 void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
2080 // No need to export constants.
2081 if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
2083 // Already exported?
2084 if (FuncInfo.isExportedInst(V)) return;
2086 unsigned Reg = FuncInfo.InitializeRegForValue(V);
2087 CopyValueToVirtualRegister(V, Reg);
2090 bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
2091 const BasicBlock *FromBB) {
2092 // The operands of the setcc have to be in this block. We don't know
2093 // how to export them from some other block.
2094 if (const Instruction *VI = dyn_cast<Instruction>(V)) {
2095 // Can export from current BB.
2096 if (VI->getParent() == FromBB)
2097 return true;
2099 // Is already exported, noop.
2100 return FuncInfo.isExportedInst(V);
2103 // If this is an argument, we can export it if the BB is the entry block or
2104 // if it is already exported.
2105 if (isa<Argument>(V)) {
2106 if (FromBB->isEntryBlock())
2107 return true;
2109 // Otherwise, can only export this if it is already exported.
2110 return FuncInfo.isExportedInst(V);
2113 // Otherwise, constants can always be exported.
2114 return true;
2117 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
2118 BranchProbability
2119 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
2120 const MachineBasicBlock *Dst) const {
2121 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2122 const BasicBlock *SrcBB = Src->getBasicBlock();
2123 const BasicBlock *DstBB = Dst->getBasicBlock();
2124 if (!BPI) {
2125 // If BPI is not available, set the default probability as 1 / N, where N is
2126 // the number of successors.
2127 auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
2128 return BranchProbability(1, SuccSize);
2130 return BPI->getEdgeProbability(SrcBB, DstBB);
2133 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
2134 MachineBasicBlock *Dst,
2135 BranchProbability Prob) {
2136 if (!FuncInfo.BPI)
2137 Src->addSuccessorWithoutProb(Dst);
2138 else {
2139 if (Prob.isUnknown())
2140 Prob = getEdgeProbability(Src, Dst);
2141 Src->addSuccessor(Dst, Prob);
2145 static bool InBlock(const Value *V, const BasicBlock *BB) {
2146 if (const Instruction *I = dyn_cast<Instruction>(V))
2147 return I->getParent() == BB;
2148 return true;
2151 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
2152 /// This function emits a branch and is used at the leaves of an OR or an
2153 /// AND operator tree.
2154 void
2155 SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
2156 MachineBasicBlock *TBB,
2157 MachineBasicBlock *FBB,
2158 MachineBasicBlock *CurBB,
2159 MachineBasicBlock *SwitchBB,
2160 BranchProbability TProb,
2161 BranchProbability FProb,
2162 bool InvertCond) {
2163 const BasicBlock *BB = CurBB->getBasicBlock();
2165 // If the leaf of the tree is a comparison, merge the condition into
2166 // the caseblock.
2167 if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
2168 // The operands of the cmp have to be in this block. We don't know
2169 // how to export them from some other block. If this is the first block
2170 // of the sequence, no exporting is needed.
2171 if (CurBB == SwitchBB ||
2172 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
2173 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
2174 ISD::CondCode Condition;
2175 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
2176 ICmpInst::Predicate Pred =
2177 InvertCond ? IC->getInversePredicate() : IC->getPredicate();
2178 Condition = getICmpCondCode(Pred);
2179 } else {
2180 const FCmpInst *FC = cast<FCmpInst>(Cond);
2181 FCmpInst::Predicate Pred =
2182 InvertCond ? FC->getInversePredicate() : FC->getPredicate();
2183 Condition = getFCmpCondCode(Pred);
2184 if (TM.Options.NoNaNsFPMath)
2185 Condition = getFCmpCodeWithoutNaN(Condition);
2188 CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
2189 TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
2190 SL->SwitchCases.push_back(CB);
2191 return;
2195 // Create a CaseBlock record representing this branch.
2196 ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
2197 CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
2198 nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
2199 SL->SwitchCases.push_back(CB);
2202 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
2203 MachineBasicBlock *TBB,
2204 MachineBasicBlock *FBB,
2205 MachineBasicBlock *CurBB,
2206 MachineBasicBlock *SwitchBB,
2207 Instruction::BinaryOps Opc,
2208 BranchProbability TProb,
2209 BranchProbability FProb,
2210 bool InvertCond) {
2211 // Skip over not part of the tree and remember to invert op and operands at
2212 // next level.
2213 Value *NotCond;
2214 if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
2215 InBlock(NotCond, CurBB->getBasicBlock())) {
2216 FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
2217 !InvertCond);
2218 return;
2221 const Instruction *BOp = dyn_cast<Instruction>(Cond);
2222 const Value *BOpOp0, *BOpOp1;
2223 // Compute the effective opcode for Cond, taking into account whether it needs
2224 // to be inverted, e.g.
2225 // and (not (or A, B)), C
2226 // gets lowered as
2227 // and (and (not A, not B), C)
2228 Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
2229 if (BOp) {
2230 BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
2231 ? Instruction::And
2232 : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
2233 ? Instruction::Or
2234 : (Instruction::BinaryOps)0);
2235 if (InvertCond) {
2236 if (BOpc == Instruction::And)
2237 BOpc = Instruction::Or;
2238 else if (BOpc == Instruction::Or)
2239 BOpc = Instruction::And;
2243 // If this node is not part of the or/and tree, emit it as a branch.
2244 // Note that all nodes in the tree should have same opcode.
2245 bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
2246 if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
2247 !InBlock(BOpOp0, CurBB->getBasicBlock()) ||
2248 !InBlock(BOpOp1, CurBB->getBasicBlock())) {
2249 EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
2250 TProb, FProb, InvertCond);
2251 return;
2254 // Create TmpBB after CurBB.
2255 MachineFunction::iterator BBI(CurBB);
2256 MachineFunction &MF = DAG.getMachineFunction();
2257 MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
2258 CurBB->getParent()->insert(++BBI, TmpBB);
2260 if (Opc == Instruction::Or) {
2261 // Codegen X | Y as:
2262 // BB1:
2263 // jmp_if_X TBB
2264 // jmp TmpBB
2265 // TmpBB:
2266 // jmp_if_Y TBB
2267 // jmp FBB
2270 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2271 // The requirement is that
2272 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
2273 // = TrueProb for original BB.
2274 // Assuming the original probabilities are A and B, one choice is to set
2275 // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
2276 // A/(1+B) and 2B/(1+B). This choice assumes that
2277 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
2278 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
2279 // TmpBB, but the math is more complicated.
2281 auto NewTrueProb = TProb / 2;
2282 auto NewFalseProb = TProb / 2 + FProb;
2283 // Emit the LHS condition.
2284 FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
2285 NewFalseProb, InvertCond);
2287 // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
2288 SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
2289 BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
2290 // Emit the RHS condition into TmpBB.
2291 FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
2292 Probs[1], InvertCond);
2293 } else {
2294 assert(Opc == Instruction::And && "Unknown merge op!");
2295 // Codegen X & Y as:
2296 // BB1:
2297 // jmp_if_X TmpBB
2298 // jmp FBB
2299 // TmpBB:
2300 // jmp_if_Y TBB
2301 // jmp FBB
2303 // This requires creation of TmpBB after CurBB.
2305 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
2306 // The requirement is that
2307 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
2308 // = FalseProb for original BB.
2309 // Assuming the original probabilities are A and B, one choice is to set
2310 // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
2311 // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
2312 // TrueProb for BB1 * FalseProb for TmpBB.
2314 auto NewTrueProb = TProb + FProb / 2;
2315 auto NewFalseProb = FProb / 2;
2316 // Emit the LHS condition.
2317 FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
2318 NewFalseProb, InvertCond);
2320 // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
2321 SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
2322 BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
2323 // Emit the RHS condition into TmpBB.
2324 FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
2325 Probs[1], InvertCond);
2329 /// If the set of cases should be emitted as a series of branches, return true.
2330 /// If we should emit this as a bunch of and/or'd together conditions, return
2331 /// false.
2332 bool
2333 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
2334 if (Cases.size() != 2) return true;
2336 // If this is two comparisons of the same values or'd or and'd together, they
2337 // will get folded into a single comparison, so don't emit two blocks.
2338 if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
2339 Cases[0].CmpRHS == Cases[1].CmpRHS) ||
2340 (Cases[0].CmpRHS == Cases[1].CmpLHS &&
2341 Cases[0].CmpLHS == Cases[1].CmpRHS)) {
2342 return false;
2345 // Handle: (X != null) | (Y != null) --> (X|Y) != 0
2346 // Handle: (X == null) & (Y == null) --> (X|Y) == 0
2347 if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
2348 Cases[0].CC == Cases[1].CC &&
2349 isa<Constant>(Cases[0].CmpRHS) &&
2350 cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
2351 if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
2352 return false;
2353 if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
2354 return false;
2357 return true;
2360 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
2361 MachineBasicBlock *BrMBB = FuncInfo.MBB;
2363 // Update machine-CFG edges.
2364 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
2366 if (I.isUnconditional()) {
2367 // Update machine-CFG edges.
2368 BrMBB->addSuccessor(Succ0MBB);
2370 // If this is not a fall-through branch or optimizations are switched off,
2371 // emit the branch.
2372 if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
2373 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2374 MVT::Other, getControlRoot(),
2375 DAG.getBasicBlock(Succ0MBB)));
2377 return;
2380 // If this condition is one of the special cases we handle, do special stuff
2381 // now.
2382 const Value *CondVal = I.getCondition();
2383 MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
2385 // If this is a series of conditions that are or'd or and'd together, emit
2386 // this as a sequence of branches instead of setcc's with and/or operations.
2387 // As long as jumps are not expensive (exceptions for multi-use logic ops,
2388 // unpredictable branches, and vector extracts because those jumps are likely
2389 // expensive for any target), this should improve performance.
2390 // For example, instead of something like:
2391 // cmp A, B
2392 // C = seteq
2393 // cmp D, E
2394 // F = setle
2395 // or C, F
2396 // jnz foo
2397 // Emit:
2398 // cmp A, B
2399 // je foo
2400 // cmp D, E
2401 // jle foo
2402 const Instruction *BOp = dyn_cast<Instruction>(CondVal);
2403 if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
2404 BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) {
2405 Value *Vec;
2406 const Value *BOp0, *BOp1;
2407 Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
2408 if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
2409 Opcode = Instruction::And;
2410 else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
2411 Opcode = Instruction::Or;
2413 if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
2414 match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
2415 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
2416 getEdgeProbability(BrMBB, Succ0MBB),
2417 getEdgeProbability(BrMBB, Succ1MBB),
2418 /*InvertCond=*/false);
2419 // If the compares in later blocks need to use values not currently
2420 // exported from this block, export them now. This block should always
2421 // be the first entry.
2422 assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
2424 // Allow some cases to be rejected.
2425 if (ShouldEmitAsBranches(SL->SwitchCases)) {
2426 for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
2427 ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
2428 ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
2431 // Emit the branch for this block.
2432 visitSwitchCase(SL->SwitchCases[0], BrMBB);
2433 SL->SwitchCases.erase(SL->SwitchCases.begin());
2434 return;
2437 // Okay, we decided not to do this, remove any inserted MBB's and clear
2438 // SwitchCases.
2439 for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
2440 FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
2442 SL->SwitchCases.clear();
2446 // Create a CaseBlock record representing this branch.
2447 CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
2448 nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
2450 // Use visitSwitchCase to actually insert the fast branch sequence for this
2451 // cond branch.
2452 visitSwitchCase(CB, BrMBB);
2455 /// visitSwitchCase - Emits the necessary code to represent a single node in
2456 /// the binary search tree resulting from lowering a switch instruction.
2457 void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
2458 MachineBasicBlock *SwitchBB) {
2459 SDValue Cond;
2460 SDValue CondLHS = getValue(CB.CmpLHS);
2461 SDLoc dl = CB.DL;
2463 if (CB.CC == ISD::SETTRUE) {
2464 // Branch or fall through to TrueBB.
2465 addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2466 SwitchBB->normalizeSuccProbs();
2467 if (CB.TrueBB != NextBlock(SwitchBB)) {
2468 DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
2469 DAG.getBasicBlock(CB.TrueBB)));
2471 return;
2474 auto &TLI = DAG.getTargetLoweringInfo();
2475 EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
2477 // Build the setcc now.
2478 if (!CB.CmpMHS) {
2479 // Fold "(X == true)" to X and "(X == false)" to !X to
2480 // handle common cases produced by branch lowering.
2481 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
2482 CB.CC == ISD::SETEQ)
2483 Cond = CondLHS;
2484 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
2485 CB.CC == ISD::SETEQ) {
2486 SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
2487 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
2488 } else {
2489 SDValue CondRHS = getValue(CB.CmpRHS);
2491 // If a pointer's DAG type is larger than its memory type then the DAG
2492 // values are zero-extended. This breaks signed comparisons so truncate
2493 // back to the underlying type before doing the compare.
2494 if (CondLHS.getValueType() != MemVT) {
2495 CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
2496 CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
2498 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
2500 } else {
2501 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2503 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
2504 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
2506 SDValue CmpOp = getValue(CB.CmpMHS);
2507 EVT VT = CmpOp.getValueType();
2509 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2510 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2511 ISD::SETLE);
2512 } else {
2513 SDValue SUB = DAG.getNode(ISD::SUB, dl,
2514 VT, CmpOp, DAG.getConstant(Low, dl, VT));
2515 Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2516 DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2520 // Update successor info
2521 addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2522 // TrueBB and FalseBB are always different unless the incoming IR is
2523 // degenerate. This only happens when running llc on weird IR.
2524 if (CB.TrueBB != CB.FalseBB)
2525 addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2526 SwitchBB->normalizeSuccProbs();
2528 // If the lhs block is the next block, invert the condition so that we can
2529 // fall through to the lhs instead of the rhs block.
2530 if (CB.TrueBB == NextBlock(SwitchBB)) {
2531 std::swap(CB.TrueBB, CB.FalseBB);
2532 SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2533 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2536 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2537 MVT::Other, getControlRoot(), Cond,
2538 DAG.getBasicBlock(CB.TrueBB));
2540 // Insert the false branch. Do this even if it's a fall through branch,
2541 // this makes it easier to do DAG optimizations which require inverting
2542 // the branch condition.
2543 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2544 DAG.getBasicBlock(CB.FalseBB));
2546 DAG.setRoot(BrCond);
2549 /// visitJumpTable - Emit JumpTable node in the current MBB
2550 void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
2551 // Emit the code for the jump table
2552 assert(JT.Reg != -1U && "Should lower JT Header first!");
2553 EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2554 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2555 JT.Reg, PTy);
2556 SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2557 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2558 MVT::Other, Index.getValue(1),
2559 Table, Index);
2560 DAG.setRoot(BrJumpTable);
2563 /// visitJumpTableHeader - This function emits necessary code to produce index
2564 /// in the JumpTable from switch case.
2565 void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
2566 JumpTableHeader &JTH,
2567 MachineBasicBlock *SwitchBB) {
2568 SDLoc dl = getCurSDLoc();
2570 // Subtract the lowest switch case value from the value being switched on.
2571 SDValue SwitchOp = getValue(JTH.SValue);
2572 EVT VT = SwitchOp.getValueType();
2573 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2574 DAG.getConstant(JTH.First, dl, VT));
2576 // The SDNode we just created, which holds the value being switched on minus
2577 // the smallest case value, needs to be copied to a virtual register so it
2578 // can be used as an index into the jump table in a subsequent basic block.
2579 // This value may be smaller or larger than the target's pointer type, and
2580 // therefore require extension or truncating.
2581 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2582 SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2584 unsigned JumpTableReg =
2585 FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2586 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2587 JumpTableReg, SwitchOp);
2588 JT.Reg = JumpTableReg;
2590 if (!JTH.FallthroughUnreachable) {
2591 // Emit the range check for the jump table, and branch to the default block
2592 // for the switch statement if the value being switched on exceeds the
2593 // largest case in the switch.
2594 SDValue CMP = DAG.getSetCC(
2595 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2596 Sub.getValueType()),
2597 Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2599 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2600 MVT::Other, CopyTo, CMP,
2601 DAG.getBasicBlock(JT.Default));
2603 // Avoid emitting unnecessary branches to the next block.
2604 if (JT.MBB != NextBlock(SwitchBB))
2605 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2606 DAG.getBasicBlock(JT.MBB));
2608 DAG.setRoot(BrCond);
2609 } else {
2610 // Avoid emitting unnecessary branches to the next block.
2611 if (JT.MBB != NextBlock(SwitchBB))
2612 DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
2613 DAG.getBasicBlock(JT.MBB)));
2614 else
2615 DAG.setRoot(CopyTo);
2619 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2620 /// variable if there exists one.
2621 static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
2622 SDValue &Chain) {
2623 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2624 EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2625 EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
2626 MachineFunction &MF = DAG.getMachineFunction();
2627 Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
2628 MachineSDNode *Node =
2629 DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2630 if (Global) {
2631 MachinePointerInfo MPInfo(Global);
2632 auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
2633 MachineMemOperand::MODereferenceable;
2634 MachineMemOperand *MemRef = MF.getMachineMemOperand(
2635 MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
2636 DAG.setNodeMemRefs(Node, {MemRef});
2638 if (PtrTy != PtrMemTy)
2639 return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
2640 return SDValue(Node, 0);
2643 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2644 /// tail spliced into a stack protector check success bb.
2646 /// For a high level explanation of how this fits into the stack protector
2647 /// generation see the comment on the declaration of class
2648 /// StackProtectorDescriptor.
2649 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2650 MachineBasicBlock *ParentBB) {
2652 // First create the loads to the guard/stack slot for the comparison.
2653 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2654 EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2655 EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
2657 MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2658 int FI = MFI.getStackProtectorIndex();
2660 SDValue Guard;
2661 SDLoc dl = getCurSDLoc();
2662 SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2663 const Module &M = *ParentBB->getParent()->getFunction().getParent();
2664 Align Align =
2665 DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
2667 // Generate code to load the content of the guard slot.
2668 SDValue GuardVal = DAG.getLoad(
2669 PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
2670 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
2671 MachineMemOperand::MOVolatile);
2673 if (TLI.useStackGuardXorFP())
2674 GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
2676 // Retrieve guard check function, nullptr if instrumentation is inlined.
2677 if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
2678 // The target provides a guard check function to validate the guard value.
2679 // Generate a call to that function with the content of the guard slot as
2680 // argument.
2681 FunctionType *FnTy = GuardCheckFn->getFunctionType();
2682 assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2684 TargetLowering::ArgListTy Args;
2685 TargetLowering::ArgListEntry Entry;
2686 Entry.Node = GuardVal;
2687 Entry.Ty = FnTy->getParamType(0);
2688 if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
2689 Entry.IsInReg = true;
2690 Args.push_back(Entry);
2692 TargetLowering::CallLoweringInfo CLI(DAG);
2693 CLI.setDebugLoc(getCurSDLoc())
2694 .setChain(DAG.getEntryNode())
2695 .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
2696 getValue(GuardCheckFn), std::move(Args));
2698 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2699 DAG.setRoot(Result.second);
2700 return;
2703 // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2704 // Otherwise, emit a volatile load to retrieve the stack guard value.
2705 SDValue Chain = DAG.getEntryNode();
2706 if (TLI.useLoadStackGuardNode()) {
2707 Guard = getLoadStackGuard(DAG, dl, Chain);
2708 } else {
2709 const Value *IRGuard = TLI.getSDagStackGuard(M);
2710 SDValue GuardPtr = getValue(IRGuard);
2712 Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
2713 MachinePointerInfo(IRGuard, 0), Align,
2714 MachineMemOperand::MOVolatile);
2717 // Perform the comparison via a getsetcc.
2718 SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2719 *DAG.getContext(),
2720 Guard.getValueType()),
2721 Guard, GuardVal, ISD::SETNE);
2723 // If the guard/stackslot do not equal, branch to failure MBB.
2724 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2725 MVT::Other, GuardVal.getOperand(0),
2726 Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2727 // Otherwise branch to success MBB.
2728 SDValue Br = DAG.getNode(ISD::BR, dl,
2729 MVT::Other, BrCond,
2730 DAG.getBasicBlock(SPD.getSuccessMBB()));
2732 DAG.setRoot(Br);
2735 /// Codegen the failure basic block for a stack protector check.
2737 /// A failure stack protector machine basic block consists simply of a call to
2738 /// __stack_chk_fail().
2740 /// For a high level explanation of how this fits into the stack protector
2741 /// generation see the comment on the declaration of class
2742 /// StackProtectorDescriptor.
2743 void
2744 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2745 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2746 TargetLowering::MakeLibCallOptions CallOptions;
2747 CallOptions.setDiscardResult(true);
2748 SDValue Chain =
2749 TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2750 None, CallOptions, getCurSDLoc()).second;
2751 // On PS4, the "return address" must still be within the calling function,
2752 // even if it's at the very end, so emit an explicit TRAP here.
2753 // Passing 'true' for doesNotReturn above won't generate the trap for us.
2754 if (TM.getTargetTriple().isPS4CPU())
2755 Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
2756 // WebAssembly needs an unreachable instruction after a non-returning call,
2757 // because the function return type can be different from __stack_chk_fail's
2758 // return type (void).
2759 if (TM.getTargetTriple().isWasm())
2760 Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
2762 DAG.setRoot(Chain);
2765 /// visitBitTestHeader - This function emits necessary code to produce value
2766 /// suitable for "bit tests"
2767 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
2768 MachineBasicBlock *SwitchBB) {
2769 SDLoc dl = getCurSDLoc();
2771 // Subtract the minimum value.
2772 SDValue SwitchOp = getValue(B.SValue);
2773 EVT VT = SwitchOp.getValueType();
2774 SDValue RangeSub =
2775 DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));
2777 // Determine the type of the test operands.
2778 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2779 bool UsePtrType = false;
2780 if (!TLI.isTypeLegal(VT)) {
2781 UsePtrType = true;
2782 } else {
2783 for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2784 if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2785 // Switch table case range are encoded into series of masks.
2786 // Just use pointer type, it's guaranteed to fit.
2787 UsePtrType = true;
2788 break;
2791 SDValue Sub = RangeSub;
2792 if (UsePtrType) {
2793 VT = TLI.getPointerTy(DAG.getDataLayout());
2794 Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2797 B.RegVT = VT.getSimpleVT();
2798 B.Reg = FuncInfo.CreateReg(B.RegVT);
2799 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2801 MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2803 if (!B.FallthroughUnreachable)
2804 addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2805 addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2806 SwitchBB->normalizeSuccProbs();
2808 SDValue Root = CopyTo;
2809 if (!B.FallthroughUnreachable) {
2810 // Conditional branch to the default block.
2811 SDValue RangeCmp = DAG.getSetCC(dl,
2812 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2813 RangeSub.getValueType()),
2814 RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
2815 ISD::SETUGT);
2817 Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
2818 DAG.getBasicBlock(B.Default));
2821 // Avoid emitting unnecessary branches to the next block.
2822 if (MBB != NextBlock(SwitchBB))
2823 Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
2825 DAG.setRoot(Root);
2828 /// visitBitTestCase - this function produces one "bit test"
2829 void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
2830 MachineBasicBlock* NextMBB,
2831 BranchProbability BranchProbToNext,
2832 unsigned Reg,
2833 BitTestCase &B,
2834 MachineBasicBlock *SwitchBB) {
2835 SDLoc dl = getCurSDLoc();
2836 MVT VT = BB.RegVT;
2837 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2838 SDValue Cmp;
2839 unsigned PopCount = countPopulation(B.Mask);
2840 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2841 if (PopCount == 1) {
2842 // Testing for a single bit; just compare the shift count with what it
2843 // would need to be to shift a 1 bit in that position.
2844 Cmp = DAG.getSetCC(
2845 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2846 ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2847 ISD::SETEQ);
2848 } else if (PopCount == BB.Range) {
2849 // There is only one zero bit in the range, test for it directly.
2850 Cmp = DAG.getSetCC(
2851 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2852 ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2853 ISD::SETNE);
2854 } else {
2855 // Make desired shift
2856 SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2857 DAG.getConstant(1, dl, VT), ShiftOp);
2859 // Emit bit tests and jumps
2860 SDValue AndOp = DAG.getNode(ISD::AND, dl,
2861 VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2862 Cmp = DAG.getSetCC(
2863 dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2864 AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2867 // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2868 addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2869 // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2870 addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2871 // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2872 // one as they are relative probabilities (and thus work more like weights),
2873 // and hence we need to normalize them to let the sum of them become one.
2874 SwitchBB->normalizeSuccProbs();
2876 SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2877 MVT::Other, getControlRoot(),
2878 Cmp, DAG.getBasicBlock(B.TargetBB));
2880 // Avoid emitting unnecessary branches to the next block.
2881 if (NextMBB != NextBlock(SwitchBB))
2882 BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2883 DAG.getBasicBlock(NextMBB));
2885 DAG.setRoot(BrAnd);
2888 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2889 MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2891 // Retrieve successors. Look through artificial IR level blocks like
2892 // catchswitch for successors.
2893 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2894 const BasicBlock *EHPadBB = I.getSuccessor(1);
2896 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2897 // have to do anything here to lower funclet bundles.
2898 assert(!I.hasOperandBundlesOtherThan(
2899 {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
2900 LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
2901 LLVMContext::OB_cfguardtarget,
2902 LLVMContext::OB_clang_arc_attachedcall}) &&
2903 "Cannot lower invokes with arbitrary operand bundles yet!");
2905 const Value *Callee(I.getCalledOperand());
2906 const Function *Fn = dyn_cast<Function>(Callee);
2907 if (isa<InlineAsm>(Callee))
2908 visitInlineAsm(I, EHPadBB);
2909 else if (Fn && Fn->isIntrinsic()) {
2910 switch (Fn->getIntrinsicID()) {
2911 default:
2912 llvm_unreachable("Cannot invoke this intrinsic");
2913 case Intrinsic::donothing:
2914 // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2915 case Intrinsic::seh_try_begin:
2916 case Intrinsic::seh_scope_begin:
2917 case Intrinsic::seh_try_end:
2918 case Intrinsic::seh_scope_end:
2919 break;
2920 case Intrinsic::experimental_patchpoint_void:
2921 case Intrinsic::experimental_patchpoint_i64:
2922 visitPatchpoint(I, EHPadBB);
2923 break;
2924 case Intrinsic::experimental_gc_statepoint:
2925 LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
2926 break;
2927 case Intrinsic::wasm_rethrow: {
2928 // This is usually done in visitTargetIntrinsic, but this intrinsic is
2929 // special because it can be invoked, so we manually lower it to a DAG
2930 // node here.
2931 SmallVector<SDValue, 8> Ops;
2932 Ops.push_back(getRoot()); // inchain
2933 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2934 Ops.push_back(
2935 DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
2936 TLI.getPointerTy(DAG.getDataLayout())));
2937 SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
2938 DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
2939 break;
2942 } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
2943 // Currently we do not lower any intrinsic calls with deopt operand bundles.
2944 // Eventually we will support lowering the @llvm.experimental.deoptimize
2945 // intrinsic, and right now there are no plans to support other intrinsics
2946 // with deopt state.
2947 LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2948 } else {
2949 LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
2952 // If the value of the invoke is used outside of its defining block, make it
2953 // available as a virtual register.
2954 // We already took care of the exported value for the statepoint instruction
2955 // during call to the LowerStatepoint.
2956 if (!isa<GCStatepointInst>(I)) {
2957 CopyToExportRegsIfNeeded(&I);
2960 SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
2961 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2962 BranchProbability EHPadBBProb =
2963 BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2964 : BranchProbability::getZero();
2965 findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2967 // Update successor info.
2968 addSuccessorWithProb(InvokeMBB, Return);
2969 for (auto &UnwindDest : UnwindDests) {
2970 UnwindDest.first->setIsEHPad();
2971 addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2973 InvokeMBB->normalizeSuccProbs();
2975 // Drop into normal successor.
2976 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
2977 DAG.getBasicBlock(Return)));
2980 void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
2981 MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
2983 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2984 // have to do anything here to lower funclet bundles.
2985 assert(!I.hasOperandBundlesOtherThan(
2986 {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2987 "Cannot lower callbrs with arbitrary operand bundles yet!");
2989 assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
2990 visitInlineAsm(I);
2991 CopyToExportRegsIfNeeded(&I);
2993 // Retrieve successors.
2994 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
2996 // Update successor info.
2997 addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
2998 for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
2999 MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
3000 addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
3001 Target->setIsInlineAsmBrIndirectTarget();
3003 CallBrMBB->normalizeSuccProbs();
3005 // Drop into default successor.
3006 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
3007 MVT::Other, getControlRoot(),
3008 DAG.getBasicBlock(Return)));
3011 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
3012 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
3015 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
3016 assert(FuncInfo.MBB->isEHPad() &&
3017 "Call to landingpad not in landing pad!");
3019 // If there aren't registers to copy the values into (e.g., during SjLj
3020 // exceptions), then don't bother to create these DAG nodes.
3021 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3022 const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
3023 if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
3024 TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
3025 return;
3027 // If landingpad's return type is token type, we don't create DAG nodes
3028 // for its exception pointer and selector value. The extraction of exception
3029 // pointer or selector value from token type landingpads is not currently
3030 // supported.
3031 if (LP.getType()->isTokenTy())
3032 return;
3034 SmallVector<EVT, 2> ValueVTs;
3035 SDLoc dl = getCurSDLoc();
3036 ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
3037 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
3039 // Get the two live-in registers as SDValues. The physregs have already been
3040 // copied into virtual registers.
3041 SDValue Ops[2];
3042 if (FuncInfo.ExceptionPointerVirtReg) {
3043 Ops[0] = DAG.getZExtOrTrunc(
3044 DAG.getCopyFromReg(DAG.getEntryNode(), dl,
3045 FuncInfo.ExceptionPointerVirtReg,
3046 TLI.getPointerTy(DAG.getDataLayout())),
3047 dl, ValueVTs[0]);
3048 } else {
3049 Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
3051 Ops[1] = DAG.getZExtOrTrunc(
3052 DAG.getCopyFromReg(DAG.getEntryNode(), dl,
3053 FuncInfo.ExceptionSelectorVirtReg,
3054 TLI.getPointerTy(DAG.getDataLayout())),
3055 dl, ValueVTs[1]);
3057 // Merge into one.
3058 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
3059 DAG.getVTList(ValueVTs), Ops);
3060 setValue(&LP, Res);
3063 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
3064 MachineBasicBlock *Last) {
3065 // Update JTCases.
3066 for (JumpTableBlock &JTB : SL->JTCases)
3067 if (JTB.first.HeaderBB == First)
3068 JTB.first.HeaderBB = Last;
3070 // Update BitTestCases.
3071 for (BitTestBlock &BTB : SL->BitTestCases)
3072 if (BTB.Parent == First)
3073 BTB.Parent = Last;
3076 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
3077 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
3079 // Update machine-CFG edges with unique successors.
3080 SmallSet<BasicBlock*, 32> Done;
3081 for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
3082 BasicBlock *BB = I.getSuccessor(i);
3083 bool Inserted = Done.insert(BB).second;
3084 if (!Inserted)
3085 continue;
3087 MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
3088 addSuccessorWithProb(IndirectBrMBB, Succ);
3090 IndirectBrMBB->normalizeSuccProbs();
3092 DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
3093 MVT::Other, getControlRoot(),
3094 getValue(I.getAddress())));
3097 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
3098 if (!DAG.getTarget().Options.TrapUnreachable)
3099 return;
3101 // We may be able to ignore unreachable behind a noreturn call.
3102 if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
3103 const BasicBlock &BB = *I.getParent();
3104 if (&I != &BB.front()) {
3105 BasicBlock::const_iterator PredI =
3106 std::prev(BasicBlock::const_iterator(&I));
3107 if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
3108 if (Call->doesNotReturn())
3109 return;
3114 DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
3117 void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
3118 SDNodeFlags Flags;
3119 if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
3120 Flags.copyFMF(*FPOp);
3122 SDValue Op = getValue(I.getOperand(0));
3123 SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
3124 Op, Flags);
3125 setValue(&I, UnNodeValue);
3128 void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
3129 SDNodeFlags Flags;
3130 if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
3131 Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
3132 Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
3134 if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
3135 Flags.setExact(ExactOp->isExact());
3136 if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
3137 Flags.copyFMF(*FPOp);
3139 SDValue Op1 = getValue(I.getOperand(0));
3140 SDValue Op2 = getValue(I.getOperand(1));
3141 SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
3142 Op1, Op2, Flags);
3143 setValue(&I, BinNodeValue);
3146 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
3147 SDValue Op1 = getValue(I.getOperand(0));
3148 SDValue Op2 = getValue(I.getOperand(1));
3150 EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
3151 Op1.getValueType(), DAG.getDataLayout());
3153 // Coerce the shift amount to the right type if we can.
3154 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
3155 unsigned ShiftSize = ShiftTy.getSizeInBits();
3156 unsigned Op2Size = Op2.getValueSizeInBits();
3157 SDLoc DL = getCurSDLoc();
3159 // If the operand is smaller than the shift count type, promote it.
3160 if (ShiftSize > Op2Size)
3161 Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
3163 // If the operand is larger than the shift count type but the shift
3164 // count type has enough bits to represent any shift value, truncate
3165 // it now. This is a common case and it exposes the truncate to
3166 // optimization early.
3167 else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))
3168 Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
3169 // Otherwise we'll need to temporarily settle for some other convenient
3170 // type. Type legalization will make adjustments once the shiftee is split.
3171 else
3172 Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
3175 bool nuw = false;
3176 bool nsw = false;
3177 bool exact = false;
3179 if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
3181 if (const OverflowingBinaryOperator *OFBinOp =
3182 dyn_cast<const OverflowingBinaryOperator>(&I)) {
3183 nuw = OFBinOp->hasNoUnsignedWrap();
3184 nsw = OFBinOp->hasNoSignedWrap();
3186 if (const PossiblyExactOperator *ExactOp =
3187 dyn_cast<const PossiblyExactOperator>(&I))
3188 exact = ExactOp->isExact();
3190 SDNodeFlags Flags;
3191 Flags.setExact(exact);
3192 Flags.setNoSignedWrap(nsw);
3193 Flags.setNoUnsignedWrap(nuw);
3194 SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
3195 Flags);
3196 setValue(&I, Res);
3199 void SelectionDAGBuilder::visitSDiv(const User &I) {
3200 SDValue Op1 = getValue(I.getOperand(0));
3201 SDValue Op2 = getValue(I.getOperand(1));
3203 SDNodeFlags Flags;
3204 Flags.setExact(isa<PossiblyExactOperator>(&I) &&
3205 cast<PossiblyExactOperator>(&I)->isExact());
3206 setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
3207 Op2, Flags));
3210 void SelectionDAGBuilder::visitICmp(const User &I) {
3211 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
3212 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
3213 predicate = IC->getPredicate();
3214 else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
3215 predicate = ICmpInst::Predicate(IC->getPredicate());
3216 SDValue Op1 = getValue(I.getOperand(0));
3217 SDValue Op2 = getValue(I.getOperand(1));
3218 ISD::CondCode Opcode = getICmpCondCode(predicate);
3220 auto &TLI = DAG.getTargetLoweringInfo();
3221 EVT MemVT =
3222 TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
3224 // If a pointer's DAG type is larger than its memory type then the DAG values
3225 // are zero-extended. This breaks signed comparisons so truncate back to the
3226 // underlying type before doing the compare.
3227 if (Op1.getValueType() != MemVT) {
3228 Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
3229 Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
3232 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3233 I.getType());
3234 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
3237 void SelectionDAGBuilder::visitFCmp(const User &I) {
3238 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
3239 if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
3240 predicate = FC->getPredicate();
3241 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
3242 predicate = FCmpInst::Predicate(FC->getPredicate());
3243 SDValue Op1 = getValue(I.getOperand(0));
3244 SDValue Op2 = getValue(I.getOperand(1));
3246 ISD::CondCode Condition = getFCmpCondCode(predicate);
3247 auto *FPMO = cast<FPMathOperator>(&I);
3248 if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
3249 Condition = getFCmpCodeWithoutNaN(Condition);
3251 SDNodeFlags Flags;
3252 Flags.copyFMF(*FPMO);
3253 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
3255 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3256 I.getType());
3257 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
3260 // Check if the condition of the select has one use or two users that are both
3261 // selects with the same condition.
3262 static bool hasOnlySelectUsers(const Value *Cond) {
3263 return llvm::all_of(Cond->users(), [](const Value *V) {
3264 return isa<SelectInst>(V);
3268 void SelectionDAGBuilder::visitSelect(const User &I) {
3269 SmallVector<EVT, 4> ValueVTs;
3270 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
3271 ValueVTs);
3272 unsigned NumValues = ValueVTs.size();
3273 if (NumValues == 0) return;
3275 SmallVector<SDValue, 4> Values(NumValues);
3276 SDValue Cond = getValue(I.getOperand(0));
3277 SDValue LHSVal = getValue(I.getOperand(1));
3278 SDValue RHSVal = getValue(I.getOperand(2));
3279 SmallVector<SDValue, 1> BaseOps(1, Cond);
3280 ISD::NodeType OpCode =
3281 Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
3283 bool IsUnaryAbs = false;
3284 bool Negate = false;
3286 SDNodeFlags Flags;
3287 if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
3288 Flags.copyFMF(*FPOp);
3290 // Min/max matching is only viable if all output VTs are the same.
3291 if (is_splat(ValueVTs)) {
3292 EVT VT = ValueVTs[0];
3293 LLVMContext &Ctx = *DAG.getContext();
3294 auto &TLI = DAG.getTargetLoweringInfo();
3296 // We care about the legality of the operation after it has been type
3297 // legalized.
3298 while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
3299 VT = TLI.getTypeToTransformTo(Ctx, VT);
3301 // If the vselect is legal, assume we want to leave this as a vector setcc +
3302 // vselect. Otherwise, if this is going to be scalarized, we want to see if
3303 // min/max is legal on the scalar type.
3304 bool UseScalarMinMax = VT.isVector() &&
3305 !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
3307 Value *LHS, *RHS;
3308 auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
3309 ISD::NodeType Opc = ISD::DELETED_NODE;
3310 switch (SPR.Flavor) {
3311 case SPF_UMAX: Opc = ISD::UMAX; break;
3312 case SPF_UMIN: Opc = ISD::UMIN; break;
3313 case SPF_SMAX: Opc = ISD::SMAX; break;
3314 case SPF_SMIN: Opc = ISD::SMIN; break;
3315 case SPF_FMINNUM:
3316 switch (SPR.NaNBehavior) {
3317 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
3318 case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
3319 case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
3320 case SPNB_RETURNS_ANY: {
3321 if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
3322 Opc = ISD::FMINNUM;
3323 else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
3324 Opc = ISD::FMINIMUM;
3325 else if (UseScalarMinMax)
3326 Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
3327 ISD::FMINNUM : ISD::FMINIMUM;
3328 break;
3331 break;
3332 case SPF_FMAXNUM:
3333 switch (SPR.NaNBehavior) {
3334 case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
3335 case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
3336 case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
3337 case SPNB_RETURNS_ANY:
3339 if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
3340 Opc = ISD::FMAXNUM;
3341 else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
3342 Opc = ISD::FMAXIMUM;
3343 else if (UseScalarMinMax)
3344 Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
3345 ISD::FMAXNUM : ISD::FMAXIMUM;
3346 break;
3348 break;
3349 case SPF_NABS:
3350 Negate = true;
3351 LLVM_FALLTHROUGH;
3352 case SPF_ABS:
3353 IsUnaryAbs = true;
3354 Opc = ISD::ABS;
3355 break;
3356 default: break;
3359 if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
3360 (TLI.isOperationLegalOrCustom(Opc, VT) ||
3361 (UseScalarMinMax &&
3362 TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
3363 // If the underlying comparison instruction is used by any other
3364 // instruction, the consumed instructions won't be destroyed, so it is
3365 // not profitable to convert to a min/max.
3366 hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
3367 OpCode = Opc;
3368 LHSVal = getValue(LHS);
3369 RHSVal = getValue(RHS);
3370 BaseOps.clear();
3373 if (IsUnaryAbs) {
3374 OpCode = Opc;
3375 LHSVal = getValue(LHS);
3376 BaseOps.clear();
3380 if (IsUnaryAbs) {
3381 for (unsigned i = 0; i != NumValues; ++i) {
3382 SDLoc dl = getCurSDLoc();
3383 EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i);
3384 Values[i] =
3385 DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
3386 if (Negate)
3387 Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
3388 Values[i]);
3390 } else {
3391 for (unsigned i = 0; i != NumValues; ++i) {
3392 SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
3393 Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
3394 Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
3395 Values[i] = DAG.getNode(
3396 OpCode, getCurSDLoc(),
3397 LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags);
3401 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3402 DAG.getVTList(ValueVTs), Values));
3405 void SelectionDAGBuilder::visitTrunc(const User &I) {
3406 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3407 SDValue N = getValue(I.getOperand(0));
3408 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3409 I.getType());
3410 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
3413 void SelectionDAGBuilder::visitZExt(const User &I) {
3414 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3415 // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3416 SDValue N = getValue(I.getOperand(0));
3417 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3418 I.getType());
3419 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
3422 void SelectionDAGBuilder::visitSExt(const User &I) {
3423 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3424 // SExt also can't be a cast to bool for same reason. So, nothing much to do
3425 SDValue N = getValue(I.getOperand(0));
3426 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3427 I.getType());
3428 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
3431 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3432 // FPTrunc is never a no-op cast, no need to check
3433 SDValue N = getValue(I.getOperand(0));
3434 SDLoc dl = getCurSDLoc();
3435 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3436 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3437 setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
3438 DAG.getTargetConstant(
3439 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
3442 void SelectionDAGBuilder::visitFPExt(const User &I) {
3443 // FPExt is never a no-op cast, no need to check
3444 SDValue N = getValue(I.getOperand(0));
3445 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3446 I.getType());
3447 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
3450 void SelectionDAGBuilder::visitFPToUI(const User &I) {
3451 // FPToUI is never a no-op cast, no need to check
3452 SDValue N = getValue(I.getOperand(0));
3453 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3454 I.getType());
3455 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3458 void SelectionDAGBuilder::visitFPToSI(const User &I) {
3459 // FPToSI is never a no-op cast, no need to check
3460 SDValue N = getValue(I.getOperand(0));
3461 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3462 I.getType());
3463 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3466 void SelectionDAGBuilder::visitUIToFP(const User &I) {
3467 // UIToFP is never a no-op cast, no need to check
3468 SDValue N = getValue(I.getOperand(0));
3469 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3470 I.getType());
3471 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3474 void SelectionDAGBuilder::visitSIToFP(const User &I) {
3475 // SIToFP is never a no-op cast, no need to check
3476 SDValue N = getValue(I.getOperand(0));
3477 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3478 I.getType());
3479 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3482 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3483 // What to do depends on the size of the integer and the size of the pointer.
3484 // We can either truncate, zero extend, or no-op, accordingly.
3485 SDValue N = getValue(I.getOperand(0));
3486 auto &TLI = DAG.getTargetLoweringInfo();
3487 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3488 I.getType());
3489 EVT PtrMemVT =
3490 TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
3491 N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
3492 N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
3493 setValue(&I, N);
3496 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3497 // What to do depends on the size of the integer and the size of the pointer.
3498 // We can either truncate, zero extend, or no-op, accordingly.
3499 SDValue N = getValue(I.getOperand(0));
3500 auto &TLI = DAG.getTargetLoweringInfo();
3501 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3502 EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
3503 N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
3504 N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
3505 setValue(&I, N);
3508 void SelectionDAGBuilder::visitBitCast(const User &I) {
3509 SDValue N = getValue(I.getOperand(0));
3510 SDLoc dl = getCurSDLoc();
3511 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3512 I.getType());
3514 // BitCast assures us that source and destination are the same size so this is
3515 // either a BITCAST or a no-op.
3516 if (DestVT != N.getValueType())
3517 setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3518 DestVT, N)); // convert types.
3519 // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3520 // might fold any kind of constant expression to an integer constant and that
3521 // is not what we are looking for. Only recognize a bitcast of a genuine
3522 // constant integer as an opaque constant.
3523 else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3524 setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3525 /*isOpaque*/true));
3526 else
3527 setValue(&I, N); // noop cast.
3530 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3531 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3532 const Value *SV = I.getOperand(0);
3533 SDValue N = getValue(SV);
3534 EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3536 unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3537 unsigned DestAS = I.getType()->getPointerAddressSpace();
3539 if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
3540 N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3542 setValue(&I, N);
3545 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3546 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3547 SDValue InVec = getValue(I.getOperand(0));
3548 SDValue InVal = getValue(I.getOperand(1));
3549 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3550 TLI.getVectorIdxTy(DAG.getDataLayout()));
3551 setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3552 TLI.getValueType(DAG.getDataLayout(), I.getType()),
3553 InVec, InVal, InIdx));
3556 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3557 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3558 SDValue InVec = getValue(I.getOperand(0));
3559 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3560 TLI.getVectorIdxTy(DAG.getDataLayout()));
3561 setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3562 TLI.getValueType(DAG.getDataLayout(), I.getType()),
3563 InVec, InIdx));
3566 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3567 SDValue Src1 = getValue(I.getOperand(0));
3568 SDValue Src2 = getValue(I.getOperand(1));
3569 ArrayRef<int> Mask;
3570 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
3571 Mask = SVI->getShuffleMask();
3572 else
3573 Mask = cast<ConstantExpr>(I).getShuffleMask();
3574 SDLoc DL = getCurSDLoc();
3575 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3576 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3577 EVT SrcVT = Src1.getValueType();
3579 if (all_of(Mask, [](int Elem) { return Elem == 0; }) &&
3580 VT.isScalableVector()) {
3581 // Canonical splat form of first element of first input vector.
3582 SDValue FirstElt =
3583 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1,
3584 DAG.getVectorIdxConstant(0, DL));
3585 setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
3586 return;
3589 // For now, we only handle splats for scalable vectors.
3590 // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
3591 // for targets that support a SPLAT_VECTOR for non-scalable vector types.
3592 assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
3594 unsigned SrcNumElts = SrcVT.getVectorNumElements();
3595 unsigned MaskNumElts = Mask.size();
3597 if (SrcNumElts == MaskNumElts) {
3598 setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3599 return;
3602 // Normalize the shuffle vector since mask and vector length don't match.
3603 if (SrcNumElts < MaskNumElts) {
3604 // Mask is longer than the source vectors. We can use concatenate vector to
3605 // make the mask and vectors lengths match.
3607 if (MaskNumElts % SrcNumElts == 0) {
3608 // Mask length is a multiple of the source vector length.
3609 // Check if the shuffle is some kind of concatenation of the input
3610 // vectors.
3611 unsigned NumConcat = MaskNumElts / SrcNumElts;
3612 bool IsConcat = true;
3613 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3614 for (unsigned i = 0; i != MaskNumElts; ++i) {
3615 int Idx = Mask[i];
3616 if (Idx < 0)
3617 continue;
3618 // Ensure the indices in each SrcVT sized piece are sequential and that
3619 // the same source is used for the whole piece.
3620 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3621 (ConcatSrcs[i / SrcNumElts] >= 0 &&
3622 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3623 IsConcat = false;
3624 break;
3626 // Remember which source this index came from.
3627 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3630 // The shuffle is concatenating multiple vectors together. Just emit
3631 // a CONCAT_VECTORS operation.
3632 if (IsConcat) {
3633 SmallVector<SDValue, 8> ConcatOps;
3634 for (auto Src : ConcatSrcs) {
3635 if (Src < 0)
3636 ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3637 else if (Src == 0)
3638 ConcatOps.push_back(Src1);
3639 else
3640 ConcatOps.push_back(Src2);
3642 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3643 return;
3647 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3648 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3649 EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3650 PaddedMaskNumElts);
3652 // Pad both vectors with undefs to make them the same length as the mask.
3653 SDValue UndefVal = DAG.getUNDEF(SrcVT);
3655 SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3656 SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3657 MOps1[0] = Src1;
3658 MOps2[0] = Src2;
3660 Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3661 Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3663 // Readjust mask for new input vector length.
3664 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3665 for (unsigned i = 0; i != MaskNumElts; ++i) {
3666 int Idx = Mask[i];
3667 if (Idx >= (int)SrcNumElts)
3668 Idx -= SrcNumElts - PaddedMaskNumElts;
3669 MappedOps[i] = Idx;
3672 SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3674 // If the concatenated vector was padded, extract a subvector with the
3675 // correct number of elements.
3676 if (MaskNumElts != PaddedMaskNumElts)
3677 Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3678 DAG.getVectorIdxConstant(0, DL));
3680 setValue(&I, Result);
3681 return;
3684 if (SrcNumElts > MaskNumElts) {
3685 // Analyze the access pattern of the vector to see if we can extract
3686 // two subvectors and do the shuffle.
3687 int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3688 bool CanExtract = true;
3689 for (int Idx : Mask) {
3690 unsigned Input = 0;
3691 if (Idx < 0)
3692 continue;
3694 if (Idx >= (int)SrcNumElts) {
3695 Input = 1;
3696 Idx -= SrcNumElts;
3699 // If all the indices come from the same MaskNumElts sized portion of
3700 // the sources we can use extract. Also make sure the extract wouldn't
3701 // extract past the end of the source.
3702 int NewStartIdx = alignDown(Idx, MaskNumElts);
3703 if (NewStartIdx + MaskNumElts > SrcNumElts ||
3704 (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3705 CanExtract = false;
3706 // Make sure we always update StartIdx as we use it to track if all
3707 // elements are undef.
3708 StartIdx[Input] = NewStartIdx;
3711 if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3712 setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3713 return;
3715 if (CanExtract) {
3716 // Extract appropriate subvector and generate a vector shuffle
3717 for (unsigned Input = 0; Input < 2; ++Input) {
3718 SDValue &Src = Input == 0 ? Src1 : Src2;
3719 if (StartIdx[Input] < 0)
3720 Src = DAG.getUNDEF(VT);
3721 else {
3722 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3723 DAG.getVectorIdxConstant(StartIdx[Input], DL));
3727 // Calculate new mask.
3728 SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3729 for (int &Idx : MappedOps) {
3730 if (Idx >= (int)SrcNumElts)
3731 Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3732 else if (Idx >= 0)
3733 Idx -= StartIdx[0];
3736 setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3737 return;
3741 // We can't use either concat vectors or extract subvectors so fall back to
3742 // replacing the shuffle with extract and build vector.
3743 // to insert and build vector.
3744 EVT EltVT = VT.getVectorElementType();
3745 SmallVector<SDValue,8> Ops;
3746 for (int Idx : Mask) {
3747 SDValue Res;
3749 if (Idx < 0) {
3750 Res = DAG.getUNDEF(EltVT);
3751 } else {
3752 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3753 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3755 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src,
3756 DAG.getVectorIdxConstant(Idx, DL));
3759 Ops.push_back(Res);
3762 setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3765 void SelectionDAGBuilder::visitInsertValue(const User &I) {
3766 ArrayRef<unsigned> Indices;
3767 if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3768 Indices = IV->getIndices();
3769 else
3770 Indices = cast<ConstantExpr>(&I)->getIndices();
3772 const Value *Op0 = I.getOperand(0);
3773 const Value *Op1 = I.getOperand(1);
3774 Type *AggTy = I.getType();
3775 Type *ValTy = Op1->getType();
3776 bool IntoUndef = isa<UndefValue>(Op0);
3777 bool FromUndef = isa<UndefValue>(Op1);
3779 unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3781 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3782 SmallVector<EVT, 4> AggValueVTs;
3783 ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3784 SmallVector<EVT, 4> ValValueVTs;
3785 ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3787 unsigned NumAggValues = AggValueVTs.size();
3788 unsigned NumValValues = ValValueVTs.size();
3789 SmallVector<SDValue, 4> Values(NumAggValues);
3791 // Ignore an insertvalue that produces an empty object
3792 if (!NumAggValues) {
3793 setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3794 return;
3797 SDValue Agg = getValue(Op0);
3798 unsigned i = 0;
3799 // Copy the beginning value(s) from the original aggregate.
3800 for (; i != LinearIndex; ++i)
3801 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3802 SDValue(Agg.getNode(), Agg.getResNo() + i);
3803 // Copy values from the inserted value(s).
3804 if (NumValValues) {
3805 SDValue Val = getValue(Op1);
3806 for (; i != LinearIndex + NumValValues; ++i)
3807 Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3808 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3810 // Copy remaining value(s) from the original aggregate.
3811 for (; i != NumAggValues; ++i)
3812 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3813 SDValue(Agg.getNode(), Agg.getResNo() + i);
3815 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3816 DAG.getVTList(AggValueVTs), Values));
3819 void SelectionDAGBuilder::visitExtractValue(const User &I) {
3820 ArrayRef<unsigned> Indices;
3821 if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3822 Indices = EV->getIndices();
3823 else
3824 Indices = cast<ConstantExpr>(&I)->getIndices();
3826 const Value *Op0 = I.getOperand(0);
3827 Type *AggTy = Op0->getType();
3828 Type *ValTy = I.getType();
3829 bool OutOfUndef = isa<UndefValue>(Op0);
3831 unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3833 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3834 SmallVector<EVT, 4> ValValueVTs;
3835 ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3837 unsigned NumValValues = ValValueVTs.size();
3839 // Ignore a extractvalue that produces an empty object
3840 if (!NumValValues) {
3841 setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3842 return;
3845 SmallVector<SDValue, 4> Values(NumValValues);
3847 SDValue Agg = getValue(Op0);
3848 // Copy out the selected value(s).
3849 for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3850 Values[i - LinearIndex] =
3851 OutOfUndef ?
3852 DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3853 SDValue(Agg.getNode(), Agg.getResNo() + i);
3855 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3856 DAG.getVTList(ValValueVTs), Values));
3859 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3860 Value *Op0 = I.getOperand(0);
3861 // Note that the pointer operand may be a vector of pointers. Take the scalar
3862 // element which holds a pointer.
3863 unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3864 SDValue N = getValue(Op0);
3865 SDLoc dl = getCurSDLoc();
3866 auto &TLI = DAG.getTargetLoweringInfo();
3868 // Normalize Vector GEP - all scalar operands should be converted to the
3869 // splat vector.
3870 bool IsVectorGEP = I.getType()->isVectorTy();
3871 ElementCount VectorElementCount =
3872 IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
3873 : ElementCount::getFixed(0);
3875 if (IsVectorGEP && !N.getValueType().isVector()) {
3876 LLVMContext &Context = *DAG.getContext();
3877 EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
3878 if (VectorElementCount.isScalable())
3879 N = DAG.getSplatVector(VT, dl, N);
3880 else
3881 N = DAG.getSplatBuildVector(VT, dl, N);
3884 for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3885 GTI != E; ++GTI) {
3886 const Value *Idx = GTI.getOperand();
3887 if (StructType *StTy = GTI.getStructTypeOrNull()) {
3888 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3889 if (Field) {
3890 // N = N + Offset
3891 uint64_t Offset =
3892 DAG.getDataLayout().getStructLayout(StTy)->getElementOffset(Field);
3894 // In an inbounds GEP with an offset that is nonnegative even when
3895 // interpreted as signed, assume there is no unsigned overflow.
3896 SDNodeFlags Flags;
3897 if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3898 Flags.setNoUnsignedWrap(true);
3900 N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3901 DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3903 } else {
3904 // IdxSize is the width of the arithmetic according to IR semantics.
3905 // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
3906 // (and fix up the result later).
3907 unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
3908 MVT IdxTy = MVT::getIntegerVT(IdxSize);
3909 TypeSize ElementSize =
3910 DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
3911 // We intentionally mask away the high bits here; ElementSize may not
3912 // fit in IdxTy.
3913 APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
3914 bool ElementScalable = ElementSize.isScalable();
3916 // If this is a scalar constant or a splat vector of constants,
3917 // handle it quickly.
3918 const auto *C = dyn_cast<Constant>(Idx);
3919 if (C && isa<VectorType>(C->getType()))
3920 C = C->getSplatValue();
3922 const auto *CI = dyn_cast_or_null<ConstantInt>(C);
3923 if (CI && CI->isZero())
3924 continue;
3925 if (CI && !ElementScalable) {
3926 APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
3927 LLVMContext &Context = *DAG.getContext();
3928 SDValue OffsVal;
3929 if (IsVectorGEP)
3930 OffsVal = DAG.getConstant(
3931 Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
3932 else
3933 OffsVal = DAG.getConstant(Offs, dl, IdxTy);
3935 // In an inbounds GEP with an offset that is nonnegative even when
3936 // interpreted as signed, assume there is no unsigned overflow.
3937 SDNodeFlags Flags;
3938 if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3939 Flags.setNoUnsignedWrap(true);
3941 OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
3943 N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3944 continue;
3947 // N = N + Idx * ElementMul;
3948 SDValue IdxN = getValue(Idx);
3950 if (!IdxN.getValueType().isVector() && IsVectorGEP) {
3951 EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
3952 VectorElementCount);
3953 if (VectorElementCount.isScalable())
3954 IdxN = DAG.getSplatVector(VT, dl, IdxN);
3955 else
3956 IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3959 // If the index is smaller or larger than intptr_t, truncate or extend
3960 // it.
3961 IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3963 if (ElementScalable) {
3964 EVT VScaleTy = N.getValueType().getScalarType();
3965 SDValue VScale = DAG.getNode(
3966 ISD::VSCALE, dl, VScaleTy,
3967 DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
3968 if (IsVectorGEP)
3969 VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
3970 IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
3971 } else {
3972 // If this is a multiply by a power of two, turn it into a shl
3973 // immediately. This is a very common case.
3974 if (ElementMul != 1) {
3975 if (ElementMul.isPowerOf2()) {
3976 unsigned Amt = ElementMul.logBase2();
3977 IdxN = DAG.getNode(ISD::SHL, dl,
3978 N.getValueType(), IdxN,
3979 DAG.getConstant(Amt, dl, IdxN.getValueType()));
3980 } else {
3981 SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
3982 IdxN.getValueType());
3983 IdxN = DAG.getNode(ISD::MUL, dl,
3984 N.getValueType(), IdxN, Scale);
3989 N = DAG.getNode(ISD::ADD, dl,
3990 N.getValueType(), N, IdxN);
3994 MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
3995 MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
3996 if (IsVectorGEP) {
3997 PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount);
3998 PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount);
4001 if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
4002 N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
4004 setValue(&I, N);
4007 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
4008 // If this is a fixed sized alloca in the entry block of the function,
4009 // allocate it statically on the stack.
4010 if (FuncInfo.StaticAllocaMap.count(&I))
4011 return; // getValue will auto-populate this.
4013 SDLoc dl = getCurSDLoc();
4014 Type *Ty = I.getAllocatedType();
4015 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4016 auto &DL = DAG.getDataLayout();
4017 uint64_t TySize = DL.getTypeAllocSize(Ty);
4018 MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
4020 SDValue AllocSize = getValue(I.getArraySize());
4022 EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
4023 if (AllocSize.getValueType() != IntPtr)
4024 AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
4026 AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
4027 AllocSize,
4028 DAG.getConstant(TySize, dl, IntPtr));
4030 // Handle alignment. If the requested alignment is less than or equal to
4031 // the stack alignment, ignore it. If the size is greater than or equal to
4032 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
4033 Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
4034 if (*Alignment <= StackAlign)
4035 Alignment = None;
4037 const uint64_t StackAlignMask = StackAlign.value() - 1U;
4038 // Round the size of the allocation up to the stack alignment size
4039 // by add SA-1 to the size. This doesn't overflow because we're computing
4040 // an address inside an alloca.
4041 SDNodeFlags Flags;
4042 Flags.setNoUnsignedWrap(true);
4043 AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
4044 DAG.getConstant(StackAlignMask, dl, IntPtr), Flags);
4046 // Mask out the low bits for alignment purposes.
4047 AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
4048 DAG.getConstant(~StackAlignMask, dl, IntPtr));
4050 SDValue Ops[] = {
4051 getRoot(), AllocSize,
4052 DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)};
4053 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
4054 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
4055 setValue(&I, DSA);
4056 DAG.setRoot(DSA.getValue(1));
4058 assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
4061 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
4062 if (I.isAtomic())
4063 return visitAtomicLoad(I);
4065 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4066 const Value *SV = I.getOperand(0);
4067 if (TLI.supportSwiftError()) {
4068 // Swifterror values can come from either a function parameter with
4069 // swifterror attribute or an alloca with swifterror attribute.
4070 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
4071 if (Arg->hasSwiftErrorAttr())
4072 return visitLoadFromSwiftError(I);
4075 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
4076 if (Alloca->isSwiftError())
4077 return visitLoadFromSwiftError(I);
4081 SDValue Ptr = getValue(SV);
4083 Type *Ty = I.getType();
4084 Align Alignment = I.getAlign();
4086 AAMDNodes AAInfo = I.getAAMetadata();
4087 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4089 SmallVector<EVT, 4> ValueVTs, MemVTs;
4090 SmallVector<uint64_t, 4> Offsets;
4091 ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
4092 unsigned NumValues = ValueVTs.size();
4093 if (NumValues == 0)
4094 return;
4096 bool isVolatile = I.isVolatile();
4098 SDValue Root;
4099 bool ConstantMemory = false;
4100 if (isVolatile)
4101 // Serialize volatile loads with other side effects.
4102 Root = getRoot();
4103 else if (NumValues > MaxParallelChains)
4104 Root = getMemoryRoot();
4105 else if (AA &&
4106 AA->pointsToConstantMemory(MemoryLocation(
4108 LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
4109 AAInfo))) {
4110 // Do not serialize (non-volatile) loads of constant memory with anything.
4111 Root = DAG.getEntryNode();
4112 ConstantMemory = true;
4113 } else {
4114 // Do not serialize non-volatile loads against each other.
4115 Root = DAG.getRoot();
4118 SDLoc dl = getCurSDLoc();
4120 if (isVolatile)
4121 Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
4123 // An aggregate load cannot wrap around the address space, so offsets to its
4124 // parts don't wrap either.
4125 SDNodeFlags Flags;
4126 Flags.setNoUnsignedWrap(true);
4128 SmallVector<SDValue, 4> Values(NumValues);
4129 SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
4130 EVT PtrVT = Ptr.getValueType();
4132 MachineMemOperand::Flags MMOFlags
4133 = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
4135 unsigned ChainI = 0;
4136 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
4137 // Serializing loads here may result in excessive register pressure, and
4138 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
4139 // could recover a bit by hoisting nodes upward in the chain by recognizing
4140 // they are side-effect free or do not alias. The optimizer should really
4141 // avoid this case by converting large object/array copies to llvm.memcpy
4142 // (MaxParallelChains should always remain as failsafe).
4143 if (ChainI == MaxParallelChains) {
4144 assert(PendingLoads.empty() && "PendingLoads must be serialized first");
4145 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4146 makeArrayRef(Chains.data(), ChainI));
4147 Root = Chain;
4148 ChainI = 0;
4150 SDValue A = DAG.getNode(ISD::ADD, dl,
4151 PtrVT, Ptr,
4152 DAG.getConstant(Offsets[i], dl, PtrVT),
4153 Flags);
4155 SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
4156 MachinePointerInfo(SV, Offsets[i]), Alignment,
4157 MMOFlags, AAInfo, Ranges);
4158 Chains[ChainI] = L.getValue(1);
4160 if (MemVTs[i] != ValueVTs[i])
4161 L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
4163 Values[i] = L;
4166 if (!ConstantMemory) {
4167 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4168 makeArrayRef(Chains.data(), ChainI));
4169 if (isVolatile)
4170 DAG.setRoot(Chain);
4171 else
4172 PendingLoads.push_back(Chain);
4175 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
4176 DAG.getVTList(ValueVTs), Values));
4179 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
4180 assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
4181 "call visitStoreToSwiftError when backend supports swifterror");
4183 SmallVector<EVT, 4> ValueVTs;
4184 SmallVector<uint64_t, 4> Offsets;
4185 const Value *SrcV = I.getOperand(0);
4186 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
4187 SrcV->getType(), ValueVTs, &Offsets);
4188 assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
4189 "expect a single EVT for swifterror");
4191 SDValue Src = getValue(SrcV);
4192 // Create a virtual register, then update the virtual register.
4193 Register VReg =
4194 SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
4195 // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
4196 // Chain can be getRoot or getControlRoot.
4197 SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
4198 SDValue(Src.getNode(), Src.getResNo()));
4199 DAG.setRoot(CopyNode);
4202 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
4203 assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
4204 "call visitLoadFromSwiftError when backend supports swifterror");
4206 assert(!I.isVolatile() &&
4207 !I.hasMetadata(LLVMContext::MD_nontemporal) &&
4208 !I.hasMetadata(LLVMContext::MD_invariant_load) &&
4209 "Support volatile, non temporal, invariant for load_from_swift_error");
4211 const Value *SV = I.getOperand(0);
4212 Type *Ty = I.getType();
4213 assert(
4214 (!AA ||
4215 !AA->pointsToConstantMemory(MemoryLocation(
4216 SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
4217 I.getAAMetadata()))) &&
4218 "load_from_swift_error should not be constant memory");
4220 SmallVector<EVT, 4> ValueVTs;
4221 SmallVector<uint64_t, 4> Offsets;
4222 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
4223 ValueVTs, &Offsets);
4224 assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
4225 "expect a single EVT for swifterror");
4227 // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
4228 SDValue L = DAG.getCopyFromReg(
4229 getRoot(), getCurSDLoc(),
4230 SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
4232 setValue(&I, L);
4235 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
4236 if (I.isAtomic())
4237 return visitAtomicStore(I);
4239 const Value *SrcV = I.getOperand(0);
4240 const Value *PtrV = I.getOperand(1);
4242 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4243 if (TLI.supportSwiftError()) {
4244 // Swifterror values can come from either a function parameter with
4245 // swifterror attribute or an alloca with swifterror attribute.
4246 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
4247 if (Arg->hasSwiftErrorAttr())
4248 return visitStoreToSwiftError(I);
4251 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
4252 if (Alloca->isSwiftError())
4253 return visitStoreToSwiftError(I);
4257 SmallVector<EVT, 4> ValueVTs, MemVTs;
4258 SmallVector<uint64_t, 4> Offsets;
4259 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
4260 SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
4261 unsigned NumValues = ValueVTs.size();
4262 if (NumValues == 0)
4263 return;
4265 // Get the lowered operands. Note that we do this after
4266 // checking if NumResults is zero, because with zero results
4267 // the operands won't have values in the map.
4268 SDValue Src = getValue(SrcV);
4269 SDValue Ptr = getValue(PtrV);
4271 SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
4272 SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
4273 SDLoc dl = getCurSDLoc();
4274 Align Alignment = I.getAlign();
4275 AAMDNodes AAInfo = I.getAAMetadata();
4277 auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
4279 // An aggregate load cannot wrap around the address space, so offsets to its
4280 // parts don't wrap either.
4281 SDNodeFlags Flags;
4282 Flags.setNoUnsignedWrap(true);
4284 unsigned ChainI = 0;
4285 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
4286 // See visitLoad comments.
4287 if (ChainI == MaxParallelChains) {
4288 SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4289 makeArrayRef(Chains.data(), ChainI));
4290 Root = Chain;
4291 ChainI = 0;
4293 SDValue Add =
4294 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags);
4295 SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
4296 if (MemVTs[i] != ValueVTs[i])
4297 Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
4298 SDValue St =
4299 DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
4300 Alignment, MMOFlags, AAInfo);
4301 Chains[ChainI] = St;
4304 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
4305 makeArrayRef(Chains.data(), ChainI));
4306 DAG.setRoot(StoreNode);
4309 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
4310 bool IsCompressing) {
4311 SDLoc sdl = getCurSDLoc();
4313 auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4314 MaybeAlign &Alignment) {
4315 // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
4316 Src0 = I.getArgOperand(0);
4317 Ptr = I.getArgOperand(1);
4318 Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
4319 Mask = I.getArgOperand(3);
4321 auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4322 MaybeAlign &Alignment) {
4323 // llvm.masked.compressstore.*(Src0, Ptr, Mask)
4324 Src0 = I.getArgOperand(0);
4325 Ptr = I.getArgOperand(1);
4326 Mask = I.getArgOperand(2);
4327 Alignment = None;
4330 Value *PtrOperand, *MaskOperand, *Src0Operand;
4331 MaybeAlign Alignment;
4332 if (IsCompressing)
4333 getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4334 else
4335 getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4337 SDValue Ptr = getValue(PtrOperand);
4338 SDValue Src0 = getValue(Src0Operand);
4339 SDValue Mask = getValue(MaskOperand);
4340 SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
4342 EVT VT = Src0.getValueType();
4343 if (!Alignment)
4344 Alignment = DAG.getEVTAlign(VT);
4346 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4347 MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
4348 MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
4349 SDValue StoreNode =
4350 DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
4351 ISD::UNINDEXED, false /* Truncating */, IsCompressing);
4352 DAG.setRoot(StoreNode);
4353 setValue(&I, StoreNode);
4356 // Get a uniform base for the Gather/Scatter intrinsic.
4357 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
4358 // We try to represent it as a base pointer + vector of indices.
4359 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
4360 // The first operand of the GEP may be a single pointer or a vector of pointers
4361 // Example:
4362 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
4363 // or
4364 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
4365 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
4367 // When the first GEP operand is a single pointer - it is the uniform base we
4368 // are looking for. If first operand of the GEP is a splat vector - we
4369 // extract the splat value and use it as a uniform base.
4370 // In all other cases the function returns 'false'.
4371 static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
4372 ISD::MemIndexType &IndexType, SDValue &Scale,
4373 SelectionDAGBuilder *SDB, const BasicBlock *CurBB) {
4374 SelectionDAG& DAG = SDB->DAG;
4375 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4376 const DataLayout &DL = DAG.getDataLayout();
4378 assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
4380 // Handle splat constant pointer.
4381 if (auto *C = dyn_cast<Constant>(Ptr)) {
4382 C = C->getSplatValue();
4383 if (!C)
4384 return false;
4386 Base = SDB->getValue(C);
4388 ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
4389 EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
4390 Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
4391 IndexType = ISD::SIGNED_SCALED;
4392 Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
4393 return true;
4396 const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
4397 if (!GEP || GEP->getParent() != CurBB)
4398 return false;
4400 if (GEP->getNumOperands() != 2)
4401 return false;
4403 const Value *BasePtr = GEP->getPointerOperand();
4404 const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);
4406 // Make sure the base is scalar and the index is a vector.
4407 if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
4408 return false;
4410 Base = SDB->getValue(BasePtr);
4411 Index = SDB->getValue(IndexVal);
4412 IndexType = ISD::SIGNED_SCALED;
4413 Scale = DAG.getTargetConstant(
4414 DL.getTypeAllocSize(GEP->getResultElementType()),
4415 SDB->getCurSDLoc(), TLI.getPointerTy(DL));
4416 return true;
4419 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
4420 SDLoc sdl = getCurSDLoc();
4422 // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
4423 const Value *Ptr = I.getArgOperand(1);
4424 SDValue Src0 = getValue(I.getArgOperand(0));
4425 SDValue Mask = getValue(I.getArgOperand(3));
4426 EVT VT = Src0.getValueType();
4427 Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
4428 ->getMaybeAlignValue()
4429 .getValueOr(DAG.getEVTAlign(VT.getScalarType()));
4430 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4432 SDValue Base;
4433 SDValue Index;
4434 ISD::MemIndexType IndexType;
4435 SDValue Scale;
4436 bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
4437 I.getParent());
4439 unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
4440 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4441 MachinePointerInfo(AS), MachineMemOperand::MOStore,
4442 // TODO: Make MachineMemOperands aware of scalable
4443 // vectors.
4444 MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
4445 if (!UniformBase) {
4446 Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4447 Index = getValue(Ptr);
4448 IndexType = ISD::SIGNED_UNSCALED;
4449 Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4452 EVT IdxVT = Index.getValueType();
4453 EVT EltTy = IdxVT.getVectorElementType();
4454 if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
4455 EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
4456 Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
4459 SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
4460 SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
4461 Ops, MMO, IndexType, false);
4462 DAG.setRoot(Scatter);
4463 setValue(&I, Scatter);
4466 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
4467 SDLoc sdl = getCurSDLoc();
4469 auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4470 MaybeAlign &Alignment) {
4471 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4472 Ptr = I.getArgOperand(0);
4473 Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
4474 Mask = I.getArgOperand(2);
4475 Src0 = I.getArgOperand(3);
4477 auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
4478 MaybeAlign &Alignment) {
4479 // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4480 Ptr = I.getArgOperand(0);
4481 Alignment = None;
4482 Mask = I.getArgOperand(1);
4483 Src0 = I.getArgOperand(2);
4486 Value *PtrOperand, *MaskOperand, *Src0Operand;
4487 MaybeAlign Alignment;
4488 if (IsExpanding)
4489 getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4490 else
4491 getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4493 SDValue Ptr = getValue(PtrOperand);
4494 SDValue Src0 = getValue(Src0Operand);
4495 SDValue Mask = getValue(MaskOperand);
4496 SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
4498 EVT VT = Src0.getValueType();
4499 if (!Alignment)
4500 Alignment = DAG.getEVTAlign(VT);
4502 AAMDNodes AAInfo = I.getAAMetadata();
4503 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4505 // Do not serialize masked loads of constant memory with anything.
4506 MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
4507 bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
4509 SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
4511 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4512 MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
4513 MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
4515 SDValue Load =
4516 DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
4517 ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
4518 if (AddToChain)
4519 PendingLoads.push_back(Load.getValue(1));
4520 setValue(&I, Load);
4523 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4524 SDLoc sdl = getCurSDLoc();
4526 // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4527 const Value *Ptr = I.getArgOperand(0);
4528 SDValue Src0 = getValue(I.getArgOperand(3));
4529 SDValue Mask = getValue(I.getArgOperand(2));
4531 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4532 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4533 Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
4534 ->getMaybeAlignValue()
4535 .getValueOr(DAG.getEVTAlign(VT.getScalarType()));
4537 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4539 SDValue Root = DAG.getRoot();
4540 SDValue Base;
4541 SDValue Index;
4542 ISD::MemIndexType IndexType;
4543 SDValue Scale;
4544 bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
4545 I.getParent());
4546 unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
4547 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4548 MachinePointerInfo(AS), MachineMemOperand::MOLoad,
4549 // TODO: Make MachineMemOperands aware of scalable
4550 // vectors.
4551 MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
4553 if (!UniformBase) {
4554 Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4555 Index = getValue(Ptr);
4556 IndexType = ISD::SIGNED_UNSCALED;
4557 Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4560 EVT IdxVT = Index.getValueType();
4561 EVT EltTy = IdxVT.getVectorElementType();
4562 if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
4563 EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
4564 Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
4567 SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
4568 SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4569 Ops, MMO, IndexType, ISD::NON_EXTLOAD);
4571 PendingLoads.push_back(Gather.getValue(1));
4572 setValue(&I, Gather);
4575 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4576 SDLoc dl = getCurSDLoc();
4577 AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
4578 AtomicOrdering FailureOrdering = I.getFailureOrdering();
4579 SyncScope::ID SSID = I.getSyncScopeID();
4581 SDValue InChain = getRoot();
4583 MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4584 SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4586 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4587 auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
4589 MachineFunction &MF = DAG.getMachineFunction();
4590 MachineMemOperand *MMO = MF.getMachineMemOperand(
4591 MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
4592 DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
4593 FailureOrdering);
4595 SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
4596 dl, MemVT, VTs, InChain,
4597 getValue(I.getPointerOperand()),
4598 getValue(I.getCompareOperand()),
4599 getValue(I.getNewValOperand()), MMO);
4601 SDValue OutChain = L.getValue(2);
4603 setValue(&I, L);
4604 DAG.setRoot(OutChain);
4607 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4608 SDLoc dl = getCurSDLoc();
4609 ISD::NodeType NT;
4610 switch (I.getOperation()) {
4611 default: llvm_unreachable("Unknown atomicrmw operation");
4612 case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4613 case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4614 case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4615 case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4616 case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4617 case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4618 case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4619 case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4620 case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4621 case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4622 case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4623 case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
4624 case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
4626 AtomicOrdering Ordering = I.getOrdering();
4627 SyncScope::ID SSID = I.getSyncScopeID();
4629 SDValue InChain = getRoot();
4631 auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
4632 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4633 auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());
4635 MachineFunction &MF = DAG.getMachineFunction();
4636 MachineMemOperand *MMO = MF.getMachineMemOperand(
4637 MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
4638 DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);
4640 SDValue L =
4641 DAG.getAtomic(NT, dl, MemVT, InChain,
4642 getValue(I.getPointerOperand()), getValue(I.getValOperand()),
4643 MMO);
4645 SDValue OutChain = L.getValue(1);
4647 setValue(&I, L);
4648 DAG.setRoot(OutChain);
4651 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4652 SDLoc dl = getCurSDLoc();
4653 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4654 SDValue Ops[3];
4655 Ops[0] = getRoot();
4656 Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
4657 TLI.getFenceOperandTy(DAG.getDataLayout()));
4658 Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
4659 TLI.getFenceOperandTy(DAG.getDataLayout()));
4660 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4663 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4664 SDLoc dl = getCurSDLoc();
4665 AtomicOrdering Order = I.getOrdering();
4666 SyncScope::ID SSID = I.getSyncScopeID();
4668 SDValue InChain = getRoot();
4670 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4671 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4672 EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
4674 if (!TLI.supportsUnalignedAtomics() &&
4675 I.getAlignment() < MemVT.getSizeInBits() / 8)
4676 report_fatal_error("Cannot generate unaligned atomic load");
4678 auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
4680 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4681 MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
4682 I.getAlign(), AAMDNodes(), nullptr, SSID, Order);
4684 InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4686 SDValue Ptr = getValue(I.getPointerOperand());
4688 if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
4689 // TODO: Once this is better exercised by tests, it should be merged with
4690 // the normal path for loads to prevent future divergence.
4691 SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
4692 if (MemVT != VT)
4693 L = DAG.getPtrExtOrTrunc(L, dl, VT);
4695 setValue(&I, L);
4696 SDValue OutChain = L.getValue(1);
4697 if (!I.isUnordered())
4698 DAG.setRoot(OutChain);
4699 else
4700 PendingLoads.push_back(OutChain);
4701 return;
4704 SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
4705 Ptr, MMO);
4707 SDValue OutChain = L.getValue(1);
4708 if (MemVT != VT)
4709 L = DAG.getPtrExtOrTrunc(L, dl, VT);
4711 setValue(&I, L);
4712 DAG.setRoot(OutChain);
4715 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4716 SDLoc dl = getCurSDLoc();
4718 AtomicOrdering Ordering = I.getOrdering();
4719 SyncScope::ID SSID = I.getSyncScopeID();
4721 SDValue InChain = getRoot();
4723 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4724 EVT MemVT =
4725 TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
4727 if (I.getAlignment() < MemVT.getSizeInBits() / 8)
4728 report_fatal_error("Cannot generate unaligned atomic store");
4730 auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
4732 MachineFunction &MF = DAG.getMachineFunction();
4733 MachineMemOperand *MMO = MF.getMachineMemOperand(
4734 MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
4735 I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);
4737 SDValue Val = getValue(I.getValueOperand());
4738 if (Val.getValueType() != MemVT)
4739 Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
4740 SDValue Ptr = getValue(I.getPointerOperand());
4742 if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
4743 // TODO: Once this is better exercised by tests, it should be merged with
4744 // the normal path for stores to prevent future divergence.
4745 SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
4746 DAG.setRoot(S);
4747 return;
4749 SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
4750 Ptr, Val, MMO);
4753 DAG.setRoot(OutChain);
4756 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4757 /// node.
4758 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4759 unsigned Intrinsic) {
4760 // Ignore the callsite's attributes. A specific call site may be marked with
4761 // readnone, but the lowering code will expect the chain based on the
4762 // definition.
4763 const Function *F = I.getCalledFunction();
4764 bool HasChain = !F->doesNotAccessMemory();
4765 bool OnlyLoad = HasChain && F->onlyReadsMemory();
4767 // Build the operand list.
4768 SmallVector<SDValue, 8> Ops;
4769 if (HasChain) { // If this intrinsic has side-effects, chainify it.
4770 if (OnlyLoad) {
4771 // We don't need to serialize loads against other loads.
4772 Ops.push_back(DAG.getRoot());
4773 } else {
4774 Ops.push_back(getRoot());
4778 // Info is set by getTgtMemInstrinsic
4779 TargetLowering::IntrinsicInfo Info;
4780 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4781 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
4782 DAG.getMachineFunction(),
4783 Intrinsic);
4785 // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4786 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4787 Info.opc == ISD::INTRINSIC_W_CHAIN)
4788 Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4789 TLI.getPointerTy(DAG.getDataLayout())));
4791 // Add all operands of the call to the operand list.
4792 for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
4793 const Value *Arg = I.getArgOperand(i);
4794 if (!I.paramHasAttr(i, Attribute::ImmArg)) {
4795 Ops.push_back(getValue(Arg));
4796 continue;
4799 // Use TargetConstant instead of a regular constant for immarg.
4800 EVT VT = TLI.getValueType(DAG.getDataLayout(), Arg->getType(), true);
4801 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
4802 assert(CI->getBitWidth() <= 64 &&
4803 "large intrinsic immediates not handled");
4804 Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
4805 } else {
4806 Ops.push_back(
4807 DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
4811 SmallVector<EVT, 4> ValueVTs;
4812 ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4814 if (HasChain)
4815 ValueVTs.push_back(MVT::Other);
4817 SDVTList VTs = DAG.getVTList(ValueVTs);
4819 // Propagate fast-math-flags from IR to node(s).
4820 SDNodeFlags Flags;
4821 if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
4822 Flags.copyFMF(*FPMO);
4823 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
4825 // Create the node.
4826 SDValue Result;
4827 if (IsTgtIntrinsic) {
4828 // This is target intrinsic that touches memory
4829 Result =
4830 DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
4831 MachinePointerInfo(Info.ptrVal, Info.offset),
4832 Info.align, Info.flags, Info.size,
4833 I.getAAMetadata());
4834 } else if (!HasChain) {
4835 Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4836 } else if (!I.getType()->isVoidTy()) {
4837 Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4838 } else {
4839 Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4842 if (HasChain) {
4843 SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4844 if (OnlyLoad)
4845 PendingLoads.push_back(Chain);
4846 else
4847 DAG.setRoot(Chain);
4850 if (!I.getType()->isVoidTy()) {
4851 if (!isa<VectorType>(I.getType()))
4852 Result = lowerRangeToAssertZExt(DAG, I, Result);
4854 MaybeAlign Alignment = I.getRetAlign();
4855 if (!Alignment)
4856 Alignment = F->getAttributes().getRetAlignment();
4857 // Insert `assertalign` node if there's an alignment.
4858 if (InsertAssertAlign && Alignment) {
4859 Result =
4860 DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
4863 setValue(&I, Result);
4867 /// GetSignificand - Get the significand and build it into a floating-point
4868 /// number with exponent of 1:
4870 /// Op = (Op & 0x007fffff) | 0x3f800000;
4872 /// where Op is the hexadecimal representation of floating point value.
4873 static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
4874 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4875 DAG.getConstant(0x007fffff, dl, MVT::i32));
4876 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4877 DAG.getConstant(0x3f800000, dl, MVT::i32));
4878 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4881 /// GetExponent - Get the exponent:
4883 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4885 /// where Op is the hexadecimal representation of floating point value.
4886 static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
4887 const TargetLowering &TLI, const SDLoc &dl) {
4888 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4889 DAG.getConstant(0x7f800000, dl, MVT::i32));
4890 SDValue t1 = DAG.getNode(
4891 ISD::SRL, dl, MVT::i32, t0,
4892 DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4893 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4894 DAG.getConstant(127, dl, MVT::i32));
4895 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4898 /// getF32Constant - Get 32-bit floating point constant.
4899 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4900 const SDLoc &dl) {
4901 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4902 MVT::f32);
4905 static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
4906 SelectionDAG &DAG) {
4907 // TODO: What fast-math-flags should be set on the floating-point nodes?
4909 // IntegerPartOfX = ((int32_t)(t0);
4910 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4912 // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4913 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4914 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4916 // IntegerPartOfX <<= 23;
4917 IntegerPartOfX = DAG.getNode(
4918 ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4919 DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
4920 DAG.getDataLayout())));
4922 SDValue TwoToFractionalPartOfX;
4923 if (LimitFloatPrecision <= 6) {
4924 // For floating-point precision of 6:
4926 // TwoToFractionalPartOfX =
4927 // 0.997535578f +
4928 // (0.735607626f + 0.252464424f * x) * x;
4930 // error 0.0144103317, which is 6 bits
4931 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4932 getF32Constant(DAG, 0x3e814304, dl));
4933 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4934 getF32Constant(DAG, 0x3f3c50c8, dl));
4935 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4936 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4937 getF32Constant(DAG, 0x3f7f5e7e, dl));
4938 } else if (LimitFloatPrecision <= 12) {
4939 // For floating-point precision of 12:
4941 // TwoToFractionalPartOfX =
4942 // 0.999892986f +
4943 // (0.696457318f +
4944 // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4946 // error 0.000107046256, which is 13 to 14 bits
4947 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4948 getF32Constant(DAG, 0x3da235e3, dl));
4949 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4950 getF32Constant(DAG, 0x3e65b8f3, dl));
4951 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4952 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4953 getF32Constant(DAG, 0x3f324b07, dl));
4954 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4955 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4956 getF32Constant(DAG, 0x3f7ff8fd, dl));
4957 } else { // LimitFloatPrecision <= 18
4958 // For floating-point precision of 18:
4960 // TwoToFractionalPartOfX =
4961 // 0.999999982f +
4962 // (0.693148872f +
4963 // (0.240227044f +
4964 // (0.554906021e-1f +
4965 // (0.961591928e-2f +
4966 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4967 // error 2.47208000*10^(-7), which is better than 18 bits
4968 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4969 getF32Constant(DAG, 0x3924b03e, dl));
4970 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4971 getF32Constant(DAG, 0x3ab24b87, dl));
4972 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4973 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4974 getF32Constant(DAG, 0x3c1d8c17, dl));
4975 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4976 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4977 getF32Constant(DAG, 0x3d634a1d, dl));
4978 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4979 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4980 getF32Constant(DAG, 0x3e75fe14, dl));
4981 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4982 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4983 getF32Constant(DAG, 0x3f317234, dl));
4984 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4985 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4986 getF32Constant(DAG, 0x3f800000, dl));
4989 // Add the exponent into the result in integer domain.
4990 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4991 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4992 DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4995 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4996 /// limited-precision mode.
4997 static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4998 const TargetLowering &TLI, SDNodeFlags Flags) {
4999 if (Op.getValueType() == MVT::f32 &&
5000 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5002 // Put the exponent in the right bit position for later addition to the
5003 // final result:
5005 // t0 = Op * log2(e)
5007 // TODO: What fast-math-flags should be set here?
5008 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
5009 DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
5010 return getLimitedPrecisionExp2(t0, dl, DAG);
5013 // No special expansion.
5014 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags);
5017 /// expandLog - Lower a log intrinsic. Handles the special sequences for
5018 /// limited-precision mode.
5019 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5020 const TargetLowering &TLI, SDNodeFlags Flags) {
5021 // TODO: What fast-math-flags should be set on the floating-point nodes?
5023 if (Op.getValueType() == MVT::f32 &&
5024 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5025 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5027 // Scale the exponent by log(2).
5028 SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
5029 SDValue LogOfExponent =
5030 DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
5031 DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
5033 // Get the significand and build it into a floating-point number with
5034 // exponent of 1.
5035 SDValue X = GetSignificand(DAG, Op1, dl);
5037 SDValue LogOfMantissa;
5038 if (LimitFloatPrecision <= 6) {
5039 // For floating-point precision of 6:
5041 // LogofMantissa =
5042 // -1.1609546f +
5043 // (1.4034025f - 0.23903021f * x) * x;
5045 // error 0.0034276066, which is better than 8 bits
5046 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5047 getF32Constant(DAG, 0xbe74c456, dl));
5048 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5049 getF32Constant(DAG, 0x3fb3a2b1, dl));
5050 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5051 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5052 getF32Constant(DAG, 0x3f949a29, dl));
5053 } else if (LimitFloatPrecision <= 12) {
5054 // For floating-point precision of 12:
5056 // LogOfMantissa =
5057 // -1.7417939f +
5058 // (2.8212026f +
5059 // (-1.4699568f +
5060 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
5062 // error 0.000061011436, which is 14 bits
5063 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5064 getF32Constant(DAG, 0xbd67b6d6, dl));
5065 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5066 getF32Constant(DAG, 0x3ee4f4b8, dl));
5067 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5068 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5069 getF32Constant(DAG, 0x3fbc278b, dl));
5070 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5071 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5072 getF32Constant(DAG, 0x40348e95, dl));
5073 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5074 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5075 getF32Constant(DAG, 0x3fdef31a, dl));
5076 } else { // LimitFloatPrecision <= 18
5077 // For floating-point precision of 18:
5079 // LogOfMantissa =
5080 // -2.1072184f +
5081 // (4.2372794f +
5082 // (-3.7029485f +
5083 // (2.2781945f +
5084 // (-0.87823314f +
5085 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
5087 // error 0.0000023660568, which is better than 18 bits
5088 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5089 getF32Constant(DAG, 0xbc91e5ac, dl));
5090 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5091 getF32Constant(DAG, 0x3e4350aa, dl));
5092 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5093 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5094 getF32Constant(DAG, 0x3f60d3e3, dl));
5095 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5096 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5097 getF32Constant(DAG, 0x4011cdf0, dl));
5098 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5099 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5100 getF32Constant(DAG, 0x406cfd1c, dl));
5101 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5102 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5103 getF32Constant(DAG, 0x408797cb, dl));
5104 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5105 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
5106 getF32Constant(DAG, 0x4006dcab, dl));
5109 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
5112 // No special expansion.
5113 return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags);
5116 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
5117 /// limited-precision mode.
5118 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5119 const TargetLowering &TLI, SDNodeFlags Flags) {
5120 // TODO: What fast-math-flags should be set on the floating-point nodes?
5122 if (Op.getValueType() == MVT::f32 &&
5123 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5124 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5126 // Get the exponent.
5127 SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
5129 // Get the significand and build it into a floating-point number with
5130 // exponent of 1.
5131 SDValue X = GetSignificand(DAG, Op1, dl);
5133 // Different possible minimax approximations of significand in
5134 // floating-point for various degrees of accuracy over [1,2].
5135 SDValue Log2ofMantissa;
5136 if (LimitFloatPrecision <= 6) {
5137 // For floating-point precision of 6:
5139 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
5141 // error 0.0049451742, which is more than 7 bits
5142 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5143 getF32Constant(DAG, 0xbeb08fe0, dl));
5144 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5145 getF32Constant(DAG, 0x40019463, dl));
5146 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5147 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5148 getF32Constant(DAG, 0x3fd6633d, dl));
5149 } else if (LimitFloatPrecision <= 12) {
5150 // For floating-point precision of 12:
5152 // Log2ofMantissa =
5153 // -2.51285454f +
5154 // (4.07009056f +
5155 // (-2.12067489f +
5156 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
5158 // error 0.0000876136000, which is better than 13 bits
5159 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5160 getF32Constant(DAG, 0xbda7262e, dl));
5161 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5162 getF32Constant(DAG, 0x3f25280b, dl));
5163 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5164 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5165 getF32Constant(DAG, 0x4007b923, dl));
5166 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5167 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5168 getF32Constant(DAG, 0x40823e2f, dl));
5169 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5170 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5171 getF32Constant(DAG, 0x4020d29c, dl));
5172 } else { // LimitFloatPrecision <= 18
5173 // For floating-point precision of 18:
5175 // Log2ofMantissa =
5176 // -3.0400495f +
5177 // (6.1129976f +
5178 // (-5.3420409f +
5179 // (3.2865683f +
5180 // (-1.2669343f +
5181 // (0.27515199f -
5182 // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
5184 // error 0.0000018516, which is better than 18 bits
5185 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5186 getF32Constant(DAG, 0xbcd2769e, dl));
5187 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5188 getF32Constant(DAG, 0x3e8ce0b9, dl));
5189 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5190 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5191 getF32Constant(DAG, 0x3fa22ae7, dl));
5192 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5193 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
5194 getF32Constant(DAG, 0x40525723, dl));
5195 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5196 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
5197 getF32Constant(DAG, 0x40aaf200, dl));
5198 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5199 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
5200 getF32Constant(DAG, 0x40c39dad, dl));
5201 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
5202 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
5203 getF32Constant(DAG, 0x4042902c, dl));
5206 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
5209 // No special expansion.
5210 return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags);
5213 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
5214 /// limited-precision mode.
5215 static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5216 const TargetLowering &TLI, SDNodeFlags Flags) {
5217 // TODO: What fast-math-flags should be set on the floating-point nodes?
5219 if (Op.getValueType() == MVT::f32 &&
5220 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5221 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
5223 // Scale the exponent by log10(2) [0.30102999f].
5224 SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
5225 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
5226 getF32Constant(DAG, 0x3e9a209a, dl));
5228 // Get the significand and build it into a floating-point number with
5229 // exponent of 1.
5230 SDValue X = GetSignificand(DAG, Op1, dl);
5232 SDValue Log10ofMantissa;
5233 if (LimitFloatPrecision <= 6) {
5234 // For floating-point precision of 6:
5236 // Log10ofMantissa =
5237 // -0.50419619f +
5238 // (0.60948995f - 0.10380950f * x) * x;
5240 // error 0.0014886165, which is 6 bits
5241 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5242 getF32Constant(DAG, 0xbdd49a13, dl));
5243 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
5244 getF32Constant(DAG, 0x3f1c0789, dl));
5245 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5246 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
5247 getF32Constant(DAG, 0x3f011300, dl));
5248 } else if (LimitFloatPrecision <= 12) {
5249 // For floating-point precision of 12:
5251 // Log10ofMantissa =
5252 // -0.64831180f +
5253 // (0.91751397f +
5254 // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
5256 // error 0.00019228036, which is better than 12 bits
5257 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5258 getF32Constant(DAG, 0x3d431f31, dl));
5259 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
5260 getF32Constant(DAG, 0x3ea21fb2, dl));
5261 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5262 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5263 getF32Constant(DAG, 0x3f6ae232, dl));
5264 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5265 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
5266 getF32Constant(DAG, 0x3f25f7c3, dl));
5267 } else { // LimitFloatPrecision <= 18
5268 // For floating-point precision of 18:
5270 // Log10ofMantissa =
5271 // -0.84299375f +
5272 // (1.5327582f +
5273 // (-1.0688956f +
5274 // (0.49102474f +
5275 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
5277 // error 0.0000037995730, which is better than 18 bits
5278 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
5279 getF32Constant(DAG, 0x3c5d51ce, dl));
5280 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
5281 getF32Constant(DAG, 0x3e00685a, dl));
5282 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
5283 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
5284 getF32Constant(DAG, 0x3efb6798, dl));
5285 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
5286 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
5287 getF32Constant(DAG, 0x3f88d192, dl));
5288 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
5289 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
5290 getF32Constant(DAG, 0x3fc4316c, dl));
5291 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
5292 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
5293 getF32Constant(DAG, 0x3f57ce70, dl));
5296 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
5299 // No special expansion.
5300 return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags);
5303 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
5304 /// limited-precision mode.
5305 static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
5306 const TargetLowering &TLI, SDNodeFlags Flags) {
5307 if (Op.getValueType() == MVT::f32 &&
5308 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
5309 return getLimitedPrecisionExp2(Op, dl, DAG);
5311 // No special expansion.
5312 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags);
5315 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
5316 /// limited-precision mode with x == 10.0f.
5317 static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
5318 SelectionDAG &DAG, const TargetLowering &TLI,
5319 SDNodeFlags Flags) {
5320 bool IsExp10 = false;
5321 if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
5322 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
5323 if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
5324 APFloat Ten(10.0f);
5325 IsExp10 = LHSC->isExactlyValue(Ten);
5329 // TODO: What fast-math-flags should be set on the FMUL node?
5330 if (IsExp10) {
5331 // Put the exponent in the right bit position for later addition to the
5332 // final result:
5334 // #define LOG2OF10 3.3219281f
5335 // t0 = Op * LOG2OF10;
5336 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
5337 getF32Constant(DAG, 0x40549a78, dl));
5338 return getLimitedPrecisionExp2(t0, dl, DAG);
5341 // No special expansion.
5342 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags);
5345 /// ExpandPowI - Expand a llvm.powi intrinsic.
5346 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
5347 SelectionDAG &DAG) {
5348 // If RHS is a constant, we can expand this out to a multiplication tree,
5349 // otherwise we end up lowering to a call to __powidf2 (for example). When
5350 // optimizing for size, we only want to do this if the expansion would produce
5351 // a small number of multiplies, otherwise we do the full expansion.
5352 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5353 // Get the exponent as a positive value.
5354 unsigned Val = RHSC->getSExtValue();
5355 if ((int)Val < 0) Val = -Val;
5357 // powi(x, 0) -> 1.0
5358 if (Val == 0)
5359 return DAG.getConstantFP(1.0, DL, LHS.getValueType());
5361 bool OptForSize = DAG.shouldOptForSize();
5362 if (!OptForSize ||
5363 // If optimizing for size, don't insert too many multiplies.
5364 // This inserts up to 5 multiplies.
5365 countPopulation(Val) + Log2_32(Val) < 7) {
5366 // We use the simple binary decomposition method to generate the multiply
5367 // sequence. There are more optimal ways to do this (for example,
5368 // powi(x,15) generates one more multiply than it should), but this has
5369 // the benefit of being both really simple and much better than a libcall.
5370 SDValue Res; // Logically starts equal to 1.0
5371 SDValue CurSquare = LHS;
5372 // TODO: Intrinsics should have fast-math-flags that propagate to these
5373 // nodes.
5374 while (Val) {
5375 if (Val & 1) {
5376 if (Res.getNode())
5377 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
5378 else
5379 Res = CurSquare; // 1.0*CurSquare.
5382 CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
5383 CurSquare, CurSquare);
5384 Val >>= 1;
5387 // If the original was negative, invert the result, producing 1/(x*x*x).
5388 if (RHSC->getSExtValue() < 0)
5389 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
5390 DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
5391 return Res;
5395 // Otherwise, expand to a libcall.
5396 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
5399 static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
5400 SDValue LHS, SDValue RHS, SDValue Scale,
5401 SelectionDAG &DAG, const TargetLowering &TLI) {
5402 EVT VT = LHS.getValueType();
5403 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
5404 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
5405 LLVMContext &Ctx = *DAG.getContext();
5407 // If the type is legal but the operation isn't, this node might survive all
5408 // the way to operation legalization. If we end up there and we do not have
5409 // the ability to widen the type (if VT*2 is not legal), we cannot expand the
5410 // node.
5412 // Coax the legalizer into expanding the node during type legalization instead
5413 // by bumping the size by one bit. This will force it to Promote, enabling the
5414 // early expansion and avoiding the need to expand later.
5416 // We don't have to do this if Scale is 0; that can always be expanded, unless
5417 // it's a saturating signed operation. Those can experience true integer
5418 // division overflow, a case which we must avoid.
5420 // FIXME: We wouldn't have to do this (or any of the early
5421 // expansion/promotion) if it was possible to expand a libcall of an
5422 // illegal type during operation legalization. But it's not, so things
5423 // get a bit hacky.
5424 unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
5425 if ((ScaleInt > 0 || (Saturating && Signed)) &&
5426 (TLI.isTypeLegal(VT) ||
5427 (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
5428 TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
5429 Opcode, VT, ScaleInt);
5430 if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
5431 EVT PromVT;
5432 if (VT.isScalarInteger())
5433 PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
5434 else if (VT.isVector()) {
5435 PromVT = VT.getVectorElementType();
5436 PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
5437 PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
5438 } else
5439 llvm_unreachable("Wrong VT for DIVFIX?");
5440 if (Signed) {
5441 LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
5442 RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
5443 } else {
5444 LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
5445 RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
5447 EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
5448 // For saturating operations, we need to shift up the LHS to get the
5449 // proper saturation width, and then shift down again afterwards.
5450 if (Saturating)
5451 LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
5452 DAG.getConstant(1, DL, ShiftTy));
5453 SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
5454 if (Saturating)
5455 Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
5456 DAG.getConstant(1, DL, ShiftTy));
5457 return DAG.getZExtOrTrunc(Res, DL, VT);
5461 return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
5464 // getUnderlyingArgRegs - Find underlying registers used for a truncated,
5465 // bitcasted, or split argument. Returns a list of <Register, size in bits>
5466 static void
5467 getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
5468 const SDValue &N) {
5469 switch (N.getOpcode()) {
5470 case ISD::CopyFromReg: {
5471 SDValue Op = N.getOperand(1);
5472 Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
5473 Op.getValueType().getSizeInBits());
5474 return;
5476 case ISD::BITCAST:
5477 case ISD::AssertZext:
5478 case ISD::AssertSext:
5479 case ISD::TRUNCATE:
5480 getUnderlyingArgRegs(Regs, N.getOperand(0));
5481 return;
5482 case ISD::BUILD_PAIR:
5483 case ISD::BUILD_VECTOR:
5484 case ISD::CONCAT_VECTORS:
5485 for (SDValue Op : N->op_values())
5486 getUnderlyingArgRegs(Regs, Op);
5487 return;
5488 default:
5489 return;
5493 /// If the DbgValueInst is a dbg_value of a function argument, create the
5494 /// corresponding DBG_VALUE machine instruction for it now. At the end of
5495 /// instruction selection, they will be inserted to the entry BB.
5496 /// We don't currently support this for variadic dbg_values, as they shouldn't
5497 /// appear for function arguments or in the prologue.
5498 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
5499 const Value *V, DILocalVariable *Variable, DIExpression *Expr,
5500 DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
5501 const Argument *Arg = dyn_cast<Argument>(V);
5502 if (!Arg)
5503 return false;
5505 MachineFunction &MF = DAG.getMachineFunction();
5506 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
5508 // Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
5509 // we've been asked to pursue.
5510 auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
5511 bool Indirect) {
5512 if (Reg.isVirtual() && MF.useDebugInstrRef()) {
5513 // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
5514 // pointing at the VReg, which will be patched up later.
5515 auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
5516 auto MIB = BuildMI(MF, DL, Inst);
5517 MIB.addReg(Reg);
5518 MIB.addImm(0);
5519 MIB.addMetadata(Variable);
5520 auto *NewDIExpr = FragExpr;
5521 // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
5522 // the DIExpression.
5523 if (Indirect)
5524 NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
5525 MIB.addMetadata(NewDIExpr);
5526 return MIB;
5527 } else {
5528 // Create a completely standard DBG_VALUE.
5529 auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
5530 return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr);
5534 if (!IsDbgDeclare) {
5535 // ArgDbgValues are hoisted to the beginning of the entry block. So we
5536 // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
5537 // the entry block.
5538 bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
5539 if (!IsInEntryBlock)
5540 return false;
5542 // ArgDbgValues are hoisted to the beginning of the entry block. So we
5543 // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
5544 // variable that also is a param.
5546 // Although, if we are at the top of the entry block already, we can still
5547 // emit using ArgDbgValue. This might catch some situations when the
5548 // dbg.value refers to an argument that isn't used in the entry block, so
5549 // any CopyToReg node would be optimized out and the only way to express
5550 // this DBG_VALUE is by using the physical reg (or FI) as done in this
5551 // method. ArgDbgValues are hoisted to the beginning of the entry block. So
5552 // we should only emit as ArgDbgValue if the Variable is an argument to the
5553 // current function, and the dbg.value intrinsic is found in the entry
5554 // block.
5555 bool VariableIsFunctionInputArg = Variable->isParameter() &&
5556 !DL->getInlinedAt();
5557 bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
5558 if (!IsInPrologue && !VariableIsFunctionInputArg)
5559 return false;
5561 // Here we assume that a function argument on IR level only can be used to
5562 // describe one input parameter on source level. If we for example have
5563 // source code like this
5565 // struct A { long x, y; };
5566 // void foo(struct A a, long b) {
5567 // ...
5568 // b = a.x;
5569 // ...
5570 // }
5572 // and IR like this
5574 // define void @foo(i32 %a1, i32 %a2, i32 %b) {
5575 // entry:
5576 // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
5577 // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
5578 // call void @llvm.dbg.value(metadata i32 %b, "b",
5579 // ...
5580 // call void @llvm.dbg.value(metadata i32 %a1, "b"
5581 // ...
5583 // then the last dbg.value is describing a parameter "b" using a value that
5584 // is an argument. But since we already has used %a1 to describe a parameter
5585 // we should not handle that last dbg.value here (that would result in an
5586 // incorrect hoisting of the DBG_VALUE to the function entry).
5587 // Notice that we allow one dbg.value per IR level argument, to accommodate
5588 // for the situation with fragments above.
5589 if (VariableIsFunctionInputArg) {
5590 unsigned ArgNo = Arg->getArgNo();
5591 if (ArgNo >= FuncInfo.DescribedArgs.size())
5592 FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
5593 else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
5594 return false;
5595 FuncInfo.DescribedArgs.set(ArgNo);
5599 bool IsIndirect = false;
5600 Optional<MachineOperand> Op;
5601 // Some arguments' frame index is recorded during argument lowering.
5602 int FI = FuncInfo.getArgumentFrameIndex(Arg);
5603 if (FI != std::numeric_limits<int>::max())
5604 Op = MachineOperand::CreateFI(FI);
5606 SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
5607 if (!Op && N.getNode()) {
5608 getUnderlyingArgRegs(ArgRegsAndSizes, N);
5609 Register Reg;
5610 if (ArgRegsAndSizes.size() == 1)
5611 Reg = ArgRegsAndSizes.front().first;
5613 if (Reg && Reg.isVirtual()) {
5614 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5615 Register PR = RegInfo.getLiveInPhysReg(Reg);
5616 if (PR)
5617 Reg = PR;
5619 if (Reg) {
5620 Op = MachineOperand::CreateReg(Reg, false);
5621 IsIndirect = IsDbgDeclare;
5625 if (!Op && N.getNode()) {
5626 // Check if frame index is available.
5627 SDValue LCandidate = peekThroughBitcasts(N);
5628 if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
5629 if (FrameIndexSDNode *FINode =
5630 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
5631 Op = MachineOperand::CreateFI(FINode->getIndex());
5634 if (!Op) {
5635 // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
5636 auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
5637 SplitRegs) {
5638 unsigned Offset = 0;
5639 for (const auto &RegAndSize : SplitRegs) {
5640 // If the expression is already a fragment, the current register
5641 // offset+size might extend beyond the fragment. In this case, only
5642 // the register bits that are inside the fragment are relevant.
5643 int RegFragmentSizeInBits = RegAndSize.second;
5644 if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
5645 uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
5646 // The register is entirely outside the expression fragment,
5647 // so is irrelevant for debug info.
5648 if (Offset >= ExprFragmentSizeInBits)
5649 break;
5650 // The register is partially outside the expression fragment, only
5651 // the low bits within the fragment are relevant for debug info.
5652 if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
5653 RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
5657 auto FragmentExpr = DIExpression::createFragmentExpression(
5658 Expr, Offset, RegFragmentSizeInBits);
5659 Offset += RegAndSize.second;
5660 // If a valid fragment expression cannot be created, the variable's
5661 // correct value cannot be determined and so it is set as Undef.
5662 if (!FragmentExpr) {
5663 SDDbgValue *SDV = DAG.getConstantDbgValue(
5664 Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
5665 DAG.AddDbgValue(SDV, false);
5666 continue;
5668 MachineInstr *NewMI =
5669 MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare);
5670 FuncInfo.ArgDbgValues.push_back(NewMI);
5674 // Check if ValueMap has reg number.
5675 DenseMap<const Value *, Register>::const_iterator
5676 VMI = FuncInfo.ValueMap.find(V);
5677 if (VMI != FuncInfo.ValueMap.end()) {
5678 const auto &TLI = DAG.getTargetLoweringInfo();
5679 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
5680 V->getType(), None);
5681 if (RFV.occupiesMultipleRegs()) {
5682 splitMultiRegDbgValue(RFV.getRegsAndSizes());
5683 return true;
5686 Op = MachineOperand::CreateReg(VMI->second, false);
5687 IsIndirect = IsDbgDeclare;
5688 } else if (ArgRegsAndSizes.size() > 1) {
5689 // This was split due to the calling convention, and no virtual register
5690 // mapping exists for the value.
5691 splitMultiRegDbgValue(ArgRegsAndSizes);
5692 return true;
5696 if (!Op)
5697 return false;
5699 assert(Variable->isValidLocationForIntrinsic(DL) &&
5700 "Expected inlined-at fields to agree");
5701 MachineInstr *NewMI = nullptr;
5703 if (Op->isReg())
5704 NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
5705 else
5706 NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
5707 Variable, Expr);
5709 FuncInfo.ArgDbgValues.push_back(NewMI);
5710 return true;
5713 /// Return the appropriate SDDbgValue based on N.
5714 SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
5715 DILocalVariable *Variable,
5716 DIExpression *Expr,
5717 const DebugLoc &dl,
5718 unsigned DbgSDNodeOrder) {
5719 if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
5720 // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
5721 // stack slot locations.
5723 // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
5724 // debug values here after optimization:
5726 // dbg.value(i32* %px, !"int *px", !DIExpression()), and
5727 // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
5729 // Both describe the direct values of their associated variables.
5730 return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
5731 /*IsIndirect*/ false, dl, DbgSDNodeOrder);
5733 return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
5734 /*IsIndirect*/ false, dl, DbgSDNodeOrder);
5737 static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
5738 switch (Intrinsic) {
5739 case Intrinsic::smul_fix:
5740 return ISD::SMULFIX;
5741 case Intrinsic::umul_fix:
5742 return ISD::UMULFIX;
5743 case Intrinsic::smul_fix_sat:
5744 return ISD::SMULFIXSAT;
5745 case Intrinsic::umul_fix_sat:
5746 return ISD::UMULFIXSAT;
5747 case Intrinsic::sdiv_fix:
5748 return ISD::SDIVFIX;
5749 case Intrinsic::udiv_fix:
5750 return ISD::UDIVFIX;
5751 case Intrinsic::sdiv_fix_sat:
5752 return ISD::SDIVFIXSAT;
5753 case Intrinsic::udiv_fix_sat:
5754 return ISD::UDIVFIXSAT;
5755 default:
5756 llvm_unreachable("Unhandled fixed point intrinsic");
5760 void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
5761 const char *FunctionName) {
5762 assert(FunctionName && "FunctionName must not be nullptr");
5763 SDValue Callee = DAG.getExternalSymbol(
5764 FunctionName,
5765 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
5766 LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
5769 /// Given a @llvm.call.preallocated.setup, return the corresponding
5770 /// preallocated call.
5771 static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
5772 assert(cast<CallBase>(PreallocatedSetup)
5773 ->getCalledFunction()
5774 ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
5775 "expected call_preallocated_setup Value");
5776 for (auto *U : PreallocatedSetup->users()) {
5777 auto *UseCall = cast<CallBase>(U);
5778 const Function *Fn = UseCall->getCalledFunction();
5779 if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
5780 return UseCall;
5783 llvm_unreachable("expected corresponding call to preallocated setup/arg");
5786 /// Lower the call to the specified intrinsic function.
5787 void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
5788 unsigned Intrinsic) {
5789 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5790 SDLoc sdl = getCurSDLoc();
5791 DebugLoc dl = getCurDebugLoc();
5792 SDValue Res;
5794 SDNodeFlags Flags;
5795 if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
5796 Flags.copyFMF(*FPOp);
5798 switch (Intrinsic) {
5799 default:
5800 // By default, turn this into a target intrinsic node.
5801 visitTargetIntrinsic(I, Intrinsic);
5802 return;
5803 case Intrinsic::vscale: {
5804 match(&I, m_VScale(DAG.getDataLayout()));
5805 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5806 setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
5807 return;
5809 case Intrinsic::vastart: visitVAStart(I); return;
5810 case Intrinsic::vaend: visitVAEnd(I); return;
5811 case Intrinsic::vacopy: visitVACopy(I); return;
5812 case Intrinsic::returnaddress:
5813 setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
5814 TLI.getPointerTy(DAG.getDataLayout()),
5815 getValue(I.getArgOperand(0))));
5816 return;
5817 case Intrinsic::addressofreturnaddress:
5818 setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
5819 TLI.getPointerTy(DAG.getDataLayout())));
5820 return;
5821 case Intrinsic::sponentry:
5822 setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
5823 TLI.getFrameIndexTy(DAG.getDataLayout())));
5824 return;
5825 case Intrinsic::frameaddress:
5826 setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
5827 TLI.getFrameIndexTy(DAG.getDataLayout()),
5828 getValue(I.getArgOperand(0))));
5829 return;
5830 case Intrinsic::read_volatile_register:
5831 case Intrinsic::read_register: {
5832 Value *Reg = I.getArgOperand(0);
5833 SDValue Chain = getRoot();
5834 SDValue RegName =
5835 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
5836 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5837 Res = DAG.getNode(ISD::READ_REGISTER, sdl,
5838 DAG.getVTList(VT, MVT::Other), Chain, RegName);
5839 setValue(&I, Res);
5840 DAG.setRoot(Res.getValue(1));
5841 return;
5843 case Intrinsic::write_register: {
5844 Value *Reg = I.getArgOperand(0);
5845 Value *RegValue = I.getArgOperand(1);
5846 SDValue Chain = getRoot();
5847 SDValue RegName =
5848 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
5849 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
5850 RegName, getValue(RegValue)));
5851 return;
5853 case Intrinsic::memcpy: {
5854 const auto &MCI = cast<MemCpyInst>(I);
5855 SDValue Op1 = getValue(I.getArgOperand(0));
5856 SDValue Op2 = getValue(I.getArgOperand(1));
5857 SDValue Op3 = getValue(I.getArgOperand(2));
5858 // @llvm.memcpy defines 0 and 1 to both mean no alignment.
5859 Align DstAlign = MCI.getDestAlign().valueOrOne();
5860 Align SrcAlign = MCI.getSourceAlign().valueOrOne();
5861 Align Alignment = commonAlignment(DstAlign, SrcAlign);
5862 bool isVol = MCI.isVolatile();
5863 bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
5864 // FIXME: Support passing different dest/src alignments to the memcpy DAG
5865 // node.
5866 SDValue Root = isVol ? getRoot() : getMemoryRoot();
5867 SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
5868 /* AlwaysInline */ false, isTC,
5869 MachinePointerInfo(I.getArgOperand(0)),
5870 MachinePointerInfo(I.getArgOperand(1)),
5871 I.getAAMetadata());
5872 updateDAGForMaybeTailCall(MC);
5873 return;
5875 case Intrinsic::memcpy_inline: {
5876 const auto &MCI = cast<MemCpyInlineInst>(I);
5877 SDValue Dst = getValue(I.getArgOperand(0));
5878 SDValue Src = getValue(I.getArgOperand(1));
5879 SDValue Size = getValue(I.getArgOperand(2));
5880 assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
5881 // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
5882 Align DstAlign = MCI.getDestAlign().valueOrOne();
5883 Align SrcAlign = MCI.getSourceAlign().valueOrOne();
5884 Align Alignment = commonAlignment(DstAlign, SrcAlign);
5885 bool isVol = MCI.isVolatile();
5886 bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
5887 // FIXME: Support passing different dest/src alignments to the memcpy DAG
5888 // node.
5889 SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
5890 /* AlwaysInline */ true, isTC,
5891 MachinePointerInfo(I.getArgOperand(0)),
5892 MachinePointerInfo(I.getArgOperand(1)),
5893 I.getAAMetadata());
5894 updateDAGForMaybeTailCall(MC);
5895 return;
5897 case Intrinsic::memset: {
5898 const auto &MSI = cast<MemSetInst>(I);
5899 SDValue Op1 = getValue(I.getArgOperand(0));
5900 SDValue Op2 = getValue(I.getArgOperand(1));
5901 SDValue Op3 = getValue(I.getArgOperand(2));
5902 // @llvm.memset defines 0 and 1 to both mean no alignment.
5903 Align Alignment = MSI.getDestAlign().valueOrOne();
5904 bool isVol = MSI.isVolatile();
5905 bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
5906 SDValue Root = isVol ? getRoot() : getMemoryRoot();
5907 SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
5908 MachinePointerInfo(I.getArgOperand(0)),
5909 I.getAAMetadata());
5910 updateDAGForMaybeTailCall(MS);
5911 return;
5913 case Intrinsic::memmove: {
5914 const auto &MMI = cast<MemMoveInst>(I);
5915 SDValue Op1 = getValue(I.getArgOperand(0));
5916 SDValue Op2 = getValue(I.getArgOperand(1));
5917 SDValue Op3 = getValue(I.getArgOperand(2));
5918 // @llvm.memmove defines 0 and 1 to both mean no alignment.
5919 Align DstAlign = MMI.getDestAlign().valueOrOne();
5920 Align SrcAlign = MMI.getSourceAlign().valueOrOne();
5921 Align Alignment = commonAlignment(DstAlign, SrcAlign);
5922 bool isVol = MMI.isVolatile();
5923 bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
5924 // FIXME: Support passing different dest/src alignments to the memmove DAG
5925 // node.
5926 SDValue Root = isVol ? getRoot() : getMemoryRoot();
5927 SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
5928 isTC, MachinePointerInfo(I.getArgOperand(0)),
5929 MachinePointerInfo(I.getArgOperand(1)),
5930 I.getAAMetadata());
5931 updateDAGForMaybeTailCall(MM);
5932 return;
5934 case Intrinsic::memcpy_element_unordered_atomic: {
5935 const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
5936 SDValue Dst = getValue(MI.getRawDest());
5937 SDValue Src = getValue(MI.getRawSource());
5938 SDValue Length = getValue(MI.getLength());
5940 unsigned DstAlign = MI.getDestAlignment();
5941 unsigned SrcAlign = MI.getSourceAlignment();
5942 Type *LengthTy = MI.getLength()->getType();
5943 unsigned ElemSz = MI.getElementSizeInBytes();
5944 bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
5945 SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
5946 SrcAlign, Length, LengthTy, ElemSz, isTC,
5947 MachinePointerInfo(MI.getRawDest()),
5948 MachinePointerInfo(MI.getRawSource()));
5949 updateDAGForMaybeTailCall(MC);
5950 return;
5952 case Intrinsic::memmove_element_unordered_atomic: {
5953 auto &MI = cast<AtomicMemMoveInst>(I);
5954 SDValue Dst = getValue(MI.getRawDest());
5955 SDValue Src = getValue(MI.getRawSource());
5956 SDValue Length = getValue(MI.getLength());
5958 unsigned DstAlign = MI.getDestAlignment();
5959 unsigned SrcAlign = MI.getSourceAlignment();
5960 Type *LengthTy = MI.getLength()->getType();
5961 unsigned ElemSz = MI.getElementSizeInBytes();
5962 bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
5963 SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
5964 SrcAlign, Length, LengthTy, ElemSz, isTC,
5965 MachinePointerInfo(MI.getRawDest()),
5966 MachinePointerInfo(MI.getRawSource()));
5967 updateDAGForMaybeTailCall(MC);
5968 return;
5970 case Intrinsic::memset_element_unordered_atomic: {
5971 auto &MI = cast<AtomicMemSetInst>(I);
5972 SDValue Dst = getValue(MI.getRawDest());
5973 SDValue Val = getValue(MI.getValue());
5974 SDValue Length = getValue(MI.getLength());
5976 unsigned DstAlign = MI.getDestAlignment();
5977 Type *LengthTy = MI.getLength()->getType();
5978 unsigned ElemSz = MI.getElementSizeInBytes();
5979 bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
5980 SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
5981 LengthTy, ElemSz, isTC,
5982 MachinePointerInfo(MI.getRawDest()));
5983 updateDAGForMaybeTailCall(MC);
5984 return;
5986 case Intrinsic::call_preallocated_setup: {
5987 const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
5988 SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
5989 SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
5990 getRoot(), SrcValue);
5991 setValue(&I, Res);
5992 DAG.setRoot(Res);
5993 return;
5995 case Intrinsic::call_preallocated_arg: {
5996 const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
5997 SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
5998 SDValue Ops[3];
5999 Ops[0] = getRoot();
6000 Ops[1] = SrcValue;
6001 Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
6002 MVT::i32); // arg index
6003 SDValue Res = DAG.getNode(
6004 ISD::PREALLOCATED_ARG, sdl,
6005 DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
6006 setValue(&I, Res);
6007 DAG.setRoot(Res.getValue(1));
6008 return;
6010 case Intrinsic::dbg_addr:
6011 case Intrinsic::dbg_declare: {
6012 // Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
6013 // they are non-variadic.
6014 const auto &DI = cast<DbgVariableIntrinsic>(I);
6015 assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
6016 DILocalVariable *Variable = DI.getVariable();
6017 DIExpression *Expression = DI.getExpression();
6018 dropDanglingDebugInfo(Variable, Expression);
6019 assert(Variable && "Missing variable");
6020 LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
6021 << "\n");
6022 // Check if address has undef value.
6023 const Value *Address = DI.getVariableLocationOp(0);
6024 if (!Address || isa<UndefValue>(Address) ||
6025 (Address->use_empty() && !isa<Argument>(Address))) {
6026 LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
6027 << " (bad/undef/unused-arg address)\n");
6028 return;
6031 bool isParameter = Variable->isParameter() || isa<Argument>(Address);
6033 // Check if this variable can be described by a frame index, typically
6034 // either as a static alloca or a byval parameter.
6035 int FI = std::numeric_limits<int>::max();
6036 if (const auto *AI =
6037 dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
6038 if (AI->isStaticAlloca()) {
6039 auto I = FuncInfo.StaticAllocaMap.find(AI);
6040 if (I != FuncInfo.StaticAllocaMap.end())
6041 FI = I->second;
6043 } else if (const auto *Arg = dyn_cast<Argument>(
6044 Address->stripInBoundsConstantOffsets())) {
6045 FI = FuncInfo.getArgumentFrameIndex(Arg);
6048 // llvm.dbg.addr is control dependent and always generates indirect
6049 // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
6050 // the MachineFunction variable table.
6051 if (FI != std::numeric_limits<int>::max()) {
6052 if (Intrinsic == Intrinsic::dbg_addr) {
6053 SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
6054 Variable, Expression, FI, getRoot().getNode(), /*IsIndirect*/ true,
6055 dl, SDNodeOrder);
6056 DAG.AddDbgValue(SDV, isParameter);
6057 } else {
6058 LLVM_DEBUG(dbgs() << "Skipping " << DI
6059 << " (variable info stashed in MF side table)\n");
6061 return;
6064 SDValue &N = NodeMap[Address];
6065 if (!N.getNode() && isa<Argument>(Address))
6066 // Check unused arguments map.
6067 N = UnusedArgNodeMap[Address];
6068 SDDbgValue *SDV;
6069 if (N.getNode()) {
6070 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
6071 Address = BCI->getOperand(0);
6072 // Parameters are handled specially.
6073 auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
6074 if (isParameter && FINode) {
6075 // Byval parameter. We have a frame index at this point.
6076 SDV =
6077 DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
6078 /*IsIndirect*/ true, dl, SDNodeOrder);
6079 } else if (isa<Argument>(Address)) {
6080 // Address is an argument, so try to emit its dbg value using
6081 // virtual register info from the FuncInfo.ValueMap.
6082 EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
6083 return;
6084 } else {
6085 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
6086 true, dl, SDNodeOrder);
6088 DAG.AddDbgValue(SDV, isParameter);
6089 } else {
6090 // If Address is an argument then try to emit its dbg value using
6091 // virtual register info from the FuncInfo.ValueMap.
6092 if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
6093 N)) {
6094 LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
6095 << " (could not emit func-arg dbg_value)\n");
6098 return;
6100 case Intrinsic::dbg_label: {
6101 const DbgLabelInst &DI = cast<DbgLabelInst>(I);
6102 DILabel *Label = DI.getLabel();
6103 assert(Label && "Missing label");
6105 SDDbgLabel *SDV;
6106 SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
6107 DAG.AddDbgLabel(SDV);
6108 return;
6110 case Intrinsic::dbg_value: {
6111 const DbgValueInst &DI = cast<DbgValueInst>(I);
6112 assert(DI.getVariable() && "Missing variable");
6114 DILocalVariable *Variable = DI.getVariable();
6115 DIExpression *Expression = DI.getExpression();
6116 dropDanglingDebugInfo(Variable, Expression);
6117 SmallVector<Value *, 4> Values(DI.getValues());
6118 if (Values.empty())
6119 return;
6121 if (llvm::is_contained(Values, nullptr))
6122 return;
6124 bool IsVariadic = DI.hasArgList();
6125 if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(),
6126 SDNodeOrder, IsVariadic))
6127 addDanglingDebugInfo(&DI, dl, SDNodeOrder);
6128 return;
6131 case Intrinsic::eh_typeid_for: {
6132 // Find the type id for the given typeinfo.
6133 GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
6134 unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
6135 Res = DAG.getConstant(TypeID, sdl, MVT::i32);
6136 setValue(&I, Res);
6137 return;
6140 case Intrinsic::eh_return_i32:
6141 case Intrinsic::eh_return_i64:
6142 DAG.getMachineFunction().setCallsEHReturn(true);
6143 DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
6144 MVT::Other,
6145 getControlRoot(),
6146 getValue(I.getArgOperand(0)),
6147 getValue(I.getArgOperand(1))));
6148 return;
6149 case Intrinsic::eh_unwind_init:
6150 DAG.getMachineFunction().setCallsUnwindInit(true);
6151 return;
6152 case Intrinsic::eh_dwarf_cfa:
6153 setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
6154 TLI.getPointerTy(DAG.getDataLayout()),
6155 getValue(I.getArgOperand(0))));
6156 return;
6157 case Intrinsic::eh_sjlj_callsite: {
6158 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
6159 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
6160 assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
6161 assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
6163 MMI.setCurrentCallSite(CI->getZExtValue());
6164 return;
6166 case Intrinsic::eh_sjlj_functioncontext: {
6167 // Get and store the index of the function context.
6168 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6169 AllocaInst *FnCtx =
6170 cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
6171 int FI = FuncInfo.StaticAllocaMap[FnCtx];
6172 MFI.setFunctionContextIndex(FI);
6173 return;
6175 case Intrinsic::eh_sjlj_setjmp: {
6176 SDValue Ops[2];
6177 Ops[0] = getRoot();
6178 Ops[1] = getValue(I.getArgOperand(0));
6179 SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
6180 DAG.getVTList(MVT::i32, MVT::Other), Ops);
6181 setValue(&I, Op.getValue(0));
6182 DAG.setRoot(Op.getValue(1));
6183 return;
6185 case Intrinsic::eh_sjlj_longjmp:
6186 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
6187 getRoot(), getValue(I.getArgOperand(0))));
6188 return;
6189 case Intrinsic::eh_sjlj_setup_dispatch:
6190 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
6191 getRoot()));
6192 return;
6193 case Intrinsic::masked_gather:
6194 visitMaskedGather(I);
6195 return;
6196 case Intrinsic::masked_load:
6197 visitMaskedLoad(I);
6198 return;
6199 case Intrinsic::masked_scatter:
6200 visitMaskedScatter(I);
6201 return;
6202 case Intrinsic::masked_store:
6203 visitMaskedStore(I);
6204 return;
6205 case Intrinsic::masked_expandload:
6206 visitMaskedLoad(I, true /* IsExpanding */);
6207 return;
6208 case Intrinsic::masked_compressstore:
6209 visitMaskedStore(I, true /* IsCompressing */);
6210 return;
6211 case Intrinsic::powi:
6212 setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
6213 getValue(I.getArgOperand(1)), DAG));
6214 return;
6215 case Intrinsic::log:
6216 setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
6217 return;
6218 case Intrinsic::log2:
6219 setValue(&I,
6220 expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
6221 return;
6222 case Intrinsic::log10:
6223 setValue(&I,
6224 expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
6225 return;
6226 case Intrinsic::exp:
6227 setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
6228 return;
6229 case Intrinsic::exp2:
6230 setValue(&I,
6231 expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
6232 return;
6233 case Intrinsic::pow:
6234 setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
6235 getValue(I.getArgOperand(1)), DAG, TLI, Flags));
6236 return;
6237 case Intrinsic::sqrt:
6238 case Intrinsic::fabs:
6239 case Intrinsic::sin:
6240 case Intrinsic::cos:
6241 case Intrinsic::floor:
6242 case Intrinsic::ceil:
6243 case Intrinsic::trunc:
6244 case Intrinsic::rint:
6245 case Intrinsic::nearbyint:
6246 case Intrinsic::round:
6247 case Intrinsic::roundeven:
6248 case Intrinsic::canonicalize: {
6249 unsigned Opcode;
6250 switch (Intrinsic) {
6251 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6252 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
6253 case Intrinsic::fabs: Opcode = ISD::FABS; break;
6254 case Intrinsic::sin: Opcode = ISD::FSIN; break;
6255 case Intrinsic::cos: Opcode = ISD::FCOS; break;
6256 case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
6257 case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
6258 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
6259 case Intrinsic::rint: Opcode = ISD::FRINT; break;
6260 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
6261 case Intrinsic::round: Opcode = ISD::FROUND; break;
6262 case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
6263 case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
6266 setValue(&I, DAG.getNode(Opcode, sdl,
6267 getValue(I.getArgOperand(0)).getValueType(),
6268 getValue(I.getArgOperand(0)), Flags));
6269 return;
6271 case Intrinsic::lround:
6272 case Intrinsic::llround:
6273 case Intrinsic::lrint:
6274 case Intrinsic::llrint: {
6275 unsigned Opcode;
6276 switch (Intrinsic) {
6277 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6278 case Intrinsic::lround: Opcode = ISD::LROUND; break;
6279 case Intrinsic::llround: Opcode = ISD::LLROUND; break;
6280 case Intrinsic::lrint: Opcode = ISD::LRINT; break;
6281 case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
6284 EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
6285 setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
6286 getValue(I.getArgOperand(0))));
6287 return;
6289 case Intrinsic::minnum:
6290 setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
6291 getValue(I.getArgOperand(0)).getValueType(),
6292 getValue(I.getArgOperand(0)),
6293 getValue(I.getArgOperand(1)), Flags));
6294 return;
6295 case Intrinsic::maxnum:
6296 setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
6297 getValue(I.getArgOperand(0)).getValueType(),
6298 getValue(I.getArgOperand(0)),
6299 getValue(I.getArgOperand(1)), Flags));
6300 return;
6301 case Intrinsic::minimum:
6302 setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
6303 getValue(I.getArgOperand(0)).getValueType(),
6304 getValue(I.getArgOperand(0)),
6305 getValue(I.getArgOperand(1)), Flags));
6306 return;
6307 case Intrinsic::maximum:
6308 setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
6309 getValue(I.getArgOperand(0)).getValueType(),
6310 getValue(I.getArgOperand(0)),
6311 getValue(I.getArgOperand(1)), Flags));
6312 return;
6313 case Intrinsic::copysign:
6314 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
6315 getValue(I.getArgOperand(0)).getValueType(),
6316 getValue(I.getArgOperand(0)),
6317 getValue(I.getArgOperand(1)), Flags));
6318 return;
6319 case Intrinsic::arithmetic_fence: {
6320 setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
6321 getValue(I.getArgOperand(0)).getValueType(),
6322 getValue(I.getArgOperand(0)), Flags));
6323 return;
6325 case Intrinsic::fma:
6326 setValue(&I, DAG.getNode(
6327 ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
6328 getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
6329 getValue(I.getArgOperand(2)), Flags));
6330 return;
6331 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
6332 case Intrinsic::INTRINSIC:
6333 #include "llvm/IR/ConstrainedOps.def"
6334 visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
6335 return;
6336 #define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
6337 #include "llvm/IR/VPIntrinsics.def"
6338 visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
6339 return;
6340 case Intrinsic::fmuladd: {
6341 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
6342 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
6343 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
6344 setValue(&I, DAG.getNode(ISD::FMA, sdl,
6345 getValue(I.getArgOperand(0)).getValueType(),
6346 getValue(I.getArgOperand(0)),
6347 getValue(I.getArgOperand(1)),
6348 getValue(I.getArgOperand(2)), Flags));
6349 } else {
6350 // TODO: Intrinsic calls should have fast-math-flags.
6351 SDValue Mul = DAG.getNode(
6352 ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(),
6353 getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags);
6354 SDValue Add = DAG.getNode(ISD::FADD, sdl,
6355 getValue(I.getArgOperand(0)).getValueType(),
6356 Mul, getValue(I.getArgOperand(2)), Flags);
6357 setValue(&I, Add);
6359 return;
6361 case Intrinsic::convert_to_fp16:
6362 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
6363 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
6364 getValue(I.getArgOperand(0)),
6365 DAG.getTargetConstant(0, sdl,
6366 MVT::i32))));
6367 return;
6368 case Intrinsic::convert_from_fp16:
6369 setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
6370 TLI.getValueType(DAG.getDataLayout(), I.getType()),
6371 DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
6372 getValue(I.getArgOperand(0)))));
6373 return;
6374 case Intrinsic::fptosi_sat: {
6375 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
6376 setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT,
6377 getValue(I.getArgOperand(0)),
6378 DAG.getValueType(VT.getScalarType())));
6379 return;
6381 case Intrinsic::fptoui_sat: {
6382 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
6383 setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT,
6384 getValue(I.getArgOperand(0)),
6385 DAG.getValueType(VT.getScalarType())));
6386 return;
6388 case Intrinsic::set_rounding:
6389 Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
6390 {getRoot(), getValue(I.getArgOperand(0))});
6391 setValue(&I, Res);
6392 DAG.setRoot(Res.getValue(0));
6393 return;
6394 case Intrinsic::pcmarker: {
6395 SDValue Tmp = getValue(I.getArgOperand(0));
6396 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
6397 return;
6399 case Intrinsic::readcyclecounter: {
6400 SDValue Op = getRoot();
6401 Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
6402 DAG.getVTList(MVT::i64, MVT::Other), Op);
6403 setValue(&I, Res);
6404 DAG.setRoot(Res.getValue(1));
6405 return;
6407 case Intrinsic::bitreverse:
6408 setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
6409 getValue(I.getArgOperand(0)).getValueType(),
6410 getValue(I.getArgOperand(0))));
6411 return;
6412 case Intrinsic::bswap:
6413 setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
6414 getValue(I.getArgOperand(0)).getValueType(),
6415 getValue(I.getArgOperand(0))));
6416 return;
6417 case Intrinsic::cttz: {
6418 SDValue Arg = getValue(I.getArgOperand(0));
6419 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
6420 EVT Ty = Arg.getValueType();
6421 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
6422 sdl, Ty, Arg));
6423 return;
6425 case Intrinsic::ctlz: {
6426 SDValue Arg = getValue(I.getArgOperand(0));
6427 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
6428 EVT Ty = Arg.getValueType();
6429 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
6430 sdl, Ty, Arg));
6431 return;
6433 case Intrinsic::ctpop: {
6434 SDValue Arg = getValue(I.getArgOperand(0));
6435 EVT Ty = Arg.getValueType();
6436 setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
6437 return;
6439 case Intrinsic::fshl:
6440 case Intrinsic::fshr: {
6441 bool IsFSHL = Intrinsic == Intrinsic::fshl;
6442 SDValue X = getValue(I.getArgOperand(0));
6443 SDValue Y = getValue(I.getArgOperand(1));
6444 SDValue Z = getValue(I.getArgOperand(2));
6445 EVT VT = X.getValueType();
6447 if (X == Y) {
6448 auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
6449 setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
6450 } else {
6451 auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
6452 setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
6454 return;
6456 case Intrinsic::sadd_sat: {
6457 SDValue Op1 = getValue(I.getArgOperand(0));
6458 SDValue Op2 = getValue(I.getArgOperand(1));
6459 setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
6460 return;
6462 case Intrinsic::uadd_sat: {
6463 SDValue Op1 = getValue(I.getArgOperand(0));
6464 SDValue Op2 = getValue(I.getArgOperand(1));
6465 setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
6466 return;
6468 case Intrinsic::ssub_sat: {
6469 SDValue Op1 = getValue(I.getArgOperand(0));
6470 SDValue Op2 = getValue(I.getArgOperand(1));
6471 setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
6472 return;
6474 case Intrinsic::usub_sat: {
6475 SDValue Op1 = getValue(I.getArgOperand(0));
6476 SDValue Op2 = getValue(I.getArgOperand(1));
6477 setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
6478 return;
6480 case Intrinsic::sshl_sat: {
6481 SDValue Op1 = getValue(I.getArgOperand(0));
6482 SDValue Op2 = getValue(I.getArgOperand(1));
6483 setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2));
6484 return;
6486 case Intrinsic::ushl_sat: {
6487 SDValue Op1 = getValue(I.getArgOperand(0));
6488 SDValue Op2 = getValue(I.getArgOperand(1));
6489 setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2));
6490 return;
6492 case Intrinsic::smul_fix:
6493 case Intrinsic::umul_fix:
6494 case Intrinsic::smul_fix_sat:
6495 case Intrinsic::umul_fix_sat: {
6496 SDValue Op1 = getValue(I.getArgOperand(0));
6497 SDValue Op2 = getValue(I.getArgOperand(1));
6498 SDValue Op3 = getValue(I.getArgOperand(2));
6499 setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
6500 Op1.getValueType(), Op1, Op2, Op3));
6501 return;
6503 case Intrinsic::sdiv_fix:
6504 case Intrinsic::udiv_fix:
6505 case Intrinsic::sdiv_fix_sat:
6506 case Intrinsic::udiv_fix_sat: {
6507 SDValue Op1 = getValue(I.getArgOperand(0));
6508 SDValue Op2 = getValue(I.getArgOperand(1));
6509 SDValue Op3 = getValue(I.getArgOperand(2));
6510 setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
6511 Op1, Op2, Op3, DAG, TLI));
6512 return;
6514 case Intrinsic::smax: {
6515 SDValue Op1 = getValue(I.getArgOperand(0));
6516 SDValue Op2 = getValue(I.getArgOperand(1));
6517 setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2));
6518 return;
6520 case Intrinsic::smin: {
6521 SDValue Op1 = getValue(I.getArgOperand(0));
6522 SDValue Op2 = getValue(I.getArgOperand(1));
6523 setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2));
6524 return;
6526 case Intrinsic::umax: {
6527 SDValue Op1 = getValue(I.getArgOperand(0));
6528 SDValue Op2 = getValue(I.getArgOperand(1));
6529 setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2));
6530 return;
6532 case Intrinsic::umin: {
6533 SDValue Op1 = getValue(I.getArgOperand(0));
6534 SDValue Op2 = getValue(I.getArgOperand(1));
6535 setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2));
6536 return;
6538 case Intrinsic::abs: {
6539 // TODO: Preserve "int min is poison" arg in SDAG?
6540 SDValue Op1 = getValue(I.getArgOperand(0));
6541 setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
6542 return;
6544 case Intrinsic::stacksave: {
6545 SDValue Op = getRoot();
6546 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
6547 Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
6548 setValue(&I, Res);
6549 DAG.setRoot(Res.getValue(1));
6550 return;
6552 case Intrinsic::stackrestore:
6553 Res = getValue(I.getArgOperand(0));
6554 DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
6555 return;
6556 case Intrinsic::get_dynamic_area_offset: {
6557 SDValue Op = getRoot();
6558 EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
6559 EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
6560 // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
6561 // target.
6562 if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
6563 report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
6564 " intrinsic!");
6565 Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
6566 Op);
6567 DAG.setRoot(Op);
6568 setValue(&I, Res);
6569 return;
6571 case Intrinsic::stackguard: {
6572 MachineFunction &MF = DAG.getMachineFunction();
6573 const Module &M = *MF.getFunction().getParent();
6574 SDValue Chain = getRoot();
6575 if (TLI.useLoadStackGuardNode()) {
6576 Res = getLoadStackGuard(DAG, sdl, Chain);
6577 } else {
6578 EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
6579 const Value *Global = TLI.getSDagStackGuard(M);
6580 Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType());
6581 Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
6582 MachinePointerInfo(Global, 0), Align,
6583 MachineMemOperand::MOVolatile);
6585 if (TLI.useStackGuardXorFP())
6586 Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
6587 DAG.setRoot(Chain);
6588 setValue(&I, Res);
6589 return;
6591 case Intrinsic::stackprotector: {
6592 // Emit code into the DAG to store the stack guard onto the stack.
6593 MachineFunction &MF = DAG.getMachineFunction();
6594 MachineFrameInfo &MFI = MF.getFrameInfo();
6595 SDValue Src, Chain = getRoot();
6597 if (TLI.useLoadStackGuardNode())
6598 Src = getLoadStackGuard(DAG, sdl, Chain);
6599 else
6600 Src = getValue(I.getArgOperand(0)); // The guard's value.
6602 AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
6604 int FI = FuncInfo.StaticAllocaMap[Slot];
6605 MFI.setStackProtectorIndex(FI);
6606 EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
6608 SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
6610 // Store the stack protector onto the stack.
6611 Res = DAG.getStore(
6612 Chain, sdl, Src, FIN,
6613 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
6614 MaybeAlign(), MachineMemOperand::MOVolatile);
6615 setValue(&I, Res);
6616 DAG.setRoot(Res);
6617 return;
6619 case Intrinsic::objectsize:
6620 llvm_unreachable("llvm.objectsize.* should have been lowered already");
6622 case Intrinsic::is_constant:
6623 llvm_unreachable("llvm.is.constant.* should have been lowered already");
6625 case Intrinsic::annotation:
6626 case Intrinsic::ptr_annotation:
6627 case Intrinsic::launder_invariant_group:
6628 case Intrinsic::strip_invariant_group:
6629 // Drop the intrinsic, but forward the value
6630 setValue(&I, getValue(I.getOperand(0)));
6631 return;
6633 case Intrinsic::assume:
6634 case Intrinsic::experimental_noalias_scope_decl:
6635 case Intrinsic::var_annotation:
6636 case Intrinsic::sideeffect:
6637 // Discard annotate attributes, noalias scope declarations, assumptions, and
6638 // artificial side-effects.
6639 return;
6641 case Intrinsic::codeview_annotation: {
6642 // Emit a label associated with this metadata.
6643 MachineFunction &MF = DAG.getMachineFunction();
6644 MCSymbol *Label =
6645 MF.getMMI().getContext().createTempSymbol("annotation", true);
6646 Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
6647 MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
6648 Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
6649 DAG.setRoot(Res);
6650 return;
6653 case Intrinsic::init_trampoline: {
6654 const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
6656 SDValue Ops[6];
6657 Ops[0] = getRoot();
6658 Ops[1] = getValue(I.getArgOperand(0));
6659 Ops[2] = getValue(I.getArgOperand(1));
6660 Ops[3] = getValue(I.getArgOperand(2));
6661 Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
6662 Ops[5] = DAG.getSrcValue(F);
6664 Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
6666 DAG.setRoot(Res);
6667 return;
6669 case Intrinsic::adjust_trampoline:
6670 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
6671 TLI.getPointerTy(DAG.getDataLayout()),
6672 getValue(I.getArgOperand(0))));
6673 return;
6674 case Intrinsic::gcroot: {
6675 assert(DAG.getMachineFunction().getFunction().hasGC() &&
6676 "only valid in functions with gc specified, enforced by Verifier");
6677 assert(GFI && "implied by previous");
6678 const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
6679 const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
6681 FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
6682 GFI->addStackRoot(FI->getIndex(), TypeMap);
6683 return;
6685 case Intrinsic::gcread:
6686 case Intrinsic::gcwrite:
6687 llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
6688 case Intrinsic::flt_rounds:
6689 Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
6690 setValue(&I, Res);
6691 DAG.setRoot(Res.getValue(1));
6692 return;
6694 case Intrinsic::expect:
6695 // Just replace __builtin_expect(exp, c) with EXP.
6696 setValue(&I, getValue(I.getArgOperand(0)));
6697 return;
6699 case Intrinsic::ubsantrap:
6700 case Intrinsic::debugtrap:
6701 case Intrinsic::trap: {
6702 StringRef TrapFuncName =
6703 I.getAttributes().getFnAttr("trap-func-name").getValueAsString();
6704 if (TrapFuncName.empty()) {
6705 switch (Intrinsic) {
6706 case Intrinsic::trap:
6707 DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
6708 break;
6709 case Intrinsic::debugtrap:
6710 DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
6711 break;
6712 case Intrinsic::ubsantrap:
6713 DAG.setRoot(DAG.getNode(
6714 ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
6715 DAG.getTargetConstant(
6716 cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
6717 MVT::i32)));
6718 break;
6719 default: llvm_unreachable("unknown trap intrinsic");
6721 return;
6723 TargetLowering::ArgListTy Args;
6724 if (Intrinsic == Intrinsic::ubsantrap) {
6725 Args.push_back(TargetLoweringBase::ArgListEntry());
6726 Args[0].Val = I.getArgOperand(0);
6727 Args[0].Node = getValue(Args[0].Val);
6728 Args[0].Ty = Args[0].Val->getType();
6731 TargetLowering::CallLoweringInfo CLI(DAG);
6732 CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
6733 CallingConv::C, I.getType(),
6734 DAG.getExternalSymbol(TrapFuncName.data(),
6735 TLI.getPointerTy(DAG.getDataLayout())),
6736 std::move(Args));
6738 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
6739 DAG.setRoot(Result.second);
6740 return;
6743 case Intrinsic::uadd_with_overflow:
6744 case Intrinsic::sadd_with_overflow:
6745 case Intrinsic::usub_with_overflow:
6746 case Intrinsic::ssub_with_overflow:
6747 case Intrinsic::umul_with_overflow:
6748 case Intrinsic::smul_with_overflow: {
6749 ISD::NodeType Op;
6750 switch (Intrinsic) {
6751 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6752 case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
6753 case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
6754 case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
6755 case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
6756 case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
6757 case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
6759 SDValue Op1 = getValue(I.getArgOperand(0));
6760 SDValue Op2 = getValue(I.getArgOperand(1));
6762 EVT ResultVT = Op1.getValueType();
6763 EVT OverflowVT = MVT::i1;
6764 if (ResultVT.isVector())
6765 OverflowVT = EVT::getVectorVT(
6766 *Context, OverflowVT, ResultVT.getVectorElementCount());
6768 SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
6769 setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
6770 return;
6772 case Intrinsic::prefetch: {
6773 SDValue Ops[5];
6774 unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
6775 auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
6776 Ops[0] = DAG.getRoot();
6777 Ops[1] = getValue(I.getArgOperand(0));
6778 Ops[2] = getValue(I.getArgOperand(1));
6779 Ops[3] = getValue(I.getArgOperand(2));
6780 Ops[4] = getValue(I.getArgOperand(3));
6781 SDValue Result = DAG.getMemIntrinsicNode(
6782 ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
6783 EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
6784 /* align */ None, Flags);
6786 // Chain the prefetch in parallell with any pending loads, to stay out of
6787 // the way of later optimizations.
6788 PendingLoads.push_back(Result);
6789 Result = getRoot();
6790 DAG.setRoot(Result);
6791 return;
6793 case Intrinsic::lifetime_start:
6794 case Intrinsic::lifetime_end: {
6795 bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
6796 // Stack coloring is not enabled in O0, discard region information.
6797 if (TM.getOptLevel() == CodeGenOpt::None)
6798 return;
6800 const int64_t ObjectSize =
6801 cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
6802 Value *const ObjectPtr = I.getArgOperand(1);
6803 SmallVector<const Value *, 4> Allocas;
6804 getUnderlyingObjects(ObjectPtr, Allocas);
6806 for (const Value *Alloca : Allocas) {
6807 const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Alloca);
6809 // Could not find an Alloca.
6810 if (!LifetimeObject)
6811 continue;
6813 // First check that the Alloca is static, otherwise it won't have a
6814 // valid frame index.
6815 auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
6816 if (SI == FuncInfo.StaticAllocaMap.end())
6817 return;
6819 const int FrameIndex = SI->second;
6820 int64_t Offset;
6821 if (GetPointerBaseWithConstantOffset(
6822 ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
6823 Offset = -1; // Cannot determine offset from alloca to lifetime object.
6824 Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
6825 Offset);
6826 DAG.setRoot(Res);
6828 return;
6830 case Intrinsic::pseudoprobe: {
6831 auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
6832 auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
6833 auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
6834 Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
6835 DAG.setRoot(Res);
6836 return;
6838 case Intrinsic::invariant_start:
6839 // Discard region information.
6840 setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
6841 return;
6842 case Intrinsic::invariant_end:
6843 // Discard region information.
6844 return;
6845 case Intrinsic::clear_cache:
6846 /// FunctionName may be null.
6847 if (const char *FunctionName = TLI.getClearCacheBuiltinName())
6848 lowerCallToExternalSymbol(I, FunctionName);
6849 return;
6850 case Intrinsic::donothing:
6851 case Intrinsic::seh_try_begin:
6852 case Intrinsic::seh_scope_begin:
6853 case Intrinsic::seh_try_end:
6854 case Intrinsic::seh_scope_end:
6855 // ignore
6856 return;
6857 case Intrinsic::experimental_stackmap:
6858 visitStackmap(I);
6859 return;
6860 case Intrinsic::experimental_patchpoint_void:
6861 case Intrinsic::experimental_patchpoint_i64:
6862 visitPatchpoint(I);
6863 return;
6864 case Intrinsic::experimental_gc_statepoint:
6865 LowerStatepoint(cast<GCStatepointInst>(I));
6866 return;
6867 case Intrinsic::experimental_gc_result:
6868 visitGCResult(cast<GCResultInst>(I));
6869 return;
6870 case Intrinsic::experimental_gc_relocate:
6871 visitGCRelocate(cast<GCRelocateInst>(I));
6872 return;
6873 case Intrinsic::instrprof_increment:
6874 llvm_unreachable("instrprof failed to lower an increment");
6875 case Intrinsic::instrprof_value_profile:
6876 llvm_unreachable("instrprof failed to lower a value profiling call");
6877 case Intrinsic::localescape: {
6878 MachineFunction &MF = DAG.getMachineFunction();
6879 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
6881 // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
6882 // is the same on all targets.
6883 for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
6884 Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
6885 if (isa<ConstantPointerNull>(Arg))
6886 continue; // Skip null pointers. They represent a hole in index space.
6887 AllocaInst *Slot = cast<AllocaInst>(Arg);
6888 assert(FuncInfo.StaticAllocaMap.count(Slot) &&
6889 "can only escape static allocas");
6890 int FI = FuncInfo.StaticAllocaMap[Slot];
6891 MCSymbol *FrameAllocSym =
6892 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
6893 GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
6894 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
6895 TII->get(TargetOpcode::LOCAL_ESCAPE))
6896 .addSym(FrameAllocSym)
6897 .addFrameIndex(FI);
6900 return;
6903 case Intrinsic::localrecover: {
6904 // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
6905 MachineFunction &MF = DAG.getMachineFunction();
6907 // Get the symbol that defines the frame offset.
6908 auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
6909 auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
6910 unsigned IdxVal =
6911 unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
6912 MCSymbol *FrameAllocSym =
6913 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
6914 GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
6916 Value *FP = I.getArgOperand(1);
6917 SDValue FPVal = getValue(FP);
6918 EVT PtrVT = FPVal.getValueType();
6920 // Create a MCSymbol for the label to avoid any target lowering
6921 // that would make this PC relative.
6922 SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
6923 SDValue OffsetVal =
6924 DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
6926 // Add the offset to the FP.
6927 SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
6928 setValue(&I, Add);
6930 return;
6933 case Intrinsic::eh_exceptionpointer:
6934 case Intrinsic::eh_exceptioncode: {
6935 // Get the exception pointer vreg, copy from it, and resize it to fit.
6936 const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
6937 MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
6938 const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
6939 unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
6940 SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT);
6941 if (Intrinsic == Intrinsic::eh_exceptioncode)
6942 N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
6943 setValue(&I, N);
6944 return;
6946 case Intrinsic::xray_customevent: {
6947 // Here we want to make sure that the intrinsic behaves as if it has a
6948 // specific calling convention, and only for x86_64.
6949 // FIXME: Support other platforms later.
6950 const auto &Triple = DAG.getTarget().getTargetTriple();
6951 if (Triple.getArch() != Triple::x86_64)
6952 return;
6954 SmallVector<SDValue, 8> Ops;
6956 // We want to say that we always want the arguments in registers.
6957 SDValue LogEntryVal = getValue(I.getArgOperand(0));
6958 SDValue StrSizeVal = getValue(I.getArgOperand(1));
6959 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6960 SDValue Chain = getRoot();
6961 Ops.push_back(LogEntryVal);
6962 Ops.push_back(StrSizeVal);
6963 Ops.push_back(Chain);
6965 // We need to enforce the calling convention for the callsite, so that
6966 // argument ordering is enforced correctly, and that register allocation can
6967 // see that some registers may be assumed clobbered and have to preserve
6968 // them across calls to the intrinsic.
6969 MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
6970 sdl, NodeTys, Ops);
6971 SDValue patchableNode = SDValue(MN, 0);
6972 DAG.setRoot(patchableNode);
6973 setValue(&I, patchableNode);
6974 return;
6976 case Intrinsic::xray_typedevent: {
6977 // Here we want to make sure that the intrinsic behaves as if it has a
6978 // specific calling convention, and only for x86_64.
6979 // FIXME: Support other platforms later.
6980 const auto &Triple = DAG.getTarget().getTargetTriple();
6981 if (Triple.getArch() != Triple::x86_64)
6982 return;
6984 SmallVector<SDValue, 8> Ops;
6986 // We want to say that we always want the arguments in registers.
6987 // It's unclear to me how manipulating the selection DAG here forces callers
6988 // to provide arguments in registers instead of on the stack.
6989 SDValue LogTypeId = getValue(I.getArgOperand(0));
6990 SDValue LogEntryVal = getValue(I.getArgOperand(1));
6991 SDValue StrSizeVal = getValue(I.getArgOperand(2));
6992 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6993 SDValue Chain = getRoot();
6994 Ops.push_back(LogTypeId);
6995 Ops.push_back(LogEntryVal);
6996 Ops.push_back(StrSizeVal);
6997 Ops.push_back(Chain);
6999 // We need to enforce the calling convention for the callsite, so that
7000 // argument ordering is enforced correctly, and that register allocation can
7001 // see that some registers may be assumed clobbered and have to preserve
7002 // them across calls to the intrinsic.
7003 MachineSDNode *MN = DAG.getMachineNode(
7004 TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops);
7005 SDValue patchableNode = SDValue(MN, 0);
7006 DAG.setRoot(patchableNode);
7007 setValue(&I, patchableNode);
7008 return;
7010 case Intrinsic::experimental_deoptimize:
7011 LowerDeoptimizeCall(&I);
7012 return;
7013 case Intrinsic::experimental_stepvector:
7014 visitStepVector(I);
7015 return;
7016 case Intrinsic::vector_reduce_fadd:
7017 case Intrinsic::vector_reduce_fmul:
7018 case Intrinsic::vector_reduce_add:
7019 case Intrinsic::vector_reduce_mul:
7020 case Intrinsic::vector_reduce_and:
7021 case Intrinsic::vector_reduce_or:
7022 case Intrinsic::vector_reduce_xor:
7023 case Intrinsic::vector_reduce_smax:
7024 case Intrinsic::vector_reduce_smin:
7025 case Intrinsic::vector_reduce_umax:
7026 case Intrinsic::vector_reduce_umin:
7027 case Intrinsic::vector_reduce_fmax:
7028 case Intrinsic::vector_reduce_fmin:
7029 visitVectorReduce(I, Intrinsic);
7030 return;
7032 case Intrinsic::icall_branch_funnel: {
7033 SmallVector<SDValue, 16> Ops;
7034 Ops.push_back(getValue(I.getArgOperand(0)));
7036 int64_t Offset;
7037 auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
7038 I.getArgOperand(1), Offset, DAG.getDataLayout()));
7039 if (!Base)
7040 report_fatal_error(
7041 "llvm.icall.branch.funnel operand must be a GlobalValue");
7042 Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));
7044 struct BranchFunnelTarget {
7045 int64_t Offset;
7046 SDValue Target;
7048 SmallVector<BranchFunnelTarget, 8> Targets;
7050 for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
7051 auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
7052 I.getArgOperand(Op), Offset, DAG.getDataLayout()));
7053 if (ElemBase != Base)
7054 report_fatal_error("all llvm.icall.branch.funnel operands must refer "
7055 "to the same GlobalValue");
7057 SDValue Val = getValue(I.getArgOperand(Op + 1));
7058 auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
7059 if (!GA)
7060 report_fatal_error(
7061 "llvm.icall.branch.funnel operand must be a GlobalValue");
7062 Targets.push_back({Offset, DAG.getTargetGlobalAddress(
7063 GA->getGlobal(), sdl, Val.getValueType(),
7064 GA->getOffset())});
7066 llvm::sort(Targets,
7067 [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
7068 return T1.Offset < T2.Offset;
7071 for (auto &T : Targets) {
7072 Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
7073 Ops.push_back(T.Target);
7076 Ops.push_back(DAG.getRoot()); // Chain
7077 SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
7078 MVT::Other, Ops),
7080 DAG.setRoot(N);
7081 setValue(&I, N);
7082 HasTailCall = true;
7083 return;
7086 case Intrinsic::wasm_landingpad_index:
7087 // Information this intrinsic contained has been transferred to
7088 // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
7089 // delete it now.
7090 return;
7092 case Intrinsic::aarch64_settag:
7093 case Intrinsic::aarch64_settag_zero: {
7094 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7095 bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
7096 SDValue Val = TSI.EmitTargetCodeForSetTag(
7097 DAG, sdl, getRoot(), getValue(I.getArgOperand(0)),
7098 getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
7099 ZeroMemory);
7100 DAG.setRoot(Val);
7101 setValue(&I, Val);
7102 return;
7104 case Intrinsic::ptrmask: {
7105 SDValue Ptr = getValue(I.getOperand(0));
7106 SDValue Const = getValue(I.getOperand(1));
7108 EVT PtrVT = Ptr.getValueType();
7109 setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
7110 DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
7111 return;
7113 case Intrinsic::get_active_lane_mask: {
7114 EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
7115 SDValue Index = getValue(I.getOperand(0));
7116 EVT ElementVT = Index.getValueType();
7118 if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) {
7119 visitTargetIntrinsic(I, Intrinsic);
7120 return;
7123 SDValue TripCount = getValue(I.getOperand(1));
7124 auto VecTy = CCVT.changeVectorElementType(ElementVT);
7126 SDValue VectorIndex, VectorTripCount;
7127 if (VecTy.isScalableVector()) {
7128 VectorIndex = DAG.getSplatVector(VecTy, sdl, Index);
7129 VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount);
7130 } else {
7131 VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index);
7132 VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount);
7134 SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
7135 SDValue VectorInduction = DAG.getNode(
7136 ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep);
7137 SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction,
7138 VectorTripCount, ISD::CondCode::SETULT);
7139 setValue(&I, SetCC);
7140 return;
7142 case Intrinsic::experimental_vector_insert: {
7143 SDValue Vec = getValue(I.getOperand(0));
7144 SDValue SubVec = getValue(I.getOperand(1));
7145 SDValue Index = getValue(I.getOperand(2));
7147 // The intrinsic's index type is i64, but the SDNode requires an index type
7148 // suitable for the target. Convert the index as required.
7149 MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7150 if (Index.getValueType() != VectorIdxTy)
7151 Index = DAG.getVectorIdxConstant(
7152 cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
7154 EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
7155 setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec,
7156 Index));
7157 return;
7159 case Intrinsic::experimental_vector_extract: {
7160 SDValue Vec = getValue(I.getOperand(0));
7161 SDValue Index = getValue(I.getOperand(1));
7162 EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
7164 // The intrinsic's index type is i64, but the SDNode requires an index type
7165 // suitable for the target. Convert the index as required.
7166 MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7167 if (Index.getValueType() != VectorIdxTy)
7168 Index = DAG.getVectorIdxConstant(
7169 cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
7171 setValue(&I,
7172 DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
7173 return;
7175 case Intrinsic::experimental_vector_reverse:
7176 visitVectorReverse(I);
7177 return;
7178 case Intrinsic::experimental_vector_splice:
7179 visitVectorSplice(I);
7180 return;
7184 void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
7185 const ConstrainedFPIntrinsic &FPI) {
7186 SDLoc sdl = getCurSDLoc();
7188 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7189 SmallVector<EVT, 4> ValueVTs;
7190 ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
7191 ValueVTs.push_back(MVT::Other); // Out chain
7193 // We do not need to serialize constrained FP intrinsics against
7194 // each other or against (nonvolatile) loads, so they can be
7195 // chained like loads.
7196 SDValue Chain = DAG.getRoot();
7197 SmallVector<SDValue, 4> Opers;
7198 Opers.push_back(Chain);
7199 if (FPI.isUnaryOp()) {
7200 Opers.push_back(getValue(FPI.getArgOperand(0)));
7201 } else if (FPI.isTernaryOp()) {
7202 Opers.push_back(getValue(FPI.getArgOperand(0)));
7203 Opers.push_back(getValue(FPI.getArgOperand(1)));
7204 Opers.push_back(getValue(FPI.getArgOperand(2)));
7205 } else {
7206 Opers.push_back(getValue(FPI.getArgOperand(0)));
7207 Opers.push_back(getValue(FPI.getArgOperand(1)));
7210 auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
7211 assert(Result.getNode()->getNumValues() == 2);
7213 // Push node to the appropriate list so that future instructions can be
7214 // chained up correctly.
7215 SDValue OutChain = Result.getValue(1);
7216 switch (EB) {
7217 case fp::ExceptionBehavior::ebIgnore:
7218 // The only reason why ebIgnore nodes still need to be chained is that
7219 // they might depend on the current rounding mode, and therefore must
7220 // not be moved across instruction that may change that mode.
7221 LLVM_FALLTHROUGH;
7222 case fp::ExceptionBehavior::ebMayTrap:
7223 // These must not be moved across calls or instructions that may change
7224 // floating-point exception masks.
7225 PendingConstrainedFP.push_back(OutChain);
7226 break;
7227 case fp::ExceptionBehavior::ebStrict:
7228 // These must not be moved across calls or instructions that may change
7229 // floating-point exception masks or read floating-point exception flags.
7230 // In addition, they cannot be optimized out even if unused.
7231 PendingConstrainedFPStrict.push_back(OutChain);
7232 break;
7236 SDVTList VTs = DAG.getVTList(ValueVTs);
7237 fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
7239 SDNodeFlags Flags;
7240 if (EB == fp::ExceptionBehavior::ebIgnore)
7241 Flags.setNoFPExcept(true);
7243 if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI))
7244 Flags.copyFMF(*FPOp);
7246 unsigned Opcode;
7247 switch (FPI.getIntrinsicID()) {
7248 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
7249 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
7250 case Intrinsic::INTRINSIC: \
7251 Opcode = ISD::STRICT_##DAGN; \
7252 break;
7253 #include "llvm/IR/ConstrainedOps.def"
7254 case Intrinsic::experimental_constrained_fmuladd: {
7255 Opcode = ISD::STRICT_FMA;
7256 // Break fmuladd into fmul and fadd.
7257 if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
7258 !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
7259 ValueVTs[0])) {
7260 Opers.pop_back();
7261 SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
7262 pushOutChain(Mul, EB);
7263 Opcode = ISD::STRICT_FADD;
7264 Opers.clear();
7265 Opers.push_back(Mul.getValue(1));
7266 Opers.push_back(Mul.getValue(0));
7267 Opers.push_back(getValue(FPI.getArgOperand(2)));
7269 break;
7273 // A few strict DAG nodes carry additional operands that are not
7274 // set up by the default code above.
7275 switch (Opcode) {
7276 default: break;
7277 case ISD::STRICT_FP_ROUND:
7278 Opers.push_back(
7279 DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
7280 break;
7281 case ISD::STRICT_FSETCC:
7282 case ISD::STRICT_FSETCCS: {
7283 auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
7284 ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
7285 if (TM.Options.NoNaNsFPMath)
7286 Condition = getFCmpCodeWithoutNaN(Condition);
7287 Opers.push_back(DAG.getCondCode(Condition));
7288 break;
7292 SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
7293 pushOutChain(Result, EB);
7295 SDValue FPResult = Result.getValue(0);
7296 setValue(&FPI, FPResult);
7299 static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
7300 Optional<unsigned> ResOPC;
7301 switch (VPIntrin.getIntrinsicID()) {
7302 #define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
7303 #define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD;
7304 #define END_REGISTER_VP_INTRINSIC(VPID) break;
7305 #include "llvm/IR/VPIntrinsics.def"
7308 if (!ResOPC.hasValue())
7309 llvm_unreachable(
7310 "Inconsistency: no SDNode available for this VPIntrinsic!");
7312 if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
7313 *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
7314 if (VPIntrin.getFastMathFlags().allowReassoc())
7315 return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
7316 : ISD::VP_REDUCE_FMUL;
7319 return ResOPC.getValue();
7322 void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
7323 SmallVector<SDValue, 7> &OpValues,
7324 bool IsGather) {
7325 SDLoc DL = getCurSDLoc();
7326 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7327 Value *PtrOperand = VPIntrin.getArgOperand(0);
7328 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
7329 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
7330 const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
7331 SDValue LD;
7332 bool AddToChain = true;
7333 if (!IsGather) {
7334 // Do not serialize variable-length loads of constant memory with
7335 // anything.
7336 if (!Alignment)
7337 Alignment = DAG.getEVTAlign(VT);
7338 MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
7339 AddToChain = !AA || !AA->pointsToConstantMemory(ML);
7340 SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
7341 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7342 MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
7343 MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
7344 LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
7345 MMO, false /*IsExpanding */);
7346 } else {
7347 if (!Alignment)
7348 Alignment = DAG.getEVTAlign(VT.getScalarType());
7349 unsigned AS =
7350 PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
7351 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7352 MachinePointerInfo(AS), MachineMemOperand::MOLoad,
7353 MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
7354 SDValue Base, Index, Scale;
7355 ISD::MemIndexType IndexType;
7356 bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
7357 this, VPIntrin.getParent());
7358 if (!UniformBase) {
7359 Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
7360 Index = getValue(PtrOperand);
7361 IndexType = ISD::SIGNED_UNSCALED;
7362 Scale =
7363 DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
7365 EVT IdxVT = Index.getValueType();
7366 EVT EltTy = IdxVT.getVectorElementType();
7367 if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
7368 EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
7369 Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
7371 LD = DAG.getGatherVP(
7372 DAG.getVTList(VT, MVT::Other), VT, DL,
7373 {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
7374 IndexType);
7376 if (AddToChain)
7377 PendingLoads.push_back(LD.getValue(1));
7378 setValue(&VPIntrin, LD);
7381 void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
7382 SmallVector<SDValue, 7> &OpValues,
7383 bool IsScatter) {
7384 SDLoc DL = getCurSDLoc();
7385 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7386 Value *PtrOperand = VPIntrin.getArgOperand(1);
7387 EVT VT = OpValues[0].getValueType();
7388 MaybeAlign Alignment = VPIntrin.getPointerAlignment();
7389 AAMDNodes AAInfo = VPIntrin.getAAMetadata();
7390 SDValue ST;
7391 if (!IsScatter) {
7392 if (!Alignment)
7393 Alignment = DAG.getEVTAlign(VT);
7394 SDValue Ptr = OpValues[1];
7395 SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
7396 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7397 MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
7398 MemoryLocation::UnknownSize, *Alignment, AAInfo);
7399 ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
7400 OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
7401 /* IsTruncating */ false, /*IsCompressing*/ false);
7402 } else {
7403 if (!Alignment)
7404 Alignment = DAG.getEVTAlign(VT.getScalarType());
7405 unsigned AS =
7406 PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
7407 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
7408 MachinePointerInfo(AS), MachineMemOperand::MOStore,
7409 MemoryLocation::UnknownSize, *Alignment, AAInfo);
7410 SDValue Base, Index, Scale;
7411 ISD::MemIndexType IndexType;
7412 bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
7413 this, VPIntrin.getParent());
7414 if (!UniformBase) {
7415 Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
7416 Index = getValue(PtrOperand);
7417 IndexType = ISD::SIGNED_UNSCALED;
7418 Scale =
7419 DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
7421 EVT IdxVT = Index.getValueType();
7422 EVT EltTy = IdxVT.getVectorElementType();
7423 if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
7424 EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
7425 Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
7427 ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
7428 {getMemoryRoot(), OpValues[0], Base, Index, Scale,
7429 OpValues[2], OpValues[3]},
7430 MMO, IndexType);
7432 DAG.setRoot(ST);
7433 setValue(&VPIntrin, ST);
7436 void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
7437 const VPIntrinsic &VPIntrin) {
7438 SDLoc DL = getCurSDLoc();
7439 unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
7441 SmallVector<EVT, 4> ValueVTs;
7442 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7443 ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
7444 SDVTList VTs = DAG.getVTList(ValueVTs);
7446 auto EVLParamPos =
7447 VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID());
7449 MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
7450 assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
7451 "Unexpected target EVL type");
7453 // Request operands.
7454 SmallVector<SDValue, 7> OpValues;
7455 for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
7456 auto Op = getValue(VPIntrin.getArgOperand(I));
7457 if (I == EVLParamPos)
7458 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
7459 OpValues.push_back(Op);
7462 switch (Opcode) {
7463 default: {
7464 SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
7465 setValue(&VPIntrin, Result);
7466 break;
7468 case ISD::VP_LOAD:
7469 case ISD::VP_GATHER:
7470 visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
7471 Opcode == ISD::VP_GATHER);
7472 break;
7473 case ISD::VP_STORE:
7474 case ISD::VP_SCATTER:
7475 visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
7476 break;
7480 SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
7481 const BasicBlock *EHPadBB,
7482 MCSymbol *&BeginLabel) {
7483 MachineFunction &MF = DAG.getMachineFunction();
7484 MachineModuleInfo &MMI = MF.getMMI();
7486 // Insert a label before the invoke call to mark the try range. This can be
7487 // used to detect deletion of the invoke via the MachineModuleInfo.
7488 BeginLabel = MMI.getContext().createTempSymbol();
7490 // For SjLj, keep track of which landing pads go with which invokes
7491 // so as to maintain the ordering of pads in the LSDA.
7492 unsigned CallSiteIndex = MMI.getCurrentCallSite();
7493 if (CallSiteIndex) {
7494 MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
7495 LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
7497 // Now that the call site is handled, stop tracking it.
7498 MMI.setCurrentCallSite(0);
7501 return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel);
7504 SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
7505 const BasicBlock *EHPadBB,
7506 MCSymbol *BeginLabel) {
7507 assert(BeginLabel && "BeginLabel should've been set");
7509 MachineFunction &MF = DAG.getMachineFunction();
7510 MachineModuleInfo &MMI = MF.getMMI();
7512 // Insert a label at the end of the invoke call to mark the try range. This
7513 // can be used to detect deletion of the invoke via the MachineModuleInfo.
7514 MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
7515 Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel);
7517 // Inform MachineModuleInfo of range.
7518 auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
7519 // There is a platform (e.g. wasm) that uses funclet style IR but does not
7520 // actually use outlined funclets and their LSDA info style.
7521 if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
7522 assert(II && "II should've been set");
7523 WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
7524 EHInfo->addIPToStateRange(II, BeginLabel, EndLabel);
7525 } else if (!isScopedEHPersonality(Pers)) {
7526 assert(EHPadBB);
7527 MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
7530 return Chain;
7533 std::pair<SDValue, SDValue>
7534 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
7535 const BasicBlock *EHPadBB) {
7536 MCSymbol *BeginLabel = nullptr;
7538 if (EHPadBB) {
7539 // Both PendingLoads and PendingExports must be flushed here;
7540 // this call might not return.
7541 (void)getRoot();
7542 DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel));
7543 CLI.setChain(getRoot());
7546 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7547 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
7549 assert((CLI.IsTailCall || Result.second.getNode()) &&
7550 "Non-null chain expected with non-tail call!");
7551 assert((Result.second.getNode() || !Result.first.getNode()) &&
7552 "Null value expected with tail call!");
7554 if (!Result.second.getNode()) {
7555 // As a special case, a null chain means that a tail call has been emitted
7556 // and the DAG root is already updated.
7557 HasTailCall = true;
7559 // Since there's no actual continuation from this block, nothing can be
7560 // relying on us setting vregs for them.
7561 PendingExports.clear();
7562 } else {
7563 DAG.setRoot(Result.second);
7566 if (EHPadBB) {
7567 DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB,
7568 BeginLabel));
7571 return Result;
7574 void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
7575 bool isTailCall,
7576 bool isMustTailCall,
7577 const BasicBlock *EHPadBB) {
7578 auto &DL = DAG.getDataLayout();
7579 FunctionType *FTy = CB.getFunctionType();
7580 Type *RetTy = CB.getType();
7582 TargetLowering::ArgListTy Args;
7583 Args.reserve(CB.arg_size());
7585 const Value *SwiftErrorVal = nullptr;
7586 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7588 if (isTailCall) {
7589 // Avoid emitting tail calls in functions with the disable-tail-calls
7590 // attribute.
7591 auto *Caller = CB.getParent()->getParent();
7592 if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
7593 "true" && !isMustTailCall)
7594 isTailCall = false;
7596 // We can't tail call inside a function with a swifterror argument. Lowering
7597 // does not support this yet. It would have to move into the swifterror
7598 // register before the call.
7599 if (TLI.supportSwiftError() &&
7600 Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
7601 isTailCall = false;
7604 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
7605 TargetLowering::ArgListEntry Entry;
7606 const Value *V = *I;
7608 // Skip empty types
7609 if (V->getType()->isEmptyTy())
7610 continue;
7612 SDValue ArgNode = getValue(V);
7613 Entry.Node = ArgNode; Entry.Ty = V->getType();
7615 Entry.setAttributes(&CB, I - CB.arg_begin());
7617 // Use swifterror virtual register as input to the call.
7618 if (Entry.IsSwiftError && TLI.supportSwiftError()) {
7619 SwiftErrorVal = V;
7620 // We find the virtual register for the actual swifterror argument.
7621 // Instead of using the Value, we use the virtual register instead.
7622 Entry.Node =
7623 DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
7624 EVT(TLI.getPointerTy(DL)));
7627 Args.push_back(Entry);
7629 // If we have an explicit sret argument that is an Instruction, (i.e., it
7630 // might point to function-local memory), we can't meaningfully tail-call.
7631 if (Entry.IsSRet && isa<Instruction>(V))
7632 isTailCall = false;
7635 // If call site has a cfguardtarget operand bundle, create and add an
7636 // additional ArgListEntry.
7637 if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
7638 TargetLowering::ArgListEntry Entry;
7639 Value *V = Bundle->Inputs[0];
7640 SDValue ArgNode = getValue(V);
7641 Entry.Node = ArgNode;
7642 Entry.Ty = V->getType();
7643 Entry.IsCFGuardTarget = true;
7644 Args.push_back(Entry);
7647 // Check if target-independent constraints permit a tail call here.
7648 // Target-dependent constraints are checked within TLI->LowerCallTo.
7649 if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
7650 isTailCall = false;
7652 // Disable tail calls if there is an swifterror argument. Targets have not
7653 // been updated to support tail calls.
7654 if (TLI.supportSwiftError() && SwiftErrorVal)
7655 isTailCall = false;
7657 TargetLowering::CallLoweringInfo CLI(DAG);
7658 CLI.setDebugLoc(getCurSDLoc())
7659 .setChain(getRoot())
7660 .setCallee(RetTy, FTy, Callee, std::move(Args), CB)
7661 .setTailCall(isTailCall)
7662 .setConvergent(CB.isConvergent())
7663 .setIsPreallocated(
7664 CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
7665 std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
7667 if (Result.first.getNode()) {
7668 Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
7669 setValue(&CB, Result.first);
7672 // The last element of CLI.InVals has the SDValue for swifterror return.
7673 // Here we copy it to a virtual register and update SwiftErrorMap for
7674 // book-keeping.
7675 if (SwiftErrorVal && TLI.supportSwiftError()) {
7676 // Get the last element of InVals.
7677 SDValue Src = CLI.InVals.back();
7678 Register VReg =
7679 SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
7680 SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
7681 DAG.setRoot(CopyNode);
7685 static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
7686 SelectionDAGBuilder &Builder) {
7687 // Check to see if this load can be trivially constant folded, e.g. if the
7688 // input is from a string literal.
7689 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
7690 // Cast pointer to the type we really want to load.
7691 Type *LoadTy =
7692 Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
7693 if (LoadVT.isVector())
7694 LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements());
7696 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
7697 PointerType::getUnqual(LoadTy));
7699 if (const Constant *LoadCst =
7700 ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
7701 LoadTy, Builder.DAG.getDataLayout()))
7702 return Builder.getValue(LoadCst);
7705 // Otherwise, we have to emit the load. If the pointer is to unfoldable but
7706 // still constant memory, the input chain can be the entry node.
7707 SDValue Root;
7708 bool ConstantMemory = false;
7710 // Do not serialize (non-volatile) loads of constant memory with anything.
7711 if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
7712 Root = Builder.DAG.getEntryNode();
7713 ConstantMemory = true;
7714 } else {
7715 // Do not serialize non-volatile loads against each other.
7716 Root = Builder.DAG.getRoot();
7719 SDValue Ptr = Builder.getValue(PtrVal);
7720 SDValue LoadVal =
7721 Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr,
7722 MachinePointerInfo(PtrVal), Align(1));
7724 if (!ConstantMemory)
7725 Builder.PendingLoads.push_back(LoadVal.getValue(1));
7726 return LoadVal;
7729 /// Record the value for an instruction that produces an integer result,
7730 /// converting the type where necessary.
7731 void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
7732 SDValue Value,
7733 bool IsSigned) {
7734 EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
7735 I.getType(), true);
7736 if (IsSigned)
7737 Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
7738 else
7739 Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
7740 setValue(&I, Value);
7743 /// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
7744 /// true and lower it. Otherwise return false, and it will be lowered like a
7745 /// normal call.
7746 /// The caller already checked that \p I calls the appropriate LibFunc with a
7747 /// correct prototype.
7748 bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
7749 const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
7750 const Value *Size = I.getArgOperand(2);
7751 const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
7752 if (CSize && CSize->getZExtValue() == 0) {
7753 EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
7754 I.getType(), true);
7755 setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
7756 return true;
7759 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7760 std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
7761 DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
7762 getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
7763 if (Res.first.getNode()) {
7764 processIntegerCallValue(I, Res.first, true);
7765 PendingLoads.push_back(Res.second);
7766 return true;
7769 // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
7770 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
7771 if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I))
7772 return false;
7774 // If the target has a fast compare for the given size, it will return a
7775 // preferred load type for that size. Require that the load VT is legal and
7776 // that the target supports unaligned loads of that type. Otherwise, return
7777 // INVALID.
7778 auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
7779 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7780 MVT LVT = TLI.hasFastEqualityCompare(NumBits);
7781 if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
7782 // TODO: Handle 5 byte compare as 4-byte + 1 byte.
7783 // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
7784 // TODO: Check alignment of src and dest ptrs.
7785 unsigned DstAS = LHS->getType()->getPointerAddressSpace();
7786 unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
7787 if (!TLI.isTypeLegal(LVT) ||
7788 !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
7789 !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
7790 LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
7793 return LVT;
7796 // This turns into unaligned loads. We only do this if the target natively
7797 // supports the MVT we'll be loading or if it is small enough (<= 4) that
7798 // we'll only produce a small number of byte loads.
7799 MVT LoadVT;
7800 unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
7801 switch (NumBitsToCompare) {
7802 default:
7803 return false;
7804 case 16:
7805 LoadVT = MVT::i16;
7806 break;
7807 case 32:
7808 LoadVT = MVT::i32;
7809 break;
7810 case 64:
7811 case 128:
7812 case 256:
7813 LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
7814 break;
7817 if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
7818 return false;
7820 SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
7821 SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
7823 // Bitcast to a wide integer type if the loads are vectors.
7824 if (LoadVT.isVector()) {
7825 EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
7826 LoadL = DAG.getBitcast(CmpVT, LoadL);
7827 LoadR = DAG.getBitcast(CmpVT, LoadR);
7830 SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
7831 processIntegerCallValue(I, Cmp, false);
7832 return true;
7835 /// See if we can lower a memchr call into an optimized form. If so, return
7836 /// true and lower it. Otherwise return false, and it will be lowered like a
7837 /// normal call.
7838 /// The caller already checked that \p I calls the appropriate LibFunc with a
7839 /// correct prototype.
7840 bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
7841 const Value *Src = I.getArgOperand(0);
7842 const Value *Char = I.getArgOperand(1);
7843 const Value *Length = I.getArgOperand(2);
7845 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7846 std::pair<SDValue, SDValue> Res =
7847 TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
7848 getValue(Src), getValue(Char), getValue(Length),
7849 MachinePointerInfo(Src));
7850 if (Res.first.getNode()) {
7851 setValue(&I, Res.first);
7852 PendingLoads.push_back(Res.second);
7853 return true;
7856 return false;
7859 /// See if we can lower a mempcpy call into an optimized form. If so, return
7860 /// true and lower it. Otherwise return false, and it will be lowered like a
7861 /// normal call.
7862 /// The caller already checked that \p I calls the appropriate LibFunc with a
7863 /// correct prototype.
7864 bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
7865 SDValue Dst = getValue(I.getArgOperand(0));
7866 SDValue Src = getValue(I.getArgOperand(1));
7867 SDValue Size = getValue(I.getArgOperand(2));
7869 Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne();
7870 Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne();
7871 // DAG::getMemcpy needs Alignment to be defined.
7872 Align Alignment = std::min(DstAlign, SrcAlign);
7874 bool isVol = false;
7875 SDLoc sdl = getCurSDLoc();
7877 // In the mempcpy context we need to pass in a false value for isTailCall
7878 // because the return pointer needs to be adjusted by the size of
7879 // the copied memory.
7880 SDValue Root = isVol ? getRoot() : getMemoryRoot();
7881 SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
7882 /*isTailCall=*/false,
7883 MachinePointerInfo(I.getArgOperand(0)),
7884 MachinePointerInfo(I.getArgOperand(1)),
7885 I.getAAMetadata());
7886 assert(MC.getNode() != nullptr &&
7887 "** memcpy should not be lowered as TailCall in mempcpy context **");
7888 DAG.setRoot(MC);
7890 // Check if Size needs to be truncated or extended.
7891 Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
7893 // Adjust return pointer to point just past the last dst byte.
7894 SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
7895 Dst, Size);
7896 setValue(&I, DstPlusSize);
7897 return true;
7900 /// See if we can lower a strcpy call into an optimized form. If so, return
7901 /// true and lower it, otherwise return false and it will be lowered like a
7902 /// normal call.
7903 /// The caller already checked that \p I calls the appropriate LibFunc with a
7904 /// correct prototype.
7905 bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
7906 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
7908 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7909 std::pair<SDValue, SDValue> Res =
7910 TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
7911 getValue(Arg0), getValue(Arg1),
7912 MachinePointerInfo(Arg0),
7913 MachinePointerInfo(Arg1), isStpcpy);
7914 if (Res.first.getNode()) {
7915 setValue(&I, Res.first);
7916 DAG.setRoot(Res.second);
7917 return true;
7920 return false;
7923 /// See if we can lower a strcmp call into an optimized form. If so, return
7924 /// true and lower it, otherwise return false and it will be lowered like a
7925 /// normal call.
7926 /// The caller already checked that \p I calls the appropriate LibFunc with a
7927 /// correct prototype.
7928 bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
7929 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
7931 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7932 std::pair<SDValue, SDValue> Res =
7933 TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
7934 getValue(Arg0), getValue(Arg1),
7935 MachinePointerInfo(Arg0),
7936 MachinePointerInfo(Arg1));
7937 if (Res.first.getNode()) {
7938 processIntegerCallValue(I, Res.first, true);
7939 PendingLoads.push_back(Res.second);
7940 return true;
7943 return false;
7946 /// See if we can lower a strlen call into an optimized form. If so, return
7947 /// true and lower it, otherwise return false and it will be lowered like a
7948 /// normal call.
7949 /// The caller already checked that \p I calls the appropriate LibFunc with a
7950 /// correct prototype.
7951 bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
7952 const Value *Arg0 = I.getArgOperand(0);
7954 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7955 std::pair<SDValue, SDValue> Res =
7956 TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
7957 getValue(Arg0), MachinePointerInfo(Arg0));
7958 if (Res.first.getNode()) {
7959 processIntegerCallValue(I, Res.first, false);
7960 PendingLoads.push_back(Res.second);
7961 return true;
7964 return false;
7967 /// See if we can lower a strnlen call into an optimized form. If so, return
7968 /// true and lower it, otherwise return false and it will be lowered like a
7969 /// normal call.
7970 /// The caller already checked that \p I calls the appropriate LibFunc with a
7971 /// correct prototype.
7972 bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
7973 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
7975 const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
7976 std::pair<SDValue, SDValue> Res =
7977 TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
7978 getValue(Arg0), getValue(Arg1),
7979 MachinePointerInfo(Arg0));
7980 if (Res.first.getNode()) {
7981 processIntegerCallValue(I, Res.first, false);
7982 PendingLoads.push_back(Res.second);
7983 return true;
7986 return false;
7989 /// See if we can lower a unary floating-point operation into an SDNode with
7990 /// the specified Opcode. If so, return true and lower it, otherwise return
7991 /// false and it will be lowered like a normal call.
7992 /// The caller already checked that \p I calls the appropriate LibFunc with a
7993 /// correct prototype.
7994 bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
7995 unsigned Opcode) {
7996 // We already checked this call's prototype; verify it doesn't modify errno.
7997 if (!I.onlyReadsMemory())
7998 return false;
8000 SDNodeFlags Flags;
8001 Flags.copyFMF(cast<FPMathOperator>(I));
8003 SDValue Tmp = getValue(I.getArgOperand(0));
8004 setValue(&I,
8005 DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags));
8006 return true;
8009 /// See if we can lower a binary floating-point operation into an SDNode with
8010 /// the specified Opcode. If so, return true and lower it. Otherwise return
8011 /// false, and it will be lowered like a normal call.
8012 /// The caller already checked that \p I calls the appropriate LibFunc with a
8013 /// correct prototype.
8014 bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
8015 unsigned Opcode) {
8016 // We already checked this call's prototype; verify it doesn't modify errno.
8017 if (!I.onlyReadsMemory())
8018 return false;
8020 SDNodeFlags Flags;
8021 Flags.copyFMF(cast<FPMathOperator>(I));
8023 SDValue Tmp0 = getValue(I.getArgOperand(0));
8024 SDValue Tmp1 = getValue(I.getArgOperand(1));
8025 EVT VT = Tmp0.getValueType();
8026 setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags));
8027 return true;
8030 void SelectionDAGBuilder::visitCall(const CallInst &I) {
8031 // Handle inline assembly differently.
8032 if (I.isInlineAsm()) {
8033 visitInlineAsm(I);
8034 return;
8037 if (Function *F = I.getCalledFunction()) {
8038 diagnoseDontCall(I);
8040 if (F->isDeclaration()) {
8041 // Is this an LLVM intrinsic or a target-specific intrinsic?
8042 unsigned IID = F->getIntrinsicID();
8043 if (!IID)
8044 if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
8045 IID = II->getIntrinsicID(F);
8047 if (IID) {
8048 visitIntrinsicCall(I, IID);
8049 return;
8053 // Check for well-known libc/libm calls. If the function is internal, it
8054 // can't be a library call. Don't do the check if marked as nobuiltin for
8055 // some reason or the call site requires strict floating point semantics.
8056 LibFunc Func;
8057 if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
8058 F->hasName() && LibInfo->getLibFunc(*F, Func) &&
8059 LibInfo->hasOptimizedCodeGen(Func)) {
8060 switch (Func) {
8061 default: break;
8062 case LibFunc_bcmp:
8063 if (visitMemCmpBCmpCall(I))
8064 return;
8065 break;
8066 case LibFunc_copysign:
8067 case LibFunc_copysignf:
8068 case LibFunc_copysignl:
8069 // We already checked this call's prototype; verify it doesn't modify
8070 // errno.
8071 if (I.onlyReadsMemory()) {
8072 SDValue LHS = getValue(I.getArgOperand(0));
8073 SDValue RHS = getValue(I.getArgOperand(1));
8074 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
8075 LHS.getValueType(), LHS, RHS));
8076 return;
8078 break;
8079 case LibFunc_fabs:
8080 case LibFunc_fabsf:
8081 case LibFunc_fabsl:
8082 if (visitUnaryFloatCall(I, ISD::FABS))
8083 return;
8084 break;
8085 case LibFunc_fmin:
8086 case LibFunc_fminf:
8087 case LibFunc_fminl:
8088 if (visitBinaryFloatCall(I, ISD::FMINNUM))
8089 return;
8090 break;
8091 case LibFunc_fmax:
8092 case LibFunc_fmaxf:
8093 case LibFunc_fmaxl:
8094 if (visitBinaryFloatCall(I, ISD::FMAXNUM))
8095 return;
8096 break;
8097 case LibFunc_sin:
8098 case LibFunc_sinf:
8099 case LibFunc_sinl:
8100 if (visitUnaryFloatCall(I, ISD::FSIN))
8101 return;
8102 break;
8103 case LibFunc_cos:
8104 case LibFunc_cosf:
8105 case LibFunc_cosl:
8106 if (visitUnaryFloatCall(I, ISD::FCOS))
8107 return;
8108 break;
8109 case LibFunc_sqrt:
8110 case LibFunc_sqrtf:
8111 case LibFunc_sqrtl:
8112 case LibFunc_sqrt_finite:
8113 case LibFunc_sqrtf_finite:
8114 case LibFunc_sqrtl_finite:
8115 if (visitUnaryFloatCall(I, ISD::FSQRT))
8116 return;
8117 break;
8118 case LibFunc_floor:
8119 case LibFunc_floorf:
8120 case LibFunc_floorl:
8121 if (visitUnaryFloatCall(I, ISD::FFLOOR))
8122 return;
8123 break;
8124 case LibFunc_nearbyint:
8125 case LibFunc_nearbyintf:
8126 case LibFunc_nearbyintl:
8127 if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
8128 return;
8129 break;
8130 case LibFunc_ceil:
8131 case LibFunc_ceilf:
8132 case LibFunc_ceill:
8133 if (visitUnaryFloatCall(I, ISD::FCEIL))
8134 return;
8135 break;
8136 case LibFunc_rint:
8137 case LibFunc_rintf:
8138 case LibFunc_rintl:
8139 if (visitUnaryFloatCall(I, ISD::FRINT))
8140 return;
8141 break;
8142 case LibFunc_round:
8143 case LibFunc_roundf:
8144 case LibFunc_roundl:
8145 if (visitUnaryFloatCall(I, ISD::FROUND))
8146 return;
8147 break;
8148 case LibFunc_trunc:
8149 case LibFunc_truncf:
8150 case LibFunc_truncl:
8151 if (visitUnaryFloatCall(I, ISD::FTRUNC))
8152 return;
8153 break;
8154 case LibFunc_log2:
8155 case LibFunc_log2f:
8156 case LibFunc_log2l:
8157 if (visitUnaryFloatCall(I, ISD::FLOG2))
8158 return;
8159 break;
8160 case LibFunc_exp2:
8161 case LibFunc_exp2f:
8162 case LibFunc_exp2l:
8163 if (visitUnaryFloatCall(I, ISD::FEXP2))
8164 return;
8165 break;
8166 case LibFunc_memcmp:
8167 if (visitMemCmpBCmpCall(I))
8168 return;
8169 break;
8170 case LibFunc_mempcpy:
8171 if (visitMemPCpyCall(I))
8172 return;
8173 break;
8174 case LibFunc_memchr:
8175 if (visitMemChrCall(I))
8176 return;
8177 break;
8178 case LibFunc_strcpy:
8179 if (visitStrCpyCall(I, false))
8180 return;
8181 break;
8182 case LibFunc_stpcpy:
8183 if (visitStrCpyCall(I, true))
8184 return;
8185 break;
8186 case LibFunc_strcmp:
8187 if (visitStrCmpCall(I))
8188 return;
8189 break;
8190 case LibFunc_strlen:
8191 if (visitStrLenCall(I))
8192 return;
8193 break;
8194 case LibFunc_strnlen:
8195 if (visitStrNLenCall(I))
8196 return;
8197 break;
8202 // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
8203 // have to do anything here to lower funclet bundles.
8204 // CFGuardTarget bundles are lowered in LowerCallTo.
8205 assert(!I.hasOperandBundlesOtherThan(
8206 {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
8207 LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
8208 LLVMContext::OB_clang_arc_attachedcall}) &&
8209 "Cannot lower calls with arbitrary operand bundles!");
8211 SDValue Callee = getValue(I.getCalledOperand());
8213 if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
8214 LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
8215 else
8216 // Check if we can potentially perform a tail call. More detailed checking
8217 // is be done within LowerCallTo, after more information about the call is
8218 // known.
8219 LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
8222 namespace {
8224 /// AsmOperandInfo - This contains information for each constraint that we are
8225 /// lowering.
8226 class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
8227 public:
8228 /// CallOperand - If this is the result output operand or a clobber
8229 /// this is null, otherwise it is the incoming operand to the CallInst.
8230 /// This gets modified as the asm is processed.
8231 SDValue CallOperand;
8233 /// AssignedRegs - If this is a register or register class operand, this
8234 /// contains the set of register corresponding to the operand.
8235 RegsForValue AssignedRegs;
8237 explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
8238 : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
8241 /// Whether or not this operand accesses memory
8242 bool hasMemory(const TargetLowering &TLI) const {
8243 // Indirect operand accesses access memory.
8244 if (isIndirect)
8245 return true;
8247 for (const auto &Code : Codes)
8248 if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
8249 return true;
8251 return false;
8254 /// getCallOperandValEVT - Return the EVT of the Value* that this operand
8255 /// corresponds to. If there is no Value* for this operand, it returns
8256 /// MVT::Other.
8257 EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
8258 const DataLayout &DL,
8259 llvm::Type *ParamElemType) const {
8260 if (!CallOperandVal) return MVT::Other;
8262 if (isa<BasicBlock>(CallOperandVal))
8263 return TLI.getProgramPointerTy(DL);
8265 llvm::Type *OpTy = CallOperandVal->getType();
8267 // FIXME: code duplicated from TargetLowering::ParseConstraints().
8268 // If this is an indirect operand, the operand is a pointer to the
8269 // accessed type.
8270 if (isIndirect) {
8271 OpTy = ParamElemType;
8272 assert(OpTy && "Indirect opernad must have elementtype attribute");
8275 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
8276 if (StructType *STy = dyn_cast<StructType>(OpTy))
8277 if (STy->getNumElements() == 1)
8278 OpTy = STy->getElementType(0);
8280 // If OpTy is not a single value, it may be a struct/union that we
8281 // can tile with integers.
8282 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
8283 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
8284 switch (BitSize) {
8285 default: break;
8286 case 1:
8287 case 8:
8288 case 16:
8289 case 32:
8290 case 64:
8291 case 128:
8292 OpTy = IntegerType::get(Context, BitSize);
8293 break;
8297 return TLI.getAsmOperandValueType(DL, OpTy, true);
8302 } // end anonymous namespace
8304 /// Make sure that the output operand \p OpInfo and its corresponding input
8305 /// operand \p MatchingOpInfo have compatible constraint types (otherwise error
8306 /// out).
8307 static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
8308 SDISelAsmOperandInfo &MatchingOpInfo,
8309 SelectionDAG &DAG) {
8310 if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
8311 return;
8313 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
8314 const auto &TLI = DAG.getTargetLoweringInfo();
8316 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
8317 TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
8318 OpInfo.ConstraintVT);
8319 std::pair<unsigned, const TargetRegisterClass *> InputRC =
8320 TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
8321 MatchingOpInfo.ConstraintVT);
8322 if ((OpInfo.ConstraintVT.isInteger() !=
8323 MatchingOpInfo.ConstraintVT.isInteger()) ||
8324 (MatchRC.second != InputRC.second)) {
8325 // FIXME: error out in a more elegant fashion
8326 report_fatal_error("Unsupported asm: input constraint"
8327 " with a matching output constraint of"
8328 " incompatible type!");
8330 MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
8333 /// Get a direct memory input to behave well as an indirect operand.
8334 /// This may introduce stores, hence the need for a \p Chain.
8335 /// \return The (possibly updated) chain.
8336 static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
8337 SDISelAsmOperandInfo &OpInfo,
8338 SelectionDAG &DAG) {
8339 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8341 // If we don't have an indirect input, put it in the constpool if we can,
8342 // otherwise spill it to a stack slot.
8343 // TODO: This isn't quite right. We need to handle these according to
8344 // the addressing mode that the constraint wants. Also, this may take
8345 // an additional register for the computation and we don't want that
8346 // either.
8348 // If the operand is a float, integer, or vector constant, spill to a
8349 // constant pool entry to get its address.
8350 const Value *OpVal = OpInfo.CallOperandVal;
8351 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
8352 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
8353 OpInfo.CallOperand = DAG.getConstantPool(
8354 cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
8355 return Chain;
8358 // Otherwise, create a stack slot and emit a store to it before the asm.
8359 Type *Ty = OpVal->getType();
8360 auto &DL = DAG.getDataLayout();
8361 uint64_t TySize = DL.getTypeAllocSize(Ty);
8362 MachineFunction &MF = DAG.getMachineFunction();
8363 int SSFI = MF.getFrameInfo().CreateStackObject(
8364 TySize, DL.getPrefTypeAlign(Ty), false);
8365 SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
8366 Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
8367 MachinePointerInfo::getFixedStack(MF, SSFI),
8368 TLI.getMemValueType(DL, Ty));
8369 OpInfo.CallOperand = StackSlot;
8371 return Chain;
8374 /// GetRegistersForValue - Assign registers (virtual or physical) for the
8375 /// specified operand. We prefer to assign virtual registers, to allow the
8376 /// register allocator to handle the assignment process. However, if the asm
8377 /// uses features that we can't model on machineinstrs, we have SDISel do the
8378 /// allocation. This produces generally horrible, but correct, code.
8380 /// OpInfo describes the operand
8381 /// RefOpInfo describes the matching operand if any, the operand otherwise
8382 static llvm::Optional<unsigned>
8383 getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
8384 SDISelAsmOperandInfo &OpInfo,
8385 SDISelAsmOperandInfo &RefOpInfo) {
8386 LLVMContext &Context = *DAG.getContext();
8387 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8389 MachineFunction &MF = DAG.getMachineFunction();
8390 SmallVector<unsigned, 4> Regs;
8391 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
8393 // No work to do for memory operations.
8394 if (OpInfo.ConstraintType == TargetLowering::C_Memory)
8395 return None;
8397 // If this is a constraint for a single physreg, or a constraint for a
8398 // register class, find it.
8399 unsigned AssignedReg;
8400 const TargetRegisterClass *RC;
8401 std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
8402 &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
8403 // RC is unset only on failure. Return immediately.
8404 if (!RC)
8405 return None;
8407 // Get the actual register value type. This is important, because the user
8408 // may have asked for (e.g.) the AX register in i32 type. We need to
8409 // remember that AX is actually i16 to get the right extension.
8410 const MVT RegVT = *TRI.legalclasstypes_begin(*RC);
8412 if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
8413 // If this is an FP operand in an integer register (or visa versa), or more
8414 // generally if the operand value disagrees with the register class we plan
8415 // to stick it in, fix the operand type.
8417 // If this is an input value, the bitcast to the new type is done now.
8418 // Bitcast for output value is done at the end of visitInlineAsm().
8419 if ((OpInfo.Type == InlineAsm::isOutput ||
8420 OpInfo.Type == InlineAsm::isInput) &&
8421 !TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
8422 // Try to convert to the first EVT that the reg class contains. If the
8423 // types are identical size, use a bitcast to convert (e.g. two differing
8424 // vector types). Note: output bitcast is done at the end of
8425 // visitInlineAsm().
8426 if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
8427 // Exclude indirect inputs while they are unsupported because the code
8428 // to perform the load is missing and thus OpInfo.CallOperand still
8429 // refers to the input address rather than the pointed-to value.
8430 if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
8431 OpInfo.CallOperand =
8432 DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
8433 OpInfo.ConstraintVT = RegVT;
8434 // If the operand is an FP value and we want it in integer registers,
8435 // use the corresponding integer type. This turns an f64 value into
8436 // i64, which can be passed with two i32 values on a 32-bit machine.
8437 } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
8438 MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
8439 if (OpInfo.Type == InlineAsm::isInput)
8440 OpInfo.CallOperand =
8441 DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
8442 OpInfo.ConstraintVT = VT;
8447 // No need to allocate a matching input constraint since the constraint it's
8448 // matching to has already been allocated.
8449 if (OpInfo.isMatchingInputConstraint())
8450 return None;
8452 EVT ValueVT = OpInfo.ConstraintVT;
8453 if (OpInfo.ConstraintVT == MVT::Other)
8454 ValueVT = RegVT;
8456 // Initialize NumRegs.
8457 unsigned NumRegs = 1;
8458 if (OpInfo.ConstraintVT != MVT::Other)
8459 NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT);
8461 // If this is a constraint for a specific physical register, like {r17},
8462 // assign it now.
8464 // If this associated to a specific register, initialize iterator to correct
8465 // place. If virtual, make sure we have enough registers
8467 // Initialize iterator if necessary
8468 TargetRegisterClass::iterator I = RC->begin();
8469 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8471 // Do not check for single registers.
8472 if (AssignedReg) {
8473 I = std::find(I, RC->end(), AssignedReg);
8474 if (I == RC->end()) {
8475 // RC does not contain the selected register, which indicates a
8476 // mismatch between the register and the required type/bitwidth.
8477 return {AssignedReg};
8481 for (; NumRegs; --NumRegs, ++I) {
8482 assert(I != RC->end() && "Ran out of registers to allocate!");
8483 Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
8484 Regs.push_back(R);
8487 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
8488 return None;
8491 static unsigned
8492 findMatchingInlineAsmOperand(unsigned OperandNo,
8493 const std::vector<SDValue> &AsmNodeOperands) {
8494 // Scan until we find the definition we already emitted of this operand.
8495 unsigned CurOp = InlineAsm::Op_FirstOperand;
8496 for (; OperandNo; --OperandNo) {
8497 // Advance to the next operand.
8498 unsigned OpFlag =
8499 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
8500 assert((InlineAsm::isRegDefKind(OpFlag) ||
8501 InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
8502 InlineAsm::isMemKind(OpFlag)) &&
8503 "Skipped past definitions?");
8504 CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
8506 return CurOp;
8509 namespace {
8511 class ExtraFlags {
8512 unsigned Flags = 0;
8514 public:
8515 explicit ExtraFlags(const CallBase &Call) {
8516 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
8517 if (IA->hasSideEffects())
8518 Flags |= InlineAsm::Extra_HasSideEffects;
8519 if (IA->isAlignStack())
8520 Flags |= InlineAsm::Extra_IsAlignStack;
8521 if (Call.isConvergent())
8522 Flags |= InlineAsm::Extra_IsConvergent;
8523 Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
8526 void update(const TargetLowering::AsmOperandInfo &OpInfo) {
8527 // Ideally, we would only check against memory constraints. However, the
8528 // meaning of an Other constraint can be target-specific and we can't easily
8529 // reason about it. Therefore, be conservative and set MayLoad/MayStore
8530 // for Other constraints as well.
8531 if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
8532 OpInfo.ConstraintType == TargetLowering::C_Other) {
8533 if (OpInfo.Type == InlineAsm::isInput)
8534 Flags |= InlineAsm::Extra_MayLoad;
8535 else if (OpInfo.Type == InlineAsm::isOutput)
8536 Flags |= InlineAsm::Extra_MayStore;
8537 else if (OpInfo.Type == InlineAsm::isClobber)
8538 Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
8542 unsigned get() const { return Flags; }
8545 } // end anonymous namespace
8547 /// visitInlineAsm - Handle a call to an InlineAsm object.
8548 void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
8549 const BasicBlock *EHPadBB) {
8550 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
8552 /// ConstraintOperands - Information about all of the constraints.
8553 SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;
8555 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8556 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
8557 DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call);
8559 // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
8560 // AsmDialect, MayLoad, MayStore).
8561 bool HasSideEffect = IA->hasSideEffects();
8562 ExtraFlags ExtraInfo(Call);
8564 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
8565 unsigned ResNo = 0; // ResNo - The result number of the next output.
8566 for (auto &T : TargetConstraints) {
8567 ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
8568 SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
8570 // Compute the value type for each operand.
8571 if (OpInfo.hasArg()) {
8572 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
8573 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
8574 Type *ParamElemTy = Call.getAttributes().getParamElementType(ArgNo);
8575 EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
8576 DAG.getDataLayout(), ParamElemTy);
8577 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
8578 ArgNo++;
8579 } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
8580 // The return value of the call is this value. As such, there is no
8581 // corresponding argument.
8582 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
8583 if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
8584 OpInfo.ConstraintVT = TLI.getSimpleValueType(
8585 DAG.getDataLayout(), STy->getElementType(ResNo));
8586 } else {
8587 assert(ResNo == 0 && "Asm only has one result!");
8588 OpInfo.ConstraintVT = TLI.getAsmOperandValueType(
8589 DAG.getDataLayout(), Call.getType()).getSimpleVT();
8591 ++ResNo;
8592 } else {
8593 OpInfo.ConstraintVT = MVT::Other;
8596 if (!HasSideEffect)
8597 HasSideEffect = OpInfo.hasMemory(TLI);
8599 // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
8600 // FIXME: Could we compute this on OpInfo rather than T?
8602 // Compute the constraint code and ConstraintType to use.
8603 TLI.ComputeConstraintToUse(T, SDValue());
8605 if (T.ConstraintType == TargetLowering::C_Immediate &&
8606 OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
8607 // We've delayed emitting a diagnostic like the "n" constraint because
8608 // inlining could cause an integer showing up.
8609 return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) +
8610 "' expects an integer constant "
8611 "expression");
8613 ExtraInfo.update(T);
8616 // We won't need to flush pending loads if this asm doesn't touch
8617 // memory and is nonvolatile.
8618 SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
8620 bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow();
8621 if (EmitEHLabels) {
8622 assert(EHPadBB && "InvokeInst must have an EHPadBB");
8624 bool IsCallBr = isa<CallBrInst>(Call);
8626 if (IsCallBr || EmitEHLabels) {
8627 // If this is a callbr or invoke we need to flush pending exports since
8628 // inlineasm_br and invoke are terminators.
8629 // We need to do this before nodes are glued to the inlineasm_br node.
8630 Chain = getControlRoot();
8633 MCSymbol *BeginLabel = nullptr;
8634 if (EmitEHLabels) {
8635 Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
8638 // Second pass over the constraints: compute which constraint option to use.
8639 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
8640 // If this is an output operand with a matching input operand, look up the
8641 // matching input. If their types mismatch, e.g. one is an integer, the
8642 // other is floating point, or their sizes are different, flag it as an
8643 // error.
8644 if (OpInfo.hasMatchingInput()) {
8645 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
8646 patchMatchingInput(OpInfo, Input, DAG);
8649 // Compute the constraint code and ConstraintType to use.
8650 TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
8652 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
8653 OpInfo.Type == InlineAsm::isClobber)
8654 continue;
8656 // If this is a memory input, and if the operand is not indirect, do what we
8657 // need to provide an address for the memory input.
8658 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
8659 !OpInfo.isIndirect) {
8660 assert((OpInfo.isMultipleAlternative ||
8661 (OpInfo.Type == InlineAsm::isInput)) &&
8662 "Can only indirectify direct input operands!");
8664 // Memory operands really want the address of the value.
8665 Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
8667 // There is no longer a Value* corresponding to this operand.
8668 OpInfo.CallOperandVal = nullptr;
8670 // It is now an indirect operand.
8671 OpInfo.isIndirect = true;
8676 // AsmNodeOperands - The operands for the ISD::INLINEASM node.
8677 std::vector<SDValue> AsmNodeOperands;
8678 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
8679 AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
8680 IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout())));
8682 // If we have a !srcloc metadata node associated with it, we want to attach
8683 // this to the ultimately generated inline asm machineinstr. To do this, we
8684 // pass in the third operand as this (potentially null) inline asm MDNode.
8685 const MDNode *SrcLoc = Call.getMetadata("srcloc");
8686 AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
8688 // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
8689 // bits as operand 3.
8690 AsmNodeOperands.push_back(DAG.getTargetConstant(
8691 ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
8693 // Third pass: Loop over operands to prepare DAG-level operands.. As part of
8694 // this, assign virtual and physical registers for inputs and otput.
8695 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
8696 // Assign Registers.
8697 SDISelAsmOperandInfo &RefOpInfo =
8698 OpInfo.isMatchingInputConstraint()
8699 ? ConstraintOperands[OpInfo.getMatchedOperand()]
8700 : OpInfo;
8701 const auto RegError =
8702 getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
8703 if (RegError.hasValue()) {
8704 const MachineFunction &MF = DAG.getMachineFunction();
8705 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
8706 const char *RegName = TRI.getName(RegError.getValue());
8707 emitInlineAsmError(Call, "register '" + Twine(RegName) +
8708 "' allocated for constraint '" +
8709 Twine(OpInfo.ConstraintCode) +
8710 "' does not match required type");
8711 return;
8714 auto DetectWriteToReservedRegister = [&]() {
8715 const MachineFunction &MF = DAG.getMachineFunction();
8716 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
8717 for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
8718 if (Register::isPhysicalRegister(Reg) &&
8719 TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
8720 const char *RegName = TRI.getName(Reg);
8721 emitInlineAsmError(Call, "write to reserved register '" +
8722 Twine(RegName) + "'");
8723 return true;
8726 return false;
8729 switch (OpInfo.Type) {
8730 case InlineAsm::isOutput:
8731 if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
8732 unsigned ConstraintID =
8733 TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
8734 assert(ConstraintID != InlineAsm::Constraint_Unknown &&
8735 "Failed to convert memory constraint code to constraint id.");
8737 // Add information to the INLINEASM node to know about this output.
8738 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
8739 OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
8740 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
8741 MVT::i32));
8742 AsmNodeOperands.push_back(OpInfo.CallOperand);
8743 } else {
8744 // Otherwise, this outputs to a register (directly for C_Register /
8745 // C_RegisterClass, and a target-defined fashion for
8746 // C_Immediate/C_Other). Find a register that we can use.
8747 if (OpInfo.AssignedRegs.Regs.empty()) {
8748 emitInlineAsmError(
8749 Call, "couldn't allocate output register for constraint '" +
8750 Twine(OpInfo.ConstraintCode) + "'");
8751 return;
8754 if (DetectWriteToReservedRegister())
8755 return;
8757 // Add information to the INLINEASM node to know that this register is
8758 // set.
8759 OpInfo.AssignedRegs.AddInlineAsmOperands(
8760 OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
8761 : InlineAsm::Kind_RegDef,
8762 false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
8764 break;
8766 case InlineAsm::isInput: {
8767 SDValue InOperandVal = OpInfo.CallOperand;
8769 if (OpInfo.isMatchingInputConstraint()) {
8770 // If this is required to match an output register we have already set,
8771 // just use its register.
8772 auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
8773 AsmNodeOperands);
8774 unsigned OpFlag =
8775 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
8776 if (InlineAsm::isRegDefKind(OpFlag) ||
8777 InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
8778 // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
8779 if (OpInfo.isIndirect) {
8780 // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
8781 emitInlineAsmError(Call, "inline asm not supported yet: "
8782 "don't know how to handle tied "
8783 "indirect register inputs");
8784 return;
8787 SmallVector<unsigned, 4> Regs;
8788 MachineFunction &MF = DAG.getMachineFunction();
8789 MachineRegisterInfo &MRI = MF.getRegInfo();
8790 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
8791 auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
8792 Register TiedReg = R->getReg();
8793 MVT RegVT = R->getSimpleValueType(0);
8794 const TargetRegisterClass *RC =
8795 TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
8796 : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
8797 : TRI.getMinimalPhysRegClass(TiedReg);
8798 unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
8799 for (unsigned i = 0; i != NumRegs; ++i)
8800 Regs.push_back(MRI.createVirtualRegister(RC));
8802 RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
8804 SDLoc dl = getCurSDLoc();
8805 // Use the produced MatchedRegs object to
8806 MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
8807 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
8808 true, OpInfo.getMatchedOperand(), dl,
8809 DAG, AsmNodeOperands);
8810 break;
8813 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
8814 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
8815 "Unexpected number of operands");
8816 // Add information to the INLINEASM node to know about this input.
8817 // See InlineAsm.h isUseOperandTiedToDef.
8818 OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
8819 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
8820 OpInfo.getMatchedOperand());
8821 AsmNodeOperands.push_back(DAG.getTargetConstant(
8822 OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
8823 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
8824 break;
8827 // Treat indirect 'X' constraint as memory.
8828 if (OpInfo.ConstraintType == TargetLowering::C_Other &&
8829 OpInfo.isIndirect)
8830 OpInfo.ConstraintType = TargetLowering::C_Memory;
8832 if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
8833 OpInfo.ConstraintType == TargetLowering::C_Other) {
8834 std::vector<SDValue> Ops;
8835 TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
8836 Ops, DAG);
8837 if (Ops.empty()) {
8838 if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
8839 if (isa<ConstantSDNode>(InOperandVal)) {
8840 emitInlineAsmError(Call, "value out of range for constraint '" +
8841 Twine(OpInfo.ConstraintCode) + "'");
8842 return;
8845 emitInlineAsmError(Call,
8846 "invalid operand for inline asm constraint '" +
8847 Twine(OpInfo.ConstraintCode) + "'");
8848 return;
8851 // Add information to the INLINEASM node to know about this input.
8852 unsigned ResOpType =
8853 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
8854 AsmNodeOperands.push_back(DAG.getTargetConstant(
8855 ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
8856 llvm::append_range(AsmNodeOperands, Ops);
8857 break;
8860 if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
8861 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
8862 assert(InOperandVal.getValueType() ==
8863 TLI.getPointerTy(DAG.getDataLayout()) &&
8864 "Memory operands expect pointer values");
8866 unsigned ConstraintID =
8867 TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
8868 assert(ConstraintID != InlineAsm::Constraint_Unknown &&
8869 "Failed to convert memory constraint code to constraint id.");
8871 // Add information to the INLINEASM node to know about this input.
8872 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
8873 ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
8874 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
8875 getCurSDLoc(),
8876 MVT::i32));
8877 AsmNodeOperands.push_back(InOperandVal);
8878 break;
8881 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
8882 OpInfo.ConstraintType == TargetLowering::C_Register) &&
8883 "Unknown constraint type!");
8885 // TODO: Support this.
8886 if (OpInfo.isIndirect) {
8887 emitInlineAsmError(
8888 Call, "Don't know how to handle indirect register inputs yet "
8889 "for constraint '" +
8890 Twine(OpInfo.ConstraintCode) + "'");
8891 return;
8894 // Copy the input into the appropriate registers.
8895 if (OpInfo.AssignedRegs.Regs.empty()) {
8896 emitInlineAsmError(Call,
8897 "couldn't allocate input reg for constraint '" +
8898 Twine(OpInfo.ConstraintCode) + "'");
8899 return;
8902 if (DetectWriteToReservedRegister())
8903 return;
8905 SDLoc dl = getCurSDLoc();
8907 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
8908 &Call);
8910 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
8911 dl, DAG, AsmNodeOperands);
8912 break;
8914 case InlineAsm::isClobber:
8915 // Add the clobbered value to the operand list, so that the register
8916 // allocator is aware that the physreg got clobbered.
8917 if (!OpInfo.AssignedRegs.Regs.empty())
8918 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
8919 false, 0, getCurSDLoc(), DAG,
8920 AsmNodeOperands);
8921 break;
8925 // Finish up input operands. Set the input chain and add the flag last.
8926 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
8927 if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
8929 unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
8930 Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
8931 DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
8932 Flag = Chain.getValue(1);
8934 // Do additional work to generate outputs.
8936 SmallVector<EVT, 1> ResultVTs;
8937 SmallVector<SDValue, 1> ResultValues;
8938 SmallVector<SDValue, 8> OutChains;
8940 llvm::Type *CallResultType = Call.getType();
8941 ArrayRef<Type *> ResultTypes;
8942 if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
8943 ResultTypes = StructResult->elements();
8944 else if (!CallResultType->isVoidTy())
8945 ResultTypes = makeArrayRef(CallResultType);
8947 auto CurResultType = ResultTypes.begin();
8948 auto handleRegAssign = [&](SDValue V) {
8949 assert(CurResultType != ResultTypes.end() && "Unexpected value");
8950 assert((*CurResultType)->isSized() && "Unexpected unsized type");
8951 EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
8952 ++CurResultType;
8953 // If the type of the inline asm call site return value is different but has
8954 // same size as the type of the asm output bitcast it. One example of this
8955 // is for vectors with different width / number of elements. This can
8956 // happen for register classes that can contain multiple different value
8957 // types. The preg or vreg allocated may not have the same VT as was
8958 // expected.
8960 // This can also happen for a return value that disagrees with the register
8961 // class it is put in, eg. a double in a general-purpose register on a
8962 // 32-bit machine.
8963 if (ResultVT != V.getValueType() &&
8964 ResultVT.getSizeInBits() == V.getValueSizeInBits())
8965 V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
8966 else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
8967 V.getValueType().isInteger()) {
8968 // If a result value was tied to an input value, the computed result
8969 // may have a wider width than the expected result. Extract the
8970 // relevant portion.
8971 V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
8973 assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
8974 ResultVTs.push_back(ResultVT);
8975 ResultValues.push_back(V);
8978 // Deal with output operands.
8979 for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
8980 if (OpInfo.Type == InlineAsm::isOutput) {
8981 SDValue Val;
8982 // Skip trivial output operands.
8983 if (OpInfo.AssignedRegs.Regs.empty())
8984 continue;
8986 switch (OpInfo.ConstraintType) {
8987 case TargetLowering::C_Register:
8988 case TargetLowering::C_RegisterClass:
8989 Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
8990 Chain, &Flag, &Call);
8991 break;
8992 case TargetLowering::C_Immediate:
8993 case TargetLowering::C_Other:
8994 Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
8995 OpInfo, DAG);
8996 break;
8997 case TargetLowering::C_Memory:
8998 break; // Already handled.
8999 case TargetLowering::C_Unknown:
9000 assert(false && "Unexpected unknown constraint");
9003 // Indirect output manifest as stores. Record output chains.
9004 if (OpInfo.isIndirect) {
9005 const Value *Ptr = OpInfo.CallOperandVal;
9006 assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
9007 SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
9008 MachinePointerInfo(Ptr));
9009 OutChains.push_back(Store);
9010 } else {
9011 // generate CopyFromRegs to associated registers.
9012 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
9013 if (Val.getOpcode() == ISD::MERGE_VALUES) {
9014 for (const SDValue &V : Val->op_values())
9015 handleRegAssign(V);
9016 } else
9017 handleRegAssign(Val);
9022 // Set results.
9023 if (!ResultValues.empty()) {
9024 assert(CurResultType == ResultTypes.end() &&
9025 "Mismatch in number of ResultTypes");
9026 assert(ResultValues.size() == ResultTypes.size() &&
9027 "Mismatch in number of output operands in asm result");
9029 SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
9030 DAG.getVTList(ResultVTs), ResultValues);
9031 setValue(&Call, V);
9034 // Collect store chains.
9035 if (!OutChains.empty())
9036 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
9038 if (EmitEHLabels) {
9039 Chain = lowerEndEH(Chain, cast<InvokeInst>(&Call), EHPadBB, BeginLabel);
9042 // Only Update Root if inline assembly has a memory effect.
9043 if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr ||
9044 EmitEHLabels)
9045 DAG.setRoot(Chain);
9048 void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
9049 const Twine &Message) {
9050 LLVMContext &Ctx = *DAG.getContext();
9051 Ctx.emitError(&Call, Message);
9053 // Make sure we leave the DAG in a valid state
9054 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9055 SmallVector<EVT, 1> ValueVTs;
9056 ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs);
9058 if (ValueVTs.empty())
9059 return;
9061 SmallVector<SDValue, 1> Ops;
9062 for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
9063 Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
9065 setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
9068 void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
9069 DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
9070 MVT::Other, getRoot(),
9071 getValue(I.getArgOperand(0)),
9072 DAG.getSrcValue(I.getArgOperand(0))));
9075 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
9076 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9077 const DataLayout &DL = DAG.getDataLayout();
9078 SDValue V = DAG.getVAArg(
9079 TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
9080 getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
9081 DL.getABITypeAlign(I.getType()).value());
9082 DAG.setRoot(V.getValue(1));
9084 if (I.getType()->isPointerTy())
9085 V = DAG.getPtrExtOrTrunc(
9086 V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
9087 setValue(&I, V);
9090 void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
9091 DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
9092 MVT::Other, getRoot(),
9093 getValue(I.getArgOperand(0)),
9094 DAG.getSrcValue(I.getArgOperand(0))));
9097 void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
9098 DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
9099 MVT::Other, getRoot(),
9100 getValue(I.getArgOperand(0)),
9101 getValue(I.getArgOperand(1)),
9102 DAG.getSrcValue(I.getArgOperand(0)),
9103 DAG.getSrcValue(I.getArgOperand(1))));
9106 SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
9107 const Instruction &I,
9108 SDValue Op) {
9109 const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
9110 if (!Range)
9111 return Op;
9113 ConstantRange CR = getConstantRangeFromMetadata(*Range);
9114 if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
9115 return Op;
9117 APInt Lo = CR.getUnsignedMin();
9118 if (!Lo.isMinValue())
9119 return Op;
9121 APInt Hi = CR.getUnsignedMax();
9122 unsigned Bits = std::max(Hi.getActiveBits(),
9123 static_cast<unsigned>(IntegerType::MIN_INT_BITS));
9125 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
9127 SDLoc SL = getCurSDLoc();
9129 SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
9130 DAG.getValueType(SmallVT));
9131 unsigned NumVals = Op.getNode()->getNumValues();
9132 if (NumVals == 1)
9133 return ZExt;
9135 SmallVector<SDValue, 4> Ops;
9137 Ops.push_back(ZExt);
9138 for (unsigned I = 1; I != NumVals; ++I)
9139 Ops.push_back(Op.getValue(I));
9141 return DAG.getMergeValues(Ops, SL);
9144 /// Populate a CallLowerinInfo (into \p CLI) based on the properties of
9145 /// the call being lowered.
9147 /// This is a helper for lowering intrinsics that follow a target calling
9148 /// convention or require stack pointer adjustment. Only a subset of the
9149 /// intrinsic's operands need to participate in the calling convention.
9150 void SelectionDAGBuilder::populateCallLoweringInfo(
9151 TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
9152 unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
9153 bool IsPatchPoint) {
9154 TargetLowering::ArgListTy Args;
9155 Args.reserve(NumArgs);
9157 // Populate the argument list.
9158 // Attributes for args start at offset 1, after the return attribute.
9159 for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
9160 ArgI != ArgE; ++ArgI) {
9161 const Value *V = Call->getOperand(ArgI);
9163 assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
9165 TargetLowering::ArgListEntry Entry;
9166 Entry.Node = getValue(V);
9167 Entry.Ty = V->getType();
9168 Entry.setAttributes(Call, ArgI);
9169 Args.push_back(Entry);
9172 CLI.setDebugLoc(getCurSDLoc())
9173 .setChain(getRoot())
9174 .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
9175 .setDiscardResult(Call->use_empty())
9176 .setIsPatchPoint(IsPatchPoint)
9177 .setIsPreallocated(
9178 Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
9181 /// Add a stack map intrinsic call's live variable operands to a stackmap
9182 /// or patchpoint target node's operand list.
9184 /// Constants are converted to TargetConstants purely as an optimization to
9185 /// avoid constant materialization and register allocation.
9187 /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
9188 /// generate addess computation nodes, and so FinalizeISel can convert the
9189 /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
9190 /// address materialization and register allocation, but may also be required
9191 /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
9192 /// alloca in the entry block, then the runtime may assume that the alloca's
9193 /// StackMap location can be read immediately after compilation and that the
9194 /// location is valid at any point during execution (this is similar to the
9195 /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
9196 /// only available in a register, then the runtime would need to trap when
9197 /// execution reaches the StackMap in order to read the alloca's location.
9198 static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
9199 const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
9200 SelectionDAGBuilder &Builder) {
9201 for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) {
9202 SDValue OpVal = Builder.getValue(Call.getArgOperand(i));
9203 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
9204 Ops.push_back(
9205 Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
9206 Ops.push_back(
9207 Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
9208 } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
9209 const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
9210 Ops.push_back(Builder.DAG.getTargetFrameIndex(
9211 FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
9212 } else
9213 Ops.push_back(OpVal);
9217 /// Lower llvm.experimental.stackmap directly to its target opcode.
9218 void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
9219 // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
9220 // [live variables...])
9222 assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
9224 SDValue Chain, InFlag, Callee, NullPtr;
9225 SmallVector<SDValue, 32> Ops;
9227 SDLoc DL = getCurSDLoc();
9228 Callee = getValue(CI.getCalledOperand());
9229 NullPtr = DAG.getIntPtrConstant(0, DL, true);
9231 // The stackmap intrinsic only records the live variables (the arguments
9232 // passed to it) and emits NOPS (if requested). Unlike the patchpoint
9233 // intrinsic, this won't be lowered to a function call. This means we don't
9234 // have to worry about calling conventions and target specific lowering code.
9235 // Instead we perform the call lowering right here.
9237 // chain, flag = CALLSEQ_START(chain, 0, 0)
9238 // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
9239 // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
9241 Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
9242 InFlag = Chain.getValue(1);
9244 // Add the <id> and <numBytes> constants.
9245 SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
9246 Ops.push_back(DAG.getTargetConstant(
9247 cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
9248 SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
9249 Ops.push_back(DAG.getTargetConstant(
9250 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
9251 MVT::i32));
9253 // Push live variables for the stack map.
9254 addStackMapLiveVars(CI, 2, DL, Ops, *this);
9256 // We are not pushing any register mask info here on the operands list,
9257 // because the stackmap doesn't clobber anything.
9259 // Push the chain and the glue flag.
9260 Ops.push_back(Chain);
9261 Ops.push_back(InFlag);
9263 // Create the STACKMAP node.
9264 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9265 SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
9266 Chain = SDValue(SM, 0);
9267 InFlag = Chain.getValue(1);
9269 Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
9271 // Stackmaps don't generate values, so nothing goes into the NodeMap.
9273 // Set the root to the target-lowered call chain.
9274 DAG.setRoot(Chain);
9276 // Inform the Frame Information that we have a stackmap in this function.
9277 FuncInfo.MF->getFrameInfo().setHasStackMap();
9280 /// Lower llvm.experimental.patchpoint directly to its target opcode.
9281 void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
9282 const BasicBlock *EHPadBB) {
9283 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
9284 // i32 <numBytes>,
9285 // i8* <target>,
9286 // i32 <numArgs>,
9287 // [Args...],
9288 // [live variables...])
9290 CallingConv::ID CC = CB.getCallingConv();
9291 bool IsAnyRegCC = CC == CallingConv::AnyReg;
9292 bool HasDef = !CB.getType()->isVoidTy();
9293 SDLoc dl = getCurSDLoc();
9294 SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos));
9296 // Handle immediate and symbolic callees.
9297 if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
9298 Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
9299 /*isTarget=*/true);
9300 else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
9301 Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
9302 SDLoc(SymbolicCallee),
9303 SymbolicCallee->getValueType(0));
9305 // Get the real number of arguments participating in the call <numArgs>
9306 SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos));
9307 unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
9309 // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
9310 // Intrinsics include all meta-operands up to but not including CC.
9311 unsigned NumMetaOpers = PatchPointOpers::CCPos;
9312 assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
9313 "Not enough arguments provided to the patchpoint intrinsic");
9315 // For AnyRegCC the arguments are lowered later on manually.
9316 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
9317 Type *ReturnTy =
9318 IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType();
9320 TargetLowering::CallLoweringInfo CLI(DAG);
9321 populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
9322 ReturnTy, true);
9323 std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
9325 SDNode *CallEnd = Result.second.getNode();
9326 if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
9327 CallEnd = CallEnd->getOperand(0).getNode();
9329 /// Get a call instruction from the call sequence chain.
9330 /// Tail calls are not allowed.
9331 assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
9332 "Expected a callseq node.");
9333 SDNode *Call = CallEnd->getOperand(0).getNode();
9334 bool HasGlue = Call->getGluedNode();
9336 // Replace the target specific call node with the patchable intrinsic.
9337 SmallVector<SDValue, 8> Ops;
9339 // Add the <id> and <numBytes> constants.
9340 SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
9341 Ops.push_back(DAG.getTargetConstant(
9342 cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
9343 SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos));
9344 Ops.push_back(DAG.getTargetConstant(
9345 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
9346 MVT::i32));
9348 // Add the callee.
9349 Ops.push_back(Callee);
9351 // Adjust <numArgs> to account for any arguments that have been passed on the
9352 // stack instead.
9353 // Call Node: Chain, Target, {Args}, RegMask, [Glue]
9354 unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
9355 NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
9356 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
9358 // Add the calling convention
9359 Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
9361 // Add the arguments we omitted previously. The register allocator should
9362 // place these in any free register.
9363 if (IsAnyRegCC)
9364 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
9365 Ops.push_back(getValue(CB.getArgOperand(i)));
9367 // Push the arguments from the call instruction up to the register mask.
9368 SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
9369 Ops.append(Call->op_begin() + 2, e);
9371 // Push live variables for the stack map.
9372 addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);
9374 // Push the register mask info.
9375 if (HasGlue)
9376 Ops.push_back(*(Call->op_end()-2));
9377 else
9378 Ops.push_back(*(Call->op_end()-1));
9380 // Push the chain (this is originally the first operand of the call, but
9381 // becomes now the last or second to last operand).
9382 Ops.push_back(*(Call->op_begin()));
9384 // Push the glue flag (last operand).
9385 if (HasGlue)
9386 Ops.push_back(*(Call->op_end()-1));
9388 SDVTList NodeTys;
9389 if (IsAnyRegCC && HasDef) {
9390 // Create the return types based on the intrinsic definition
9391 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9392 SmallVector<EVT, 3> ValueVTs;
9393 ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs);
9394 assert(ValueVTs.size() == 1 && "Expected only one return value type.");
9396 // There is always a chain and a glue type at the end
9397 ValueVTs.push_back(MVT::Other);
9398 ValueVTs.push_back(MVT::Glue);
9399 NodeTys = DAG.getVTList(ValueVTs);
9400 } else
9401 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9403 // Replace the target specific call node with a PATCHPOINT node.
9404 MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
9405 dl, NodeTys, Ops);
9407 // Update the NodeMap.
9408 if (HasDef) {
9409 if (IsAnyRegCC)
9410 setValue(&CB, SDValue(MN, 0));
9411 else
9412 setValue(&CB, Result.first);
9415 // Fixup the consumers of the intrinsic. The chain and glue may be used in the
9416 // call sequence. Furthermore the location of the chain and glue can change
9417 // when the AnyReg calling convention is used and the intrinsic returns a
9418 // value.
9419 if (IsAnyRegCC && HasDef) {
9420 SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
9421 SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
9422 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
9423 } else
9424 DAG.ReplaceAllUsesWith(Call, MN);
9425 DAG.DeleteNode(Call);
9427 // Inform the Frame Information that we have a patchpoint in this function.
9428 FuncInfo.MF->getFrameInfo().setHasPatchPoint();
9431 void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
9432 unsigned Intrinsic) {
9433 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9434 SDValue Op1 = getValue(I.getArgOperand(0));
9435 SDValue Op2;
9436 if (I.arg_size() > 1)
9437 Op2 = getValue(I.getArgOperand(1));
9438 SDLoc dl = getCurSDLoc();
9439 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
9440 SDValue Res;
9441 SDNodeFlags SDFlags;
9442 if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
9443 SDFlags.copyFMF(*FPMO);
9445 switch (Intrinsic) {
9446 case Intrinsic::vector_reduce_fadd:
9447 if (SDFlags.hasAllowReassociation())
9448 Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
9449 DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags),
9450 SDFlags);
9451 else
9452 Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags);
9453 break;
9454 case Intrinsic::vector_reduce_fmul:
9455 if (SDFlags.hasAllowReassociation())
9456 Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
9457 DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags),
9458 SDFlags);
9459 else
9460 Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags);
9461 break;
9462 case Intrinsic::vector_reduce_add:
9463 Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
9464 break;
9465 case Intrinsic::vector_reduce_mul:
9466 Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
9467 break;
9468 case Intrinsic::vector_reduce_and:
9469 Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
9470 break;
9471 case Intrinsic::vector_reduce_or:
9472 Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
9473 break;
9474 case Intrinsic::vector_reduce_xor:
9475 Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
9476 break;
9477 case Intrinsic::vector_reduce_smax:
9478 Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
9479 break;
9480 case Intrinsic::vector_reduce_smin:
9481 Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
9482 break;
9483 case Intrinsic::vector_reduce_umax:
9484 Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
9485 break;
9486 case Intrinsic::vector_reduce_umin:
9487 Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
9488 break;
9489 case Intrinsic::vector_reduce_fmax:
9490 Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
9491 break;
9492 case Intrinsic::vector_reduce_fmin:
9493 Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
9494 break;
9495 default:
9496 llvm_unreachable("Unhandled vector reduce intrinsic");
9498 setValue(&I, Res);
9501 /// Returns an AttributeList representing the attributes applied to the return
9502 /// value of the given call.
9503 static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
9504 SmallVector<Attribute::AttrKind, 2> Attrs;
9505 if (CLI.RetSExt)
9506 Attrs.push_back(Attribute::SExt);
9507 if (CLI.RetZExt)
9508 Attrs.push_back(Attribute::ZExt);
9509 if (CLI.IsInReg)
9510 Attrs.push_back(Attribute::InReg);
9512 return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
9513 Attrs);
9516 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
9517 /// implementation, which just calls LowerCall.
9518 /// FIXME: When all targets are
9519 /// migrated to using LowerCall, this hook should be integrated into SDISel.
9520 std::pair<SDValue, SDValue>
9521 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
9522 // Handle the incoming return values from the call.
9523 CLI.Ins.clear();
9524 Type *OrigRetTy = CLI.RetTy;
9525 SmallVector<EVT, 4> RetTys;
9526 SmallVector<uint64_t, 4> Offsets;
9527 auto &DL = CLI.DAG.getDataLayout();
9528 ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
9530 if (CLI.IsPostTypeLegalization) {
9531 // If we are lowering a libcall after legalization, split the return type.
9532 SmallVector<EVT, 4> OldRetTys;
9533 SmallVector<uint64_t, 4> OldOffsets;
9534 RetTys.swap(OldRetTys);
9535 Offsets.swap(OldOffsets);
9537 for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
9538 EVT RetVT = OldRetTys[i];
9539 uint64_t Offset = OldOffsets[i];
9540 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
9541 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
9542 unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
9543 RetTys.append(NumRegs, RegisterVT);
9544 for (unsigned j = 0; j != NumRegs; ++j)
9545 Offsets.push_back(Offset + j * RegisterVTByteSZ);
9549 SmallVector<ISD::OutputArg, 4> Outs;
9550 GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
9552 bool CanLowerReturn =
9553 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
9554 CLI.IsVarArg, Outs, CLI.RetTy->getContext());
9556 SDValue DemoteStackSlot;
9557 int DemoteStackIdx = -100;
9558 if (!CanLowerReturn) {
9559 // FIXME: equivalent assert?
9560 // assert(!CS.hasInAllocaArgument() &&
9561 // "sret demotion is incompatible with inalloca");
9562 uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
9563 Align Alignment = DL.getPrefTypeAlign(CLI.RetTy);
9564 MachineFunction &MF = CLI.DAG.getMachineFunction();
9565 DemoteStackIdx =
9566 MF.getFrameInfo().CreateStackObject(TySize, Alignment, false);
9567 Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
9568 DL.getAllocaAddrSpace());
9570 DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
9571 ArgListEntry Entry;
9572 Entry.Node = DemoteStackSlot;
9573 Entry.Ty = StackSlotPtrType;
9574 Entry.IsSExt = false;
9575 Entry.IsZExt = false;
9576 Entry.IsInReg = false;
9577 Entry.IsSRet = true;
9578 Entry.IsNest = false;
9579 Entry.IsByVal = false;
9580 Entry.IsByRef = false;
9581 Entry.IsReturned = false;
9582 Entry.IsSwiftSelf = false;
9583 Entry.IsSwiftAsync = false;
9584 Entry.IsSwiftError = false;
9585 Entry.IsCFGuardTarget = false;
9586 Entry.Alignment = Alignment;
9587 CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
9588 CLI.NumFixedArgs += 1;
9589 CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
9591 // sret demotion isn't compatible with tail-calls, since the sret argument
9592 // points into the callers stack frame.
9593 CLI.IsTailCall = false;
9594 } else {
9595 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
9596 CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
9597 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
9598 ISD::ArgFlagsTy Flags;
9599 if (NeedsRegBlock) {
9600 Flags.setInConsecutiveRegs();
9601 if (I == RetTys.size() - 1)
9602 Flags.setInConsecutiveRegsLast();
9604 EVT VT = RetTys[I];
9605 MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
9606 CLI.CallConv, VT);
9607 unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
9608 CLI.CallConv, VT);
9609 for (unsigned i = 0; i != NumRegs; ++i) {
9610 ISD::InputArg MyFlags;
9611 MyFlags.Flags = Flags;
9612 MyFlags.VT = RegisterVT;
9613 MyFlags.ArgVT = VT;
9614 MyFlags.Used = CLI.IsReturnValueUsed;
9615 if (CLI.RetTy->isPointerTy()) {
9616 MyFlags.Flags.setPointer();
9617 MyFlags.Flags.setPointerAddrSpace(
9618 cast<PointerType>(CLI.RetTy)->getAddressSpace());
9620 if (CLI.RetSExt)
9621 MyFlags.Flags.setSExt();
9622 if (CLI.RetZExt)
9623 MyFlags.Flags.setZExt();
9624 if (CLI.IsInReg)
9625 MyFlags.Flags.setInReg();
9626 CLI.Ins.push_back(MyFlags);
9631 // We push in swifterror return as the last element of CLI.Ins.
9632 ArgListTy &Args = CLI.getArgs();
9633 if (supportSwiftError()) {
9634 for (const ArgListEntry &Arg : Args) {
9635 if (Arg.IsSwiftError) {
9636 ISD::InputArg MyFlags;
9637 MyFlags.VT = getPointerTy(DL);
9638 MyFlags.ArgVT = EVT(getPointerTy(DL));
9639 MyFlags.Flags.setSwiftError();
9640 CLI.Ins.push_back(MyFlags);
9645 // Handle all of the outgoing arguments.
9646 CLI.Outs.clear();
9647 CLI.OutVals.clear();
9648 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
9649 SmallVector<EVT, 4> ValueVTs;
9650 ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
9651 // FIXME: Split arguments if CLI.IsPostTypeLegalization
9652 Type *FinalType = Args[i].Ty;
9653 if (Args[i].IsByVal)
9654 FinalType = Args[i].IndirectType;
9655 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
9656 FinalType, CLI.CallConv, CLI.IsVarArg, DL);
9657 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
9658 ++Value) {
9659 EVT VT = ValueVTs[Value];
9660 Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
9661 SDValue Op = SDValue(Args[i].Node.getNode(),
9662 Args[i].Node.getResNo() + Value);
9663 ISD::ArgFlagsTy Flags;
9665 // Certain targets (such as MIPS), may have a different ABI alignment
9666 // for a type depending on the context. Give the target a chance to
9667 // specify the alignment it wants.
9668 const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
9669 Flags.setOrigAlign(OriginalAlignment);
9671 if (Args[i].Ty->isPointerTy()) {
9672 Flags.setPointer();
9673 Flags.setPointerAddrSpace(
9674 cast<PointerType>(Args[i].Ty)->getAddressSpace());
9676 if (Args[i].IsZExt)
9677 Flags.setZExt();
9678 if (Args[i].IsSExt)
9679 Flags.setSExt();
9680 if (Args[i].IsInReg) {
9681 // If we are using vectorcall calling convention, a structure that is
9682 // passed InReg - is surely an HVA
9683 if (CLI.CallConv == CallingConv::X86_VectorCall &&
9684 isa<StructType>(FinalType)) {
9685 // The first value of a structure is marked
9686 if (0 == Value)
9687 Flags.setHvaStart();
9688 Flags.setHva();
9690 // Set InReg Flag
9691 Flags.setInReg();
9693 if (Args[i].IsSRet)
9694 Flags.setSRet();
9695 if (Args[i].IsSwiftSelf)
9696 Flags.setSwiftSelf();
9697 if (Args[i].IsSwiftAsync)
9698 Flags.setSwiftAsync();
9699 if (Args[i].IsSwiftError)
9700 Flags.setSwiftError();
9701 if (Args[i].IsCFGuardTarget)
9702 Flags.setCFGuardTarget();
9703 if (Args[i].IsByVal)
9704 Flags.setByVal();
9705 if (Args[i].IsByRef)
9706 Flags.setByRef();
9707 if (Args[i].IsPreallocated) {
9708 Flags.setPreallocated();
9709 // Set the byval flag for CCAssignFn callbacks that don't know about
9710 // preallocated. This way we can know how many bytes we should've
9711 // allocated and how many bytes a callee cleanup function will pop. If
9712 // we port preallocated to more targets, we'll have to add custom
9713 // preallocated handling in the various CC lowering callbacks.
9714 Flags.setByVal();
9716 if (Args[i].IsInAlloca) {
9717 Flags.setInAlloca();
9718 // Set the byval flag for CCAssignFn callbacks that don't know about
9719 // inalloca. This way we can know how many bytes we should've allocated
9720 // and how many bytes a callee cleanup function will pop. If we port
9721 // inalloca to more targets, we'll have to add custom inalloca handling
9722 // in the various CC lowering callbacks.
9723 Flags.setByVal();
9725 Align MemAlign;
9726 if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
9727 unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType);
9728 Flags.setByValSize(FrameSize);
9730 // info is not there but there are cases it cannot get right.
9731 if (auto MA = Args[i].Alignment)
9732 MemAlign = *MA;
9733 else
9734 MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL));
9735 } else if (auto MA = Args[i].Alignment) {
9736 MemAlign = *MA;
9737 } else {
9738 MemAlign = OriginalAlignment;
9740 Flags.setMemAlign(MemAlign);
9741 if (Args[i].IsNest)
9742 Flags.setNest();
9743 if (NeedsRegBlock)
9744 Flags.setInConsecutiveRegs();
9746 MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
9747 CLI.CallConv, VT);
9748 unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
9749 CLI.CallConv, VT);
9750 SmallVector<SDValue, 4> Parts(NumParts);
9751 ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
9753 if (Args[i].IsSExt)
9754 ExtendKind = ISD::SIGN_EXTEND;
9755 else if (Args[i].IsZExt)
9756 ExtendKind = ISD::ZERO_EXTEND;
9758 // Conservatively only handle 'returned' on non-vectors that can be lowered,
9759 // for now.
9760 if (Args[i].IsReturned && !Op.getValueType().isVector() &&
9761 CanLowerReturn) {
9762 assert((CLI.RetTy == Args[i].Ty ||
9763 (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
9764 CLI.RetTy->getPointerAddressSpace() ==
9765 Args[i].Ty->getPointerAddressSpace())) &&
9766 RetTys.size() == NumValues && "unexpected use of 'returned'");
9767 // Before passing 'returned' to the target lowering code, ensure that
9768 // either the register MVT and the actual EVT are the same size or that
9769 // the return value and argument are extended in the same way; in these
9770 // cases it's safe to pass the argument register value unchanged as the
9771 // return register value (although it's at the target's option whether
9772 // to do so)
9773 // TODO: allow code generation to take advantage of partially preserved
9774 // registers rather than clobbering the entire register when the
9775 // parameter extension method is not compatible with the return
9776 // extension method
9777 if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
9778 (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
9779 CLI.RetZExt == Args[i].IsZExt))
9780 Flags.setReturned();
9783 getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB,
9784 CLI.CallConv, ExtendKind);
9786 for (unsigned j = 0; j != NumParts; ++j) {
9787 // if it isn't first piece, alignment must be 1
9788 // For scalable vectors the scalable part is currently handled
9789 // by individual targets, so we just use the known minimum size here.
9790 ISD::OutputArg MyFlags(
9791 Flags, Parts[j].getValueType().getSimpleVT(), VT,
9792 i < CLI.NumFixedArgs, i,
9793 j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
9794 if (NumParts > 1 && j == 0)
9795 MyFlags.Flags.setSplit();
9796 else if (j != 0) {
9797 MyFlags.Flags.setOrigAlign(Align(1));
9798 if (j == NumParts - 1)
9799 MyFlags.Flags.setSplitEnd();
9802 CLI.Outs.push_back(MyFlags);
9803 CLI.OutVals.push_back(Parts[j]);
9806 if (NeedsRegBlock && Value == NumValues - 1)
9807 CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
9811 SmallVector<SDValue, 4> InVals;
9812 CLI.Chain = LowerCall(CLI, InVals);
9814 // Update CLI.InVals to use outside of this function.
9815 CLI.InVals = InVals;
9817 // Verify that the target's LowerCall behaved as expected.
9818 assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
9819 "LowerCall didn't return a valid chain!");
9820 assert((!CLI.IsTailCall || InVals.empty()) &&
9821 "LowerCall emitted a return value for a tail call!");
9822 assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
9823 "LowerCall didn't emit the correct number of values!");
9825 // For a tail call, the return value is merely live-out and there aren't
9826 // any nodes in the DAG representing it. Return a special value to
9827 // indicate that a tail call has been emitted and no more Instructions
9828 // should be processed in the current block.
9829 if (CLI.IsTailCall) {
9830 CLI.DAG.setRoot(CLI.Chain);
9831 return std::make_pair(SDValue(), SDValue());
9834 #ifndef NDEBUG
9835 for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
9836 assert(InVals[i].getNode() && "LowerCall emitted a null value!");
9837 assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
9838 "LowerCall emitted a value with the wrong type!");
9840 #endif
9842 SmallVector<SDValue, 4> ReturnValues;
9843 if (!CanLowerReturn) {
9844 // The instruction result is the result of loading from the
9845 // hidden sret parameter.
9846 SmallVector<EVT, 1> PVTs;
9847 Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
9849 ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
9850 assert(PVTs.size() == 1 && "Pointers should fit in one register");
9851 EVT PtrVT = PVTs[0];
9853 unsigned NumValues = RetTys.size();
9854 ReturnValues.resize(NumValues);
9855 SmallVector<SDValue, 4> Chains(NumValues);
9857 // An aggregate return value cannot wrap around the address space, so
9858 // offsets to its parts don't wrap either.
9859 SDNodeFlags Flags;
9860 Flags.setNoUnsignedWrap(true);
9862 MachineFunction &MF = CLI.DAG.getMachineFunction();
9863 Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
9864 for (unsigned i = 0; i < NumValues; ++i) {
9865 SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
9866 CLI.DAG.getConstant(Offsets[i], CLI.DL,
9867 PtrVT), Flags);
9868 SDValue L = CLI.DAG.getLoad(
9869 RetTys[i], CLI.DL, CLI.Chain, Add,
9870 MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
9871 DemoteStackIdx, Offsets[i]),
9872 HiddenSRetAlign);
9873 ReturnValues[i] = L;
9874 Chains[i] = L.getValue(1);
9877 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
9878 } else {
9879 // Collect the legal value parts into potentially illegal values
9880 // that correspond to the original function's return values.
9881 Optional<ISD::NodeType> AssertOp;
9882 if (CLI.RetSExt)
9883 AssertOp = ISD::AssertSext;
9884 else if (CLI.RetZExt)
9885 AssertOp = ISD::AssertZext;
9886 unsigned CurReg = 0;
9887 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
9888 EVT VT = RetTys[I];
9889 MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
9890 CLI.CallConv, VT);
9891 unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
9892 CLI.CallConv, VT);
9894 ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
9895 NumRegs, RegisterVT, VT, nullptr,
9896 CLI.CallConv, AssertOp));
9897 CurReg += NumRegs;
9900 // For a function returning void, there is no return value. We can't create
9901 // such a node, so we just return a null return value in that case. In
9902 // that case, nothing will actually look at the value.
9903 if (ReturnValues.empty())
9904 return std::make_pair(SDValue(), CLI.Chain);
9907 SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
9908 CLI.DAG.getVTList(RetTys), ReturnValues);
9909 return std::make_pair(Res, CLI.Chain);
9912 /// Places new result values for the node in Results (their number
9913 /// and types must exactly match those of the original return values of
9914 /// the node), or leaves Results empty, which indicates that the node is not
9915 /// to be custom lowered after all.
9916 void TargetLowering::LowerOperationWrapper(SDNode *N,
9917 SmallVectorImpl<SDValue> &Results,
9918 SelectionDAG &DAG) const {
9919 SDValue Res = LowerOperation(SDValue(N, 0), DAG);
9921 if (!Res.getNode())
9922 return;
9924 // If the original node has one result, take the return value from
9925 // LowerOperation as is. It might not be result number 0.
9926 if (N->getNumValues() == 1) {
9927 Results.push_back(Res);
9928 return;
9931 // If the original node has multiple results, then the return node should
9932 // have the same number of results.
9933 assert((N->getNumValues() == Res->getNumValues()) &&
9934 "Lowering returned the wrong number of results!");
9936 // Places new result values base on N result number.
9937 for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
9938 Results.push_back(Res.getValue(I));
9941 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
9942 llvm_unreachable("LowerOperation not implemented for this target!");
9945 void
9946 SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
9947 SDValue Op = getNonRegisterValue(V);
9948 assert((Op.getOpcode() != ISD::CopyFromReg ||
9949 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
9950 "Copy from a reg to the same reg!");
9951 assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
9953 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9954 // If this is an InlineAsm we have to match the registers required, not the
9955 // notional registers required by the type.
9957 RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
9958 None); // This is not an ABI copy.
9959 SDValue Chain = DAG.getEntryNode();
9961 ISD::NodeType ExtendType = ISD::ANY_EXTEND;
9962 auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
9963 if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
9964 ExtendType = PreferredExtendIt->second;
9965 RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
9966 PendingExports.push_back(Chain);
9969 #include "llvm/CodeGen/SelectionDAGISel.h"
9971 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
9972 /// entry block, return true. This includes arguments used by switches, since
9973 /// the switch may expand into multiple basic blocks.
9974 static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
9975 // With FastISel active, we may be splitting blocks, so force creation
9976 // of virtual registers for all non-dead arguments.
9977 if (FastISel)
9978 return A->use_empty();
9980 const BasicBlock &Entry = A->getParent()->front();
9981 for (const User *U : A->users())
9982 if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
9983 return false; // Use not in entry block.
9985 return true;
9988 using ArgCopyElisionMapTy =
9989 DenseMap<const Argument *,
9990 std::pair<const AllocaInst *, const StoreInst *>>;
9992 /// Scan the entry block of the function in FuncInfo for arguments that look
9993 /// like copies into a local alloca. Record any copied arguments in
9994 /// ArgCopyElisionCandidates.
9995 static void
9996 findArgumentCopyElisionCandidates(const DataLayout &DL,
9997 FunctionLoweringInfo *FuncInfo,
9998 ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
9999 // Record the state of every static alloca used in the entry block. Argument
10000 // allocas are all used in the entry block, so we need approximately as many
10001 // entries as we have arguments.
10002 enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
10003 SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
10004 unsigned NumArgs = FuncInfo->Fn->arg_size();
10005 StaticAllocas.reserve(NumArgs * 2);
10007 auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
10008 if (!V)
10009 return nullptr;
10010 V = V->stripPointerCasts();
10011 const auto *AI = dyn_cast<AllocaInst>(V);
10012 if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
10013 return nullptr;
10014 auto Iter = StaticAllocas.insert({AI, Unknown});
10015 return &Iter.first->second;
10018 // Look for stores of arguments to static allocas. Look through bitcasts and
10019 // GEPs to handle type coercions, as long as the alloca is fully initialized
10020 // by the store. Any non-store use of an alloca escapes it and any subsequent
10021 // unanalyzed store might write it.
10022 // FIXME: Handle structs initialized with multiple stores.
10023 for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
10024 // Look for stores, and handle non-store uses conservatively.
10025 const auto *SI = dyn_cast<StoreInst>(&I);
10026 if (!SI) {
10027 // We will look through cast uses, so ignore them completely.
10028 if (I.isCast())
10029 continue;
10030 // Ignore debug info and pseudo op intrinsics, they don't escape or store
10031 // to allocas.
10032 if (I.isDebugOrPseudoInst())
10033 continue;
10034 // This is an unknown instruction. Assume it escapes or writes to all
10035 // static alloca operands.
10036 for (const Use &U : I.operands()) {
10037 if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
10038 *Info = StaticAllocaInfo::Clobbered;
10040 continue;
10043 // If the stored value is a static alloca, mark it as escaped.
10044 if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
10045 *Info = StaticAllocaInfo::Clobbered;
10047 // Check if the destination is a static alloca.
10048 const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
10049 StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
10050 if (!Info)
10051 continue;
10052 const AllocaInst *AI = cast<AllocaInst>(Dst);
10054 // Skip allocas that have been initialized or clobbered.
10055 if (*Info != StaticAllocaInfo::Unknown)
10056 continue;
10058 // Check if the stored value is an argument, and that this store fully
10059 // initializes the alloca.
10060 // If the argument type has padding bits we can't directly forward a pointer
10061 // as the upper bits may contain garbage.
10062 // Don't elide copies from the same argument twice.
10063 const Value *Val = SI->getValueOperand()->stripPointerCasts();
10064 const auto *Arg = dyn_cast<Argument>(Val);
10065 if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
10066 Arg->getType()->isEmptyTy() ||
10067 DL.getTypeStoreSize(Arg->getType()) !=
10068 DL.getTypeAllocSize(AI->getAllocatedType()) ||
10069 !DL.typeSizeEqualsStoreSize(Arg->getType()) ||
10070 ArgCopyElisionCandidates.count(Arg)) {
10071 *Info = StaticAllocaInfo::Clobbered;
10072 continue;
10075 LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
10076 << '\n');
10078 // Mark this alloca and store for argument copy elision.
10079 *Info = StaticAllocaInfo::Elidable;
10080 ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
10082 // Stop scanning if we've seen all arguments. This will happen early in -O0
10083 // builds, which is useful, because -O0 builds have large entry blocks and
10084 // many allocas.
10085 if (ArgCopyElisionCandidates.size() == NumArgs)
10086 break;
10090 /// Try to elide argument copies from memory into a local alloca. Succeeds if
10091 /// ArgVal is a load from a suitable fixed stack object.
10092 static void tryToElideArgumentCopy(
10093 FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
10094 DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
10095 SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
10096 ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
10097 SDValue ArgVal, bool &ArgHasUses) {
10098 // Check if this is a load from a fixed stack object.
10099 auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
10100 if (!LNode)
10101 return;
10102 auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
10103 if (!FINode)
10104 return;
10106 // Check that the fixed stack object is the right size and alignment.
10107 // Look at the alignment that the user wrote on the alloca instead of looking
10108 // at the stack object.
10109 auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
10110 assert(ArgCopyIter != ArgCopyElisionCandidates.end());
10111 const AllocaInst *AI = ArgCopyIter->second.first;
10112 int FixedIndex = FINode->getIndex();
10113 int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
10114 int OldIndex = AllocaIndex;
10115 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
10116 if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
10117 LLVM_DEBUG(
10118 dbgs() << " argument copy elision failed due to bad fixed stack "
10119 "object size\n");
10120 return;
10122 Align RequiredAlignment = AI->getAlign();
10123 if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
10124 LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
10125 "greater than stack argument alignment ("
10126 << DebugStr(RequiredAlignment) << " vs "
10127 << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
10128 return;
10131 // Perform the elision. Delete the old stack object and replace its only use
10132 // in the variable info map. Mark the stack object as mutable.
10133 LLVM_DEBUG({
10134 dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
10135 << " Replacing frame index " << OldIndex << " with " << FixedIndex
10136 << '\n';
10138 MFI.RemoveStackObject(OldIndex);
10139 MFI.setIsImmutableObjectIndex(FixedIndex, false);
10140 AllocaIndex = FixedIndex;
10141 ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
10142 Chains.push_back(ArgVal.getValue(1));
10144 // Avoid emitting code for the store implementing the copy.
10145 const StoreInst *SI = ArgCopyIter->second.second;
10146 ElidedArgCopyInstrs.insert(SI);
10148 // Check for uses of the argument again so that we can avoid exporting ArgVal
10149 // if it is't used by anything other than the store.
10150 for (const Value *U : Arg.users()) {
10151 if (U != SI) {
10152 ArgHasUses = true;
10153 break;
10158 void SelectionDAGISel::LowerArguments(const Function &F) {
10159 SelectionDAG &DAG = SDB->DAG;
10160 SDLoc dl = SDB->getCurSDLoc();
10161 const DataLayout &DL = DAG.getDataLayout();
10162 SmallVector<ISD::InputArg, 16> Ins;
10164 // In Naked functions we aren't going to save any registers.
10165 if (F.hasFnAttribute(Attribute::Naked))
10166 return;
10168 if (!FuncInfo->CanLowerReturn) {
10169 // Put in an sret pointer parameter before all the other parameters.
10170 SmallVector<EVT, 1> ValueVTs;
10171 ComputeValueVTs(*TLI, DAG.getDataLayout(),
10172 F.getReturnType()->getPointerTo(
10173 DAG.getDataLayout().getAllocaAddrSpace()),
10174 ValueVTs);
10176 // NOTE: Assuming that a pointer will never break down to more than one VT
10177 // or one register.
10178 ISD::ArgFlagsTy Flags;
10179 Flags.setSRet();
10180 MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
10181 ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
10182 ISD::InputArg::NoArgIndex, 0);
10183 Ins.push_back(RetArg);
10186 // Look for stores of arguments to static allocas. Mark such arguments with a
10187 // flag to ask the target to give us the memory location of that argument if
10188 // available.
10189 ArgCopyElisionMapTy ArgCopyElisionCandidates;
10190 findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
10191 ArgCopyElisionCandidates);
10193 // Set up the incoming argument description vector.
10194 for (const Argument &Arg : F.args()) {
10195 unsigned ArgNo = Arg.getArgNo();
10196 SmallVector<EVT, 4> ValueVTs;
10197 ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
10198 bool isArgValueUsed = !Arg.use_empty();
10199 unsigned PartBase = 0;
10200 Type *FinalType = Arg.getType();
10201 if (Arg.hasAttribute(Attribute::ByVal))
10202 FinalType = Arg.getParamByValType();
10203 bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
10204 FinalType, F.getCallingConv(), F.isVarArg(), DL);
10205 for (unsigned Value = 0, NumValues = ValueVTs.size();
10206 Value != NumValues; ++Value) {
10207 EVT VT = ValueVTs[Value];
10208 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
10209 ISD::ArgFlagsTy Flags;
10212 if (Arg.getType()->isPointerTy()) {
10213 Flags.setPointer();
10214 Flags.setPointerAddrSpace(
10215 cast<PointerType>(Arg.getType())->getAddressSpace());
10217 if (Arg.hasAttribute(Attribute::ZExt))
10218 Flags.setZExt();
10219 if (Arg.hasAttribute(Attribute::SExt))
10220 Flags.setSExt();
10221 if (Arg.hasAttribute(Attribute::InReg)) {
10222 // If we are using vectorcall calling convention, a structure that is
10223 // passed InReg - is surely an HVA
10224 if (F.getCallingConv() == CallingConv::X86_VectorCall &&
10225 isa<StructType>(Arg.getType())) {
10226 // The first value of a structure is marked
10227 if (0 == Value)
10228 Flags.setHvaStart();
10229 Flags.setHva();
10231 // Set InReg Flag
10232 Flags.setInReg();
10234 if (Arg.hasAttribute(Attribute::StructRet))
10235 Flags.setSRet();
10236 if (Arg.hasAttribute(Attribute::SwiftSelf))
10237 Flags.setSwiftSelf();
10238 if (Arg.hasAttribute(Attribute::SwiftAsync))
10239 Flags.setSwiftAsync();
10240 if (Arg.hasAttribute(Attribute::SwiftError))
10241 Flags.setSwiftError();
10242 if (Arg.hasAttribute(Attribute::ByVal))
10243 Flags.setByVal();
10244 if (Arg.hasAttribute(Attribute::ByRef))
10245 Flags.setByRef();
10246 if (Arg.hasAttribute(Attribute::InAlloca)) {
10247 Flags.setInAlloca();
10248 // Set the byval flag for CCAssignFn callbacks that don't know about
10249 // inalloca. This way we can know how many bytes we should've allocated
10250 // and how many bytes a callee cleanup function will pop. If we port
10251 // inalloca to more targets, we'll have to add custom inalloca handling
10252 // in the various CC lowering callbacks.
10253 Flags.setByVal();
10255 if (Arg.hasAttribute(Attribute::Preallocated)) {
10256 Flags.setPreallocated();
10257 // Set the byval flag for CCAssignFn callbacks that don't know about
10258 // preallocated. This way we can know how many bytes we should've
10259 // allocated and how many bytes a callee cleanup function will pop. If
10260 // we port preallocated to more targets, we'll have to add custom
10261 // preallocated handling in the various CC lowering callbacks.
10262 Flags.setByVal();
10265 // Certain targets (such as MIPS), may have a different ABI alignment
10266 // for a type depending on the context. Give the target a chance to
10267 // specify the alignment it wants.
10268 const Align OriginalAlignment(
10269 TLI->getABIAlignmentForCallingConv(ArgTy, DL));
10270 Flags.setOrigAlign(OriginalAlignment);
10272 Align MemAlign;
10273 Type *ArgMemTy = nullptr;
10274 if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
10275 Flags.isByRef()) {
10276 if (!ArgMemTy)
10277 ArgMemTy = Arg.getPointeeInMemoryValueType();
10279 uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy);
10281 // For in-memory arguments, size and alignment should be passed from FE.
10282 // BE will guess if this info is not there but there are cases it cannot
10283 // get right.
10284 if (auto ParamAlign = Arg.getParamStackAlign())
10285 MemAlign = *ParamAlign;
10286 else if ((ParamAlign = Arg.getParamAlign()))
10287 MemAlign = *ParamAlign;
10288 else
10289 MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
10290 if (Flags.isByRef())
10291 Flags.setByRefSize(MemSize);
10292 else
10293 Flags.setByValSize(MemSize);
10294 } else if (auto ParamAlign = Arg.getParamStackAlign()) {
10295 MemAlign = *ParamAlign;
10296 } else {
10297 MemAlign = OriginalAlignment;
10299 Flags.setMemAlign(MemAlign);
10301 if (Arg.hasAttribute(Attribute::Nest))
10302 Flags.setNest();
10303 if (NeedsRegBlock)
10304 Flags.setInConsecutiveRegs();
10305 if (ArgCopyElisionCandidates.count(&Arg))
10306 Flags.setCopyElisionCandidate();
10307 if (Arg.hasAttribute(Attribute::Returned))
10308 Flags.setReturned();
10310 MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
10311 *CurDAG->getContext(), F.getCallingConv(), VT);
10312 unsigned NumRegs = TLI->getNumRegistersForCallingConv(
10313 *CurDAG->getContext(), F.getCallingConv(), VT);
10314 for (unsigned i = 0; i != NumRegs; ++i) {
10315 // For scalable vectors, use the minimum size; individual targets
10316 // are responsible for handling scalable vector arguments and
10317 // return values.
10318 ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
10319 ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
10320 if (NumRegs > 1 && i == 0)
10321 MyFlags.Flags.setSplit();
10322 // if it isn't first piece, alignment must be 1
10323 else if (i > 0) {
10324 MyFlags.Flags.setOrigAlign(Align(1));
10325 if (i == NumRegs - 1)
10326 MyFlags.Flags.setSplitEnd();
10328 Ins.push_back(MyFlags);
10330 if (NeedsRegBlock && Value == NumValues - 1)
10331 Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
10332 PartBase += VT.getStoreSize().getKnownMinSize();
10336 // Call the target to set up the argument values.
10337 SmallVector<SDValue, 8> InVals;
10338 SDValue NewRoot = TLI->LowerFormalArguments(
10339 DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
10341 // Verify that the target's LowerFormalArguments behaved as expected.
10342 assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
10343 "LowerFormalArguments didn't return a valid chain!");
10344 assert(InVals.size() == Ins.size() &&
10345 "LowerFormalArguments didn't emit the correct number of values!");
10346 LLVM_DEBUG({
10347 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
10348 assert(InVals[i].getNode() &&
10349 "LowerFormalArguments emitted a null value!");
10350 assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
10351 "LowerFormalArguments emitted a value with the wrong type!");
10355 // Update the DAG with the new chain value resulting from argument lowering.
10356 DAG.setRoot(NewRoot);
10358 // Set up the argument values.
10359 unsigned i = 0;
10360 if (!FuncInfo->CanLowerReturn) {
10361 // Create a virtual register for the sret pointer, and put in a copy
10362 // from the sret argument into it.
10363 SmallVector<EVT, 1> ValueVTs;
10364 ComputeValueVTs(*TLI, DAG.getDataLayout(),
10365 F.getReturnType()->getPointerTo(
10366 DAG.getDataLayout().getAllocaAddrSpace()),
10367 ValueVTs);
10368 MVT VT = ValueVTs[0].getSimpleVT();
10369 MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
10370 Optional<ISD::NodeType> AssertOp = None;
10371 SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
10372 nullptr, F.getCallingConv(), AssertOp);
10374 MachineFunction& MF = SDB->DAG.getMachineFunction();
10375 MachineRegisterInfo& RegInfo = MF.getRegInfo();
10376 Register SRetReg =
10377 RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
10378 FuncInfo->DemoteRegister = SRetReg;
10379 NewRoot =
10380 SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
10381 DAG.setRoot(NewRoot);
10383 // i indexes lowered arguments. Bump it past the hidden sret argument.
10384 ++i;
10387 SmallVector<SDValue, 4> Chains;
10388 DenseMap<int, int> ArgCopyElisionFrameIndexMap;
10389 for (const Argument &Arg : F.args()) {
10390 SmallVector<SDValue, 4> ArgValues;
10391 SmallVector<EVT, 4> ValueVTs;
10392 ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
10393 unsigned NumValues = ValueVTs.size();
10394 if (NumValues == 0)
10395 continue;
10397 bool ArgHasUses = !Arg.use_empty();
10399 // Elide the copying store if the target loaded this argument from a
10400 // suitable fixed stack object.
10401 if (Ins[i].Flags.isCopyElisionCandidate()) {
10402 tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
10403 ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
10404 InVals[i], ArgHasUses);
10407 // If this argument is unused then remember its value. It is used to generate
10408 // debugging information.
10409 bool isSwiftErrorArg =
10410 TLI->supportSwiftError() &&
10411 Arg.hasAttribute(Attribute::SwiftError);
10412 if (!ArgHasUses && !isSwiftErrorArg) {
10413 SDB->setUnusedArgValue(&Arg, InVals[i]);
10415 // Also remember any frame index for use in FastISel.
10416 if (FrameIndexSDNode *FI =
10417 dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
10418 FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
10421 for (unsigned Val = 0; Val != NumValues; ++Val) {
10422 EVT VT = ValueVTs[Val];
10423 MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
10424 F.getCallingConv(), VT);
10425 unsigned NumParts = TLI->getNumRegistersForCallingConv(
10426 *CurDAG->getContext(), F.getCallingConv(), VT);
10428 // Even an apparent 'unused' swifterror argument needs to be returned. So
10429 // we do generate a copy for it that can be used on return from the
10430 // function.
10431 if (ArgHasUses || isSwiftErrorArg) {
10432 Optional<ISD::NodeType> AssertOp;
10433 if (Arg.hasAttribute(Attribute::SExt))
10434 AssertOp = ISD::AssertSext;
10435 else if (Arg.hasAttribute(Attribute::ZExt))
10436 AssertOp = ISD::AssertZext;
10438 ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
10439 PartVT, VT, nullptr,
10440 F.getCallingConv(), AssertOp));
10443 i += NumParts;
10446 // We don't need to do anything else for unused arguments.
10447 if (ArgValues.empty())
10448 continue;
10450 // Note down frame index.
10451 if (FrameIndexSDNode *FI =
10452 dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
10453 FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
10455 SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
10456 SDB->getCurSDLoc());
10458 SDB->setValue(&Arg, Res);
10459 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
10460 // We want to associate the argument with the frame index, among
10461 // involved operands, that correspond to the lowest address. The
10462 // getCopyFromParts function, called earlier, is swapping the order of
10463 // the operands to BUILD_PAIR depending on endianness. The result of
10464 // that swapping is that the least significant bits of the argument will
10465 // be in the first operand of the BUILD_PAIR node, and the most
10466 // significant bits will be in the second operand.
10467 unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
10468 if (LoadSDNode *LNode =
10469 dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
10470 if (FrameIndexSDNode *FI =
10471 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
10472 FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
10475 // Analyses past this point are naive and don't expect an assertion.
10476 if (Res.getOpcode() == ISD::AssertZext)
10477 Res = Res.getOperand(0);
10479 // Update the SwiftErrorVRegDefMap.
10480 if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
10481 unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
10482 if (Register::isVirtualRegister(Reg))
10483 SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
10484 Reg);
10487 // If this argument is live outside of the entry block, insert a copy from
10488 // wherever we got it to the vreg that other BB's will reference it as.
10489 if (Res.getOpcode() == ISD::CopyFromReg) {
10490 // If we can, though, try to skip creating an unnecessary vreg.
10491 // FIXME: This isn't very clean... it would be nice to make this more
10492 // general.
10493 unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
10494 if (Register::isVirtualRegister(Reg)) {
10495 FuncInfo->ValueMap[&Arg] = Reg;
10496 continue;
10499 if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
10500 FuncInfo->InitializeRegForValue(&Arg);
10501 SDB->CopyToExportRegsIfNeeded(&Arg);
10505 if (!Chains.empty()) {
10506 Chains.push_back(NewRoot);
10507 NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
10510 DAG.setRoot(NewRoot);
10512 assert(i == InVals.size() && "Argument register count mismatch!");
10514 // If any argument copy elisions occurred and we have debug info, update the
10515 // stale frame indices used in the dbg.declare variable info table.
10516 MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
10517 if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
10518 for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
10519 auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
10520 if (I != ArgCopyElisionFrameIndexMap.end())
10521 VI.Slot = I->second;
10525 // Finally, if the target has anything special to do, allow it to do so.
10526 emitFunctionEntryCode();
10529 /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
10530 /// ensure constants are generated when needed. Remember the virtual registers
10531 /// that need to be added to the Machine PHI nodes as input. We cannot just
10532 /// directly add them, because expansion might result in multiple MBB's for one
10533 /// BB. As such, the start of the BB might correspond to a different MBB than
10534 /// the end.
10535 void
10536 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
10537 const Instruction *TI = LLVMBB->getTerminator();
10539 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
10541 // Check PHI nodes in successors that expect a value to be available from this
10542 // block.
10543 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
10544 const BasicBlock *SuccBB = TI->getSuccessor(succ);
10545 if (!isa<PHINode>(SuccBB->begin())) continue;
10546 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
10548 // If this terminator has multiple identical successors (common for
10549 // switches), only handle each succ once.
10550 if (!SuccsHandled.insert(SuccMBB).second)
10551 continue;
10553 MachineBasicBlock::iterator MBBI = SuccMBB->begin();
10555 // At this point we know that there is a 1-1 correspondence between LLVM PHI
10556 // nodes and Machine PHI nodes, but the incoming operands have not been
10557 // emitted yet.
10558 for (const PHINode &PN : SuccBB->phis()) {
10559 // Ignore dead phi's.
10560 if (PN.use_empty())
10561 continue;
10563 // Skip empty types
10564 if (PN.getType()->isEmptyTy())
10565 continue;
10567 unsigned Reg;
10568 const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
10570 if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
10571 unsigned &RegOut = ConstantsOut[C];
10572 if (RegOut == 0) {
10573 RegOut = FuncInfo.CreateRegs(C);
10574 CopyValueToVirtualRegister(C, RegOut);
10576 Reg = RegOut;
10577 } else {
10578 DenseMap<const Value *, Register>::iterator I =
10579 FuncInfo.ValueMap.find(PHIOp);
10580 if (I != FuncInfo.ValueMap.end())
10581 Reg = I->second;
10582 else {
10583 assert(isa<AllocaInst>(PHIOp) &&
10584 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
10585 "Didn't codegen value into a register!??");
10586 Reg = FuncInfo.CreateRegs(PHIOp);
10587 CopyValueToVirtualRegister(PHIOp, Reg);
10591 // Remember that this register needs to added to the machine PHI node as
10592 // the input for this MBB.
10593 SmallVector<EVT, 4> ValueVTs;
10594 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10595 ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
10596 for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
10597 EVT VT = ValueVTs[vti];
10598 unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
10599 for (unsigned i = 0, e = NumRegisters; i != e; ++i)
10600 FuncInfo.PHINodesToUpdate.push_back(
10601 std::make_pair(&*MBBI++, Reg + i));
10602 Reg += NumRegisters;
10607 ConstantsOut.clear();
10610 MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
10611 MachineFunction::iterator I(MBB);
10612 if (++I == FuncInfo.MF->end())
10613 return nullptr;
10614 return &*I;
10617 /// During lowering new call nodes can be created (such as memset, etc.).
10618 /// Those will become new roots of the current DAG, but complications arise
10619 /// when they are tail calls. In such cases, the call lowering will update
10620 /// the root, but the builder still needs to know that a tail call has been
10621 /// lowered in order to avoid generating an additional return.
10622 void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
10623 // If the node is null, we do have a tail call.
10624 if (MaybeTC.getNode() != nullptr)
10625 DAG.setRoot(MaybeTC);
10626 else
10627 HasTailCall = true;
10630 void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
10631 MachineBasicBlock *SwitchMBB,
10632 MachineBasicBlock *DefaultMBB) {
10633 MachineFunction *CurMF = FuncInfo.MF;
10634 MachineBasicBlock *NextMBB = nullptr;
10635 MachineFunction::iterator BBI(W.MBB);
10636 if (++BBI != FuncInfo.MF->end())
10637 NextMBB = &*BBI;
10639 unsigned Size = W.LastCluster - W.FirstCluster + 1;
10641 BranchProbabilityInfo *BPI = FuncInfo.BPI;
10643 if (Size == 2 && W.MBB == SwitchMBB) {
10644 // If any two of the cases has the same destination, and if one value
10645 // is the same as the other, but has one bit unset that the other has set,
10646 // use bit manipulation to do two compares at once. For example:
10647 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
10648 // TODO: This could be extended to merge any 2 cases in switches with 3
10649 // cases.
10650 // TODO: Handle cases where W.CaseBB != SwitchBB.
10651 CaseCluster &Small = *W.FirstCluster;
10652 CaseCluster &Big = *W.LastCluster;
10654 if (Small.Low == Small.High && Big.Low == Big.High &&
10655 Small.MBB == Big.MBB) {
10656 const APInt &SmallValue = Small.Low->getValue();
10657 const APInt &BigValue = Big.Low->getValue();
10659 // Check that there is only one bit different.
10660 APInt CommonBit = BigValue ^ SmallValue;
10661 if (CommonBit.isPowerOf2()) {
10662 SDValue CondLHS = getValue(Cond);
10663 EVT VT = CondLHS.getValueType();
10664 SDLoc DL = getCurSDLoc();
10666 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
10667 DAG.getConstant(CommonBit, DL, VT));
10668 SDValue Cond = DAG.getSetCC(
10669 DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
10670 ISD::SETEQ);
10672 // Update successor info.
10673 // Both Small and Big will jump to Small.BB, so we sum up the
10674 // probabilities.
10675 addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
10676 if (BPI)
10677 addSuccessorWithProb(
10678 SwitchMBB, DefaultMBB,
10679 // The default destination is the first successor in IR.
10680 BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
10681 else
10682 addSuccessorWithProb(SwitchMBB, DefaultMBB);
10684 // Insert the true branch.
10685 SDValue BrCond =
10686 DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
10687 DAG.getBasicBlock(Small.MBB));
10688 // Insert the false branch.
10689 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
10690 DAG.getBasicBlock(DefaultMBB));
10692 DAG.setRoot(BrCond);
10693 return;
10698 if (TM.getOptLevel() != CodeGenOpt::None) {
10699 // Here, we order cases by probability so the most likely case will be
10700 // checked first. However, two clusters can have the same probability in
10701 // which case their relative ordering is non-deterministic. So we use Low
10702 // as a tie-breaker as clusters are guaranteed to never overlap.
10703 llvm::sort(W.FirstCluster, W.LastCluster + 1,
10704 [](const CaseCluster &a, const CaseCluster &b) {
10705 return a.Prob != b.Prob ?
10706 a.Prob > b.Prob :
10707 a.Low->getValue().slt(b.Low->getValue());
10710 // Rearrange the case blocks so that the last one falls through if possible
10711 // without changing the order of probabilities.
10712 for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
10713 --I;
10714 if (I->Prob > W.LastCluster->Prob)
10715 break;
10716 if (I->Kind == CC_Range && I->MBB == NextMBB) {
10717 std::swap(*I, *W.LastCluster);
10718 break;
10723 // Compute total probability.
10724 BranchProbability DefaultProb = W.DefaultProb;
10725 BranchProbability UnhandledProbs = DefaultProb;
10726 for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
10727 UnhandledProbs += I->Prob;
10729 MachineBasicBlock *CurMBB = W.MBB;
10730 for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
10731 bool FallthroughUnreachable = false;
10732 MachineBasicBlock *Fallthrough;
10733 if (I == W.LastCluster) {
10734 // For the last cluster, fall through to the default destination.
10735 Fallthrough = DefaultMBB;
10736 FallthroughUnreachable = isa<UnreachableInst>(
10737 DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
10738 } else {
10739 Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
10740 CurMF->insert(BBI, Fallthrough);
10741 // Put Cond in a virtual register to make it available from the new blocks.
10742 ExportFromCurrentBlock(Cond);
10744 UnhandledProbs -= I->Prob;
10746 switch (I->Kind) {
10747 case CC_JumpTable: {
10748 // FIXME: Optimize away range check based on pivot comparisons.
10749 JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
10750 SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
10752 // The jump block hasn't been inserted yet; insert it here.
10753 MachineBasicBlock *JumpMBB = JT->MBB;
10754 CurMF->insert(BBI, JumpMBB);
10756 auto JumpProb = I->Prob;
10757 auto FallthroughProb = UnhandledProbs;
10759 // If the default statement is a target of the jump table, we evenly
10760 // distribute the default probability to successors of CurMBB. Also
10761 // update the probability on the edge from JumpMBB to Fallthrough.
10762 for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
10763 SE = JumpMBB->succ_end();
10764 SI != SE; ++SI) {
10765 if (*SI == DefaultMBB) {
10766 JumpProb += DefaultProb / 2;
10767 FallthroughProb -= DefaultProb / 2;
10768 JumpMBB->setSuccProbability(SI, DefaultProb / 2);
10769 JumpMBB->normalizeSuccProbs();
10770 break;
10774 if (FallthroughUnreachable)
10775 JTH->FallthroughUnreachable = true;
10777 if (!JTH->FallthroughUnreachable)
10778 addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
10779 addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
10780 CurMBB->normalizeSuccProbs();
10782 // The jump table header will be inserted in our current block, do the
10783 // range check, and fall through to our fallthrough block.
10784 JTH->HeaderBB = CurMBB;
10785 JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
10787 // If we're in the right place, emit the jump table header right now.
10788 if (CurMBB == SwitchMBB) {
10789 visitJumpTableHeader(*JT, *JTH, SwitchMBB);
10790 JTH->Emitted = true;
10792 break;
10794 case CC_BitTests: {
10795 // FIXME: Optimize away range check based on pivot comparisons.
10796 BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
10798 // The bit test blocks haven't been inserted yet; insert them here.
10799 for (BitTestCase &BTC : BTB->Cases)
10800 CurMF->insert(BBI, BTC.ThisBB);
10802 // Fill in fields of the BitTestBlock.
10803 BTB->Parent = CurMBB;
10804 BTB->Default = Fallthrough;
10806 BTB->DefaultProb = UnhandledProbs;
10807 // If the cases in bit test don't form a contiguous range, we evenly
10808 // distribute the probability on the edge to Fallthrough to two
10809 // successors of CurMBB.
10810 if (!BTB->ContiguousRange) {
10811 BTB->Prob += DefaultProb / 2;
10812 BTB->DefaultProb -= DefaultProb / 2;
10815 if (FallthroughUnreachable)
10816 BTB->FallthroughUnreachable = true;
10818 // If we're in the right place, emit the bit test header right now.
10819 if (CurMBB == SwitchMBB) {
10820 visitBitTestHeader(*BTB, SwitchMBB);
10821 BTB->Emitted = true;
10823 break;
10825 case CC_Range: {
10826 const Value *RHS, *LHS, *MHS;
10827 ISD::CondCode CC;
10828 if (I->Low == I->High) {
10829 // Check Cond == I->Low.
10830 CC = ISD::SETEQ;
10831 LHS = Cond;
10832 RHS=I->Low;
10833 MHS = nullptr;
10834 } else {
10835 // Check I->Low <= Cond <= I->High.
10836 CC = ISD::SETLE;
10837 LHS = I->Low;
10838 MHS = Cond;
10839 RHS = I->High;
10842 // If Fallthrough is unreachable, fold away the comparison.
10843 if (FallthroughUnreachable)
10844 CC = ISD::SETTRUE;
10846 // The false probability is the sum of all unhandled cases.
10847 CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
10848 getCurSDLoc(), I->Prob, UnhandledProbs);
10850 if (CurMBB == SwitchMBB)
10851 visitSwitchCase(CB, SwitchMBB);
10852 else
10853 SL->SwitchCases.push_back(CB);
10855 break;
10858 CurMBB = Fallthrough;
10862 unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
10863 CaseClusterIt First,
10864 CaseClusterIt Last) {
10865 return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
10866 if (X.Prob != CC.Prob)
10867 return X.Prob > CC.Prob;
10869 // Ties are broken by comparing the case value.
10870 return X.Low->getValue().slt(CC.Low->getValue());
10874 void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
10875 const SwitchWorkListItem &W,
10876 Value *Cond,
10877 MachineBasicBlock *SwitchMBB) {
10878 assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
10879 "Clusters not sorted?");
10881 assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
10883 // Balance the tree based on branch probabilities to create a near-optimal (in
10884 // terms of search time given key frequency) binary search tree. See e.g. Kurt
10885 // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
10886 CaseClusterIt LastLeft = W.FirstCluster;
10887 CaseClusterIt FirstRight = W.LastCluster;
10888 auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
10889 auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
10891 // Move LastLeft and FirstRight towards each other from opposite directions to
10892 // find a partitioning of the clusters which balances the probability on both
10893 // sides. If LeftProb and RightProb are equal, alternate which side is
10894 // taken to ensure 0-probability nodes are distributed evenly.
10895 unsigned I = 0;
10896 while (LastLeft + 1 < FirstRight) {
10897 if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
10898 LeftProb += (++LastLeft)->Prob;
10899 else
10900 RightProb += (--FirstRight)->Prob;
10901 I++;
10904 while (true) {
10905 // Our binary search tree differs from a typical BST in that ours can have up
10906 // to three values in each leaf. The pivot selection above doesn't take that
10907 // into account, which means the tree might require more nodes and be less
10908 // efficient. We compensate for this here.
10910 unsigned NumLeft = LastLeft - W.FirstCluster + 1;
10911 unsigned NumRight = W.LastCluster - FirstRight + 1;
10913 if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
10914 // If one side has less than 3 clusters, and the other has more than 3,
10915 // consider taking a cluster from the other side.
10917 if (NumLeft < NumRight) {
10918 // Consider moving the first cluster on the right to the left side.
10919 CaseCluster &CC = *FirstRight;
10920 unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
10921 unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
10922 if (LeftSideRank <= RightSideRank) {
10923 // Moving the cluster to the left does not demote it.
10924 ++LastLeft;
10925 ++FirstRight;
10926 continue;
10928 } else {
10929 assert(NumRight < NumLeft);
10930 // Consider moving the last element on the left to the right side.
10931 CaseCluster &CC = *LastLeft;
10932 unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
10933 unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
10934 if (RightSideRank <= LeftSideRank) {
10935 // Moving the cluster to the right does not demot it.
10936 --LastLeft;
10937 --FirstRight;
10938 continue;
10942 break;
10945 assert(LastLeft + 1 == FirstRight);
10946 assert(LastLeft >= W.FirstCluster);
10947 assert(FirstRight <= W.LastCluster);
10949 // Use the first element on the right as pivot since we will make less-than
10950 // comparisons against it.
10951 CaseClusterIt PivotCluster = FirstRight;
10952 assert(PivotCluster > W.FirstCluster);
10953 assert(PivotCluster <= W.LastCluster);
10955 CaseClusterIt FirstLeft = W.FirstCluster;
10956 CaseClusterIt LastRight = W.LastCluster;
10958 const ConstantInt *Pivot = PivotCluster->Low;
10960 // New blocks will be inserted immediately after the current one.
10961 MachineFunction::iterator BBI(W.MBB);
10962 ++BBI;
10964 // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
10965 // we can branch to its destination directly if it's squeezed exactly in
10966 // between the known lower bound and Pivot - 1.
10967 MachineBasicBlock *LeftMBB;
10968 if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
10969 FirstLeft->Low == W.GE &&
10970 (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
10971 LeftMBB = FirstLeft->MBB;
10972 } else {
10973 LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
10974 FuncInfo.MF->insert(BBI, LeftMBB);
10975 WorkList.push_back(
10976 {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
10977 // Put Cond in a virtual register to make it available from the new blocks.
10978 ExportFromCurrentBlock(Cond);
10981 // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
10982 // single cluster, RHS.Low == Pivot, and we can branch to its destination
10983 // directly if RHS.High equals the current upper bound.
10984 MachineBasicBlock *RightMBB;
10985 if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
10986 W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
10987 RightMBB = FirstRight->MBB;
10988 } else {
10989 RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
10990 FuncInfo.MF->insert(BBI, RightMBB);
10991 WorkList.push_back(
10992 {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
10993 // Put Cond in a virtual register to make it available from the new blocks.
10994 ExportFromCurrentBlock(Cond);
10997 // Create the CaseBlock record that will be used to lower the branch.
10998 CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
10999 getCurSDLoc(), LeftProb, RightProb);
11001 if (W.MBB == SwitchMBB)
11002 visitSwitchCase(CB, SwitchMBB);
11003 else
11004 SL->SwitchCases.push_back(CB);
11007 // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
11008 // from the swith statement.
11009 static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
11010 BranchProbability PeeledCaseProb) {
11011 if (PeeledCaseProb == BranchProbability::getOne())
11012 return BranchProbability::getZero();
11013 BranchProbability SwitchProb = PeeledCaseProb.getCompl();
11015 uint32_t Numerator = CaseProb.getNumerator();
11016 uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
11017 return BranchProbability(Numerator, std::max(Numerator, Denominator));
11020 // Try to peel the top probability case if it exceeds the threshold.
11021 // Return current MachineBasicBlock for the switch statement if the peeling
11022 // does not occur.
11023 // If the peeling is performed, return the newly created MachineBasicBlock
11024 // for the peeled switch statement. Also update Clusters to remove the peeled
11025 // case. PeeledCaseProb is the BranchProbability for the peeled case.
11026 MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
11027 const SwitchInst &SI, CaseClusterVector &Clusters,
11028 BranchProbability &PeeledCaseProb) {
11029 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
11030 // Don't perform if there is only one cluster or optimizing for size.
11031 if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
11032 TM.getOptLevel() == CodeGenOpt::None ||
11033 SwitchMBB->getParent()->getFunction().hasMinSize())
11034 return SwitchMBB;
11036 BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
11037 unsigned PeeledCaseIndex = 0;
11038 bool SwitchPeeled = false;
11039 for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
11040 CaseCluster &CC = Clusters[Index];
11041 if (CC.Prob < TopCaseProb)
11042 continue;
11043 TopCaseProb = CC.Prob;
11044 PeeledCaseIndex = Index;
11045 SwitchPeeled = true;
11047 if (!SwitchPeeled)
11048 return SwitchMBB;
11050 LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
11051 << TopCaseProb << "\n");
11053 // Record the MBB for the peeled switch statement.
11054 MachineFunction::iterator BBI(SwitchMBB);
11055 ++BBI;
11056 MachineBasicBlock *PeeledSwitchMBB =
11057 FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
11058 FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
11060 ExportFromCurrentBlock(SI.getCondition());
11061 auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
11062 SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
11063 nullptr, nullptr, TopCaseProb.getCompl()};
11064 lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
11066 Clusters.erase(PeeledCaseIt);
11067 for (CaseCluster &CC : Clusters) {
11068 LLVM_DEBUG(
11069 dbgs() << "Scale the probablity for one cluster, before scaling: "
11070 << CC.Prob << "\n");
11071 CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
11072 LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
11074 PeeledCaseProb = TopCaseProb;
11075 return PeeledSwitchMBB;
11078 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
11079 // Extract cases from the switch.
11080 BranchProbabilityInfo *BPI = FuncInfo.BPI;
11081 CaseClusterVector Clusters;
11082 Clusters.reserve(SI.getNumCases());
11083 for (auto I : SI.cases()) {
11084 MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
11085 const ConstantInt *CaseVal = I.getCaseValue();
11086 BranchProbability Prob =
11087 BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
11088 : BranchProbability(1, SI.getNumCases() + 1);
11089 Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
11092 MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
11094 // Cluster adjacent cases with the same destination. We do this at all
11095 // optimization levels because it's cheap to do and will make codegen faster
11096 // if there are many clusters.
11097 sortAndRangeify(Clusters);
11099 // The branch probablity of the peeled case.
11100 BranchProbability PeeledCaseProb = BranchProbability::getZero();
11101 MachineBasicBlock *PeeledSwitchMBB =
11102 peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
11104 // If there is only the default destination, jump there directly.
11105 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
11106 if (Clusters.empty()) {
11107 assert(PeeledSwitchMBB == SwitchMBB);
11108 SwitchMBB->addSuccessor(DefaultMBB);
11109 if (DefaultMBB != NextBlock(SwitchMBB)) {
11110 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
11111 getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
11113 return;
11116 SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
11117 SL->findBitTestClusters(Clusters, &SI);
11119 LLVM_DEBUG({
11120 dbgs() << "Case clusters: ";
11121 for (const CaseCluster &C : Clusters) {
11122 if (C.Kind == CC_JumpTable)
11123 dbgs() << "JT:";
11124 if (C.Kind == CC_BitTests)
11125 dbgs() << "BT:";
11127 C.Low->getValue().print(dbgs(), true);
11128 if (C.Low != C.High) {
11129 dbgs() << '-';
11130 C.High->getValue().print(dbgs(), true);
11132 dbgs() << ' ';
11134 dbgs() << '\n';
11137 assert(!Clusters.empty());
11138 SwitchWorkList WorkList;
11139 CaseClusterIt First = Clusters.begin();
11140 CaseClusterIt Last = Clusters.end() - 1;
11141 auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
11142 // Scale the branchprobability for DefaultMBB if the peel occurs and
11143 // DefaultMBB is not replaced.
11144 if (PeeledCaseProb != BranchProbability::getZero() &&
11145 DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
11146 DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
11147 WorkList.push_back(
11148 {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
11150 while (!WorkList.empty()) {
11151 SwitchWorkListItem W = WorkList.pop_back_val();
11152 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
11154 if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
11155 !DefaultMBB->getParent()->getFunction().hasMinSize()) {
11156 // For optimized builds, lower large range as a balanced binary tree.
11157 splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
11158 continue;
11161 lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
11165 void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
11166 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11167 auto DL = getCurSDLoc();
11168 EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
11169 setValue(&I, DAG.getStepVector(DL, ResultVT));
11172 void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
11173 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11174 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
11176 SDLoc DL = getCurSDLoc();
11177 SDValue V = getValue(I.getOperand(0));
11178 assert(VT == V.getValueType() && "Malformed vector.reverse!");
11180 if (VT.isScalableVector()) {
11181 setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
11182 return;
11185 // Use VECTOR_SHUFFLE for the fixed-length vector
11186 // to maintain existing behavior.
11187 SmallVector<int, 8> Mask;
11188 unsigned NumElts = VT.getVectorMinNumElements();
11189 for (unsigned i = 0; i != NumElts; ++i)
11190 Mask.push_back(NumElts - 1 - i);
11192 setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
11195 void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
11196 SmallVector<EVT, 4> ValueVTs;
11197 ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
11198 ValueVTs);
11199 unsigned NumValues = ValueVTs.size();
11200 if (NumValues == 0) return;
11202 SmallVector<SDValue, 4> Values(NumValues);
11203 SDValue Op = getValue(I.getOperand(0));
11205 for (unsigned i = 0; i != NumValues; ++i)
11206 Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i],
11207 SDValue(Op.getNode(), Op.getResNo() + i));
11209 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
11210 DAG.getVTList(ValueVTs), Values));
11213 void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
11214 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11215 EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
11217 SDLoc DL = getCurSDLoc();
11218 SDValue V1 = getValue(I.getOperand(0));
11219 SDValue V2 = getValue(I.getOperand(1));
11220 int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue();
11222 // VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
11223 if (VT.isScalableVector()) {
11224 MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
11225 setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
11226 DAG.getConstant(Imm, DL, IdxVT)));
11227 return;
11230 unsigned NumElts = VT.getVectorNumElements();
11232 uint64_t Idx = (NumElts + Imm) % NumElts;
11234 // Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
11235 SmallVector<int, 8> Mask;
11236 for (unsigned i = 0; i < NumElts; ++i)
11237 Mask.push_back(Idx + i);
11238 setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));